404 files changed, 89717 insertions, 76777 deletions
diff --git a/drivers/SCsub b/drivers/SCsub
index 3028139f50..bc46bf2cec 100644
--- a/drivers/SCsub
+++ b/drivers/SCsub
@@ -31,15 +31,17 @@ SConscript("rtaudio/SCsub");
 SConscript("nedmalloc/SCsub");
 SConscript("nrex/SCsub");
 SConscript("chibi/SCsub");
-if (env["vorbis"]=="yes" or env["speex"]=="yes" or env["theora"]=="yes"):
+if (env["vorbis"]=="yes" or env["speex"]=="yes" or env["theora"]=="yes" or env["opus"]=="yes"):
         SConscript("ogg/SCsub");
 if (env["vorbis"]=="yes"):
         SConscript("vorbis/SCsub");
+if (env["opus"]=="yes"):
+		SConscript('opus/SCsub');
 if (env["tools"]=="yes"):
 	SConscript("convex_decomp/SCsub");
 
-if env["theora"]=="yes":
-	SConscript("theoraplayer/SCsub")
+#if env["theora"]=="yes":
+#	SConscript("theoraplayer/SCsub")
 if (env["theora"]=="yes"):
 	SConscript("theora/SCsub");
 if (env['speex']=='yes'):
diff --git a/drivers/convex_decomp/b2Glue.h b/drivers/convex_decomp/b2Glue.h
index db765f7eb9..7ec6d7f181 100644
--- a/drivers/convex_decomp/b2Glue.h
+++ b/drivers/convex_decomp/b2Glue.h
@@ -20,7 +20,8 @@
 #define B2GLUE_H
 
 #include "math_2d.h"
-#include <limits>
+#include <limits.h>
+
 namespace b2ConvexDecomp {
 
 typedef real_t float32;
diff --git a/drivers/convex_decomp/b2Polygon.cpp b/drivers/convex_decomp/b2Polygon.cpp
index 668313967e..775f2adfe2 100644
--- a/drivers/convex_decomp/b2Polygon.cpp
+++ b/drivers/convex_decomp/b2Polygon.cpp
@@ -21,8 +21,8 @@
 #include "b2Triangle.h"
 #include "b2Polygon.h"
 
-#include <cmath>
-#include <climits>
+#include <math.h>
+#include <limits.h>
 #include <assert.h>
 #define b2Assert assert
 
diff --git a/drivers/convex_decomp/b2Polygon.h b/drivers/convex_decomp/b2Polygon.h
index 82cdc56804..36af2fd9d0 100644
--- a/drivers/convex_decomp/b2Polygon.h
+++ b/drivers/convex_decomp/b2Polygon.h
@@ -22,7 +22,7 @@
 #include "b2Triangle.h"
 #include "stdio.h"
 #include <string.h>
-#include <limits>
+#include <limits.h>
 namespace b2ConvexDecomp {
 
 static bool B2_POLYGON_REPORT_ERRORS = false;
diff --git a/drivers/etc1/rg_etc1.cpp b/drivers/etc1/rg_etc1.cpp
index fd109f003c..47dcb57e6b 100644
--- a/drivers/etc1/rg_etc1.cpp
+++ b/drivers/etc1/rg_etc1.cpp
@@ -1,2454 +1,2454 @@
-// File: rg_etc1.cpp - Fast, high quality ETC1 block packer/unpacker - Rich Geldreich <richgel99@gmail.com>
-// Please see ZLIB license at the end of rg_etc1.h.
-//
-// For more information Ericsson Texture Compression (ETC/ETC1), see:
-// http://www.khronos.org/registry/gles/extensions/OES/OES_compressed_ETC1_RGB8_texture.txt
-//
-// v1.03 - 5/12/13 - Initial public release
-#include "rg_etc1.h"
-
-#include <stdlib.h>
-#include <string.h>
-#include <assert.h>
-//#include <stdio.h>
-#include <math.h>
-#include <stdio.h>
-#pragma warning (disable: 4201) //  nonstandard extension used : nameless struct/union
-
-#if defined(_DEBUG) || defined(DEBUG)
-#define RG_ETC1_BUILD_DEBUG
-#endif
-
-#define RG_ETC1_ASSERT assert
-
-namespace rg_etc1
-{
-
-   inline long labs(long val) {
-        return val < 0 ? -val : val;
-   }
-
-   inline int intabs(int val) {
-
-       return val<0?-val:val;
-   }
-
-   typedef unsigned char uint8;
-   typedef unsigned short uint16;
-   typedef unsigned int uint;
-   typedef unsigned int uint32;
-   typedef long long int64;
-   typedef unsigned long long uint64;
-
-   const uint32 cUINT32_MAX = 0xFFFFFFFFU;
-   const uint64 cUINT64_MAX = 0xFFFFFFFFFFFFFFFFULL; //0xFFFFFFFFFFFFFFFFui64;
-   
-   template<typename T> inline T minimum(T a, T b) { return (a < b) ? a : b; }
-   template<typename T> inline T minimum(T a, T b, T c) { return minimum(minimum(a, b), c); }
-   template<typename T> inline T maximum(T a, T b) { return (a > b) ? a : b; }
-   template<typename T> inline T maximum(T a, T b, T c) { return maximum(maximum(a, b), c); }
-   template<typename T> inline T clamp(T value, T low, T high) { return (value < low) ? low : ((value > high) ? high : value); }
-   template<typename T> inline T square(T value) { return value * value; }
-   template<typename T> inline void zero_object(T& obj) { memset((void*)&obj, 0, sizeof(obj)); }
-   template<typename T> inline void zero_this(T* pObj) { memset((void*)pObj, 0, sizeof(*pObj)); }
-
-   template<class T, size_t N> T decay_array_to_subtype(T (&a)[N]);   
-
-#define RG_ETC1_ARRAY_SIZE(X) (sizeof(X) / sizeof(decay_array_to_subtype(X)))
-
-   enum eNoClamp { cNoClamp };
-
-   struct color_quad_u8
-   {
-      static inline int clamp(int v) { if (v & 0xFFFFFF00U) v = (~(static_cast<int>(v) >> 31)) & 0xFF; return v; }
-
-      struct component_traits { enum { cSigned = false, cFloat = false, cMin = 0U, cMax = 255U }; };
-
-   public:
-      typedef unsigned char component_t;
-      typedef int parameter_t;
-
-      enum { cNumComps = 4 };
-
-      union
-      {
-         struct
-         {
-            component_t r;
-            component_t g;
-            component_t b;
-            component_t a;
-         };
-
-         component_t c[cNumComps];
-
-         uint32 m_u32;
-      };
-
-      inline color_quad_u8()
-      {
-      }
-
-      inline color_quad_u8(const color_quad_u8& other) : m_u32(other.m_u32)
-      {
-      }
-
-      explicit inline color_quad_u8(parameter_t y, parameter_t alpha = component_traits::cMax)
-      {
-         set(y, alpha);
-      }
-
-      inline color_quad_u8(parameter_t red, parameter_t green, parameter_t blue, parameter_t alpha = component_traits::cMax)
-      {
-         set(red, green, blue, alpha);
-      }
-
-      explicit inline color_quad_u8(eNoClamp, parameter_t y, parameter_t alpha = component_traits::cMax)
-      {
-         set_noclamp_y_alpha(y, alpha);
-      }
-
-      inline color_quad_u8(eNoClamp, parameter_t red, parameter_t green, parameter_t blue, parameter_t alpha = component_traits::cMax)
-      {
-         set_noclamp_rgba(red, green, blue, alpha);
-      }
-
-      inline void clear()
-      {
-         m_u32 = 0;
-      }
-
-      inline color_quad_u8& operator= (const color_quad_u8& other)
-      {
-         m_u32 = other.m_u32;
-         return *this;
-      }
-
-      inline color_quad_u8& set_rgb(const color_quad_u8& other)
-      {
-         r = other.r;
-         g = other.g;
-         b = other.b;
-         return *this;
-      }
-
-      inline color_quad_u8& operator= (parameter_t y)
-      {
-         set(y, component_traits::cMax);
-         return *this;
-      }
-
-      inline color_quad_u8& set(parameter_t y, parameter_t alpha = component_traits::cMax)
-      {
-         y = clamp(y);
-         alpha = clamp(alpha);
-         r = static_cast<component_t>(y);
-         g = static_cast<component_t>(y);
-         b = static_cast<component_t>(y);
-         a = static_cast<component_t>(alpha);
-         return *this;
-      }
-
-      inline color_quad_u8& set_noclamp_y_alpha(parameter_t y, parameter_t alpha = component_traits::cMax)
-      {
-         RG_ETC1_ASSERT( (y >= component_traits::cMin) && (y <= component_traits::cMax) );
-         RG_ETC1_ASSERT( (alpha >= component_traits::cMin) && (alpha <= component_traits::cMax) );
-
-         r = static_cast<component_t>(y);
-         g = static_cast<component_t>(y);
-         b = static_cast<component_t>(y);
-         a = static_cast<component_t>(alpha);
-         return *this;
-      }
-
-      inline color_quad_u8& set(parameter_t red, parameter_t green, parameter_t blue, parameter_t alpha = component_traits::cMax)
-      {
-         r = static_cast<component_t>(clamp(red));
-         g = static_cast<component_t>(clamp(green));
-         b = static_cast<component_t>(clamp(blue));
-         a = static_cast<component_t>(clamp(alpha));
-         return *this;
-      }
-
-      inline color_quad_u8& set_noclamp_rgba(parameter_t red, parameter_t green, parameter_t blue, parameter_t alpha)
-      {
-         RG_ETC1_ASSERT( (red >= component_traits::cMin) && (red <= component_traits::cMax) );
-         RG_ETC1_ASSERT( (green >= component_traits::cMin) && (green <= component_traits::cMax) );
-         RG_ETC1_ASSERT( (blue >= component_traits::cMin) && (blue <= component_traits::cMax) );
-         RG_ETC1_ASSERT( (alpha >= component_traits::cMin) && (alpha <= component_traits::cMax) );
-
-         r = static_cast<component_t>(red);
-         g = static_cast<component_t>(green);
-         b = static_cast<component_t>(blue);
-         a = static_cast<component_t>(alpha);
-         return *this;
-      }
-
-      inline color_quad_u8& set_noclamp_rgb(parameter_t red, parameter_t green, parameter_t blue)
-      {
-         RG_ETC1_ASSERT( (red >= component_traits::cMin) && (red <= component_traits::cMax) );
-         RG_ETC1_ASSERT( (green >= component_traits::cMin) && (green <= component_traits::cMax) );
-         RG_ETC1_ASSERT( (blue >= component_traits::cMin) && (blue <= component_traits::cMax) );
-
-         r = static_cast<component_t>(red);
-         g = static_cast<component_t>(green);
-         b = static_cast<component_t>(blue);
-         return *this;
-      }
-
-      static inline parameter_t get_min_comp() { return component_traits::cMin; }
-      static inline parameter_t get_max_comp() { return component_traits::cMax; }
-      static inline bool get_comps_are_signed() { return component_traits::cSigned; }
-
-      inline component_t operator[] (uint i) const { RG_ETC1_ASSERT(i < cNumComps); return c[i]; }
-      inline component_t& operator[] (uint i) { RG_ETC1_ASSERT(i < cNumComps); return c[i]; }
-
-      inline color_quad_u8& set_component(uint i, parameter_t f)
-      {
-         RG_ETC1_ASSERT(i < cNumComps);
-
-         c[i] = static_cast<component_t>(clamp(f));
-
-         return *this;
-      }
-
-      inline color_quad_u8& set_grayscale(parameter_t l)
-      {
-         component_t x = static_cast<component_t>(clamp(l));
-         c[0] = x;
-         c[1] = x;
-         c[2] = x;
-         return *this;
-      }
-
-      inline color_quad_u8& clamp(const color_quad_u8& l, const color_quad_u8& h)
-      {
-         for (uint i = 0; i < cNumComps; i++)
-            c[i] = static_cast<component_t>(rg_etc1::clamp<parameter_t>(c[i], l[i], h[i]));
-         return *this;
-      }
-
-      inline color_quad_u8& clamp(parameter_t l, parameter_t h)
-      {
-         for (uint i = 0; i < cNumComps; i++)
-            c[i] = static_cast<component_t>(rg_etc1::clamp<parameter_t>(c[i], l, h));
-         return *this;
-      }
-
-      // Returns CCIR 601 luma (consistent with color_utils::RGB_To_Y).
-      inline parameter_t get_luma() const
-      {
-         return static_cast<parameter_t>((19595U * r + 38470U * g + 7471U * b + 32768U) >> 16U);
-      }
-
-      // Returns REC 709 luma.
-      inline parameter_t get_luma_rec709() const
-      {
-         return static_cast<parameter_t>((13938U * r + 46869U * g + 4729U * b + 32768U) >> 16U);
-      }
-
-      inline uint squared_distance_rgb(const color_quad_u8& c) const
-      {
-         return rg_etc1::square(r - c.r) + rg_etc1::square(g - c.g) + rg_etc1::square(b - c.b);
-      }
-
-      inline uint squared_distance_rgba(const color_quad_u8& c) const
-      {
-         return rg_etc1::square(r - c.r) + rg_etc1::square(g - c.g) + rg_etc1::square(b - c.b) + rg_etc1::square(a - c.a);
-      }
-
-      inline bool rgb_equals(const color_quad_u8& rhs) const
-      {
-         return (r == rhs.r) && (g == rhs.g) && (b == rhs.b);
-      }
-
-      inline bool operator== (const color_quad_u8& rhs) const
-      {
-         return m_u32 == rhs.m_u32;
-      }
-
-      color_quad_u8& operator+= (const color_quad_u8& other)
-      {
-         for (uint i = 0; i < 4; i++)
-            c[i] = static_cast<component_t>(clamp(c[i] + other.c[i]));
-         return *this;
-      }
-
-      color_quad_u8& operator-= (const color_quad_u8& other)
-      {
-         for (uint i = 0; i < 4; i++)
-            c[i] = static_cast<component_t>(clamp(c[i] - other.c[i]));
-         return *this;
-      }
-
-      friend color_quad_u8 operator+ (const color_quad_u8& lhs, const color_quad_u8& rhs)
-      {
-         color_quad_u8 result(lhs);
-         result += rhs;
-         return result;
-      }
-
-      friend color_quad_u8 operator- (const color_quad_u8& lhs, const color_quad_u8& rhs)
-      {
-         color_quad_u8 result(lhs);
-         result -= rhs;
-         return result;
-      }
-   }; // class color_quad_u8
-
-   struct vec3F
-   {
-      float m_s[3];
-      
-      inline vec3F() { }
-      inline vec3F(float s) { m_s[0] = s; m_s[1] = s; m_s[2] = s; }
-      inline vec3F(float x, float y, float z) { m_s[0] = x; m_s[1] = y; m_s[2] = z; }
-      
-      inline float operator[] (uint i) const { RG_ETC1_ASSERT(i < 3); return m_s[i]; }
-
-      inline vec3F& operator += (const vec3F& other) { for (uint i = 0; i < 3; i++) m_s[i] += other.m_s[i]; return *this; }
-
-      inline vec3F& operator *= (float s) { for (uint i = 0; i < 3; i++) m_s[i] *= s; return *this; }
-   };
-     
-   enum etc_constants
-   {
-      cETC1BytesPerBlock = 8U,
-
-      cETC1SelectorBits = 2U,
-      cETC1SelectorValues = 1U << cETC1SelectorBits,
-      cETC1SelectorMask = cETC1SelectorValues - 1U,
-
-      cETC1BlockShift = 2U,
-      cETC1BlockSize = 1U << cETC1BlockShift,
-
-      cETC1LSBSelectorIndicesBitOffset = 0,
-      cETC1MSBSelectorIndicesBitOffset = 16,
-
-      cETC1FlipBitOffset = 32,
-      cETC1DiffBitOffset = 33,
-
-      cETC1IntenModifierNumBits = 3,
-      cETC1IntenModifierValues = 1 << cETC1IntenModifierNumBits,
-      cETC1RightIntenModifierTableBitOffset = 34,
-      cETC1LeftIntenModifierTableBitOffset = 37,
-
-      // Base+Delta encoding (5 bit bases, 3 bit delta)
-      cETC1BaseColorCompNumBits = 5,
-      cETC1BaseColorCompMax = 1 << cETC1BaseColorCompNumBits,
-
-      cETC1DeltaColorCompNumBits = 3,
-      cETC1DeltaColorComp = 1 << cETC1DeltaColorCompNumBits,
-      cETC1DeltaColorCompMax = 1 << cETC1DeltaColorCompNumBits,
-
-      cETC1BaseColor5RBitOffset = 59,
-      cETC1BaseColor5GBitOffset = 51,
-      cETC1BaseColor5BBitOffset = 43,
-
-      cETC1DeltaColor3RBitOffset = 56,
-      cETC1DeltaColor3GBitOffset = 48,
-      cETC1DeltaColor3BBitOffset = 40,
-
-      // Absolute (non-delta) encoding (two 4-bit per component bases)
-      cETC1AbsColorCompNumBits = 4,
-      cETC1AbsColorCompMax = 1 << cETC1AbsColorCompNumBits,
-
-      cETC1AbsColor4R1BitOffset = 60,
-      cETC1AbsColor4G1BitOffset = 52,
-      cETC1AbsColor4B1BitOffset = 44,
-
-      cETC1AbsColor4R2BitOffset = 56,
-      cETC1AbsColor4G2BitOffset = 48,
-      cETC1AbsColor4B2BitOffset = 40,
-
-      cETC1ColorDeltaMin = -4,
-      cETC1ColorDeltaMax = 3,
-
-      // Delta3:
-      // 0   1   2   3   4   5   6   7
-      // 000 001 010 011 100 101 110 111
-      // 0   1   2   3   -4  -3  -2  -1
-   };
-   
-   static uint8 g_quant5_tab[256+16];
-
-
-   static const int g_etc1_inten_tables[cETC1IntenModifierValues][cETC1SelectorValues] = 
-   { 
-      { -8,  -2,   2,   8 }, { -17,  -5,  5,  17 }, { -29,  -9,   9,  29 }, {  -42, -13, 13,  42 }, 
-      { -60, -18, 18,  60 }, { -80, -24, 24,  80 }, { -106, -33, 33, 106 }, { -183, -47, 47, 183 } 
-   };
-
-   static const uint8 g_etc1_to_selector_index[cETC1SelectorValues] = { 2, 3, 1, 0 };
-   static const uint8 g_selector_index_to_etc1[cETC1SelectorValues] = { 3, 2, 0, 1 };
-      
-   // Given an ETC1 diff/inten_table/selector, and an 8-bit desired color, this table encodes the best packed_color in the low byte, and the abs error in the high byte.
-   static uint16 g_etc1_inverse_lookup[2*8*4][256];      // [diff/inten_table/selector][desired_color]
-
-   // g_color8_to_etc_block_config[color][table_index] = Supplies for each 8-bit color value a list of packed ETC1 diff/intensity table/selectors/packed_colors that map to that color.
-   // To pack: diff | (inten << 1) | (selector << 4) | (packed_c << 8)
-   static const uint16 g_color8_to_etc_block_config_0_255[2][33] =
-   {
-      { 0x0000,  0x0010,  0x0002,  0x0012,  0x0004,  0x0014,  0x0006,  0x0016,  0x0008,  0x0018,  0x000A,  0x001A,  0x000C,  0x001C,  0x000E,  0x001E,
-        0x0001,  0x0011,  0x0003,  0x0013,  0x0005,  0x0015,  0x0007,  0x0017,  0x0009,  0x0019,  0x000B,  0x001B,  0x000D,  0x001D,  0x000F,  0x001F, 0xFFFF },
-      { 0x0F20,  0x0F30,  0x0E32,  0x0F22,  0x0E34,  0x0F24,  0x0D36,  0x0F26,  0x0C38,  0x0E28,  0x0B3A,  0x0E2A,  0x093C,  0x0E2C,  0x053E,  0x0D2E,
-        0x1E31,  0x1F21,  0x1D33,  0x1F23,  0x1C35,  0x1E25,  0x1A37,  0x1E27,  0x1839,  0x1D29,  0x163B,  0x1C2B,  0x133D,  0x1B2D,  0x093F,  0x1A2F, 0xFFFF },
-   };
-
-   // Really only [254][11].
-   static const uint16 g_color8_to_etc_block_config_1_to_254[254][12] = 
-   {
-      { 0x021C, 0x0D0D, 0xFFFF }, { 0x0020, 0x0021, 0x0A0B, 0x061F, 0xFFFF }, { 0x0113, 0x0217, 0xFFFF }, { 0x0116, 0x031E,
-      0x0B0E, 0x0405, 0xFFFF }, { 0x0022, 0x0204, 0x050A, 0x0023, 0xFFFF }, { 0x0111, 0x0319, 0x0809, 0x170F, 0xFFFF }, {
-      0x0303, 0x0215, 0x0607, 0xFFFF }, { 0x0030, 0x0114, 0x0408, 0x0031, 0x0201, 0x051D, 0xFFFF }, { 0x0100, 0x0024, 0x0306,
-      0x0025, 0x041B, 0x0E0D, 0xFFFF }, { 0x021A, 0x0121, 0x0B0B, 0x071F, 0xFFFF }, { 0x0213, 0x0317, 0xFFFF }, { 0x0112,
-      0x0505, 0xFFFF }, { 0x0026, 0x070C, 0x0123, 0x0027, 0xFFFF }, { 0x0211, 0x0909, 0xFFFF }, { 0x0110, 0x0315, 0x0707,
-      0x0419, 0x180F, 0xFFFF }, { 0x0218, 0x0131, 0x0301, 0x0403, 0x061D, 0xFFFF }, { 0x0032, 0x0202, 0x0033, 0x0125, 0x051B,
-      0x0F0D, 0xFFFF }, { 0x0028, 0x031C, 0x0221, 0x0029, 0xFFFF }, { 0x0120, 0x0313, 0x0C0B, 0x081F, 0xFFFF }, { 0x0605,
-      0x0417, 0xFFFF }, { 0x0216, 0x041E, 0x0C0E, 0x0223, 0x0127, 0xFFFF }, { 0x0122, 0x0304, 0x060A, 0x0311, 0x0A09, 0xFFFF
-      }, { 0x0519, 0x190F, 0xFFFF }, { 0x002A, 0x0231, 0x0503, 0x0415, 0x0807, 0x002B, 0x071D, 0xFFFF }, { 0x0130, 0x0214,
-      0x0508, 0x0401, 0x0133, 0x0225, 0x061B, 0xFFFF }, { 0x0200, 0x0124, 0x0406, 0x0321, 0x0129, 0x100D, 0xFFFF }, { 0x031A,
-      0x0D0B, 0x091F, 0xFFFF }, { 0x0413, 0x0705, 0x0517, 0xFFFF }, { 0x0212, 0x0034, 0x0323, 0x0035, 0x0227, 0xFFFF }, {
-      0x0126, 0x080C, 0x0B09, 0xFFFF }, { 0x0411, 0x0619, 0x1A0F, 0xFFFF }, { 0x0210, 0x0331, 0x0603, 0x0515, 0x0907, 0x012B,
-      0xFFFF }, { 0x0318, 0x002C, 0x0501, 0x0233, 0x0325, 0x071B, 0x002D, 0x081D, 0xFFFF }, { 0x0132, 0x0302, 0x0229, 0x110D,
-      0xFFFF }, { 0x0128, 0x041C, 0x0421, 0x0E0B, 0x0A1F, 0xFFFF }, { 0x0220, 0x0513, 0x0617, 0xFFFF }, { 0x0135, 0x0805,
-      0x0327, 0xFFFF }, { 0x0316, 0x051E, 0x0D0E, 0x0423, 0xFFFF }, { 0x0222, 0x0404, 0x070A, 0x0511, 0x0719, 0x0C09, 0x1B0F,
-      0xFFFF }, { 0x0703, 0x0615, 0x0A07, 0x022B, 0xFFFF }, { 0x012A, 0x0431, 0x0601, 0x0333, 0x012D, 0x091D, 0xFFFF }, {
-      0x0230, 0x0314, 0x0036, 0x0608, 0x0425, 0x0037, 0x0329, 0x081B, 0x120D, 0xFFFF }, { 0x0300, 0x0224, 0x0506, 0x0521,
-      0x0F0B, 0x0B1F, 0xFFFF }, { 0x041A, 0x0613, 0x0717, 0xFFFF }, { 0x0235, 0x0905, 0xFFFF }, { 0x0312, 0x0134, 0x0523,
-      0x0427, 0xFFFF }, { 0x0226, 0x090C, 0x002E, 0x0611, 0x0D09, 0x002F, 0xFFFF }, { 0x0715, 0x0B07, 0x0819, 0x032B, 0x1C0F,
-      0xFFFF }, { 0x0310, 0x0531, 0x0701, 0x0803, 0x022D, 0x0A1D, 0xFFFF }, { 0x0418, 0x012C, 0x0433, 0x0525, 0x0137, 0x091B,
-      0x130D, 0xFFFF }, { 0x0232, 0x0402, 0x0621, 0x0429, 0xFFFF }, { 0x0228, 0x051C, 0x0713, 0x100B, 0x0C1F, 0xFFFF }, {
-      0x0320, 0x0335, 0x0A05, 0x0817, 0xFFFF }, { 0x0623, 0x0527, 0xFFFF }, { 0x0416, 0x061E, 0x0E0E, 0x0711, 0x0E09, 0x012F,
-      0xFFFF }, { 0x0322, 0x0504, 0x080A, 0x0919, 0x1D0F, 0xFFFF }, { 0x0631, 0x0903, 0x0815, 0x0C07, 0x042B, 0x032D, 0x0B1D,
-      0xFFFF }, { 0x022A, 0x0801, 0x0533, 0x0625, 0x0237, 0x0A1B, 0xFFFF }, { 0x0330, 0x0414, 0x0136, 0x0708, 0x0721, 0x0529,
-      0x140D, 0xFFFF }, { 0x0400, 0x0324, 0x0606, 0x0038, 0x0039, 0x110B, 0x0D1F, 0xFFFF }, { 0x051A, 0x0813, 0x0B05, 0x0917,
-      0xFFFF }, { 0x0723, 0x0435, 0x0627, 0xFFFF }, { 0x0412, 0x0234, 0x0F09, 0x022F, 0xFFFF }, { 0x0326, 0x0A0C, 0x012E,
-      0x0811, 0x0A19, 0x1E0F, 0xFFFF }, { 0x0731, 0x0A03, 0x0915, 0x0D07, 0x052B, 0xFFFF }, { 0x0410, 0x0901, 0x0633, 0x0725,
-      0x0337, 0x0B1B, 0x042D, 0x0C1D, 0xFFFF }, { 0x0518, 0x022C, 0x0629, 0x150D, 0xFFFF }, { 0x0332, 0x0502, 0x0821, 0x0139,
-      0x120B, 0x0E1F, 0xFFFF }, { 0x0328, 0x061C, 0x0913, 0x0A17, 0xFFFF }, { 0x0420, 0x0535, 0x0C05, 0x0727, 0xFFFF }, {
-      0x0823, 0x032F, 0xFFFF }, { 0x0516, 0x071E, 0x0F0E, 0x0911, 0x0B19, 0x1009, 0x1F0F, 0xFFFF }, { 0x0422, 0x0604, 0x090A,
-      0x0B03, 0x0A15, 0x0E07, 0x062B, 0xFFFF }, { 0x0831, 0x0A01, 0x0733, 0x052D, 0x0D1D, 0xFFFF }, { 0x032A, 0x0825, 0x0437,
-      0x0729, 0x0C1B, 0x160D, 0xFFFF }, { 0x0430, 0x0514, 0x0236, 0x0808, 0x0921, 0x0239, 0x130B, 0x0F1F, 0xFFFF }, { 0x0500,
-      0x0424, 0x0706, 0x0138, 0x0A13, 0x0B17, 0xFFFF }, { 0x061A, 0x0635, 0x0D05, 0xFFFF }, { 0x0923, 0x0827, 0xFFFF }, {
-      0x0512, 0x0334, 0x003A, 0x0A11, 0x1109, 0x003B, 0x042F, 0xFFFF }, { 0x0426, 0x0B0C, 0x022E, 0x0B15, 0x0F07, 0x0C19,
-      0x072B, 0xFFFF }, { 0x0931, 0x0B01, 0x0C03, 0x062D, 0x0E1D, 0xFFFF }, { 0x0510, 0x0833, 0x0925, 0x0537, 0x0D1B, 0x170D,
-      0xFFFF }, { 0x0618, 0x032C, 0x0A21, 0x0339, 0x0829, 0xFFFF }, { 0x0432, 0x0602, 0x0B13, 0x140B, 0x101F, 0xFFFF }, {
-      0x0428, 0x071C, 0x0735, 0x0E05, 0x0C17, 0xFFFF }, { 0x0520, 0x0A23, 0x0927, 0xFFFF }, { 0x0B11, 0x1209, 0x013B, 0x052F,
-      0xFFFF }, { 0x0616, 0x081E, 0x0D19, 0xFFFF }, { 0x0522, 0x0704, 0x0A0A, 0x0A31, 0x0D03, 0x0C15, 0x1007, 0x082B, 0x072D,
-      0x0F1D, 0xFFFF }, { 0x0C01, 0x0933, 0x0A25, 0x0637, 0x0E1B, 0xFFFF }, { 0x042A, 0x0B21, 0x0929, 0x180D, 0xFFFF }, {
-	      0x0530, 0x0614, 0x0336, 0x0908, 0x0439, 0x150B, 0x111F, 0xFFFF }, { 0x0600, 0x0524, 0x0806, 0x0238, 0x0C13, 0x0F05,
-      0x0D17, 0xFFFF }, { 0x071A, 0x0B23, 0x0835, 0x0A27, 0xFFFF }, { 0x1309, 0x023B, 0x062F, 0xFFFF }, { 0x0612, 0x0434,
-      0x013A, 0x0C11, 0x0E19, 0xFFFF }, { 0x0526, 0x0C0C, 0x032E, 0x0B31, 0x0E03, 0x0D15, 0x1107, 0x092B, 0xFFFF }, { 0x0D01,
-      0x0A33, 0x0B25, 0x0737, 0x0F1B, 0x082D, 0x101D, 0xFFFF }, { 0x0610, 0x0A29, 0x190D, 0xFFFF }, { 0x0718, 0x042C, 0x0C21,
-      0x0539, 0x160B, 0x121F, 0xFFFF }, { 0x0532, 0x0702, 0x0D13, 0x0E17, 0xFFFF }, { 0x0528, 0x081C, 0x0935, 0x1005, 0x0B27,
-      0xFFFF }, { 0x0620, 0x0C23, 0x033B, 0x072F, 0xFFFF }, { 0x0D11, 0x0F19, 0x1409, 0xFFFF }, { 0x0716, 0x003C, 0x091E,
-      0x0F03, 0x0E15, 0x1207, 0x0A2B, 0x003D, 0xFFFF }, { 0x0622, 0x0804, 0x0B0A, 0x0C31, 0x0E01, 0x0B33, 0x092D, 0x111D,
-      0xFFFF }, { 0x0C25, 0x0837, 0x0B29, 0x101B, 0x1A0D, 0xFFFF }, { 0x052A, 0x0D21, 0x0639, 0x170B, 0x131F, 0xFFFF }, {
-      0x0630, 0x0714, 0x0436, 0x0A08, 0x0E13, 0x0F17, 0xFFFF }, { 0x0700, 0x0624, 0x0906, 0x0338, 0x0A35, 0x1105, 0xFFFF }, {
-      0x081A, 0x0D23, 0x0C27, 0xFFFF }, { 0x0E11, 0x1509, 0x043B, 0x082F, 0xFFFF }, { 0x0712, 0x0534, 0x023A, 0x0F15, 0x1307,
-      0x1019, 0x0B2B, 0x013D, 0xFFFF }, { 0x0626, 0x0D0C, 0x042E, 0x0D31, 0x0F01, 0x1003, 0x0A2D, 0x121D, 0xFFFF }, { 0x0C33,
-      0x0D25, 0x0937, 0x111B, 0x1B0D, 0xFFFF }, { 0x0710, 0x0E21, 0x0739, 0x0C29, 0xFFFF }, { 0x0818, 0x052C, 0x0F13, 0x180B,
-      0x141F, 0xFFFF }, { 0x0632, 0x0802, 0x0B35, 0x1205, 0x1017, 0xFFFF }, { 0x0628, 0x091C, 0x0E23, 0x0D27, 0xFFFF }, {
-      0x0720, 0x0F11, 0x1609, 0x053B, 0x092F, 0xFFFF }, { 0x1119, 0x023D, 0xFFFF }, { 0x0816, 0x013C, 0x0A1E, 0x0E31, 0x1103,
-      0x1015, 0x1407, 0x0C2B, 0x0B2D, 0x131D, 0xFFFF }, { 0x0722, 0x0904, 0x0C0A, 0x1001, 0x0D33, 0x0E25, 0x0A37, 0x121B,
-      0xFFFF }, { 0x0F21, 0x0D29, 0x1C0D, 0xFFFF }, { 0x062A, 0x0839, 0x190B, 0x151F, 0xFFFF }, { 0x0730, 0x0814, 0x0536,
-      0x0B08, 0x1013, 0x1305, 0x1117, 0xFFFF }, { 0x0800, 0x0724, 0x0A06, 0x0438, 0x0F23, 0x0C35, 0x0E27, 0xFFFF }, { 0x091A,
-      0x1709, 0x063B, 0x0A2F, 0xFFFF }, { 0x1011, 0x1219, 0x033D, 0xFFFF }, { 0x0812, 0x0634, 0x033A, 0x0F31, 0x1203, 0x1115,
-      0x1507, 0x0D2B, 0xFFFF }, { 0x0726, 0x0E0C, 0x052E, 0x1101, 0x0E33, 0x0F25, 0x0B37, 0x131B, 0x0C2D, 0x141D, 0xFFFF }, {
-      0x0E29, 0x1D0D, 0xFFFF }, { 0x0810, 0x1021, 0x0939, 0x1A0B, 0x161F, 0xFFFF }, { 0x0918, 0x062C, 0x1113, 0x1217, 0xFFFF
-      }, { 0x0732, 0x0902, 0x0D35, 0x1405, 0x0F27, 0xFFFF }, { 0x0728, 0x0A1C, 0x1023, 0x073B, 0x0B2F, 0xFFFF }, { 0x0820,
-      0x1111, 0x1319, 0x1809, 0xFFFF }, { 0x1303, 0x1215, 0x1607, 0x0E2B, 0x043D, 0xFFFF }, { 0x0916, 0x023C, 0x0B1E, 0x1031,
-      0x1201, 0x0F33, 0x0D2D, 0x151D, 0xFFFF }, { 0x0822, 0x0A04, 0x0D0A, 0x1025, 0x0C37, 0x0F29, 0x141B, 0x1E0D, 0xFFFF }, {
-      0x1121, 0x0A39, 0x1B0B, 0x171F, 0xFFFF }, { 0x072A, 0x1213, 0x1317, 0xFFFF }, { 0x0830, 0x0914, 0x0636, 0x0C08, 0x0E35,
-      0x1505, 0xFFFF }, { 0x0900, 0x0824, 0x0B06, 0x0538, 0x1123, 0x1027, 0xFFFF }, { 0x0A1A, 0x1211, 0x1909, 0x083B, 0x0C2F,
-      0xFFFF }, { 0x1315, 0x1707, 0x1419, 0x0F2B, 0x053D, 0xFFFF }, { 0x0912, 0x0734, 0x043A, 0x1131, 0x1301, 0x1403, 0x0E2D,
-      0x161D, 0xFFFF }, { 0x0826, 0x0F0C, 0x062E, 0x1033, 0x1125, 0x0D37, 0x151B, 0x1F0D, 0xFFFF }, { 0x1221, 0x0B39, 0x1029,
-      0xFFFF }, { 0x0910, 0x1313, 0x1C0B, 0x181F, 0xFFFF }, { 0x0A18, 0x072C, 0x0F35, 0x1605, 0x1417, 0xFFFF }, { 0x0832,
-      0x0A02, 0x1223, 0x1127, 0xFFFF }, { 0x0828, 0x0B1C, 0x1311, 0x1A09, 0x093B, 0x0D2F, 0xFFFF }, { 0x0920, 0x1519, 0x063D,
-      0xFFFF }, { 0x1231, 0x1503, 0x1415, 0x1807, 0x102B, 0x0F2D, 0x171D, 0xFFFF }, { 0x0A16, 0x033C, 0x0C1E, 0x1401, 0x1133,
-      0x1225, 0x0E37, 0x161B, 0xFFFF }, { 0x0922, 0x0B04, 0x0E0A, 0x1321, 0x1129, 0xFFFF }, { 0x0C39, 0x1D0B, 0x191F, 0xFFFF
-      }, { 0x082A, 0x1413, 0x1705, 0x1517, 0xFFFF }, { 0x0930, 0x0A14, 0x0736, 0x0D08, 0x1323, 0x1035, 0x1227, 0xFFFF }, {
-      0x0A00, 0x0924, 0x0C06, 0x0638, 0x1B09, 0x0A3B, 0x0E2F, 0xFFFF }, { 0x0B1A, 0x1411, 0x1619, 0x073D, 0xFFFF }, { 0x1331,
-      0x1603, 0x1515, 0x1907, 0x112B, 0xFFFF }, { 0x0A12, 0x0834, 0x053A, 0x1501, 0x1233, 0x1325, 0x0F37, 0x171B, 0x102D,
-      0x181D, 0xFFFF }, { 0x0926, 0x072E, 0x1229, 0xFFFF }, { 0x1421, 0x0D39, 0x1E0B, 0x1A1F, 0xFFFF }, { 0x0A10, 0x1513,
-      0x1617, 0xFFFF }, { 0x0B18, 0x082C, 0x1135, 0x1805, 0x1327, 0xFFFF }, { 0x0932, 0x0B02, 0x1423, 0x0B3B, 0x0F2F, 0xFFFF
-      }, { 0x0928, 0x0C1C, 0x1511, 0x1719, 0x1C09, 0xFFFF }, { 0x0A20, 0x1703, 0x1615, 0x1A07, 0x122B, 0x083D, 0xFFFF }, {
-      0x1431, 0x1601, 0x1333, 0x112D, 0x191D, 0xFFFF }, { 0x0B16, 0x043C, 0x0D1E, 0x1425, 0x1037, 0x1329, 0x181B, 0xFFFF }, {
-      0x0A22, 0x0C04, 0x0F0A, 0x1521, 0x0E39, 0x1F0B, 0x1B1F, 0xFFFF }, { 0x1613, 0x1717, 0xFFFF }, { 0x092A, 0x1235, 0x1905,
-      0xFFFF }, { 0x0A30, 0x0B14, 0x0836, 0x0E08, 0x1523, 0x1427, 0xFFFF }, { 0x0B00, 0x0A24, 0x0D06, 0x0738, 0x1611, 0x1D09,
-      0x0C3B, 0x102F, 0xFFFF }, { 0x0C1A, 0x1715, 0x1B07, 0x1819, 0x132B, 0x093D, 0xFFFF }, { 0x1531, 0x1701, 0x1803, 0x122D,
-      0x1A1D, 0xFFFF }, { 0x0B12, 0x0934, 0x063A, 0x1433, 0x1525, 0x1137, 0x191B, 0xFFFF }, { 0x0A26, 0x003E, 0x082E, 0x1621,
-      0x0F39, 0x1429, 0x003F, 0xFFFF }, { 0x1713, 0x1C1F, 0xFFFF }, { 0x0B10, 0x1335, 0x1A05, 0x1817, 0xFFFF }, { 0x0C18,
-      0x092C, 0x1623, 0x1527, 0xFFFF }, { 0x0A32, 0x0C02, 0x1711, 0x1E09, 0x0D3B, 0x112F, 0xFFFF }, { 0x0A28, 0x0D1C, 0x1919,
-      0x0A3D, 0xFFFF }, { 0x0B20, 0x1631, 0x1903, 0x1815, 0x1C07, 0x142B, 0x132D, 0x1B1D, 0xFFFF }, { 0x1801, 0x1533, 0x1625,
-      0x1237, 0x1A1B, 0xFFFF }, { 0x0C16, 0x053C, 0x0E1E, 0x1721, 0x1529, 0x013F, 0xFFFF }, { 0x0B22, 0x0D04, 0x1039, 0x1D1F,
-      0xFFFF }, { 0x1813, 0x1B05, 0x1917, 0xFFFF }, { 0x0A2A, 0x1723, 0x1435, 0x1627, 0xFFFF }, { 0x0B30, 0x0C14, 0x0936,
-      0x0F08, 0x1F09, 0x0E3B, 0x122F, 0xFFFF }, { 0x0C00, 0x0B24, 0x0E06, 0x0838, 0x1811, 0x1A19, 0x0B3D, 0xFFFF }, { 0x0D1A,
-      0x1731, 0x1A03, 0x1915, 0x1D07, 0x152B, 0xFFFF }, { 0x1901, 0x1633, 0x1725, 0x1337, 0x1B1B, 0x142D, 0x1C1D, 0xFFFF }, {
-      0x0C12, 0x0A34, 0x073A, 0x1629, 0x023F, 0xFFFF }, { 0x0B26, 0x013E, 0x092E, 0x1821, 0x1139, 0x1E1F, 0xFFFF }, { 0x1913,
-      0x1A17, 0xFFFF }, { 0x0C10, 0x1535, 0x1C05, 0x1727, 0xFFFF }, { 0x0D18, 0x0A2C, 0x1823, 0x0F3B, 0x132F, 0xFFFF }, {
-      0x0B32, 0x0D02, 0x1911, 0x1B19, 0xFFFF }, { 0x0B28, 0x0E1C, 0x1B03, 0x1A15, 0x1E07, 0x162B, 0x0C3D, 0xFFFF }, { 0x0C20,
-      0x1831, 0x1A01, 0x1733, 0x152D, 0x1D1D, 0xFFFF }, { 0x1825, 0x1437, 0x1729, 0x1C1B, 0x033F, 0xFFFF }, { 0x0D16, 0x063C,
-      0x0F1E, 0x1921, 0x1239, 0x1F1F, 0xFFFF }, { 0x0C22, 0x0E04, 0x1A13, 0x1B17, 0xFFFF }, { 0x1635, 0x1D05, 0xFFFF }, {
-      0x0B2A, 0x1923, 0x1827, 0xFFFF }, { 0x0C30, 0x0D14, 0x0A36, 0x1A11, 0x103B, 0x142F, 0xFFFF }, { 0x0D00, 0x0C24, 0x0F06,
-      0x0938, 0x1B15, 0x1F07, 0x1C19, 0x172B, 0x0D3D, 0xFFFF }, { 0x0E1A, 0x1931, 0x1B01, 0x1C03, 0x162D, 0x1E1D, 0xFFFF }, {
-      0x1833, 0x1925, 0x1537, 0x1D1B, 0xFFFF }, { 0x0D12, 0x0B34, 0x083A, 0x1A21, 0x1339, 0x1829, 0x043F, 0xFFFF }, { 0x0C26,
-      0x023E, 0x0A2E, 0x1B13, 0xFFFF }, { 0x1735, 0x1E05, 0x1C17, 0xFFFF }, { 0x0D10, 0x1A23, 0x1927, 0xFFFF }, { 0x0E18,
-      0x0B2C, 0x1B11, 0x113B, 0x152F, 0xFFFF }, { 0x0C32, 0x0E02, 0x1D19, 0x0E3D, 0xFFFF }, { 0x0C28, 0x0F1C, 0x1A31, 0x1D03,
-      0x1C15, 0x182B, 0x172D, 0x1F1D, 0xFFFF }, { 0x0D20, 0x1C01, 0x1933, 0x1A25, 0x1637, 0x1E1B, 0xFFFF }, { 0x1B21, 0x1929,
-      0x053F, 0xFFFF }, { 0x0E16, 0x073C, 0x1439, 0xFFFF }, { 0x0D22, 0x0F04, 0x1C13, 0x1F05, 0x1D17, 0xFFFF }, { 0x1B23,
-      0x1835, 0x1A27, 0xFFFF }, { 0x0C2A, 0x123B, 0x162F, 0xFFFF }, { 0x0D30, 0x0E14, 0x0B36, 0x1C11, 0x1E19, 0x0F3D, 0xFFFF
-      }, { 0x0E00, 0x0D24, 0x0A38, 0x1B31, 0x1E03, 0x1D15, 0x192B, 0xFFFF }, { 0x0F1A, 0x1D01, 0x1A33, 0x1B25, 0x1737, 0x1F1B,
-      0x182D, 0xFFFF }, { 0x1A29, 0x063F, 0xFFFF }, { 0x0E12, 0x0C34, 0x093A, 0x1C21, 0x1539, 0xFFFF }, { 0x0D26, 0x033E,
-      0x0B2E, 0x1D13, 0x1E17, 0xFFFF }, { 0x1935, 0x1B27, 0xFFFF }, { 0x0E10, 0x1C23, 0x133B, 0x172F, 0xFFFF }, { 0x0F18,
-      0x0C2C, 0x1D11, 0x1F19, 0xFFFF }, { 0x0D32, 0x0F02, 0x1F03, 0x1E15, 0x1A2B, 0x103D, 0xFFFF }, { 0x0D28, 0x1C31, 0x1E01,
-      0x1B33, 0x192D, 0xFFFF }, { 0x0E20, 0x1C25, 0x1837, 0x1B29, 0x073F, 0xFFFF }, { 0x1D21, 0x1639, 0xFFFF }, { 0x0F16,
-      0x083C, 0x1E13, 0x1F17, 0xFFFF }, { 0x0E22, 0x1A35, 0xFFFF }, { 0x1D23, 0x1C27, 0xFFFF }, { 0x0D2A, 0x1E11, 0x143B,
-      0x182F, 0xFFFF }, { 0x0E30, 0x0F14, 0x0C36, 0x1F15, 0x1B2B, 0x113D, 0xFFFF }, { 0x0F00, 0x0E24, 0x0B38, 0x1D31, 0x1F01,
-      0x1A2D, 0xFFFF }, { 0x1C33, 0x1D25, 0x1937, 0xFFFF }, { 0x1E21, 0x1739, 0x1C29, 0x083F, 0xFFFF }, { 0x0F12, 0x0D34,
-      0x0A3A, 0x1F13, 0xFFFF }, { 0x0E26, 0x043E, 0x0C2E, 0x1B35, 0xFFFF }, { 0x1E23, 0x1D27, 0xFFFF }, { 0x0F10, 0x1F11,
-      0x153B, 0x192F, 0xFFFF }, { 0x0D2C, 0x123D, 0xFFFF },
-   };
-
-   struct etc1_block
-   {
-      // big endian uint64:
-      // bit ofs:  56  48  40  32  24  16   8   0
-      // byte ofs: b0, b1, b2, b3, b4, b5, b6, b7 
-      union 
-      {
-         uint64 m_uint64;
-         uint8 m_bytes[8];
-      };
-
-      uint8 m_low_color[2];
-      uint8 m_high_color[2];
-
-      enum { cNumSelectorBytes = 4 };
-      uint8 m_selectors[cNumSelectorBytes];
-
-      inline void clear()
-      {
-         zero_this(this);
-      }
-
-      inline uint get_byte_bits(uint ofs, uint num) const
-      {
-         RG_ETC1_ASSERT((ofs + num) <= 64U);
-         RG_ETC1_ASSERT(num && (num <= 8U));
-         RG_ETC1_ASSERT((ofs >> 3) == ((ofs + num - 1) >> 3));
-         const uint byte_ofs = 7 - (ofs >> 3);
-         const uint byte_bit_ofs = ofs & 7;
-         return (m_bytes[byte_ofs] >> byte_bit_ofs) & ((1 << num) - 1);
-      }
-
-      inline void set_byte_bits(uint ofs, uint num, uint bits)
-      {
-         RG_ETC1_ASSERT((ofs + num) <= 64U);
-         RG_ETC1_ASSERT(num && (num < 32U));
-         RG_ETC1_ASSERT((ofs >> 3) == ((ofs + num - 1) >> 3));
-         RG_ETC1_ASSERT(bits < (1U << num));
-         const uint byte_ofs = 7 - (ofs >> 3);
-         const uint byte_bit_ofs = ofs & 7;
-         const uint mask = (1 << num) - 1;
-         m_bytes[byte_ofs] &= ~(mask << byte_bit_ofs);
-         m_bytes[byte_ofs] |= (bits << byte_bit_ofs);
-      }
-
-      // false = left/right subblocks
-      // true = upper/lower subblocks
-      inline bool get_flip_bit() const 
-      {
-         return (m_bytes[3] & 1) != 0;
-      }   
-
-      inline void set_flip_bit(bool flip)
-      {
-         m_bytes[3] &= ~1;
-         m_bytes[3] |= static_cast<uint8>(flip);
-      }
-
-      inline bool get_diff_bit() const
-      {
-         return (m_bytes[3] & 2) != 0;
-      }
-
-      inline void set_diff_bit(bool diff)
-      {
-         m_bytes[3] &= ~2;
-         m_bytes[3] |= (static_cast<uint>(diff) << 1);
-      }
-
-      // Returns intensity modifier table (0-7) used by subblock subblock_id.
-      // subblock_id=0 left/top (CW 1), 1=right/bottom (CW 2)
-      inline uint get_inten_table(uint subblock_id) const
-      {
-         RG_ETC1_ASSERT(subblock_id < 2);
-         const uint ofs = subblock_id ? 2 : 5;
-         return (m_bytes[3] >> ofs) & 7;
-      }
-
-      // Sets intensity modifier table (0-7) used by subblock subblock_id (0 or 1)
-      inline void set_inten_table(uint subblock_id, uint t)
-      {
-         RG_ETC1_ASSERT(subblock_id < 2);
-         RG_ETC1_ASSERT(t < 8);
-         const uint ofs = subblock_id ? 2 : 5;
-         m_bytes[3] &= ~(7 << ofs);
-         m_bytes[3] |= (t << ofs);
-      }
-
-      // Returned selector value ranges from 0-3 and is a direct index into g_etc1_inten_tables.
-      inline uint get_selector(uint x, uint y) const
-      {
-         RG_ETC1_ASSERT((x | y) < 4);
-
-         const uint bit_index = x * 4 + y;
-         const uint byte_bit_ofs = bit_index & 7;
-         const uint8 *p = &m_bytes[7 - (bit_index >> 3)];
-         const uint lsb = (p[0] >> byte_bit_ofs) & 1;
-         const uint msb = (p[-2] >> byte_bit_ofs) & 1;
-         const uint val = lsb | (msb << 1);
-
-         return g_etc1_to_selector_index[val];
-      }
-
-      // Selector "val" ranges from 0-3 and is a direct index into g_etc1_inten_tables.
-      inline void set_selector(uint x, uint y, uint val)
-      {
-         RG_ETC1_ASSERT((x | y | val) < 4);
-         const uint bit_index = x * 4 + y;
-
-         uint8 *p = &m_bytes[7 - (bit_index >> 3)];
-
-         const uint byte_bit_ofs = bit_index & 7;
-         const uint mask = 1 << byte_bit_ofs;
-
-         const uint etc1_val = g_selector_index_to_etc1[val];
-
-         const uint lsb = etc1_val & 1;
-         const uint msb = etc1_val >> 1;
-
-         p[0] &= ~mask;
-         p[0] |= (lsb << byte_bit_ofs);
-
-         p[-2] &= ~mask;
-         p[-2] |= (msb << byte_bit_ofs);
-      }
-
-      inline void set_base4_color(uint idx, uint16 c)
-      {
-         if (idx)
-         {
-            set_byte_bits(cETC1AbsColor4R2BitOffset, 4, (c >> 8) & 15);
-            set_byte_bits(cETC1AbsColor4G2BitOffset, 4, (c >> 4) & 15);
-            set_byte_bits(cETC1AbsColor4B2BitOffset, 4, c & 15);
-         }
-         else
-         {
-            set_byte_bits(cETC1AbsColor4R1BitOffset, 4, (c >> 8) & 15);
-            set_byte_bits(cETC1AbsColor4G1BitOffset, 4, (c >> 4) & 15);
-            set_byte_bits(cETC1AbsColor4B1BitOffset, 4, c & 15);
-         }
-      }
-
-      inline uint16 get_base4_color(uint idx) const
-      {
-         uint r, g, b;
-         if (idx)
-         {
-            r = get_byte_bits(cETC1AbsColor4R2BitOffset, 4);
-            g = get_byte_bits(cETC1AbsColor4G2BitOffset, 4);
-            b = get_byte_bits(cETC1AbsColor4B2BitOffset, 4);
-         }
-         else
-         {
-            r = get_byte_bits(cETC1AbsColor4R1BitOffset, 4);
-            g = get_byte_bits(cETC1AbsColor4G1BitOffset, 4);
-            b = get_byte_bits(cETC1AbsColor4B1BitOffset, 4);
-         }
-         return static_cast<uint16>(b | (g << 4U) | (r << 8U));
-      }
-
-      inline void set_base5_color(uint16 c)
-      {
-         set_byte_bits(cETC1BaseColor5RBitOffset, 5, (c >> 10) & 31);
-         set_byte_bits(cETC1BaseColor5GBitOffset, 5, (c >> 5) & 31);
-         set_byte_bits(cETC1BaseColor5BBitOffset, 5, c & 31);
-      }
-
-      inline uint16 get_base5_color() const
-      {
-         const uint r = get_byte_bits(cETC1BaseColor5RBitOffset, 5);
-         const uint g = get_byte_bits(cETC1BaseColor5GBitOffset, 5);
-         const uint b = get_byte_bits(cETC1BaseColor5BBitOffset, 5);
-         return static_cast<uint16>(b | (g << 5U) | (r << 10U));
-      }
-
-      void set_delta3_color(uint16 c)
-      {
-         set_byte_bits(cETC1DeltaColor3RBitOffset, 3, (c >> 6) & 7);
-         set_byte_bits(cETC1DeltaColor3GBitOffset, 3, (c >> 3) & 7);
-         set_byte_bits(cETC1DeltaColor3BBitOffset, 3, c & 7);
-      }
-
-      inline uint16 get_delta3_color() const
-      {
-         const uint r = get_byte_bits(cETC1DeltaColor3RBitOffset, 3);
-         const uint g = get_byte_bits(cETC1DeltaColor3GBitOffset, 3);
-         const uint b = get_byte_bits(cETC1DeltaColor3BBitOffset, 3);
-         return static_cast<uint16>(b | (g << 3U) | (r << 6U));
-      }
-
-      // Base color 5
-      static uint16 pack_color5(const color_quad_u8& color, bool scaled, uint bias = 127U);
-      static uint16 pack_color5(uint r, uint g, uint b, bool scaled, uint bias = 127U);
-
-      static color_quad_u8 unpack_color5(uint16 packed_color5, bool scaled, uint alpha = 255U);
-      static void unpack_color5(uint& r, uint& g, uint& b, uint16 packed_color, bool scaled);
-
-      static bool unpack_color5(color_quad_u8& result, uint16 packed_color5, uint16 packed_delta3, bool scaled, uint alpha = 255U);
-      static bool unpack_color5(uint& r, uint& g, uint& b, uint16 packed_color5, uint16 packed_delta3, bool scaled, uint alpha = 255U);
-
-      // Delta color 3
-      // Inputs range from -4 to 3 (cETC1ColorDeltaMin to cETC1ColorDeltaMax)
-      static uint16 pack_delta3(int r, int g, int b);
-
-      // Results range from -4 to 3 (cETC1ColorDeltaMin to cETC1ColorDeltaMax)
-      static void unpack_delta3(int& r, int& g, int& b, uint16 packed_delta3);
-
-      // Abs color 4
-      static uint16 pack_color4(const color_quad_u8& color, bool scaled, uint bias = 127U);
-      static uint16 pack_color4(uint r, uint g, uint b, bool scaled, uint bias = 127U);
-
-      static color_quad_u8 unpack_color4(uint16 packed_color4, bool scaled, uint alpha = 255U);
-      static void unpack_color4(uint& r, uint& g, uint& b, uint16 packed_color4, bool scaled);
-
-      // subblock colors
-      static void get_diff_subblock_colors(color_quad_u8* pDst, uint16 packed_color5, uint table_idx);
-      static bool get_diff_subblock_colors(color_quad_u8* pDst, uint16 packed_color5, uint16 packed_delta3, uint table_idx);
-      static void get_abs_subblock_colors(color_quad_u8* pDst, uint16 packed_color4, uint table_idx);
-
-      static inline void unscaled_to_scaled_color(color_quad_u8& dst, const color_quad_u8& src, bool color4)
-      {
-         if (color4)
-         {
-            dst.r = src.r | (src.r << 4);
-            dst.g = src.g | (src.g << 4);
-            dst.b = src.b | (src.b << 4);
-         }
-         else
-         {
-            dst.r = (src.r >> 2) | (src.r << 3);
-            dst.g = (src.g >> 2) | (src.g << 3);
-            dst.b = (src.b >> 2) | (src.b << 3);
-         }
-         dst.a = src.a;
-      }
-   };
-
-   // Returns pointer to sorted array.
-   template<typename T, typename Q>
-   T* indirect_radix_sort(uint num_indices, T* pIndices0, T* pIndices1, const Q* pKeys, uint key_ofs, uint key_size, bool init_indices)
-   {  
-      RG_ETC1_ASSERT((key_ofs >= 0) && (key_ofs < sizeof(T)));
-      RG_ETC1_ASSERT((key_size >= 1) && (key_size <= 4));
-
-      if (init_indices)
-      {
-         T* p = pIndices0;
-         T* q = pIndices0 + (num_indices >> 1) * 2;
-         uint i;
-         for (i = 0; p != q; p += 2, i += 2)
-         {
-            p[0] = static_cast<T>(i);
-            p[1] = static_cast<T>(i + 1); 
-         }
-
-         if (num_indices & 1)
-            *p = static_cast<T>(i);
-      }
-
-      uint hist[256 * 4];
-
-      memset(hist, 0, sizeof(hist[0]) * 256 * key_size);
-
-#define RG_ETC1_GET_KEY(p) (*(const uint*)((const uint8*)(pKeys + *(p)) + key_ofs))
-#define RG_ETC1_GET_KEY_FROM_INDEX(i) (*(const uint*)((const uint8*)(pKeys + (i)) + key_ofs))
-
-      if (key_size == 4)
-      {
-         T* p = pIndices0;
-         T* q = pIndices0 + num_indices;
-         for ( ; p != q; p++)
-         {
-            const uint key = RG_ETC1_GET_KEY(p);
-
-            hist[        key        & 0xFF]++;
-            hist[256 + ((key >>  8) & 0xFF)]++;
-            hist[512 + ((key >> 16) & 0xFF)]++;
-            hist[768 + ((key >> 24) & 0xFF)]++;
-         }
-      }
-      else if (key_size == 3)
-      {
-         T* p = pIndices0;
-         T* q = pIndices0 + num_indices;
-         for ( ; p != q; p++)
-         {
-            const uint key = RG_ETC1_GET_KEY(p);
-
-            hist[        key        & 0xFF]++;
-            hist[256 + ((key >>  8) & 0xFF)]++;
-            hist[512 + ((key >> 16) & 0xFF)]++;
-         }
-      }   
-      else if (key_size == 2)
-      {
-         T* p = pIndices0;
-         T* q = pIndices0 + (num_indices >> 1) * 2;
-
-         for ( ; p != q; p += 2)
-         {
-            const uint key0 = RG_ETC1_GET_KEY(p);
-            const uint key1 = RG_ETC1_GET_KEY(p+1);
-
-            hist[        key0         & 0xFF]++;
-            hist[256 + ((key0 >>  8) & 0xFF)]++;
-
-            hist[        key1        & 0xFF]++;
-            hist[256 + ((key1 >>  8) & 0xFF)]++;
-         }
-
-         if (num_indices & 1)
-         {
-            const uint key = RG_ETC1_GET_KEY(p);
-
-            hist[        key        & 0xFF]++;
-            hist[256 + ((key >>  8) & 0xFF)]++;
-         }
-      }      
-      else
-      {
-         RG_ETC1_ASSERT(key_size == 1);
-         if (key_size != 1)
-            return NULL;
-
-         T* p = pIndices0;
-         T* q = pIndices0 + (num_indices >> 1) * 2;
-
-         for ( ; p != q; p += 2)
-         {
-            const uint key0 = RG_ETC1_GET_KEY(p);
-            const uint key1 = RG_ETC1_GET_KEY(p+1);
-
-            hist[key0 & 0xFF]++;
-            hist[key1 & 0xFF]++;
-         }
-
-         if (num_indices & 1)
-         {
-            const uint key = RG_ETC1_GET_KEY(p);
-
-            hist[key & 0xFF]++;
-         }
-      }      
-
-      T* pCur = pIndices0;
-      T* pNew = pIndices1;
-
-      for (uint pass = 0; pass < key_size; pass++)
-      {
-         const uint* pHist = &hist[pass << 8];
-
-         uint offsets[256];
-
-         uint cur_ofs = 0;
-         for (uint i = 0; i < 256; i += 2)
-         {
-            offsets[i] = cur_ofs;
-            cur_ofs += pHist[i];
-
-            offsets[i+1] = cur_ofs;
-            cur_ofs += pHist[i+1];
-         }
-
-         const uint pass_shift = pass << 3;
-
-         T* p = pCur;
-         T* q = pCur + (num_indices >> 1) * 2;
-
-         for ( ; p != q; p += 2)
-         {
-            uint index0 = p[0];
-            uint index1 = p[1];
-
-            uint c0 = (RG_ETC1_GET_KEY_FROM_INDEX(index0) >> pass_shift) & 0xFF;
-            uint c1 = (RG_ETC1_GET_KEY_FROM_INDEX(index1) >> pass_shift) & 0xFF;
-
-            if (c0 == c1)
-            {
-               uint dst_offset0 = offsets[c0];
-
-               offsets[c0] = dst_offset0 + 2;
-
-               pNew[dst_offset0] = static_cast<T>(index0);
-               pNew[dst_offset0 + 1] = static_cast<T>(index1);
-            }
-            else
-            {
-               uint dst_offset0 = offsets[c0]++;
-               uint dst_offset1 = offsets[c1]++;
-
-               pNew[dst_offset0] = static_cast<T>(index0);
-               pNew[dst_offset1] = static_cast<T>(index1);
-            }
-         }
-
-         if (num_indices & 1)
-         {
-            uint index = *p;
-            uint c = (RG_ETC1_GET_KEY_FROM_INDEX(index) >> pass_shift) & 0xFF;
-
-            uint dst_offset = offsets[c];
-            offsets[c] = dst_offset + 1;
-
-            pNew[dst_offset] = static_cast<T>(index);
-         }
-
-         T* t = pCur;
-         pCur = pNew;
-         pNew = t;
-      }            
-
-      return pCur;
-   }
-
-#undef RG_ETC1_GET_KEY
-#undef RG_ETC1_GET_KEY_FROM_INDEX
-
-   uint16 etc1_block::pack_color5(const color_quad_u8& color, bool scaled, uint bias)
-   {
-      return pack_color5(color.r, color.g, color.b, scaled, bias);
-   }
-   
-   uint16 etc1_block::pack_color5(uint r, uint g, uint b, bool scaled, uint bias)
-   {
-      if (scaled)
-      {
-         r = (r * 31U + bias) / 255U;
-         g = (g * 31U + bias) / 255U;
-         b = (b * 31U + bias) / 255U;
-      }
-
-      r = rg_etc1::minimum(r, 31U);
-      g = rg_etc1::minimum(g, 31U);
-      b = rg_etc1::minimum(b, 31U);
-
-      return static_cast<uint16>(b | (g << 5U) | (r << 10U));
-   }
-
-   color_quad_u8 etc1_block::unpack_color5(uint16 packed_color5, bool scaled, uint alpha)
-   {
-      uint b = packed_color5 & 31U;
-      uint g = (packed_color5 >> 5U) & 31U;
-      uint r = (packed_color5 >> 10U) & 31U;
-
-      if (scaled)
-      {
-         b = (b << 3U) | (b >> 2U);
-         g = (g << 3U) | (g >> 2U);
-         r = (r << 3U) | (r >> 2U);
-      }
-
-      return color_quad_u8(cNoClamp, r, g, b, rg_etc1::minimum(alpha, 255U));
-   }
-
-   void etc1_block::unpack_color5(uint& r, uint& g, uint& b, uint16 packed_color5, bool scaled)
-   {
-      color_quad_u8 c(unpack_color5(packed_color5, scaled, 0));
-      r = c.r;
-      g = c.g;
-      b = c.b;
-   }
-
-   bool etc1_block::unpack_color5(color_quad_u8& result, uint16 packed_color5, uint16 packed_delta3, bool scaled, uint alpha)
-   {
-      int dc_r, dc_g, dc_b;
-      unpack_delta3(dc_r, dc_g, dc_b, packed_delta3);
-      
-      int b = (packed_color5 & 31U) + dc_b;
-      int g = ((packed_color5 >> 5U) & 31U) + dc_g;
-      int r = ((packed_color5 >> 10U) & 31U) + dc_r;
-
-      bool success = true;
-      if (static_cast<uint>(r | g | b) > 31U)
-      {
-         success = false;
-         r = rg_etc1::clamp<int>(r, 0, 31);
-         g = rg_etc1::clamp<int>(g, 0, 31);
-         b = rg_etc1::clamp<int>(b, 0, 31);
-      }
-
-      if (scaled)
-      {
-         b = (b << 3U) | (b >> 2U);
-         g = (g << 3U) | (g >> 2U);
-         r = (r << 3U) | (r >> 2U);
-      }
-
-      result.set_noclamp_rgba(r, g, b, rg_etc1::minimum(alpha, 255U));
-      return success;
-   }
-
-   bool etc1_block::unpack_color5(uint& r, uint& g, uint& b, uint16 packed_color5, uint16 packed_delta3, bool scaled, uint alpha)
-   {
-      color_quad_u8 result;
-      const bool success = unpack_color5(result, packed_color5, packed_delta3, scaled, alpha);
-      r = result.r;
-      g = result.g;
-      b = result.b;
-      return success;
-   }
-     
-   uint16 etc1_block::pack_delta3(int r, int g, int b)
-   {
-      RG_ETC1_ASSERT((r >= cETC1ColorDeltaMin) && (r <= cETC1ColorDeltaMax));
-      RG_ETC1_ASSERT((g >= cETC1ColorDeltaMin) && (g <= cETC1ColorDeltaMax));
-      RG_ETC1_ASSERT((b >= cETC1ColorDeltaMin) && (b <= cETC1ColorDeltaMax));
-      if (r < 0) r += 8;
-      if (g < 0) g += 8;
-      if (b < 0) b += 8;
-      return static_cast<uint16>(b | (g << 3) | (r << 6));
-   }
-   
-   void etc1_block::unpack_delta3(int& r, int& g, int& b, uint16 packed_delta3)
-   {
-      r = (packed_delta3 >> 6) & 7;
-      g = (packed_delta3 >> 3) & 7;
-      b = packed_delta3 & 7;
-      if (r >= 4) r -= 8;
-      if (g >= 4) g -= 8;
-      if (b >= 4) b -= 8;
-   }
-
-   uint16 etc1_block::pack_color4(const color_quad_u8& color, bool scaled, uint bias)
-   {
-      return pack_color4(color.r, color.g, color.b, scaled, bias);
-   }
-   
-   uint16 etc1_block::pack_color4(uint r, uint g, uint b, bool scaled, uint bias)
-   {
-      if (scaled)
-      {
-         r = (r * 15U + bias) / 255U;
-         g = (g * 15U + bias) / 255U;
-         b = (b * 15U + bias) / 255U;
-      }
-
-      r = rg_etc1::minimum(r, 15U);
-      g = rg_etc1::minimum(g, 15U);
-      b = rg_etc1::minimum(b, 15U);
-
-      return static_cast<uint16>(b | (g << 4U) | (r << 8U));
-   }
-
-   color_quad_u8 etc1_block::unpack_color4(uint16 packed_color4, bool scaled, uint alpha)
-   {
-      uint b = packed_color4 & 15U;
-      uint g = (packed_color4 >> 4U) & 15U;
-      uint r = (packed_color4 >> 8U) & 15U;
-
-      if (scaled)
-      {
-         b = (b << 4U) | b;
-         g = (g << 4U) | g;
-         r = (r << 4U) | r;
-      }
-
-      return color_quad_u8(cNoClamp, r, g, b, rg_etc1::minimum(alpha, 255U));
-   }
-   
-   void etc1_block::unpack_color4(uint& r, uint& g, uint& b, uint16 packed_color4, bool scaled)
-   {
-      color_quad_u8 c(unpack_color4(packed_color4, scaled, 0));
-      r = c.r;
-      g = c.g;
-      b = c.b;
-   }
-
-   void etc1_block::get_diff_subblock_colors(color_quad_u8* pDst, uint16 packed_color5, uint table_idx)
-   {
-      RG_ETC1_ASSERT(table_idx < cETC1IntenModifierValues);
-      const int *pInten_modifer_table = &g_etc1_inten_tables[table_idx][0];
-
-      uint r, g, b;
-      unpack_color5(r, g, b, packed_color5, true);
-
-      const int ir = static_cast<int>(r), ig = static_cast<int>(g), ib = static_cast<int>(b);
-
-      const int y0 = pInten_modifer_table[0];
-      pDst[0].set(ir + y0, ig + y0, ib + y0);
-
-      const int y1 = pInten_modifer_table[1];
-      pDst[1].set(ir + y1, ig + y1, ib + y1);
-
-      const int y2 = pInten_modifer_table[2];
-      pDst[2].set(ir + y2, ig + y2, ib + y2);
-
-      const int y3 = pInten_modifer_table[3];
-      pDst[3].set(ir + y3, ig + y3, ib + y3);
-   }
-   
-   bool etc1_block::get_diff_subblock_colors(color_quad_u8* pDst, uint16 packed_color5, uint16 packed_delta3, uint table_idx)
-   {
-      RG_ETC1_ASSERT(table_idx < cETC1IntenModifierValues);
-      const int *pInten_modifer_table = &g_etc1_inten_tables[table_idx][0];
-
-      uint r, g, b;
-      bool success = unpack_color5(r, g, b, packed_color5, packed_delta3, true);
-
-      const int ir = static_cast<int>(r), ig = static_cast<int>(g), ib = static_cast<int>(b);
-
-      const int y0 = pInten_modifer_table[0];
-      pDst[0].set(ir + y0, ig + y0, ib + y0);
-
-      const int y1 = pInten_modifer_table[1];
-      pDst[1].set(ir + y1, ig + y1, ib + y1);
-
-      const int y2 = pInten_modifer_table[2];
-      pDst[2].set(ir + y2, ig + y2, ib + y2);
-
-      const int y3 = pInten_modifer_table[3];
-      pDst[3].set(ir + y3, ig + y3, ib + y3);
-
-      return success;
-   }
-   
-   void etc1_block::get_abs_subblock_colors(color_quad_u8* pDst, uint16 packed_color4, uint table_idx)
-   {
-      RG_ETC1_ASSERT(table_idx < cETC1IntenModifierValues);
-      const int *pInten_modifer_table = &g_etc1_inten_tables[table_idx][0];
-
-      uint r, g, b;
-      unpack_color4(r, g, b, packed_color4, true);
-      
-      const int ir = static_cast<int>(r), ig = static_cast<int>(g), ib = static_cast<int>(b);
-
-      const int y0 = pInten_modifer_table[0];
-      pDst[0].set(ir + y0, ig + y0, ib + y0);
-      
-      const int y1 = pInten_modifer_table[1];
-      pDst[1].set(ir + y1, ig + y1, ib + y1);
-
-      const int y2 = pInten_modifer_table[2];
-      pDst[2].set(ir + y2, ig + y2, ib + y2);
-
-      const int y3 = pInten_modifer_table[3];
-      pDst[3].set(ir + y3, ig + y3, ib + y3);
-   }
-      
-   bool unpack_etc1_block(const void* pETC1_block, unsigned int* pDst_pixels_rgba, bool preserve_alpha)
-   {
-      color_quad_u8* pDst = reinterpret_cast<color_quad_u8*>(pDst_pixels_rgba);
-      const etc1_block& block = *static_cast<const etc1_block*>(pETC1_block);
-
-      const bool diff_flag = block.get_diff_bit();
-      const bool flip_flag = block.get_flip_bit();
-      const uint table_index0 = block.get_inten_table(0);
-      const uint table_index1 = block.get_inten_table(1);
-
-      color_quad_u8 subblock_colors0[4];
-      color_quad_u8 subblock_colors1[4];
-      bool success = true;
-
-      if (diff_flag)
-      {
-         const uint16 base_color5 = block.get_base5_color();
-         const uint16 delta_color3 = block.get_delta3_color();
-         etc1_block::get_diff_subblock_colors(subblock_colors0, base_color5, table_index0);
-            
-         if (!etc1_block::get_diff_subblock_colors(subblock_colors1, base_color5, delta_color3, table_index1))
-            success = false;
-      }
-      else
-      {
-         const uint16 base_color4_0 = block.get_base4_color(0);
-         etc1_block::get_abs_subblock_colors(subblock_colors0, base_color4_0, table_index0);
-
-         const uint16 base_color4_1 = block.get_base4_color(1);
-         etc1_block::get_abs_subblock_colors(subblock_colors1, base_color4_1, table_index1);
-      }
-
-      if (preserve_alpha)
-      {
-         if (flip_flag)
-         {
-            for (uint y = 0; y < 2; y++)
-            {
-               pDst[0].set_rgb(subblock_colors0[block.get_selector(0, y)]);
-               pDst[1].set_rgb(subblock_colors0[block.get_selector(1, y)]);
-               pDst[2].set_rgb(subblock_colors0[block.get_selector(2, y)]);
-               pDst[3].set_rgb(subblock_colors0[block.get_selector(3, y)]);
-               pDst += 4;
-            }
-
-            for (uint y = 2; y < 4; y++)
-            {
-               pDst[0].set_rgb(subblock_colors1[block.get_selector(0, y)]);
-               pDst[1].set_rgb(subblock_colors1[block.get_selector(1, y)]);
-               pDst[2].set_rgb(subblock_colors1[block.get_selector(2, y)]);
-               pDst[3].set_rgb(subblock_colors1[block.get_selector(3, y)]);
-               pDst += 4;
-            }
-         }
-         else
-         {
-            for (uint y = 0; y < 4; y++)
-            {
-               pDst[0].set_rgb(subblock_colors0[block.get_selector(0, y)]);
-               pDst[1].set_rgb(subblock_colors0[block.get_selector(1, y)]);
-               pDst[2].set_rgb(subblock_colors1[block.get_selector(2, y)]);
-               pDst[3].set_rgb(subblock_colors1[block.get_selector(3, y)]);
-               pDst += 4;
-            }
-         }
-      }
-      else 
-      {
-         if (flip_flag)
-         {
-            // 0000
-            // 0000
-            // 1111
-            // 1111
-            for (uint y = 0; y < 2; y++)
-            {
-               pDst[0] = subblock_colors0[block.get_selector(0, y)];
-               pDst[1] = subblock_colors0[block.get_selector(1, y)];
-               pDst[2] = subblock_colors0[block.get_selector(2, y)];
-               pDst[3] = subblock_colors0[block.get_selector(3, y)];
-               pDst += 4;
-            }
-
-            for (uint y = 2; y < 4; y++)
-            {
-               pDst[0] = subblock_colors1[block.get_selector(0, y)];
-               pDst[1] = subblock_colors1[block.get_selector(1, y)];
-               pDst[2] = subblock_colors1[block.get_selector(2, y)];
-               pDst[3] = subblock_colors1[block.get_selector(3, y)];
-               pDst += 4;
-            }
-         }
-         else
-         {
-            // 0011
-            // 0011
-            // 0011
-            // 0011
-            for (uint y = 0; y < 4; y++)
-            {
-               pDst[0] = subblock_colors0[block.get_selector(0, y)];
-               pDst[1] = subblock_colors0[block.get_selector(1, y)];
-               pDst[2] = subblock_colors1[block.get_selector(2, y)];
-               pDst[3] = subblock_colors1[block.get_selector(3, y)];
-               pDst += 4;
-            }
-         }
-      }
-      
-      return success;
-   }
-
-   struct etc1_solution_coordinates
-   {
-      inline etc1_solution_coordinates() :
-      m_unscaled_color(0, 0, 0, 0),
-         m_inten_table(0),
-         m_color4(false)
-      {
-      }
-
-      inline etc1_solution_coordinates(uint r, uint g, uint b, uint inten_table, bool color4) : 
-      m_unscaled_color(r, g, b, 255),
-         m_inten_table(inten_table),
-         m_color4(color4)
-      {
-      }
-
-      inline etc1_solution_coordinates(const color_quad_u8& c, uint inten_table, bool color4) : 
-      m_unscaled_color(c),
-         m_inten_table(inten_table),
-         m_color4(color4)
-      {
-      }
-
-      inline etc1_solution_coordinates(const etc1_solution_coordinates& other)
-      {
-         *this = other;
-      }
-
-      inline etc1_solution_coordinates& operator= (const etc1_solution_coordinates& rhs)
-      {
-         m_unscaled_color = rhs.m_unscaled_color;
-         m_inten_table = rhs.m_inten_table;
-         m_color4 = rhs.m_color4;
-         return *this;
-      }
-
-      inline void clear()
-      {
-         m_unscaled_color.clear();
-         m_inten_table = 0;
-         m_color4 = false;
-      }
-
-      inline color_quad_u8 get_scaled_color() const
-      {
-         int br, bg, bb;
-         if (m_color4)
-         {
-            br = m_unscaled_color.r | (m_unscaled_color.r << 4);
-            bg = m_unscaled_color.g | (m_unscaled_color.g << 4);
-            bb = m_unscaled_color.b | (m_unscaled_color.b << 4);
-         }
-         else
-         {
-            br = (m_unscaled_color.r >> 2) | (m_unscaled_color.r << 3);
-            bg = (m_unscaled_color.g >> 2) | (m_unscaled_color.g << 3);
-            bb = (m_unscaled_color.b >> 2) | (m_unscaled_color.b << 3);
-         }
-         return color_quad_u8(br, bg, bb);
-      }
-
-      inline void get_block_colors(color_quad_u8* pBlock_colors)
-      {
-         int br, bg, bb;
-         if (m_color4)
-         {
-            br = m_unscaled_color.r | (m_unscaled_color.r << 4);
-            bg = m_unscaled_color.g | (m_unscaled_color.g << 4);
-            bb = m_unscaled_color.b | (m_unscaled_color.b << 4);
-         }
-         else
-         {
-            br = (m_unscaled_color.r >> 2) | (m_unscaled_color.r << 3);
-            bg = (m_unscaled_color.g >> 2) | (m_unscaled_color.g << 3);
-            bb = (m_unscaled_color.b >> 2) | (m_unscaled_color.b << 3);
-         }
-         const int* pInten_table = g_etc1_inten_tables[m_inten_table];
-         pBlock_colors[0].set(br + pInten_table[0], bg + pInten_table[0], bb + pInten_table[0]);
-         pBlock_colors[1].set(br + pInten_table[1], bg + pInten_table[1], bb + pInten_table[1]);
-         pBlock_colors[2].set(br + pInten_table[2], bg + pInten_table[2], bb + pInten_table[2]);
-         pBlock_colors[3].set(br + pInten_table[3], bg + pInten_table[3], bb + pInten_table[3]);
-      }
-
-      color_quad_u8 m_unscaled_color;
-      uint m_inten_table;
-      bool m_color4;
-   };
-
-   class etc1_optimizer
-   {
-      etc1_optimizer(const etc1_optimizer&);
-      etc1_optimizer& operator= (const etc1_optimizer&);
-
-   public:
-      etc1_optimizer()
-      {
-         clear();
-      }
-
-      void clear()
-      {
-         m_pParams = NULL;
-         m_pResult = NULL;
-         m_pSorted_luma = NULL;
-         m_pSorted_luma_indices = NULL;
-      }
-
-      struct params : etc1_pack_params
-      {
-         params()
-         {
-            clear();
-         }
-
-         params(const etc1_pack_params& base_params) : 
-         etc1_pack_params(base_params)
-         {
-            clear_optimizer_params();
-         }
-
-         void clear()
-         {
-            etc1_pack_params::clear();
-            clear_optimizer_params();
-         }
-
-         void clear_optimizer_params()
-         {
-            m_num_src_pixels = 0;
-            m_pSrc_pixels = 0;
-
-            m_use_color4 = false;
-            static const int s_default_scan_delta[] = { 0 };
-            m_pScan_deltas = s_default_scan_delta;
-            m_scan_delta_size = 1;
-
-            m_base_color5.clear();
-            m_constrain_against_base_color5 = false;
-         }
-
-         uint m_num_src_pixels;
-         const color_quad_u8* m_pSrc_pixels;
-
-         bool m_use_color4;
-         const int* m_pScan_deltas;
-         uint m_scan_delta_size;
-
-         color_quad_u8 m_base_color5;
-         bool m_constrain_against_base_color5;
-      };
-
-      struct results
-      {
-         uint64 m_error;
-         color_quad_u8 m_block_color_unscaled;
-         uint m_block_inten_table;
-         uint m_n;
-         uint8* m_pSelectors;
-         bool m_block_color4;
-
-         inline results& operator= (const results& rhs)
-         {
-            m_block_color_unscaled = rhs.m_block_color_unscaled;
-            m_block_color4 = rhs.m_block_color4;
-            m_block_inten_table = rhs.m_block_inten_table;
-            m_error = rhs.m_error;
-            RG_ETC1_ASSERT(m_n == rhs.m_n);
-            memcpy(m_pSelectors, rhs.m_pSelectors, rhs.m_n);
-            return *this;
-         }
-      };
-
-      void init(const params& params, results& result);
-      bool compute();
-
-   private:      
-      struct potential_solution
-      {
-         potential_solution() : m_coords(), m_error(cUINT64_MAX), m_valid(false)
-         {
-         }
-
-         etc1_solution_coordinates  m_coords;
-         uint8                      m_selectors[8];
-         uint64                     m_error;
-         bool                       m_valid;
-
-         void clear()
-         {
-            m_coords.clear();
-            m_error = cUINT64_MAX;
-            m_valid = false;
-         }
-      };
-
-      const params* m_pParams;
-      results* m_pResult;
-
-      int m_limit;
-
-      vec3F m_avg_color;
-      int m_br, m_bg, m_bb;
-      uint16 m_luma[8];
-      uint32 m_sorted_luma[2][8];
-      const uint32* m_pSorted_luma_indices;
-      uint32* m_pSorted_luma;
-
-      uint8 m_selectors[8];
-      uint8 m_best_selectors[8];
-
-      potential_solution m_best_solution;
-      potential_solution m_trial_solution;
-      uint8 m_temp_selectors[8];
-
-      bool evaluate_solution(const etc1_solution_coordinates& coords, potential_solution& trial_solution, potential_solution* pBest_solution);
-      bool evaluate_solution_fast(const etc1_solution_coordinates& coords, potential_solution& trial_solution, potential_solution* pBest_solution);
-   };
-      
-   bool etc1_optimizer::compute()
-   {
-      const uint n = m_pParams->m_num_src_pixels;
-      const int scan_delta_size = m_pParams->m_scan_delta_size;
-      
-      // Scan through a subset of the 3D lattice centered around the avg block color trying each 3D (555 or 444) lattice point as a potential block color.
-      // Each time a better solution is found try to refine the current solution's block color based of the current selectors and intensity table index.
-      for (int zdi = 0; zdi < scan_delta_size; zdi++)
-      {
-         const int zd = m_pParams->m_pScan_deltas[zdi];
-         const int mbb = m_bb + zd;
-         if (mbb < 0) continue; else if (mbb > m_limit) break;
-         
-         for (int ydi = 0; ydi < scan_delta_size; ydi++)
-         {
-            const int yd = m_pParams->m_pScan_deltas[ydi];
-            const int mbg = m_bg + yd;
-            if (mbg < 0) continue; else if (mbg > m_limit) break;
-
-            for (int xdi = 0; xdi < scan_delta_size; xdi++)
-            {
-               const int xd = m_pParams->m_pScan_deltas[xdi];
-               const int mbr = m_br + xd;
-               if (mbr < 0) continue; else if (mbr > m_limit) break;
-      
-               etc1_solution_coordinates coords(mbr, mbg, mbb, 0, m_pParams->m_use_color4);
-               if (m_pParams->m_quality == cHighQuality)
-               {
-                  if (!evaluate_solution(coords, m_trial_solution, &m_best_solution))
-                     continue;
-               }
-               else
-               {
-                  if (!evaluate_solution_fast(coords, m_trial_solution, &m_best_solution))
-                     continue;
-               }
-               
-               // Now we have the input block, the avg. color of the input pixels, a set of trial selector indices, and the block color+intensity index.
-               // Now, for each component, attempt to refine the current solution by solving a simple linear equation. For example, for 4 colors:
-               // The goal is:
-               // pixel0 - (block_color+inten_table[selector0]) + pixel1 - (block_color+inten_table[selector1]) + pixel2 - (block_color+inten_table[selector2]) + pixel3 - (block_color+inten_table[selector3]) = 0
-               // Rearranging this:
-               // (pixel0 + pixel1 + pixel2 + pixel3) - (block_color+inten_table[selector0]) - (block_color+inten_table[selector1]) - (block_color+inten_table[selector2]) - (block_color+inten_table[selector3]) = 0
-               // (pixel0 + pixel1 + pixel2 + pixel3) - block_color - inten_table[selector0] - block_color-inten_table[selector1] - block_color-inten_table[selector2] - block_color-inten_table[selector3] = 0
-               // (pixel0 + pixel1 + pixel2 + pixel3) - 4*block_color - inten_table[selector0] - inten_table[selector1] - inten_table[selector2] - inten_table[selector3] = 0
-               // (pixel0 + pixel1 + pixel2 + pixel3) - 4*block_color - (inten_table[selector0] + inten_table[selector1] + inten_table[selector2] + inten_table[selector3]) = 0
-               // (pixel0 + pixel1 + pixel2 + pixel3)/4 - block_color - (inten_table[selector0] + inten_table[selector1] + inten_table[selector2] + inten_table[selector3])/4 = 0
-               // block_color = (pixel0 + pixel1 + pixel2 + pixel3)/4 - (inten_table[selector0] + inten_table[selector1] + inten_table[selector2] + inten_table[selector3])/4
-               // So what this means:
-               // optimal_block_color = avg_input - avg_inten_delta
-               // So the optimal block color can be computed by taking the average block color and subtracting the current average of the intensity delta.
-               // Unfortunately, optimal_block_color must then be quantized to 555 or 444 so it's not always possible to improve matters using this formula.
-               // Also, the above formula is for unclamped intensity deltas. The actual implementation takes into account clamping.
-
-               const uint max_refinement_trials = (m_pParams->m_quality == cLowQuality) ? 2 : (((xd | yd | zd) == 0) ? 4 : 2);
-               for (uint refinement_trial = 0; refinement_trial < max_refinement_trials; refinement_trial++)
-               {
-                  const uint8* pSelectors = m_best_solution.m_selectors;
-                  const int* pInten_table = g_etc1_inten_tables[m_best_solution.m_coords.m_inten_table];
-
-                  int delta_sum_r = 0, delta_sum_g = 0, delta_sum_b = 0;
-                  const color_quad_u8 base_color(m_best_solution.m_coords.get_scaled_color());
-                  for (uint r = 0; r < n; r++)
-                  {
-                     const uint s = *pSelectors++;
-                     const int yd = pInten_table[s];
-                     // Compute actual delta being applied to each pixel, taking into account clamping.
-                     delta_sum_r += rg_etc1::clamp<int>(base_color.r + yd, 0, 255) - base_color.r;
-                     delta_sum_g += rg_etc1::clamp<int>(base_color.g + yd, 0, 255) - base_color.g;
-                     delta_sum_b += rg_etc1::clamp<int>(base_color.b + yd, 0, 255) - base_color.b;
-                  }
-                  if ((!delta_sum_r) && (!delta_sum_g) && (!delta_sum_b))
-                     break;
-                  const float avg_delta_r_f = static_cast<float>(delta_sum_r) / n;
-                  const float avg_delta_g_f = static_cast<float>(delta_sum_g) / n;
-                  const float avg_delta_b_f = static_cast<float>(delta_sum_b) / n;
-                  const int br1 = rg_etc1::clamp<int>(static_cast<uint>((m_avg_color[0] - avg_delta_r_f) * m_limit / 255.0f + .5f), 0, m_limit);
-                  const int bg1 = rg_etc1::clamp<int>(static_cast<uint>((m_avg_color[1] - avg_delta_g_f) * m_limit / 255.0f + .5f), 0, m_limit);
-                  const int bb1 = rg_etc1::clamp<int>(static_cast<uint>((m_avg_color[2] - avg_delta_b_f) * m_limit / 255.0f + .5f), 0, m_limit);
-                  
-                  bool skip = false;
-                  
-                  if ((mbr == br1) && (mbg == bg1) && (mbb == bb1))
-                     skip = true;
-                  else if ((br1 == m_best_solution.m_coords.m_unscaled_color.r) && (bg1 == m_best_solution.m_coords.m_unscaled_color.g) && (bb1 == m_best_solution.m_coords.m_unscaled_color.b))
-                     skip = true;
-                  else if ((m_br == br1) && (m_bg == bg1) && (m_bb == bb1))
-                     skip = true;
-
-                  if (skip)
-                     break;
-
-                  etc1_solution_coordinates coords1(br1, bg1, bb1, 0, m_pParams->m_use_color4);
-                  if (m_pParams->m_quality == cHighQuality)
-                  {
-                     if (!evaluate_solution(coords1, m_trial_solution, &m_best_solution)) 
-                        break;
-                  }
-                  else
-                  {
-                     if (!evaluate_solution_fast(coords1, m_trial_solution, &m_best_solution))
-                        break;
-                  }
-
-               }  // refinement_trial
-
-            } // xdi
-         } // ydi
-      } // zdi
-
-      if (!m_best_solution.m_valid)
-      {
-         m_pResult->m_error = cUINT32_MAX;
-         return false;
-      }
-      
-      const uint8* pSelectors = m_best_solution.m_selectors;
-
-#ifdef RG_ETC1_BUILD_DEBUG
-      {
-         color_quad_u8 block_colors[4];
-         m_best_solution.m_coords.get_block_colors(block_colors);
-
-         const color_quad_u8* pSrc_pixels = m_pParams->m_pSrc_pixels;
-         uint64 actual_error = 0;
-         for (uint i = 0; i < n; i++)
-            actual_error += pSrc_pixels[i].squared_distance_rgb(block_colors[pSelectors[i]]);
-         
-         RG_ETC1_ASSERT(actual_error == m_best_solution.m_error);
-      }
-#endif      
-      
-      m_pResult->m_error = m_best_solution.m_error;
-
-      m_pResult->m_block_color_unscaled = m_best_solution.m_coords.m_unscaled_color;
-      m_pResult->m_block_color4 = m_best_solution.m_coords.m_color4;
-      
-      m_pResult->m_block_inten_table = m_best_solution.m_coords.m_inten_table;
-      memcpy(m_pResult->m_pSelectors, pSelectors, n);
-      m_pResult->m_n = n;
-
-      return true;
-   }
-
-   void etc1_optimizer::init(const params& p, results& r)
-   {
-      // This version is hardcoded for 8 pixel subblocks.
-      RG_ETC1_ASSERT(p.m_num_src_pixels == 8);
-      
-      m_pParams = &p;
-      m_pResult = &r;
-                  
-      const uint n = 8;
-      
-      m_limit = m_pParams->m_use_color4 ? 15 : 31;
-
-      vec3F avg_color(0.0f);
-
-      for (uint i = 0; i < n; i++)
-      {
-         const color_quad_u8& c = m_pParams->m_pSrc_pixels[i];
-         const vec3F fc(c.r, c.g, c.b);
-
-         avg_color += fc;
-
-         m_luma[i] = static_cast<uint16>(c.r + c.g + c.b);
-         m_sorted_luma[0][i] = i;
-      }
-      avg_color *= (1.0f / static_cast<float>(n));
-      m_avg_color = avg_color;
-
-      m_br = rg_etc1::clamp<int>(static_cast<uint>(m_avg_color[0] * m_limit / 255.0f + .5f), 0, m_limit);
-      m_bg = rg_etc1::clamp<int>(static_cast<uint>(m_avg_color[1] * m_limit / 255.0f + .5f), 0, m_limit);
-      m_bb = rg_etc1::clamp<int>(static_cast<uint>(m_avg_color[2] * m_limit / 255.0f + .5f), 0, m_limit);
-
-      if (m_pParams->m_quality <= cMediumQuality)
-      {
-         m_pSorted_luma_indices = indirect_radix_sort(n, m_sorted_luma[0], m_sorted_luma[1], m_luma, 0, sizeof(m_luma[0]), false);
-         m_pSorted_luma = m_sorted_luma[0];
-         if (m_pSorted_luma_indices == m_sorted_luma[0])
-            m_pSorted_luma = m_sorted_luma[1];
-      
-         for (uint i = 0; i < n; i++)
-            m_pSorted_luma[i] = m_luma[m_pSorted_luma_indices[i]];
-      }
-      
-      m_best_solution.m_coords.clear();
-      m_best_solution.m_valid = false;
-      m_best_solution.m_error = cUINT64_MAX;
-   }
-
-   bool etc1_optimizer::evaluate_solution(const etc1_solution_coordinates& coords, potential_solution& trial_solution, potential_solution* pBest_solution)
-   {
-      trial_solution.m_valid = false;
-
-      if (m_pParams->m_constrain_against_base_color5)
-      {
-         const int dr = coords.m_unscaled_color.r - m_pParams->m_base_color5.r;
-         const int dg = coords.m_unscaled_color.g - m_pParams->m_base_color5.g;
-         const int db = coords.m_unscaled_color.b - m_pParams->m_base_color5.b;
-
-         if ((rg_etc1::minimum(dr, dg, db) < cETC1ColorDeltaMin) || (rg_etc1::maximum(dr, dg, db) > cETC1ColorDeltaMax))
-            return false;
-      }
-
-      const color_quad_u8 base_color(coords.get_scaled_color());
-      
-      const uint n = 8;
-            
-      trial_solution.m_error = cUINT64_MAX;
-            
-      for (uint inten_table = 0; inten_table < cETC1IntenModifierValues; inten_table++)
-      {
-         const int* pInten_table = g_etc1_inten_tables[inten_table];
-
-         color_quad_u8 block_colors[4];
-         for (uint s = 0; s < 4; s++)
-         {
-            const int yd = pInten_table[s];
-            block_colors[s].set(base_color.r + yd, base_color.g + yd, base_color.b + yd, 0);
-         }
-         
-         uint64 total_error = 0;
-         
-         const color_quad_u8* pSrc_pixels = m_pParams->m_pSrc_pixels;
-         for (uint c = 0; c < n; c++)
-         {
-            const color_quad_u8& src_pixel = *pSrc_pixels++;
-            
-            uint best_selector_index = 0;
-            uint best_error = rg_etc1::square(src_pixel.r - block_colors[0].r) + rg_etc1::square(src_pixel.g - block_colors[0].g) + rg_etc1::square(src_pixel.b - block_colors[0].b);
-
-            uint trial_error = rg_etc1::square(src_pixel.r - block_colors[1].r) + rg_etc1::square(src_pixel.g - block_colors[1].g) + rg_etc1::square(src_pixel.b - block_colors[1].b);
-            if (trial_error < best_error)
-            {
-               best_error = trial_error;
-               best_selector_index = 1;
-            }
-
-            trial_error = rg_etc1::square(src_pixel.r - block_colors[2].r) + rg_etc1::square(src_pixel.g - block_colors[2].g) + rg_etc1::square(src_pixel.b - block_colors[2].b);
-            if (trial_error < best_error)
-            {
-               best_error = trial_error;
-               best_selector_index = 2;
-            }
-
-            trial_error = rg_etc1::square(src_pixel.r - block_colors[3].r) + rg_etc1::square(src_pixel.g - block_colors[3].g) + rg_etc1::square(src_pixel.b - block_colors[3].b);
-            if (trial_error < best_error)
-            {
-               best_error = trial_error;
-               best_selector_index = 3;
-            }
-
-            m_temp_selectors[c] = static_cast<uint8>(best_selector_index);
-
-            total_error += best_error;
-            if (total_error >= trial_solution.m_error)
-               break;
-         }
-         
-         if (total_error < trial_solution.m_error)
-         {
-            trial_solution.m_error = total_error;
-            trial_solution.m_coords.m_inten_table = inten_table;
-            memcpy(trial_solution.m_selectors, m_temp_selectors, 8);
-            trial_solution.m_valid = true;
-         }
-      }
-      trial_solution.m_coords.m_unscaled_color = coords.m_unscaled_color;
-      trial_solution.m_coords.m_color4 = m_pParams->m_use_color4;
-
-      bool success = false;
-      if (pBest_solution)
-      {
-         if (trial_solution.m_error < pBest_solution->m_error)
-         {
-            *pBest_solution = trial_solution;
-            success = true;
-         }
-      }
-
-      return success;
-   }
-
-   bool etc1_optimizer::evaluate_solution_fast(const etc1_solution_coordinates& coords, potential_solution& trial_solution, potential_solution* pBest_solution)
-   {
-      if (m_pParams->m_constrain_against_base_color5)
-      {
-         const int dr = coords.m_unscaled_color.r - m_pParams->m_base_color5.r;
-         const int dg = coords.m_unscaled_color.g - m_pParams->m_base_color5.g;
-         const int db = coords.m_unscaled_color.b - m_pParams->m_base_color5.b;
-
-         if ((rg_etc1::minimum(dr, dg, db) < cETC1ColorDeltaMin) || (rg_etc1::maximum(dr, dg, db) > cETC1ColorDeltaMax))
-         {
-            trial_solution.m_valid = false;
-            return false;
-         }
-      }
-
-      const color_quad_u8 base_color(coords.get_scaled_color());
-
-      const uint n = 8;
-      
-      trial_solution.m_error = cUINT64_MAX;
-
-      for (int inten_table = cETC1IntenModifierValues - 1; inten_table >= 0; --inten_table)
-      {
-         const int* pInten_table = g_etc1_inten_tables[inten_table];
-
-         uint block_inten[4];
-         color_quad_u8 block_colors[4];
-         for (uint s = 0; s < 4; s++)
-         {
-            const int yd = pInten_table[s];
-            color_quad_u8 block_color(base_color.r + yd, base_color.g + yd, base_color.b + yd, 0);
-            block_colors[s] = block_color;
-            block_inten[s] = block_color.r + block_color.g + block_color.b;
-         }
-
-         // evaluate_solution_fast() enforces/assumesd a total ordering of the input colors along the intensity (1,1,1) axis to more quickly classify the inputs to selectors.
-         // The inputs colors have been presorted along the projection onto this axis, and ETC1 block colors are always ordered along the intensity axis, so this classification is fast.
-         // 0   1   2   3
-         //   01  12  23
-         const uint block_inten_midpoints[3] = { block_inten[0] + block_inten[1], block_inten[1] + block_inten[2], block_inten[2] + block_inten[3] };
-
-         uint64 total_error = 0;
-         const color_quad_u8* pSrc_pixels = m_pParams->m_pSrc_pixels;
-         if ((m_pSorted_luma[n - 1] * 2) < block_inten_midpoints[0])
-         {
-            if (block_inten[0] > m_pSorted_luma[n - 1])
-            {
-           const uint min_error = intabs(block_inten[0] - m_pSorted_luma[n - 1]);
-               if (min_error >= trial_solution.m_error)
-                  continue;
-            }
-
-            memset(&m_temp_selectors[0], 0, n);
-
-            for (uint c = 0; c < n; c++)
-               total_error += block_colors[0].squared_distance_rgb(pSrc_pixels[c]);
-         }
-         else if ((m_pSorted_luma[0] * 2) >= block_inten_midpoints[2])
-         {
-            if (m_pSorted_luma[0] > block_inten[3])
-            {
-           const uint min_error = intabs(m_pSorted_luma[0] - block_inten[3]);
-               if (min_error >= trial_solution.m_error)
-                  continue;
-            }
-
-            memset(&m_temp_selectors[0], 3, n);
-
-            for (uint c = 0; c < n; c++)
-               total_error += block_colors[3].squared_distance_rgb(pSrc_pixels[c]);
-         }
-         else
-         {
-            uint cur_selector = 0, c;
-            for (c = 0; c < n; c++)
-            {
-               const uint y = m_pSorted_luma[c];
-               while ((y * 2) >= block_inten_midpoints[cur_selector])
-                  if (++cur_selector > 2)
-                     goto done;
-               const uint sorted_pixel_index = m_pSorted_luma_indices[c];
-               m_temp_selectors[sorted_pixel_index] = static_cast<uint8>(cur_selector);
-               total_error += block_colors[cur_selector].squared_distance_rgb(pSrc_pixels[sorted_pixel_index]);
-            }
-done:
-            while (c < n)
-            {
-               const uint sorted_pixel_index = m_pSorted_luma_indices[c];
-               m_temp_selectors[sorted_pixel_index] = 3;
-               total_error += block_colors[3].squared_distance_rgb(pSrc_pixels[sorted_pixel_index]);
-               ++c;
-            }
-         }
-
-         if (total_error < trial_solution.m_error)
-         {
-            trial_solution.m_error = total_error;
-            trial_solution.m_coords.m_inten_table = inten_table;
-            memcpy(trial_solution.m_selectors, m_temp_selectors, n);
-            trial_solution.m_valid = true;
-            if (!total_error)
-               break;
-         }
-      }
-      trial_solution.m_coords.m_unscaled_color = coords.m_unscaled_color;
-      trial_solution.m_coords.m_color4 = m_pParams->m_use_color4;
-      
-      bool success = false;
-      if (pBest_solution)
-      {
-         if (trial_solution.m_error < pBest_solution->m_error)
-         {
-            *pBest_solution = trial_solution;
-            success = true;
-         }
-      }
-
-      return success;
-   }
-         
-   static uint etc1_decode_value(uint diff, uint inten, uint selector, uint packed_c)
-   {
-      const uint limit = diff ? 32 : 16; limit;
-      RG_ETC1_ASSERT((diff < 2) && (inten < 8) && (selector < 4) && (packed_c < limit));
-      int c;
-      if (diff)
-         c = (packed_c >> 2) | (packed_c << 3);
-      else 
-         c = packed_c | (packed_c << 4);
-      c += g_etc1_inten_tables[inten][selector];
-      c = rg_etc1::clamp<int>(c, 0, 255);
-      return c;
-   }
-
-   static inline int mul_8bit(int a, int b) { int t = a*b + 128; return (t + (t >> 8)) >> 8; }
-
-   void pack_etc1_block_init()
-   {
-      for (uint diff = 0; diff < 2; diff++)
-      {
-         const uint limit = diff ? 32 : 16;
-
-         for (uint inten = 0; inten < 8; inten++)
-         {
-            for (uint selector = 0; selector < 4; selector++)
-            {
-               const uint inverse_table_index = diff + (inten << 1) + (selector << 4);
-               for (uint color = 0; color < 256; color++)
-               {
-                  uint best_error = cUINT32_MAX, best_packed_c = 0;
-                  for (uint packed_c = 0; packed_c < limit; packed_c++)
-                  {
-                     int v = etc1_decode_value(diff, inten, selector, packed_c);
-                     uint err = labs(v - static_cast<int>(color));
-		     //printf("err: %d - %u = %u\n",v,color,err);
-                     if (err < best_error)
-                     {
-                        best_error = err;
-                        best_packed_c = packed_c;
-                        if (!best_error) 
-                           break;
-                     }
-                  }
-                  RG_ETC1_ASSERT(best_error <= 255);
-                  g_etc1_inverse_lookup[inverse_table_index][color] = static_cast<uint16>(best_packed_c | (best_error << 8));
-               }
-            }
-         }
-      }
-      
-      uint expand5[32];
-      for(int i = 0; i < 32; i++)
-         expand5[i] = (i << 3) | (i >> 2);
-
-      for(int i = 0; i < 256 + 16; i++)
-      {
-         int v = clamp<int>(i - 8, 0, 255);
-         g_quant5_tab[i] = static_cast<uint8>(expand5[mul_8bit(v,31)]);
-      }
-   }
-
-   // Packs solid color blocks efficiently using a set of small precomputed tables.
-   // For random 888 inputs, MSE results are better than Erricson's ETC1 packer in "slow" mode ~9.5% of the time, is slightly worse only ~.01% of the time, and is equal the rest of the time.
-   static uint64 pack_etc1_block_solid_color(etc1_block& block, const uint8* pColor, etc1_pack_params& pack_params)
-   {
-      pack_params;
-      RG_ETC1_ASSERT(g_etc1_inverse_lookup[0][255]);
-            
-      static uint s_next_comp[4] = { 1, 2, 0, 1 };
-            
-      uint best_error = cUINT32_MAX, best_i = 0;
-      int best_x = 0, best_packed_c1 = 0, best_packed_c2 = 0;
-
-      // For each possible 8-bit value, there is a precomputed list of diff/inten/selector configurations that allow that 8-bit value to be encoded with no error.
-      for (uint i = 0; i < 3; i++)
-      {
-         const uint c1 = pColor[s_next_comp[i]], c2 = pColor[s_next_comp[i + 1]];
-
-         const int delta_range = 1;
-         for (int delta = -delta_range; delta <= delta_range; delta++)
-         {
-            const int c_plus_delta = rg_etc1::clamp<int>(pColor[i] + delta, 0, 255);
-
-            const uint16* pTable;
-            if (!c_plus_delta)
-               pTable = g_color8_to_etc_block_config_0_255[0];
-            else if (c_plus_delta == 255)
-               pTable = g_color8_to_etc_block_config_0_255[1];
-            else
-               pTable = g_color8_to_etc_block_config_1_to_254[c_plus_delta - 1];
-
-            do
-            {
-               const uint x = *pTable++;
-
-#ifdef RG_ETC1_BUILD_DEBUG
-               const uint diff = x & 1;
-               const uint inten = (x >> 1) & 7;
-               const uint selector = (x >> 4) & 3;
-               const uint p0 = (x >> 8) & 255;
-               RG_ETC1_ASSERT(etc1_decode_value(diff, inten, selector, p0) == (uint)c_plus_delta);
-#endif
-
-               const uint16* pInverse_table = g_etc1_inverse_lookup[x & 0xFF];
-               uint16 p1 = pInverse_table[c1];
-               uint16 p2 = pInverse_table[c2];
-               const uint trial_error = rg_etc1::square(c_plus_delta - pColor[i]) + rg_etc1::square(p1 >> 8) + rg_etc1::square(p2 >> 8);
-               if (trial_error < best_error)
-               {
-                  best_error = trial_error;
-                  best_x = x;
-                  best_packed_c1 = p1 & 0xFF;
-                  best_packed_c2 = p2 & 0xFF;
-                  best_i = i;
-                  if (!best_error)
-                     goto found_perfect_match;
-               }
-            } while (*pTable != 0xFFFF);
-         }
-      }
-found_perfect_match:
-
-      const uint diff = best_x & 1;
-      const uint inten = (best_x >> 1) & 7;
-
-      block.m_bytes[3] = static_cast<uint8>(((inten | (inten << 3)) << 2) | (diff << 1));
-                        
-      const uint etc1_selector = g_selector_index_to_etc1[(best_x >> 4) & 3];
-      *reinterpret_cast<uint16*>(&block.m_bytes[4]) = (etc1_selector & 2) ? 0xFFFF : 0;
-      *reinterpret_cast<uint16*>(&block.m_bytes[6]) = (etc1_selector & 1) ? 0xFFFF : 0;
-
-      const uint best_packed_c0 = (best_x >> 8) & 255;
-      if (diff)
-      {
-         block.m_bytes[best_i] = static_cast<uint8>(best_packed_c0 << 3);
-         block.m_bytes[s_next_comp[best_i]] = static_cast<uint8>(best_packed_c1 << 3);
-         block.m_bytes[s_next_comp[best_i+1]] = static_cast<uint8>(best_packed_c2 << 3);
-      }
-      else
-      {
-         block.m_bytes[best_i] = static_cast<uint8>(best_packed_c0 | (best_packed_c0 << 4));
-         block.m_bytes[s_next_comp[best_i]] = static_cast<uint8>(best_packed_c1 | (best_packed_c1 << 4));
-         block.m_bytes[s_next_comp[best_i+1]] = static_cast<uint8>(best_packed_c2 | (best_packed_c2 << 4));
-      }
-
-      return best_error;
-   }
-      
-   static uint pack_etc1_block_solid_color_constrained(
-      etc1_optimizer::results& results, 
-      uint num_colors, const uint8* pColor, 
-      etc1_pack_params& pack_params, 
-      bool use_diff,
-      const color_quad_u8* pBase_color5_unscaled)
-   {
-      RG_ETC1_ASSERT(g_etc1_inverse_lookup[0][255]);
-
-      pack_params;
-      static uint s_next_comp[4] = { 1, 2, 0, 1 };
-
-      uint best_error = cUINT32_MAX, best_i = 0;
-      int best_x = 0, best_packed_c1 = 0, best_packed_c2 = 0;
-
-      // For each possible 8-bit value, there is a precomputed list of diff/inten/selector configurations that allow that 8-bit value to be encoded with no error.
-      for (uint i = 0; i < 3; i++)
-      {
-         const uint c1 = pColor[s_next_comp[i]], c2 = pColor[s_next_comp[i + 1]];
-
-         const int delta_range = 1;
-         for (int delta = -delta_range; delta <= delta_range; delta++)
-         {
-            const int c_plus_delta = rg_etc1::clamp<int>(pColor[i] + delta, 0, 255);
-
-            const uint16* pTable;
-            if (!c_plus_delta)
-               pTable = g_color8_to_etc_block_config_0_255[0];
-            else if (c_plus_delta == 255)
-               pTable = g_color8_to_etc_block_config_0_255[1];
-            else
-               pTable = g_color8_to_etc_block_config_1_to_254[c_plus_delta - 1];
-
-            do
-            {
-               const uint x = *pTable++;
-               const uint diff = x & 1;
-               if (static_cast<uint>(use_diff) != diff)
-               {
-                  if (*pTable == 0xFFFF)
-                     break;
-                  continue;
-               }
-
-               if ((diff) && (pBase_color5_unscaled))
-               {
-                  const int p0 = (x >> 8) & 255;
-                  int delta = p0 - static_cast<int>(pBase_color5_unscaled->c[i]);
-                  if ((delta < cETC1ColorDeltaMin) || (delta > cETC1ColorDeltaMax))
-                  {
-                     if (*pTable == 0xFFFF)
-                        break;
-                     continue;
-                  }
-               }
-
-#ifdef RG_ETC1_BUILD_DEBUG
-               {
-                  const uint inten = (x >> 1) & 7;
-                  const uint selector = (x >> 4) & 3;
-                  const uint p0 = (x >> 8) & 255;
-                  RG_ETC1_ASSERT(etc1_decode_value(diff, inten, selector, p0) == (uint)c_plus_delta);
-               }
-#endif
-
-               const uint16* pInverse_table = g_etc1_inverse_lookup[x & 0xFF];
-               uint16 p1 = pInverse_table[c1];
-               uint16 p2 = pInverse_table[c2];
-
-               if ((diff) && (pBase_color5_unscaled))
-               {
-                  int delta1 = (p1 & 0xFF) - static_cast<int>(pBase_color5_unscaled->c[s_next_comp[i]]);
-                  int delta2 = (p2 & 0xFF) - static_cast<int>(pBase_color5_unscaled->c[s_next_comp[i + 1]]);
-                  if ((delta1 < cETC1ColorDeltaMin) || (delta1 > cETC1ColorDeltaMax) || (delta2 < cETC1ColorDeltaMin) || (delta2 > cETC1ColorDeltaMax))
-                  {
-                     if (*pTable == 0xFFFF)
-                        break;
-                     continue;
-                  }
-               }
-
-               const uint trial_error = rg_etc1::square(c_plus_delta - pColor[i]) + rg_etc1::square(p1 >> 8) + rg_etc1::square(p2 >> 8);
-               if (trial_error < best_error)
-               {
-                  best_error = trial_error;
-                  best_x = x;
-                  best_packed_c1 = p1 & 0xFF;
-                  best_packed_c2 = p2 & 0xFF;
-                  best_i = i;
-                  if (!best_error)
-                     goto found_perfect_match;
-               }
-            } while (*pTable != 0xFFFF);
-         }
-      }
-found_perfect_match:
-
-      if (best_error == cUINT32_MAX)
-         return best_error;
-
-      best_error *= num_colors;
-
-      results.m_n = num_colors;
-      results.m_block_color4 = !(best_x & 1);
-      results.m_block_inten_table = (best_x >> 1) & 7;
-      memset(results.m_pSelectors, (best_x >> 4) & 3, num_colors);
-
-      const uint best_packed_c0 = (best_x >> 8) & 255;
-      results.m_block_color_unscaled[best_i] = static_cast<uint8>(best_packed_c0);
-      results.m_block_color_unscaled[s_next_comp[best_i]] = static_cast<uint8>(best_packed_c1);
-      results.m_block_color_unscaled[s_next_comp[best_i + 1]] = static_cast<uint8>(best_packed_c2);
-      results.m_error = best_error;
-      
-      return best_error;
-   }
-
-   // Function originally from RYG's public domain real-time DXT1 compressor, modified for 555.
-   static void dither_block_555(color_quad_u8* dest, const color_quad_u8* block)
-   {
-      int err[8],*ep1 = err,*ep2 = err+4;
-      uint8 *quant = g_quant5_tab+8;
-
-      memset(dest, 0xFF, sizeof(color_quad_u8)*16);
-
-      // process channels seperately
-      for(int ch=0;ch<3;ch++)
-      {
-         uint8* bp = (uint8*)block;
-         uint8* dp = (uint8*)dest;
-
-         bp += ch; dp += ch;
-
-         memset(err,0, sizeof(err));
-         for(int y = 0; y < 4; y++)
-         {
-            // pixel 0
-            dp[ 0] = quant[bp[ 0] + ((3*ep2[1] + 5*ep2[0]) >> 4)];
-            ep1[0] = bp[ 0] - dp[ 0];
-
-            // pixel 1
-            dp[ 4] = quant[bp[ 4] + ((7*ep1[0] + 3*ep2[2] + 5*ep2[1] + ep2[0]) >> 4)];
-            ep1[1] = bp[ 4] - dp[ 4];
-
-            // pixel 2
-            dp[ 8] = quant[bp[ 8] + ((7*ep1[1] + 3*ep2[3] + 5*ep2[2] + ep2[1]) >> 4)];
-            ep1[2] = bp[ 8] - dp[ 8];
-
-            // pixel 3
-            dp[12] = quant[bp[12] + ((7*ep1[2] + 5*ep2[3] + ep2[2]) >> 4)];
-            ep1[3] = bp[12] - dp[12];
-
-            // advance to next line
-            int* tmp = ep1; ep1 = ep2; ep2 = tmp;
-            bp += 16;
-            dp += 16;
-         }
-      }
-   }
-
-   unsigned int pack_etc1_block(void* pETC1_block, const unsigned int* pSrc_pixels_rgba, etc1_pack_params& pack_params)
-   {
-      const color_quad_u8* pSrc_pixels = reinterpret_cast<const color_quad_u8*>(pSrc_pixels_rgba);
-      etc1_block& dst_block = *static_cast<etc1_block*>(pETC1_block);
-
-#ifdef RG_ETC1_BUILD_DEBUG
-      // Ensure all alpha values are 0xFF.
-      for (uint i = 0; i < 16; i++)
-      {
-         RG_ETC1_ASSERT(pSrc_pixels[i].a == 255);
-      }
-#endif
-
-      color_quad_u8 src_pixel0(pSrc_pixels[0]);
-
-      // Check for solid block.
-      const uint32 first_pixel_u32 = pSrc_pixels->m_u32;
-      int r;
-      for (r = 15; r >= 1; --r)
-         if (pSrc_pixels[r].m_u32 != first_pixel_u32)
-            break;
-      if (!r)
-         return static_cast<unsigned int>(16 * pack_etc1_block_solid_color(dst_block, &pSrc_pixels[0].r, pack_params));
-      
-      color_quad_u8 dithered_pixels[16];
-      if (pack_params.m_dithering)
-      {
-         dither_block_555(dithered_pixels, pSrc_pixels);
-         pSrc_pixels = dithered_pixels;
-      }
-
-      etc1_optimizer optimizer;
-
-      uint64 best_error = cUINT64_MAX;
-      uint best_flip = false, best_use_color4 = false;
-      
-      uint8 best_selectors[2][8];
-      etc1_optimizer::results best_results[2];
-      for (uint i = 0; i < 2; i++)
-      {
-         best_results[i].m_n = 8;
-         best_results[i].m_pSelectors = best_selectors[i];
-      }
-      
-      uint8 selectors[3][8];
-      etc1_optimizer::results results[3];
-      
-      for (uint i = 0; i < 3; i++)
-      {
-         results[i].m_n = 8;
-         results[i].m_pSelectors = selectors[i];
-      }
-            
-      color_quad_u8 subblock_pixels[8];
-
-      etc1_optimizer::params params(pack_params);
-      params.m_num_src_pixels = 8;
-      params.m_pSrc_pixels = subblock_pixels;
-
-      for (uint flip = 0; flip < 2; flip++)
-      {
-         for (uint use_color4 = 0; use_color4 < 2; use_color4++)
-         {
-            uint64 trial_error = 0;
-
-            uint subblock;
-            for (subblock = 0; subblock < 2; subblock++)
-            {
-               if (flip)
-                  memcpy(subblock_pixels, pSrc_pixels + subblock * 8, sizeof(color_quad_u8) * 8);
-               else
-               {
-                  const color_quad_u8* pSrc_col = pSrc_pixels + subblock * 2;
-                  subblock_pixels[0] = pSrc_col[0]; subblock_pixels[1] = pSrc_col[4]; subblock_pixels[2] = pSrc_col[8]; subblock_pixels[3] = pSrc_col[12];
-                  subblock_pixels[4] = pSrc_col[1]; subblock_pixels[5] = pSrc_col[5]; subblock_pixels[6] = pSrc_col[9]; subblock_pixels[7] = pSrc_col[13];
-               }
-
-               results[2].m_error = cUINT64_MAX;
-               if ((params.m_quality >= cMediumQuality) && ((subblock) || (use_color4)))
-               {
-                  const uint32 subblock_pixel0_u32 = subblock_pixels[0].m_u32;
-                  for (r = 7; r >= 1; --r)
-                     if (subblock_pixels[r].m_u32 != subblock_pixel0_u32)
-                        break;
-                  if (!r)
-                  {
-                     pack_etc1_block_solid_color_constrained(results[2], 8, &subblock_pixels[0].r, pack_params, !use_color4, (subblock && !use_color4) ? &results[0].m_block_color_unscaled : NULL);
-                  }
-               }
-
-               params.m_use_color4 = (use_color4 != 0);
-               params.m_constrain_against_base_color5 = false;
-
-               if ((!use_color4) && (subblock))
-               {
-                  params.m_constrain_against_base_color5 = true;
-                  params.m_base_color5 = results[0].m_block_color_unscaled;
-               }
-                              
-               if (params.m_quality == cHighQuality)
-               {
-                  static const int s_scan_delta_0_to_4[] = { -4, -3, -2, -1, 0, 1, 2, 3, 4 };
-                  params.m_scan_delta_size = RG_ETC1_ARRAY_SIZE(s_scan_delta_0_to_4);
-                  params.m_pScan_deltas = s_scan_delta_0_to_4;
-               }
-               else if (params.m_quality == cMediumQuality)
-               {
-                  static const int s_scan_delta_0_to_1[] = { -1, 0, 1 };
-                  params.m_scan_delta_size = RG_ETC1_ARRAY_SIZE(s_scan_delta_0_to_1);
-                  params.m_pScan_deltas = s_scan_delta_0_to_1;
-               }
-               else
-               {
-                  static const int s_scan_delta_0[] = { 0 };
-                  params.m_scan_delta_size = RG_ETC1_ARRAY_SIZE(s_scan_delta_0);
-                  params.m_pScan_deltas = s_scan_delta_0;
-               }
-               
-               optimizer.init(params, results[subblock]);
-               if (!optimizer.compute())
-                  break;
-                              
-               if (params.m_quality >= cMediumQuality)
-               {
-                  // TODO: Fix fairly arbitrary/unrefined thresholds that control how far away to scan for potentially better solutions.
-                  const uint refinement_error_thresh0 = 3000;
-                  const uint refinement_error_thresh1 = 6000;
-                  if (results[subblock].m_error > refinement_error_thresh0)
-                  {
-                     if (params.m_quality == cMediumQuality)
-                     {
-                        static const int s_scan_delta_2_to_3[] = { -3, -2, 2, 3 };
-                        params.m_scan_delta_size = RG_ETC1_ARRAY_SIZE(s_scan_delta_2_to_3);
-                        params.m_pScan_deltas = s_scan_delta_2_to_3;
-                     }
-                     else
-                     {
-                        static const int s_scan_delta_5_to_5[] = { -5, 5 };
-                        static const int s_scan_delta_5_to_8[] = { -8, -7, -6, -5, 5, 6, 7, 8 };
-                        if (results[subblock].m_error > refinement_error_thresh1)
-                        {
-                           params.m_scan_delta_size = RG_ETC1_ARRAY_SIZE(s_scan_delta_5_to_8);
-                           params.m_pScan_deltas = s_scan_delta_5_to_8;
-                        }
-                        else
-                        {
-                           params.m_scan_delta_size = RG_ETC1_ARRAY_SIZE(s_scan_delta_5_to_5);
-                           params.m_pScan_deltas = s_scan_delta_5_to_5;
-                        }
-                     }
-
-                     if (!optimizer.compute())
-                        break;
-                  }
-
-                  if (results[2].m_error < results[subblock].m_error)
-                     results[subblock] = results[2];
-               }
-                            
-               trial_error += results[subblock].m_error;
-               if (trial_error >= best_error)
-                  break;
-            }
-
-            if (subblock < 2)
-               continue;
-
-            best_error = trial_error;
-            best_results[0] = results[0];
-            best_results[1] = results[1];
-            best_flip = flip;
-            best_use_color4 = use_color4;
-            
-         } // use_color4
-
-      } // flip
-
-      int dr = best_results[1].m_block_color_unscaled.r - best_results[0].m_block_color_unscaled.r;
-      int dg = best_results[1].m_block_color_unscaled.g - best_results[0].m_block_color_unscaled.g;
-      int db = best_results[1].m_block_color_unscaled.b - best_results[0].m_block_color_unscaled.b;
-      RG_ETC1_ASSERT(best_use_color4 || ((rg_etc1::minimum(dr, dg, db) >= cETC1ColorDeltaMin) && (rg_etc1::maximum(dr, dg, db) <= cETC1ColorDeltaMax)));
-           
-      if (best_use_color4)
-      {
-         dst_block.m_bytes[0] = static_cast<uint8>(best_results[1].m_block_color_unscaled.r | (best_results[0].m_block_color_unscaled.r << 4));
-         dst_block.m_bytes[1] = static_cast<uint8>(best_results[1].m_block_color_unscaled.g | (best_results[0].m_block_color_unscaled.g << 4));
-         dst_block.m_bytes[2] = static_cast<uint8>(best_results[1].m_block_color_unscaled.b | (best_results[0].m_block_color_unscaled.b << 4));
-      }
-      else
-      {
-         if (dr < 0) dr += 8; dst_block.m_bytes[0] = static_cast<uint8>((best_results[0].m_block_color_unscaled.r << 3) | dr);
-         if (dg < 0) dg += 8; dst_block.m_bytes[1] = static_cast<uint8>((best_results[0].m_block_color_unscaled.g << 3) | dg);
-         if (db < 0) db += 8; dst_block.m_bytes[2] = static_cast<uint8>((best_results[0].m_block_color_unscaled.b << 3) | db);
-      }
-      
-      dst_block.m_bytes[3] = static_cast<uint8>( (best_results[1].m_block_inten_table << 2) | (best_results[0].m_block_inten_table << 5) | ((~best_use_color4 & 1) << 1) | best_flip );
-      
-      uint selector0 = 0, selector1 = 0;
-      if (best_flip)
-      {
-         // flipped:
-         // { 0, 0 }, { 1, 0 }, { 2, 0 }, { 3, 0 },               
-         // { 0, 1 }, { 1, 1 }, { 2, 1 }, { 3, 1 } 
-         //
-         // { 0, 2 }, { 1, 2 }, { 2, 2 }, { 3, 2 },
-         // { 0, 3 }, { 1, 3 }, { 2, 3 }, { 3, 3 }
-         const uint8* pSelectors0 = best_results[0].m_pSelectors;
-         const uint8* pSelectors1 = best_results[1].m_pSelectors;
-         for (int x = 3; x >= 0; --x)
-         {
-            uint b;
-            b = g_selector_index_to_etc1[pSelectors1[4 + x]];
-            selector0 = (selector0 << 1) | (b & 1); selector1 = (selector1 << 1) | (b >> 1);
-
-            b = g_selector_index_to_etc1[pSelectors1[x]];
-            selector0 = (selector0 << 1) | (b & 1); selector1 = (selector1 << 1) | (b >> 1);
-
-            b = g_selector_index_to_etc1[pSelectors0[4 + x]];
-            selector0 = (selector0 << 1) | (b & 1); selector1 = (selector1 << 1) | (b >> 1);
-
-            b = g_selector_index_to_etc1[pSelectors0[x]];
-            selector0 = (selector0 << 1) | (b & 1); selector1 = (selector1 << 1) | (b >> 1);
-         }
-      }
-      else
-      {
-         // non-flipped:
-         // { 0, 0 }, { 0, 1 }, { 0, 2 }, { 0, 3 },
-         // { 1, 0 }, { 1, 1 }, { 1, 2 }, { 1, 3 }
-         //
-         // { 2, 0 }, { 2, 1 }, { 2, 2 }, { 2, 3 },
-         // { 3, 0 }, { 3, 1 }, { 3, 2 }, { 3, 3 }
-         for (int subblock = 1; subblock >= 0; --subblock)
-         {
-            const uint8* pSelectors = best_results[subblock].m_pSelectors + 4;
-            for (uint i = 0; i < 2; i++)
-            {
-               uint b;
-               b = g_selector_index_to_etc1[pSelectors[3]];
-               selector0 = (selector0 << 1) | (b & 1); selector1 = (selector1 << 1) | (b >> 1);
-
-               b = g_selector_index_to_etc1[pSelectors[2]];
-               selector0 = (selector0 << 1) | (b & 1); selector1 = (selector1 << 1) | (b >> 1);
-
-               b = g_selector_index_to_etc1[pSelectors[1]];
-               selector0 = (selector0 << 1) | (b & 1); selector1 = (selector1 << 1) | (b >> 1);
-
-               b = g_selector_index_to_etc1[pSelectors[0]];
-               selector0 = (selector0 << 1) | (b & 1);selector1 = (selector1 << 1) | (b >> 1);
-
-               pSelectors -= 4;
-            }
-         }
-      }
-                  
-      dst_block.m_bytes[4] = static_cast<uint8>(selector1 >> 8); dst_block.m_bytes[5] = static_cast<uint8>(selector1 & 0xFF);
-      dst_block.m_bytes[6] = static_cast<uint8>(selector0 >> 8); dst_block.m_bytes[7] = static_cast<uint8>(selector0 & 0xFF);
-
-      return static_cast<unsigned int>(best_error);
-   }
-
-} // namespace rg_etc1
+// File: rg_etc1.cpp - Fast, high quality ETC1 block packer/unpacker - Rich Geldreich <richgel99@gmail.com>
+// Please see ZLIB license at the end of rg_etc1.h.
+//
+// For more information Ericsson Texture Compression (ETC/ETC1), see:
+// http://www.khronos.org/registry/gles/extensions/OES/OES_compressed_ETC1_RGB8_texture.txt
+//
+// v1.03 - 5/12/13 - Initial public release
+#include "rg_etc1.h"
+
+#include <stdlib.h>
+#include <string.h>
+#include <assert.h>
+//#include <stdio.h>
+#include <math.h>
+#include <stdio.h>
+#pragma warning (disable: 4201) //  nonstandard extension used : nameless struct/union
+
+#if defined(_DEBUG) || defined(DEBUG)
+#define RG_ETC1_BUILD_DEBUG
+#endif
+
+#define RG_ETC1_ASSERT assert
+
+namespace rg_etc1
+{
+
+   inline long labs(long val) {
+        return val < 0 ? -val : val;
+   }
+
+   inline int intabs(int val) {
+
+       return val<0?-val:val;
+   }
+
+   typedef unsigned char uint8;
+   typedef unsigned short uint16;
+   typedef unsigned int uint;
+   typedef unsigned int uint32;
+   typedef long long int64;
+   typedef unsigned long long uint64;
+
+   const uint32 cUINT32_MAX = 0xFFFFFFFFU;
+   const uint64 cUINT64_MAX = 0xFFFFFFFFFFFFFFFFULL; //0xFFFFFFFFFFFFFFFFui64;
+   
+   template<typename T> inline T minimum(T a, T b) { return (a < b) ? a : b; }
+   template<typename T> inline T minimum(T a, T b, T c) { return minimum(minimum(a, b), c); }
+   template<typename T> inline T maximum(T a, T b) { return (a > b) ? a : b; }
+   template<typename T> inline T maximum(T a, T b, T c) { return maximum(maximum(a, b), c); }
+   template<typename T> inline T clamp(T value, T low, T high) { return (value < low) ? low : ((value > high) ? high : value); }
+   template<typename T> inline T square(T value) { return value * value; }
+   template<typename T> inline void zero_object(T& obj) { memset((void*)&obj, 0, sizeof(obj)); }
+   template<typename T> inline void zero_this(T* pObj) { memset((void*)pObj, 0, sizeof(*pObj)); }
+
+   template<class T, size_t N> T decay_array_to_subtype(T (&a)[N]);   
+
+#define RG_ETC1_ARRAY_SIZE(X) (sizeof(X) / sizeof(decay_array_to_subtype(X)))
+
+   enum eNoClamp { cNoClamp };
+
+   struct color_quad_u8
+   {
+      static inline int clamp(int v) { if (v & 0xFFFFFF00U) v = (~(static_cast<int>(v) >> 31)) & 0xFF; return v; }
+
+      struct component_traits { enum { cSigned = false, cFloat = false, cMin = 0U, cMax = 255U }; };
+
+   public:
+      typedef unsigned char component_t;
+      typedef int parameter_t;
+
+      enum { cNumComps = 4 };
+
+      union
+      {
+         struct
+         {
+            component_t r;
+            component_t g;
+            component_t b;
+            component_t a;
+         };
+
+         component_t c[cNumComps];
+
+         uint32 m_u32;
+      };
+
+      inline color_quad_u8()
+      {
+      }
+
+      inline color_quad_u8(const color_quad_u8& other) : m_u32(other.m_u32)
+      {
+      }
+
+      explicit inline color_quad_u8(parameter_t y, parameter_t alpha = component_traits::cMax)
+      {
+         set(y, alpha);
+      }
+
+      inline color_quad_u8(parameter_t red, parameter_t green, parameter_t blue, parameter_t alpha = component_traits::cMax)
+      {
+         set(red, green, blue, alpha);
+      }
+
+      explicit inline color_quad_u8(eNoClamp, parameter_t y, parameter_t alpha = component_traits::cMax)
+      {
+         set_noclamp_y_alpha(y, alpha);
+      }
+
+      inline color_quad_u8(eNoClamp, parameter_t red, parameter_t green, parameter_t blue, parameter_t alpha = component_traits::cMax)
+      {
+         set_noclamp_rgba(red, green, blue, alpha);
+      }
+
+      inline void clear()
+      {
+         m_u32 = 0;
+      }
+
+      inline color_quad_u8& operator= (const color_quad_u8& other)
+      {
+         m_u32 = other.m_u32;
+         return *this;
+      }
+
+      inline color_quad_u8& set_rgb(const color_quad_u8& other)
+      {
+         r = other.r;
+         g = other.g;
+         b = other.b;
+         return *this;
+      }
+
+      inline color_quad_u8& operator= (parameter_t y)
+      {
+         set(y, component_traits::cMax);
+         return *this;
+      }
+
+      inline color_quad_u8& set(parameter_t y, parameter_t alpha = component_traits::cMax)
+      {
+         y = clamp(y);
+         alpha = clamp(alpha);
+         r = static_cast<component_t>(y);
+         g = static_cast<component_t>(y);
+         b = static_cast<component_t>(y);
+         a = static_cast<component_t>(alpha);
+         return *this;
+      }
+
+      inline color_quad_u8& set_noclamp_y_alpha(parameter_t y, parameter_t alpha = component_traits::cMax)
+      {
+         RG_ETC1_ASSERT( (y >= component_traits::cMin) && (y <= component_traits::cMax) );
+         RG_ETC1_ASSERT( (alpha >= component_traits::cMin) && (alpha <= component_traits::cMax) );
+
+         r = static_cast<component_t>(y);
+         g = static_cast<component_t>(y);
+         b = static_cast<component_t>(y);
+         a = static_cast<component_t>(alpha);
+         return *this;
+      }
+
+      inline color_quad_u8& set(parameter_t red, parameter_t green, parameter_t blue, parameter_t alpha = component_traits::cMax)
+      {
+         r = static_cast<component_t>(clamp(red));
+         g = static_cast<component_t>(clamp(green));
+         b = static_cast<component_t>(clamp(blue));
+         a = static_cast<component_t>(clamp(alpha));
+         return *this;
+      }
+
+      inline color_quad_u8& set_noclamp_rgba(parameter_t red, parameter_t green, parameter_t blue, parameter_t alpha)
+      {
+         RG_ETC1_ASSERT( (red >= component_traits::cMin) && (red <= component_traits::cMax) );
+         RG_ETC1_ASSERT( (green >= component_traits::cMin) && (green <= component_traits::cMax) );
+         RG_ETC1_ASSERT( (blue >= component_traits::cMin) && (blue <= component_traits::cMax) );
+         RG_ETC1_ASSERT( (alpha >= component_traits::cMin) && (alpha <= component_traits::cMax) );
+
+         r = static_cast<component_t>(red);
+         g = static_cast<component_t>(green);
+         b = static_cast<component_t>(blue);
+         a = static_cast<component_t>(alpha);
+         return *this;
+      }
+
+      inline color_quad_u8& set_noclamp_rgb(parameter_t red, parameter_t green, parameter_t blue)
+      {
+         RG_ETC1_ASSERT( (red >= component_traits::cMin) && (red <= component_traits::cMax) );
+         RG_ETC1_ASSERT( (green >= component_traits::cMin) && (green <= component_traits::cMax) );
+         RG_ETC1_ASSERT( (blue >= component_traits::cMin) && (blue <= component_traits::cMax) );
+
+         r = static_cast<component_t>(red);
+         g = static_cast<component_t>(green);
+         b = static_cast<component_t>(blue);
+         return *this;
+      }
+
+      static inline parameter_t get_min_comp() { return component_traits::cMin; }
+      static inline parameter_t get_max_comp() { return component_traits::cMax; }
+      static inline bool get_comps_are_signed() { return component_traits::cSigned; }
+
+      inline component_t operator[] (uint i) const { RG_ETC1_ASSERT(i < cNumComps); return c[i]; }
+      inline component_t& operator[] (uint i) { RG_ETC1_ASSERT(i < cNumComps); return c[i]; }
+
+      inline color_quad_u8& set_component(uint i, parameter_t f)
+      {
+         RG_ETC1_ASSERT(i < cNumComps);
+
+         c[i] = static_cast<component_t>(clamp(f));
+
+         return *this;
+      }
+
+      inline color_quad_u8& set_grayscale(parameter_t l)
+      {
+         component_t x = static_cast<component_t>(clamp(l));
+         c[0] = x;
+         c[1] = x;
+         c[2] = x;
+         return *this;
+      }
+
+      inline color_quad_u8& clamp(const color_quad_u8& l, const color_quad_u8& h)
+      {
+         for (uint i = 0; i < cNumComps; i++)
+            c[i] = static_cast<component_t>(rg_etc1::clamp<parameter_t>(c[i], l[i], h[i]));
+         return *this;
+      }
+
+      inline color_quad_u8& clamp(parameter_t l, parameter_t h)
+      {
+         for (uint i = 0; i < cNumComps; i++)
+            c[i] = static_cast<component_t>(rg_etc1::clamp<parameter_t>(c[i], l, h));
+         return *this;
+      }
+
+      // Returns CCIR 601 luma (consistent with color_utils::RGB_To_Y).
+      inline parameter_t get_luma() const
+      {
+         return static_cast<parameter_t>((19595U * r + 38470U * g + 7471U * b + 32768U) >> 16U);
+      }
+
+      // Returns REC 709 luma.
+      inline parameter_t get_luma_rec709() const
+      {
+         return static_cast<parameter_t>((13938U * r + 46869U * g + 4729U * b + 32768U) >> 16U);
+      }
+
+      inline uint squared_distance_rgb(const color_quad_u8& c) const
+      {
+         return rg_etc1::square(r - c.r) + rg_etc1::square(g - c.g) + rg_etc1::square(b - c.b);
+      }
+
+      inline uint squared_distance_rgba(const color_quad_u8& c) const
+      {
+         return rg_etc1::square(r - c.r) + rg_etc1::square(g - c.g) + rg_etc1::square(b - c.b) + rg_etc1::square(a - c.a);
+      }
+
+      inline bool rgb_equals(const color_quad_u8& rhs) const
+      {
+         return (r == rhs.r) && (g == rhs.g) && (b == rhs.b);
+      }
+
+      inline bool operator== (const color_quad_u8& rhs) const
+      {
+         return m_u32 == rhs.m_u32;
+      }
+
+      color_quad_u8& operator+= (const color_quad_u8& other)
+      {
+         for (uint i = 0; i < 4; i++)
+            c[i] = static_cast<component_t>(clamp(c[i] + other.c[i]));
+         return *this;
+      }
+
+      color_quad_u8& operator-= (const color_quad_u8& other)
+      {
+         for (uint i = 0; i < 4; i++)
+            c[i] = static_cast<component_t>(clamp(c[i] - other.c[i]));
+         return *this;
+      }
+
+      friend color_quad_u8 operator+ (const color_quad_u8& lhs, const color_quad_u8& rhs)
+      {
+         color_quad_u8 result(lhs);
+         result += rhs;
+         return result;
+      }
+
+      friend color_quad_u8 operator- (const color_quad_u8& lhs, const color_quad_u8& rhs)
+      {
+         color_quad_u8 result(lhs);
+         result -= rhs;
+         return result;
+      }
+   }; // class color_quad_u8
+
+   struct vec3F
+   {
+      float m_s[3];
+      
+      inline vec3F() { }
+      inline vec3F(float s) { m_s[0] = s; m_s[1] = s; m_s[2] = s; }
+      inline vec3F(float x, float y, float z) { m_s[0] = x; m_s[1] = y; m_s[2] = z; }
+      
+      inline float operator[] (uint i) const { RG_ETC1_ASSERT(i < 3); return m_s[i]; }
+
+      inline vec3F& operator += (const vec3F& other) { for (uint i = 0; i < 3; i++) m_s[i] += other.m_s[i]; return *this; }
+
+      inline vec3F& operator *= (float s) { for (uint i = 0; i < 3; i++) m_s[i] *= s; return *this; }
+   };
+     
+   enum etc_constants
+   {
+      cETC1BytesPerBlock = 8U,
+
+      cETC1SelectorBits = 2U,
+      cETC1SelectorValues = 1U << cETC1SelectorBits,
+      cETC1SelectorMask = cETC1SelectorValues - 1U,
+
+      cETC1BlockShift = 2U,
+      cETC1BlockSize = 1U << cETC1BlockShift,
+
+      cETC1LSBSelectorIndicesBitOffset = 0,
+      cETC1MSBSelectorIndicesBitOffset = 16,
+
+      cETC1FlipBitOffset = 32,
+      cETC1DiffBitOffset = 33,
+
+      cETC1IntenModifierNumBits = 3,
+      cETC1IntenModifierValues = 1 << cETC1IntenModifierNumBits,
+      cETC1RightIntenModifierTableBitOffset = 34,
+      cETC1LeftIntenModifierTableBitOffset = 37,
+
+      // Base+Delta encoding (5 bit bases, 3 bit delta)
+      cETC1BaseColorCompNumBits = 5,
+      cETC1BaseColorCompMax = 1 << cETC1BaseColorCompNumBits,
+
+      cETC1DeltaColorCompNumBits = 3,
+      cETC1DeltaColorComp = 1 << cETC1DeltaColorCompNumBits,
+      cETC1DeltaColorCompMax = 1 << cETC1DeltaColorCompNumBits,
+
+      cETC1BaseColor5RBitOffset = 59,
+      cETC1BaseColor5GBitOffset = 51,
+      cETC1BaseColor5BBitOffset = 43,
+
+      cETC1DeltaColor3RBitOffset = 56,
+      cETC1DeltaColor3GBitOffset = 48,
+      cETC1DeltaColor3BBitOffset = 40,
+
+      // Absolute (non-delta) encoding (two 4-bit per component bases)
+      cETC1AbsColorCompNumBits = 4,
+      cETC1AbsColorCompMax = 1 << cETC1AbsColorCompNumBits,
+
+      cETC1AbsColor4R1BitOffset = 60,
+      cETC1AbsColor4G1BitOffset = 52,
+      cETC1AbsColor4B1BitOffset = 44,
+
+      cETC1AbsColor4R2BitOffset = 56,
+      cETC1AbsColor4G2BitOffset = 48,
+      cETC1AbsColor4B2BitOffset = 40,
+
+      cETC1ColorDeltaMin = -4,
+      cETC1ColorDeltaMax = 3,
+
+      // Delta3:
+      // 0   1   2   3   4   5   6   7
+      // 000 001 010 011 100 101 110 111
+      // 0   1   2   3   -4  -3  -2  -1
+   };
+   
+   static uint8 g_quant5_tab[256+16];
+
+
+   static const int g_etc1_inten_tables[cETC1IntenModifierValues][cETC1SelectorValues] = 
+   { 
+      { -8,  -2,   2,   8 }, { -17,  -5,  5,  17 }, { -29,  -9,   9,  29 }, {  -42, -13, 13,  42 }, 
+      { -60, -18, 18,  60 }, { -80, -24, 24,  80 }, { -106, -33, 33, 106 }, { -183, -47, 47, 183 } 
+   };
+
+   static const uint8 g_etc1_to_selector_index[cETC1SelectorValues] = { 2, 3, 1, 0 };
+   static const uint8 g_selector_index_to_etc1[cETC1SelectorValues] = { 3, 2, 0, 1 };
+      
+   // Given an ETC1 diff/inten_table/selector, and an 8-bit desired color, this table encodes the best packed_color in the low byte, and the abs error in the high byte.
+   static uint16 g_etc1_inverse_lookup[2*8*4][256];      // [diff/inten_table/selector][desired_color]
+
+   // g_color8_to_etc_block_config[color][table_index] = Supplies for each 8-bit color value a list of packed ETC1 diff/intensity table/selectors/packed_colors that map to that color.
+   // To pack: diff | (inten << 1) | (selector << 4) | (packed_c << 8)
+   static const uint16 g_color8_to_etc_block_config_0_255[2][33] =
+   {
+      { 0x0000,  0x0010,  0x0002,  0x0012,  0x0004,  0x0014,  0x0006,  0x0016,  0x0008,  0x0018,  0x000A,  0x001A,  0x000C,  0x001C,  0x000E,  0x001E,
+        0x0001,  0x0011,  0x0003,  0x0013,  0x0005,  0x0015,  0x0007,  0x0017,  0x0009,  0x0019,  0x000B,  0x001B,  0x000D,  0x001D,  0x000F,  0x001F, 0xFFFF },
+      { 0x0F20,  0x0F30,  0x0E32,  0x0F22,  0x0E34,  0x0F24,  0x0D36,  0x0F26,  0x0C38,  0x0E28,  0x0B3A,  0x0E2A,  0x093C,  0x0E2C,  0x053E,  0x0D2E,
+        0x1E31,  0x1F21,  0x1D33,  0x1F23,  0x1C35,  0x1E25,  0x1A37,  0x1E27,  0x1839,  0x1D29,  0x163B,  0x1C2B,  0x133D,  0x1B2D,  0x093F,  0x1A2F, 0xFFFF },
+   };
+
+   // Really only [254][11].
+   static const uint16 g_color8_to_etc_block_config_1_to_254[254][12] = 
+   {
+      { 0x021C, 0x0D0D, 0xFFFF }, { 0x0020, 0x0021, 0x0A0B, 0x061F, 0xFFFF }, { 0x0113, 0x0217, 0xFFFF }, { 0x0116, 0x031E,
+      0x0B0E, 0x0405, 0xFFFF }, { 0x0022, 0x0204, 0x050A, 0x0023, 0xFFFF }, { 0x0111, 0x0319, 0x0809, 0x170F, 0xFFFF }, {
+      0x0303, 0x0215, 0x0607, 0xFFFF }, { 0x0030, 0x0114, 0x0408, 0x0031, 0x0201, 0x051D, 0xFFFF }, { 0x0100, 0x0024, 0x0306,
+      0x0025, 0x041B, 0x0E0D, 0xFFFF }, { 0x021A, 0x0121, 0x0B0B, 0x071F, 0xFFFF }, { 0x0213, 0x0317, 0xFFFF }, { 0x0112,
+      0x0505, 0xFFFF }, { 0x0026, 0x070C, 0x0123, 0x0027, 0xFFFF }, { 0x0211, 0x0909, 0xFFFF }, { 0x0110, 0x0315, 0x0707,
+      0x0419, 0x180F, 0xFFFF }, { 0x0218, 0x0131, 0x0301, 0x0403, 0x061D, 0xFFFF }, { 0x0032, 0x0202, 0x0033, 0x0125, 0x051B,
+      0x0F0D, 0xFFFF }, { 0x0028, 0x031C, 0x0221, 0x0029, 0xFFFF }, { 0x0120, 0x0313, 0x0C0B, 0x081F, 0xFFFF }, { 0x0605,
+      0x0417, 0xFFFF }, { 0x0216, 0x041E, 0x0C0E, 0x0223, 0x0127, 0xFFFF }, { 0x0122, 0x0304, 0x060A, 0x0311, 0x0A09, 0xFFFF
+      }, { 0x0519, 0x190F, 0xFFFF }, { 0x002A, 0x0231, 0x0503, 0x0415, 0x0807, 0x002B, 0x071D, 0xFFFF }, { 0x0130, 0x0214,
+      0x0508, 0x0401, 0x0133, 0x0225, 0x061B, 0xFFFF }, { 0x0200, 0x0124, 0x0406, 0x0321, 0x0129, 0x100D, 0xFFFF }, { 0x031A,
+      0x0D0B, 0x091F, 0xFFFF }, { 0x0413, 0x0705, 0x0517, 0xFFFF }, { 0x0212, 0x0034, 0x0323, 0x0035, 0x0227, 0xFFFF }, {
+      0x0126, 0x080C, 0x0B09, 0xFFFF }, { 0x0411, 0x0619, 0x1A0F, 0xFFFF }, { 0x0210, 0x0331, 0x0603, 0x0515, 0x0907, 0x012B,
+      0xFFFF }, { 0x0318, 0x002C, 0x0501, 0x0233, 0x0325, 0x071B, 0x002D, 0x081D, 0xFFFF }, { 0x0132, 0x0302, 0x0229, 0x110D,
+      0xFFFF }, { 0x0128, 0x041C, 0x0421, 0x0E0B, 0x0A1F, 0xFFFF }, { 0x0220, 0x0513, 0x0617, 0xFFFF }, { 0x0135, 0x0805,
+      0x0327, 0xFFFF }, { 0x0316, 0x051E, 0x0D0E, 0x0423, 0xFFFF }, { 0x0222, 0x0404, 0x070A, 0x0511, 0x0719, 0x0C09, 0x1B0F,
+      0xFFFF }, { 0x0703, 0x0615, 0x0A07, 0x022B, 0xFFFF }, { 0x012A, 0x0431, 0x0601, 0x0333, 0x012D, 0x091D, 0xFFFF }, {
+      0x0230, 0x0314, 0x0036, 0x0608, 0x0425, 0x0037, 0x0329, 0x081B, 0x120D, 0xFFFF }, { 0x0300, 0x0224, 0x0506, 0x0521,
+      0x0F0B, 0x0B1F, 0xFFFF }, { 0x041A, 0x0613, 0x0717, 0xFFFF }, { 0x0235, 0x0905, 0xFFFF }, { 0x0312, 0x0134, 0x0523,
+      0x0427, 0xFFFF }, { 0x0226, 0x090C, 0x002E, 0x0611, 0x0D09, 0x002F, 0xFFFF }, { 0x0715, 0x0B07, 0x0819, 0x032B, 0x1C0F,
+      0xFFFF }, { 0x0310, 0x0531, 0x0701, 0x0803, 0x022D, 0x0A1D, 0xFFFF }, { 0x0418, 0x012C, 0x0433, 0x0525, 0x0137, 0x091B,
+      0x130D, 0xFFFF }, { 0x0232, 0x0402, 0x0621, 0x0429, 0xFFFF }, { 0x0228, 0x051C, 0x0713, 0x100B, 0x0C1F, 0xFFFF }, {
+      0x0320, 0x0335, 0x0A05, 0x0817, 0xFFFF }, { 0x0623, 0x0527, 0xFFFF }, { 0x0416, 0x061E, 0x0E0E, 0x0711, 0x0E09, 0x012F,
+      0xFFFF }, { 0x0322, 0x0504, 0x080A, 0x0919, 0x1D0F, 0xFFFF }, { 0x0631, 0x0903, 0x0815, 0x0C07, 0x042B, 0x032D, 0x0B1D,
+      0xFFFF }, { 0x022A, 0x0801, 0x0533, 0x0625, 0x0237, 0x0A1B, 0xFFFF }, { 0x0330, 0x0414, 0x0136, 0x0708, 0x0721, 0x0529,
+      0x140D, 0xFFFF }, { 0x0400, 0x0324, 0x0606, 0x0038, 0x0039, 0x110B, 0x0D1F, 0xFFFF }, { 0x051A, 0x0813, 0x0B05, 0x0917,
+      0xFFFF }, { 0x0723, 0x0435, 0x0627, 0xFFFF }, { 0x0412, 0x0234, 0x0F09, 0x022F, 0xFFFF }, { 0x0326, 0x0A0C, 0x012E,
+      0x0811, 0x0A19, 0x1E0F, 0xFFFF }, { 0x0731, 0x0A03, 0x0915, 0x0D07, 0x052B, 0xFFFF }, { 0x0410, 0x0901, 0x0633, 0x0725,
+      0x0337, 0x0B1B, 0x042D, 0x0C1D, 0xFFFF }, { 0x0518, 0x022C, 0x0629, 0x150D, 0xFFFF }, { 0x0332, 0x0502, 0x0821, 0x0139,
+      0x120B, 0x0E1F, 0xFFFF }, { 0x0328, 0x061C, 0x0913, 0x0A17, 0xFFFF }, { 0x0420, 0x0535, 0x0C05, 0x0727, 0xFFFF }, {
+      0x0823, 0x032F, 0xFFFF }, { 0x0516, 0x071E, 0x0F0E, 0x0911, 0x0B19, 0x1009, 0x1F0F, 0xFFFF }, { 0x0422, 0x0604, 0x090A,
+      0x0B03, 0x0A15, 0x0E07, 0x062B, 0xFFFF }, { 0x0831, 0x0A01, 0x0733, 0x052D, 0x0D1D, 0xFFFF }, { 0x032A, 0x0825, 0x0437,
+      0x0729, 0x0C1B, 0x160D, 0xFFFF }, { 0x0430, 0x0514, 0x0236, 0x0808, 0x0921, 0x0239, 0x130B, 0x0F1F, 0xFFFF }, { 0x0500,
+      0x0424, 0x0706, 0x0138, 0x0A13, 0x0B17, 0xFFFF }, { 0x061A, 0x0635, 0x0D05, 0xFFFF }, { 0x0923, 0x0827, 0xFFFF }, {
+      0x0512, 0x0334, 0x003A, 0x0A11, 0x1109, 0x003B, 0x042F, 0xFFFF }, { 0x0426, 0x0B0C, 0x022E, 0x0B15, 0x0F07, 0x0C19,
+      0x072B, 0xFFFF }, { 0x0931, 0x0B01, 0x0C03, 0x062D, 0x0E1D, 0xFFFF }, { 0x0510, 0x0833, 0x0925, 0x0537, 0x0D1B, 0x170D,
+      0xFFFF }, { 0x0618, 0x032C, 0x0A21, 0x0339, 0x0829, 0xFFFF }, { 0x0432, 0x0602, 0x0B13, 0x140B, 0x101F, 0xFFFF }, {
+      0x0428, 0x071C, 0x0735, 0x0E05, 0x0C17, 0xFFFF }, { 0x0520, 0x0A23, 0x0927, 0xFFFF }, { 0x0B11, 0x1209, 0x013B, 0x052F,
+      0xFFFF }, { 0x0616, 0x081E, 0x0D19, 0xFFFF }, { 0x0522, 0x0704, 0x0A0A, 0x0A31, 0x0D03, 0x0C15, 0x1007, 0x082B, 0x072D,
+      0x0F1D, 0xFFFF }, { 0x0C01, 0x0933, 0x0A25, 0x0637, 0x0E1B, 0xFFFF }, { 0x042A, 0x0B21, 0x0929, 0x180D, 0xFFFF }, {
+	      0x0530, 0x0614, 0x0336, 0x0908, 0x0439, 0x150B, 0x111F, 0xFFFF }, { 0x0600, 0x0524, 0x0806, 0x0238, 0x0C13, 0x0F05,
+      0x0D17, 0xFFFF }, { 0x071A, 0x0B23, 0x0835, 0x0A27, 0xFFFF }, { 0x1309, 0x023B, 0x062F, 0xFFFF }, { 0x0612, 0x0434,
+      0x013A, 0x0C11, 0x0E19, 0xFFFF }, { 0x0526, 0x0C0C, 0x032E, 0x0B31, 0x0E03, 0x0D15, 0x1107, 0x092B, 0xFFFF }, { 0x0D01,
+      0x0A33, 0x0B25, 0x0737, 0x0F1B, 0x082D, 0x101D, 0xFFFF }, { 0x0610, 0x0A29, 0x190D, 0xFFFF }, { 0x0718, 0x042C, 0x0C21,
+      0x0539, 0x160B, 0x121F, 0xFFFF }, { 0x0532, 0x0702, 0x0D13, 0x0E17, 0xFFFF }, { 0x0528, 0x081C, 0x0935, 0x1005, 0x0B27,
+      0xFFFF }, { 0x0620, 0x0C23, 0x033B, 0x072F, 0xFFFF }, { 0x0D11, 0x0F19, 0x1409, 0xFFFF }, { 0x0716, 0x003C, 0x091E,
+      0x0F03, 0x0E15, 0x1207, 0x0A2B, 0x003D, 0xFFFF }, { 0x0622, 0x0804, 0x0B0A, 0x0C31, 0x0E01, 0x0B33, 0x092D, 0x111D,
+      0xFFFF }, { 0x0C25, 0x0837, 0x0B29, 0x101B, 0x1A0D, 0xFFFF }, { 0x052A, 0x0D21, 0x0639, 0x170B, 0x131F, 0xFFFF }, {
+      0x0630, 0x0714, 0x0436, 0x0A08, 0x0E13, 0x0F17, 0xFFFF }, { 0x0700, 0x0624, 0x0906, 0x0338, 0x0A35, 0x1105, 0xFFFF }, {
+      0x081A, 0x0D23, 0x0C27, 0xFFFF }, { 0x0E11, 0x1509, 0x043B, 0x082F, 0xFFFF }, { 0x0712, 0x0534, 0x023A, 0x0F15, 0x1307,
+      0x1019, 0x0B2B, 0x013D, 0xFFFF }, { 0x0626, 0x0D0C, 0x042E, 0x0D31, 0x0F01, 0x1003, 0x0A2D, 0x121D, 0xFFFF }, { 0x0C33,
+      0x0D25, 0x0937, 0x111B, 0x1B0D, 0xFFFF }, { 0x0710, 0x0E21, 0x0739, 0x0C29, 0xFFFF }, { 0x0818, 0x052C, 0x0F13, 0x180B,
+      0x141F, 0xFFFF }, { 0x0632, 0x0802, 0x0B35, 0x1205, 0x1017, 0xFFFF }, { 0x0628, 0x091C, 0x0E23, 0x0D27, 0xFFFF }, {
+      0x0720, 0x0F11, 0x1609, 0x053B, 0x092F, 0xFFFF }, { 0x1119, 0x023D, 0xFFFF }, { 0x0816, 0x013C, 0x0A1E, 0x0E31, 0x1103,
+      0x1015, 0x1407, 0x0C2B, 0x0B2D, 0x131D, 0xFFFF }, { 0x0722, 0x0904, 0x0C0A, 0x1001, 0x0D33, 0x0E25, 0x0A37, 0x121B,
+      0xFFFF }, { 0x0F21, 0x0D29, 0x1C0D, 0xFFFF }, { 0x062A, 0x0839, 0x190B, 0x151F, 0xFFFF }, { 0x0730, 0x0814, 0x0536,
+      0x0B08, 0x1013, 0x1305, 0x1117, 0xFFFF }, { 0x0800, 0x0724, 0x0A06, 0x0438, 0x0F23, 0x0C35, 0x0E27, 0xFFFF }, { 0x091A,
+      0x1709, 0x063B, 0x0A2F, 0xFFFF }, { 0x1011, 0x1219, 0x033D, 0xFFFF }, { 0x0812, 0x0634, 0x033A, 0x0F31, 0x1203, 0x1115,
+      0x1507, 0x0D2B, 0xFFFF }, { 0x0726, 0x0E0C, 0x052E, 0x1101, 0x0E33, 0x0F25, 0x0B37, 0x131B, 0x0C2D, 0x141D, 0xFFFF }, {
+      0x0E29, 0x1D0D, 0xFFFF }, { 0x0810, 0x1021, 0x0939, 0x1A0B, 0x161F, 0xFFFF }, { 0x0918, 0x062C, 0x1113, 0x1217, 0xFFFF
+      }, { 0x0732, 0x0902, 0x0D35, 0x1405, 0x0F27, 0xFFFF }, { 0x0728, 0x0A1C, 0x1023, 0x073B, 0x0B2F, 0xFFFF }, { 0x0820,
+      0x1111, 0x1319, 0x1809, 0xFFFF }, { 0x1303, 0x1215, 0x1607, 0x0E2B, 0x043D, 0xFFFF }, { 0x0916, 0x023C, 0x0B1E, 0x1031,
+      0x1201, 0x0F33, 0x0D2D, 0x151D, 0xFFFF }, { 0x0822, 0x0A04, 0x0D0A, 0x1025, 0x0C37, 0x0F29, 0x141B, 0x1E0D, 0xFFFF }, {
+      0x1121, 0x0A39, 0x1B0B, 0x171F, 0xFFFF }, { 0x072A, 0x1213, 0x1317, 0xFFFF }, { 0x0830, 0x0914, 0x0636, 0x0C08, 0x0E35,
+      0x1505, 0xFFFF }, { 0x0900, 0x0824, 0x0B06, 0x0538, 0x1123, 0x1027, 0xFFFF }, { 0x0A1A, 0x1211, 0x1909, 0x083B, 0x0C2F,
+      0xFFFF }, { 0x1315, 0x1707, 0x1419, 0x0F2B, 0x053D, 0xFFFF }, { 0x0912, 0x0734, 0x043A, 0x1131, 0x1301, 0x1403, 0x0E2D,
+      0x161D, 0xFFFF }, { 0x0826, 0x0F0C, 0x062E, 0x1033, 0x1125, 0x0D37, 0x151B, 0x1F0D, 0xFFFF }, { 0x1221, 0x0B39, 0x1029,
+      0xFFFF }, { 0x0910, 0x1313, 0x1C0B, 0x181F, 0xFFFF }, { 0x0A18, 0x072C, 0x0F35, 0x1605, 0x1417, 0xFFFF }, { 0x0832,
+      0x0A02, 0x1223, 0x1127, 0xFFFF }, { 0x0828, 0x0B1C, 0x1311, 0x1A09, 0x093B, 0x0D2F, 0xFFFF }, { 0x0920, 0x1519, 0x063D,
+      0xFFFF }, { 0x1231, 0x1503, 0x1415, 0x1807, 0x102B, 0x0F2D, 0x171D, 0xFFFF }, { 0x0A16, 0x033C, 0x0C1E, 0x1401, 0x1133,
+      0x1225, 0x0E37, 0x161B, 0xFFFF }, { 0x0922, 0x0B04, 0x0E0A, 0x1321, 0x1129, 0xFFFF }, { 0x0C39, 0x1D0B, 0x191F, 0xFFFF
+      }, { 0x082A, 0x1413, 0x1705, 0x1517, 0xFFFF }, { 0x0930, 0x0A14, 0x0736, 0x0D08, 0x1323, 0x1035, 0x1227, 0xFFFF }, {
+      0x0A00, 0x0924, 0x0C06, 0x0638, 0x1B09, 0x0A3B, 0x0E2F, 0xFFFF }, { 0x0B1A, 0x1411, 0x1619, 0x073D, 0xFFFF }, { 0x1331,
+      0x1603, 0x1515, 0x1907, 0x112B, 0xFFFF }, { 0x0A12, 0x0834, 0x053A, 0x1501, 0x1233, 0x1325, 0x0F37, 0x171B, 0x102D,
+      0x181D, 0xFFFF }, { 0x0926, 0x072E, 0x1229, 0xFFFF }, { 0x1421, 0x0D39, 0x1E0B, 0x1A1F, 0xFFFF }, { 0x0A10, 0x1513,
+      0x1617, 0xFFFF }, { 0x0B18, 0x082C, 0x1135, 0x1805, 0x1327, 0xFFFF }, { 0x0932, 0x0B02, 0x1423, 0x0B3B, 0x0F2F, 0xFFFF
+      }, { 0x0928, 0x0C1C, 0x1511, 0x1719, 0x1C09, 0xFFFF }, { 0x0A20, 0x1703, 0x1615, 0x1A07, 0x122B, 0x083D, 0xFFFF }, {
+      0x1431, 0x1601, 0x1333, 0x112D, 0x191D, 0xFFFF }, { 0x0B16, 0x043C, 0x0D1E, 0x1425, 0x1037, 0x1329, 0x181B, 0xFFFF }, {
+      0x0A22, 0x0C04, 0x0F0A, 0x1521, 0x0E39, 0x1F0B, 0x1B1F, 0xFFFF }, { 0x1613, 0x1717, 0xFFFF }, { 0x092A, 0x1235, 0x1905,
+      0xFFFF }, { 0x0A30, 0x0B14, 0x0836, 0x0E08, 0x1523, 0x1427, 0xFFFF }, { 0x0B00, 0x0A24, 0x0D06, 0x0738, 0x1611, 0x1D09,
+      0x0C3B, 0x102F, 0xFFFF }, { 0x0C1A, 0x1715, 0x1B07, 0x1819, 0x132B, 0x093D, 0xFFFF }, { 0x1531, 0x1701, 0x1803, 0x122D,
+      0x1A1D, 0xFFFF }, { 0x0B12, 0x0934, 0x063A, 0x1433, 0x1525, 0x1137, 0x191B, 0xFFFF }, { 0x0A26, 0x003E, 0x082E, 0x1621,
+      0x0F39, 0x1429, 0x003F, 0xFFFF }, { 0x1713, 0x1C1F, 0xFFFF }, { 0x0B10, 0x1335, 0x1A05, 0x1817, 0xFFFF }, { 0x0C18,
+      0x092C, 0x1623, 0x1527, 0xFFFF }, { 0x0A32, 0x0C02, 0x1711, 0x1E09, 0x0D3B, 0x112F, 0xFFFF }, { 0x0A28, 0x0D1C, 0x1919,
+      0x0A3D, 0xFFFF }, { 0x0B20, 0x1631, 0x1903, 0x1815, 0x1C07, 0x142B, 0x132D, 0x1B1D, 0xFFFF }, { 0x1801, 0x1533, 0x1625,
+      0x1237, 0x1A1B, 0xFFFF }, { 0x0C16, 0x053C, 0x0E1E, 0x1721, 0x1529, 0x013F, 0xFFFF }, { 0x0B22, 0x0D04, 0x1039, 0x1D1F,
+      0xFFFF }, { 0x1813, 0x1B05, 0x1917, 0xFFFF }, { 0x0A2A, 0x1723, 0x1435, 0x1627, 0xFFFF }, { 0x0B30, 0x0C14, 0x0936,
+      0x0F08, 0x1F09, 0x0E3B, 0x122F, 0xFFFF }, { 0x0C00, 0x0B24, 0x0E06, 0x0838, 0x1811, 0x1A19, 0x0B3D, 0xFFFF }, { 0x0D1A,
+      0x1731, 0x1A03, 0x1915, 0x1D07, 0x152B, 0xFFFF }, { 0x1901, 0x1633, 0x1725, 0x1337, 0x1B1B, 0x142D, 0x1C1D, 0xFFFF }, {
+      0x0C12, 0x0A34, 0x073A, 0x1629, 0x023F, 0xFFFF }, { 0x0B26, 0x013E, 0x092E, 0x1821, 0x1139, 0x1E1F, 0xFFFF }, { 0x1913,
+      0x1A17, 0xFFFF }, { 0x0C10, 0x1535, 0x1C05, 0x1727, 0xFFFF }, { 0x0D18, 0x0A2C, 0x1823, 0x0F3B, 0x132F, 0xFFFF }, {
+      0x0B32, 0x0D02, 0x1911, 0x1B19, 0xFFFF }, { 0x0B28, 0x0E1C, 0x1B03, 0x1A15, 0x1E07, 0x162B, 0x0C3D, 0xFFFF }, { 0x0C20,
+      0x1831, 0x1A01, 0x1733, 0x152D, 0x1D1D, 0xFFFF }, { 0x1825, 0x1437, 0x1729, 0x1C1B, 0x033F, 0xFFFF }, { 0x0D16, 0x063C,
+      0x0F1E, 0x1921, 0x1239, 0x1F1F, 0xFFFF }, { 0x0C22, 0x0E04, 0x1A13, 0x1B17, 0xFFFF }, { 0x1635, 0x1D05, 0xFFFF }, {
+      0x0B2A, 0x1923, 0x1827, 0xFFFF }, { 0x0C30, 0x0D14, 0x0A36, 0x1A11, 0x103B, 0x142F, 0xFFFF }, { 0x0D00, 0x0C24, 0x0F06,
+      0x0938, 0x1B15, 0x1F07, 0x1C19, 0x172B, 0x0D3D, 0xFFFF }, { 0x0E1A, 0x1931, 0x1B01, 0x1C03, 0x162D, 0x1E1D, 0xFFFF }, {
+      0x1833, 0x1925, 0x1537, 0x1D1B, 0xFFFF }, { 0x0D12, 0x0B34, 0x083A, 0x1A21, 0x1339, 0x1829, 0x043F, 0xFFFF }, { 0x0C26,
+      0x023E, 0x0A2E, 0x1B13, 0xFFFF }, { 0x1735, 0x1E05, 0x1C17, 0xFFFF }, { 0x0D10, 0x1A23, 0x1927, 0xFFFF }, { 0x0E18,
+      0x0B2C, 0x1B11, 0x113B, 0x152F, 0xFFFF }, { 0x0C32, 0x0E02, 0x1D19, 0x0E3D, 0xFFFF }, { 0x0C28, 0x0F1C, 0x1A31, 0x1D03,
+      0x1C15, 0x182B, 0x172D, 0x1F1D, 0xFFFF }, { 0x0D20, 0x1C01, 0x1933, 0x1A25, 0x1637, 0x1E1B, 0xFFFF }, { 0x1B21, 0x1929,
+      0x053F, 0xFFFF }, { 0x0E16, 0x073C, 0x1439, 0xFFFF }, { 0x0D22, 0x0F04, 0x1C13, 0x1F05, 0x1D17, 0xFFFF }, { 0x1B23,
+      0x1835, 0x1A27, 0xFFFF }, { 0x0C2A, 0x123B, 0x162F, 0xFFFF }, { 0x0D30, 0x0E14, 0x0B36, 0x1C11, 0x1E19, 0x0F3D, 0xFFFF
+      }, { 0x0E00, 0x0D24, 0x0A38, 0x1B31, 0x1E03, 0x1D15, 0x192B, 0xFFFF }, { 0x0F1A, 0x1D01, 0x1A33, 0x1B25, 0x1737, 0x1F1B,
+      0x182D, 0xFFFF }, { 0x1A29, 0x063F, 0xFFFF }, { 0x0E12, 0x0C34, 0x093A, 0x1C21, 0x1539, 0xFFFF }, { 0x0D26, 0x033E,
+      0x0B2E, 0x1D13, 0x1E17, 0xFFFF }, { 0x1935, 0x1B27, 0xFFFF }, { 0x0E10, 0x1C23, 0x133B, 0x172F, 0xFFFF }, { 0x0F18,
+      0x0C2C, 0x1D11, 0x1F19, 0xFFFF }, { 0x0D32, 0x0F02, 0x1F03, 0x1E15, 0x1A2B, 0x103D, 0xFFFF }, { 0x0D28, 0x1C31, 0x1E01,
+      0x1B33, 0x192D, 0xFFFF }, { 0x0E20, 0x1C25, 0x1837, 0x1B29, 0x073F, 0xFFFF }, { 0x1D21, 0x1639, 0xFFFF }, { 0x0F16,
+      0x083C, 0x1E13, 0x1F17, 0xFFFF }, { 0x0E22, 0x1A35, 0xFFFF }, { 0x1D23, 0x1C27, 0xFFFF }, { 0x0D2A, 0x1E11, 0x143B,
+      0x182F, 0xFFFF }, { 0x0E30, 0x0F14, 0x0C36, 0x1F15, 0x1B2B, 0x113D, 0xFFFF }, { 0x0F00, 0x0E24, 0x0B38, 0x1D31, 0x1F01,
+      0x1A2D, 0xFFFF }, { 0x1C33, 0x1D25, 0x1937, 0xFFFF }, { 0x1E21, 0x1739, 0x1C29, 0x083F, 0xFFFF }, { 0x0F12, 0x0D34,
+      0x0A3A, 0x1F13, 0xFFFF }, { 0x0E26, 0x043E, 0x0C2E, 0x1B35, 0xFFFF }, { 0x1E23, 0x1D27, 0xFFFF }, { 0x0F10, 0x1F11,
+      0x153B, 0x192F, 0xFFFF }, { 0x0D2C, 0x123D, 0xFFFF },
+   };
+
+   struct etc1_block
+   {
+      // big endian uint64:
+      // bit ofs:  56  48  40  32  24  16   8   0
+      // byte ofs: b0, b1, b2, b3, b4, b5, b6, b7 
+      union 
+      {
+         uint64 m_uint64;
+         uint8 m_bytes[8];
+      };
+
+      uint8 m_low_color[2];
+      uint8 m_high_color[2];
+
+      enum { cNumSelectorBytes = 4 };
+      uint8 m_selectors[cNumSelectorBytes];
+
+      inline void clear()
+      {
+         zero_this(this);
+      }
+
+      inline uint get_byte_bits(uint ofs, uint num) const
+      {
+         RG_ETC1_ASSERT((ofs + num) <= 64U);
+         RG_ETC1_ASSERT(num && (num <= 8U));
+         RG_ETC1_ASSERT((ofs >> 3) == ((ofs + num - 1) >> 3));
+         const uint byte_ofs = 7 - (ofs >> 3);
+         const uint byte_bit_ofs = ofs & 7;
+         return (m_bytes[byte_ofs] >> byte_bit_ofs) & ((1 << num) - 1);
+      }
+
+      inline void set_byte_bits(uint ofs, uint num, uint bits)
+      {
+         RG_ETC1_ASSERT((ofs + num) <= 64U);
+         RG_ETC1_ASSERT(num && (num < 32U));
+         RG_ETC1_ASSERT((ofs >> 3) == ((ofs + num - 1) >> 3));
+         RG_ETC1_ASSERT(bits < (1U << num));
+         const uint byte_ofs = 7 - (ofs >> 3);
+         const uint byte_bit_ofs = ofs & 7;
+         const uint mask = (1 << num) - 1;
+         m_bytes[byte_ofs] &= ~(mask << byte_bit_ofs);
+         m_bytes[byte_ofs] |= (bits << byte_bit_ofs);
+      }
+
+      // false = left/right subblocks
+      // true = upper/lower subblocks
+      inline bool get_flip_bit() const 
+      {
+         return (m_bytes[3] & 1) != 0;
+      }   
+
+      inline void set_flip_bit(bool flip)
+      {
+         m_bytes[3] &= ~1;
+         m_bytes[3] |= static_cast<uint8>(flip);
+      }
+
+      inline bool get_diff_bit() const
+      {
+         return (m_bytes[3] & 2) != 0;
+      }
+
+      inline void set_diff_bit(bool diff)
+      {
+         m_bytes[3] &= ~2;
+         m_bytes[3] |= (static_cast<uint>(diff) << 1);
+      }
+
+      // Returns intensity modifier table (0-7) used by subblock subblock_id.
+      // subblock_id=0 left/top (CW 1), 1=right/bottom (CW 2)
+      inline uint get_inten_table(uint subblock_id) const
+      {
+         RG_ETC1_ASSERT(subblock_id < 2);
+         const uint ofs = subblock_id ? 2 : 5;
+         return (m_bytes[3] >> ofs) & 7;
+      }
+
+      // Sets intensity modifier table (0-7) used by subblock subblock_id (0 or 1)
+      inline void set_inten_table(uint subblock_id, uint t)
+      {
+         RG_ETC1_ASSERT(subblock_id < 2);
+         RG_ETC1_ASSERT(t < 8);
+         const uint ofs = subblock_id ? 2 : 5;
+         m_bytes[3] &= ~(7 << ofs);
+         m_bytes[3] |= (t << ofs);
+      }
+
+      // Returned selector value ranges from 0-3 and is a direct index into g_etc1_inten_tables.
+      inline uint get_selector(uint x, uint y) const
+      {
+         RG_ETC1_ASSERT((x | y) < 4);
+
+         const uint bit_index = x * 4 + y;
+         const uint byte_bit_ofs = bit_index & 7;
+         const uint8 *p = &m_bytes[7 - (bit_index >> 3)];
+         const uint lsb = (p[0] >> byte_bit_ofs) & 1;
+         const uint msb = (p[-2] >> byte_bit_ofs) & 1;
+         const uint val = lsb | (msb << 1);
+
+         return g_etc1_to_selector_index[val];
+      }
+
+      // Selector "val" ranges from 0-3 and is a direct index into g_etc1_inten_tables.
+      inline void set_selector(uint x, uint y, uint val)
+      {
+         RG_ETC1_ASSERT((x | y | val) < 4);
+         const uint bit_index = x * 4 + y;
+
+         uint8 *p = &m_bytes[7 - (bit_index >> 3)];
+
+         const uint byte_bit_ofs = bit_index & 7;
+         const uint mask = 1 << byte_bit_ofs;
+
+         const uint etc1_val = g_selector_index_to_etc1[val];
+
+         const uint lsb = etc1_val & 1;
+         const uint msb = etc1_val >> 1;
+
+         p[0] &= ~mask;
+         p[0] |= (lsb << byte_bit_ofs);
+
+         p[-2] &= ~mask;
+         p[-2] |= (msb << byte_bit_ofs);
+      }
+
+      inline void set_base4_color(uint idx, uint16 c)
+      {
+         if (idx)
+         {
+            set_byte_bits(cETC1AbsColor4R2BitOffset, 4, (c >> 8) & 15);
+            set_byte_bits(cETC1AbsColor4G2BitOffset, 4, (c >> 4) & 15);
+            set_byte_bits(cETC1AbsColor4B2BitOffset, 4, c & 15);
+         }
+         else
+         {
+            set_byte_bits(cETC1AbsColor4R1BitOffset, 4, (c >> 8) & 15);
+            set_byte_bits(cETC1AbsColor4G1BitOffset, 4, (c >> 4) & 15);
+            set_byte_bits(cETC1AbsColor4B1BitOffset, 4, c & 15);
+         }
+      }
+
+      inline uint16 get_base4_color(uint idx) const
+      {
+         uint r, g, b;
+         if (idx)
+         {
+            r = get_byte_bits(cETC1AbsColor4R2BitOffset, 4);
+            g = get_byte_bits(cETC1AbsColor4G2BitOffset, 4);
+            b = get_byte_bits(cETC1AbsColor4B2BitOffset, 4);
+         }
+         else
+         {
+            r = get_byte_bits(cETC1AbsColor4R1BitOffset, 4);
+            g = get_byte_bits(cETC1AbsColor4G1BitOffset, 4);
+            b = get_byte_bits(cETC1AbsColor4B1BitOffset, 4);
+         }
+         return static_cast<uint16>(b | (g << 4U) | (r << 8U));
+      }
+
+      inline void set_base5_color(uint16 c)
+      {
+         set_byte_bits(cETC1BaseColor5RBitOffset, 5, (c >> 10) & 31);
+         set_byte_bits(cETC1BaseColor5GBitOffset, 5, (c >> 5) & 31);
+         set_byte_bits(cETC1BaseColor5BBitOffset, 5, c & 31);
+      }
+
+      inline uint16 get_base5_color() const
+      {
+         const uint r = get_byte_bits(cETC1BaseColor5RBitOffset, 5);
+         const uint g = get_byte_bits(cETC1BaseColor5GBitOffset, 5);
+         const uint b = get_byte_bits(cETC1BaseColor5BBitOffset, 5);
+         return static_cast<uint16>(b | (g << 5U) | (r << 10U));
+      }
+
+      void set_delta3_color(uint16 c)
+      {
+         set_byte_bits(cETC1DeltaColor3RBitOffset, 3, (c >> 6) & 7);
+         set_byte_bits(cETC1DeltaColor3GBitOffset, 3, (c >> 3) & 7);
+         set_byte_bits(cETC1DeltaColor3BBitOffset, 3, c & 7);
+      }
+
+      inline uint16 get_delta3_color() const
+      {
+         const uint r = get_byte_bits(cETC1DeltaColor3RBitOffset, 3);
+         const uint g = get_byte_bits(cETC1DeltaColor3GBitOffset, 3);
+         const uint b = get_byte_bits(cETC1DeltaColor3BBitOffset, 3);
+         return static_cast<uint16>(b | (g << 3U) | (r << 6U));
+      }
+
+      // Base color 5
+      static uint16 pack_color5(const color_quad_u8& color, bool scaled, uint bias = 127U);
+      static uint16 pack_color5(uint r, uint g, uint b, bool scaled, uint bias = 127U);
+
+      static color_quad_u8 unpack_color5(uint16 packed_color5, bool scaled, uint alpha = 255U);
+      static void unpack_color5(uint& r, uint& g, uint& b, uint16 packed_color, bool scaled);
+
+      static bool unpack_color5(color_quad_u8& result, uint16 packed_color5, uint16 packed_delta3, bool scaled, uint alpha = 255U);
+      static bool unpack_color5(uint& r, uint& g, uint& b, uint16 packed_color5, uint16 packed_delta3, bool scaled, uint alpha = 255U);
+
+      // Delta color 3
+      // Inputs range from -4 to 3 (cETC1ColorDeltaMin to cETC1ColorDeltaMax)
+      static uint16 pack_delta3(int r, int g, int b);
+
+      // Results range from -4 to 3 (cETC1ColorDeltaMin to cETC1ColorDeltaMax)
+      static void unpack_delta3(int& r, int& g, int& b, uint16 packed_delta3);
+
+      // Abs color 4
+      static uint16 pack_color4(const color_quad_u8& color, bool scaled, uint bias = 127U);
+      static uint16 pack_color4(uint r, uint g, uint b, bool scaled, uint bias = 127U);
+
+      static color_quad_u8 unpack_color4(uint16 packed_color4, bool scaled, uint alpha = 255U);
+      static void unpack_color4(uint& r, uint& g, uint& b, uint16 packed_color4, bool scaled);
+
+      // subblock colors
+      static void get_diff_subblock_colors(color_quad_u8* pDst, uint16 packed_color5, uint table_idx);
+      static bool get_diff_subblock_colors(color_quad_u8* pDst, uint16 packed_color5, uint16 packed_delta3, uint table_idx);
+      static void get_abs_subblock_colors(color_quad_u8* pDst, uint16 packed_color4, uint table_idx);
+
+      static inline void unscaled_to_scaled_color(color_quad_u8& dst, const color_quad_u8& src, bool color4)
+      {
+         if (color4)
+         {
+            dst.r = src.r | (src.r << 4);
+            dst.g = src.g | (src.g << 4);
+            dst.b = src.b | (src.b << 4);
+         }
+         else
+         {
+            dst.r = (src.r >> 2) | (src.r << 3);
+            dst.g = (src.g >> 2) | (src.g << 3);
+            dst.b = (src.b >> 2) | (src.b << 3);
+         }
+         dst.a = src.a;
+      }
+   };
+
+   // Returns pointer to sorted array.
+   template<typename T, typename Q>
+   T* indirect_radix_sort(uint num_indices, T* pIndices0, T* pIndices1, const Q* pKeys, uint key_ofs, uint key_size, bool init_indices)
+   {  
+      RG_ETC1_ASSERT((key_ofs >= 0) && (key_ofs < sizeof(T)));
+      RG_ETC1_ASSERT((key_size >= 1) && (key_size <= 4));
+
+      if (init_indices)
+      {
+         T* p = pIndices0;
+         T* q = pIndices0 + (num_indices >> 1) * 2;
+         uint i;
+         for (i = 0; p != q; p += 2, i += 2)
+         {
+            p[0] = static_cast<T>(i);
+            p[1] = static_cast<T>(i + 1); 
+         }
+
+         if (num_indices & 1)
+            *p = static_cast<T>(i);
+      }
+
+      uint hist[256 * 4];
+
+      memset(hist, 0, sizeof(hist[0]) * 256 * key_size);
+
+#define RG_ETC1_GET_KEY(p) (*(const uint*)((const uint8*)(pKeys + *(p)) + key_ofs))
+#define RG_ETC1_GET_KEY_FROM_INDEX(i) (*(const uint*)((const uint8*)(pKeys + (i)) + key_ofs))
+
+      if (key_size == 4)
+      {
+         T* p = pIndices0;
+         T* q = pIndices0 + num_indices;
+         for ( ; p != q; p++)
+         {
+            const uint key = RG_ETC1_GET_KEY(p);
+
+            hist[        key        & 0xFF]++;
+            hist[256 + ((key >>  8) & 0xFF)]++;
+            hist[512 + ((key >> 16) & 0xFF)]++;
+            hist[768 + ((key >> 24) & 0xFF)]++;
+         }
+      }
+      else if (key_size == 3)
+      {
+         T* p = pIndices0;
+         T* q = pIndices0 + num_indices;
+         for ( ; p != q; p++)
+         {
+            const uint key = RG_ETC1_GET_KEY(p);
+
+            hist[        key        & 0xFF]++;
+            hist[256 + ((key >>  8) & 0xFF)]++;
+            hist[512 + ((key >> 16) & 0xFF)]++;
+         }
+      }   
+      else if (key_size == 2)
+      {
+         T* p = pIndices0;
+         T* q = pIndices0 + (num_indices >> 1) * 2;
+
+         for ( ; p != q; p += 2)
+         {
+            const uint key0 = RG_ETC1_GET_KEY(p);
+            const uint key1 = RG_ETC1_GET_KEY(p+1);
+
+            hist[        key0         & 0xFF]++;
+            hist[256 + ((key0 >>  8) & 0xFF)]++;
+
+            hist[        key1        & 0xFF]++;
+            hist[256 + ((key1 >>  8) & 0xFF)]++;
+         }
+
+         if (num_indices & 1)
+         {
+            const uint key = RG_ETC1_GET_KEY(p);
+
+            hist[        key        & 0xFF]++;
+            hist[256 + ((key >>  8) & 0xFF)]++;
+         }
+      }      
+      else
+      {
+         RG_ETC1_ASSERT(key_size == 1);
+         if (key_size != 1)
+            return NULL;
+
+         T* p = pIndices0;
+         T* q = pIndices0 + (num_indices >> 1) * 2;
+
+         for ( ; p != q; p += 2)
+         {
+            const uint key0 = RG_ETC1_GET_KEY(p);
+            const uint key1 = RG_ETC1_GET_KEY(p+1);
+
+            hist[key0 & 0xFF]++;
+            hist[key1 & 0xFF]++;
+         }
+
+         if (num_indices & 1)
+         {
+            const uint key = RG_ETC1_GET_KEY(p);
+
+            hist[key & 0xFF]++;
+         }
+      }      
+
+      T* pCur = pIndices0;
+      T* pNew = pIndices1;
+
+      for (uint pass = 0; pass < key_size; pass++)
+      {
+         const uint* pHist = &hist[pass << 8];
+
+         uint offsets[256];
+
+         uint cur_ofs = 0;
+         for (uint i = 0; i < 256; i += 2)
+         {
+            offsets[i] = cur_ofs;
+            cur_ofs += pHist[i];
+
+            offsets[i+1] = cur_ofs;
+            cur_ofs += pHist[i+1];
+         }
+
+         const uint pass_shift = pass << 3;
+
+         T* p = pCur;
+         T* q = pCur + (num_indices >> 1) * 2;
+
+         for ( ; p != q; p += 2)
+         {
+            uint index0 = p[0];
+            uint index1 = p[1];
+
+            uint c0 = (RG_ETC1_GET_KEY_FROM_INDEX(index0) >> pass_shift) & 0xFF;
+            uint c1 = (RG_ETC1_GET_KEY_FROM_INDEX(index1) >> pass_shift) & 0xFF;
+
+            if (c0 == c1)
+            {
+               uint dst_offset0 = offsets[c0];
+
+               offsets[c0] = dst_offset0 + 2;
+
+               pNew[dst_offset0] = static_cast<T>(index0);
+               pNew[dst_offset0 + 1] = static_cast<T>(index1);
+            }
+            else
+            {
+               uint dst_offset0 = offsets[c0]++;
+               uint dst_offset1 = offsets[c1]++;
+
+               pNew[dst_offset0] = static_cast<T>(index0);
+               pNew[dst_offset1] = static_cast<T>(index1);
+            }
+         }
+
+         if (num_indices & 1)
+         {
+            uint index = *p;
+            uint c = (RG_ETC1_GET_KEY_FROM_INDEX(index) >> pass_shift) & 0xFF;
+
+            uint dst_offset = offsets[c];
+            offsets[c] = dst_offset + 1;
+
+            pNew[dst_offset] = static_cast<T>(index);
+         }
+
+         T* t = pCur;
+         pCur = pNew;
+         pNew = t;
+      }            
+
+      return pCur;
+   }
+
+#undef RG_ETC1_GET_KEY
+#undef RG_ETC1_GET_KEY_FROM_INDEX
+
+   uint16 etc1_block::pack_color5(const color_quad_u8& color, bool scaled, uint bias)
+   {
+      return pack_color5(color.r, color.g, color.b, scaled, bias);
+   }
+   
+   uint16 etc1_block::pack_color5(uint r, uint g, uint b, bool scaled, uint bias)
+   {
+      if (scaled)
+      {
+         r = (r * 31U + bias) / 255U;
+         g = (g * 31U + bias) / 255U;
+         b = (b * 31U + bias) / 255U;
+      }
+
+      r = rg_etc1::minimum(r, 31U);
+      g = rg_etc1::minimum(g, 31U);
+      b = rg_etc1::minimum(b, 31U);
+
+      return static_cast<uint16>(b | (g << 5U) | (r << 10U));
+   }
+
+   color_quad_u8 etc1_block::unpack_color5(uint16 packed_color5, bool scaled, uint alpha)
+   {
+      uint b = packed_color5 & 31U;
+      uint g = (packed_color5 >> 5U) & 31U;
+      uint r = (packed_color5 >> 10U) & 31U;
+
+      if (scaled)
+      {
+         b = (b << 3U) | (b >> 2U);
+         g = (g << 3U) | (g >> 2U);
+         r = (r << 3U) | (r >> 2U);
+      }
+
+      return color_quad_u8(cNoClamp, r, g, b, rg_etc1::minimum(alpha, 255U));
+   }
+
+   void etc1_block::unpack_color5(uint& r, uint& g, uint& b, uint16 packed_color5, bool scaled)
+   {
+      color_quad_u8 c(unpack_color5(packed_color5, scaled, 0));
+      r = c.r;
+      g = c.g;
+      b = c.b;
+   }
+
+   bool etc1_block::unpack_color5(color_quad_u8& result, uint16 packed_color5, uint16 packed_delta3, bool scaled, uint alpha)
+   {
+      int dc_r, dc_g, dc_b;
+      unpack_delta3(dc_r, dc_g, dc_b, packed_delta3);
+      
+      int b = (packed_color5 & 31U) + dc_b;
+      int g = ((packed_color5 >> 5U) & 31U) + dc_g;
+      int r = ((packed_color5 >> 10U) & 31U) + dc_r;
+
+      bool success = true;
+      if (static_cast<uint>(r | g | b) > 31U)
+      {
+         success = false;
+         r = rg_etc1::clamp<int>(r, 0, 31);
+         g = rg_etc1::clamp<int>(g, 0, 31);
+         b = rg_etc1::clamp<int>(b, 0, 31);
+      }
+
+      if (scaled)
+      {
+         b = (b << 3U) | (b >> 2U);
+         g = (g << 3U) | (g >> 2U);
+         r = (r << 3U) | (r >> 2U);
+      }
+
+      result.set_noclamp_rgba(r, g, b, rg_etc1::minimum(alpha, 255U));
+      return success;
+   }
+
+   bool etc1_block::unpack_color5(uint& r, uint& g, uint& b, uint16 packed_color5, uint16 packed_delta3, bool scaled, uint alpha)
+   {
+      color_quad_u8 result;
+      const bool success = unpack_color5(result, packed_color5, packed_delta3, scaled, alpha);
+      r = result.r;
+      g = result.g;
+      b = result.b;
+      return success;
+   }
+     
+   uint16 etc1_block::pack_delta3(int r, int g, int b)
+   {
+      RG_ETC1_ASSERT((r >= cETC1ColorDeltaMin) && (r <= cETC1ColorDeltaMax));
+      RG_ETC1_ASSERT((g >= cETC1ColorDeltaMin) && (g <= cETC1ColorDeltaMax));
+      RG_ETC1_ASSERT((b >= cETC1ColorDeltaMin) && (b <= cETC1ColorDeltaMax));
+      if (r < 0) r += 8;
+      if (g < 0) g += 8;
+      if (b < 0) b += 8;
+      return static_cast<uint16>(b | (g << 3) | (r << 6));
+   }
+   
+   void etc1_block::unpack_delta3(int& r, int& g, int& b, uint16 packed_delta3)
+   {
+      r = (packed_delta3 >> 6) & 7;
+      g = (packed_delta3 >> 3) & 7;
+      b = packed_delta3 & 7;
+      if (r >= 4) r -= 8;
+      if (g >= 4) g -= 8;
+      if (b >= 4) b -= 8;
+   }
+
+   uint16 etc1_block::pack_color4(const color_quad_u8& color, bool scaled, uint bias)
+   {
+      return pack_color4(color.r, color.g, color.b, scaled, bias);
+   }
+   
+   uint16 etc1_block::pack_color4(uint r, uint g, uint b, bool scaled, uint bias)
+   {
+      if (scaled)
+      {
+         r = (r * 15U + bias) / 255U;
+         g = (g * 15U + bias) / 255U;
+         b = (b * 15U + bias) / 255U;
+      }
+
+      r = rg_etc1::minimum(r, 15U);
+      g = rg_etc1::minimum(g, 15U);
+      b = rg_etc1::minimum(b, 15U);
+
+      return static_cast<uint16>(b | (g << 4U) | (r << 8U));
+   }
+
+   color_quad_u8 etc1_block::unpack_color4(uint16 packed_color4, bool scaled, uint alpha)
+   {
+      uint b = packed_color4 & 15U;
+      uint g = (packed_color4 >> 4U) & 15U;
+      uint r = (packed_color4 >> 8U) & 15U;
+
+      if (scaled)
+      {
+         b = (b << 4U) | b;
+         g = (g << 4U) | g;
+         r = (r << 4U) | r;
+      }
+
+      return color_quad_u8(cNoClamp, r, g, b, rg_etc1::minimum(alpha, 255U));
+   }
+   
+   void etc1_block::unpack_color4(uint& r, uint& g, uint& b, uint16 packed_color4, bool scaled)
+   {
+      color_quad_u8 c(unpack_color4(packed_color4, scaled, 0));
+      r = c.r;
+      g = c.g;
+      b = c.b;
+   }
+
+   void etc1_block::get_diff_subblock_colors(color_quad_u8* pDst, uint16 packed_color5, uint table_idx)
+   {
+      RG_ETC1_ASSERT(table_idx < cETC1IntenModifierValues);
+      const int *pInten_modifer_table = &g_etc1_inten_tables[table_idx][0];
+
+      uint r, g, b;
+      unpack_color5(r, g, b, packed_color5, true);
+
+      const int ir = static_cast<int>(r), ig = static_cast<int>(g), ib = static_cast<int>(b);
+
+      const int y0 = pInten_modifer_table[0];
+      pDst[0].set(ir + y0, ig + y0, ib + y0);
+
+      const int y1 = pInten_modifer_table[1];
+      pDst[1].set(ir + y1, ig + y1, ib + y1);
+
+      const int y2 = pInten_modifer_table[2];
+      pDst[2].set(ir + y2, ig + y2, ib + y2);
+
+      const int y3 = pInten_modifer_table[3];
+      pDst[3].set(ir + y3, ig + y3, ib + y3);
+   }
+   
+   bool etc1_block::get_diff_subblock_colors(color_quad_u8* pDst, uint16 packed_color5, uint16 packed_delta3, uint table_idx)
+   {
+      RG_ETC1_ASSERT(table_idx < cETC1IntenModifierValues);
+      const int *pInten_modifer_table = &g_etc1_inten_tables[table_idx][0];
+
+      uint r, g, b;
+      bool success = unpack_color5(r, g, b, packed_color5, packed_delta3, true);
+
+      const int ir = static_cast<int>(r), ig = static_cast<int>(g), ib = static_cast<int>(b);
+
+      const int y0 = pInten_modifer_table[0];
+      pDst[0].set(ir + y0, ig + y0, ib + y0);
+
+      const int y1 = pInten_modifer_table[1];
+      pDst[1].set(ir + y1, ig + y1, ib + y1);
+
+      const int y2 = pInten_modifer_table[2];
+      pDst[2].set(ir + y2, ig + y2, ib + y2);
+
+      const int y3 = pInten_modifer_table[3];
+      pDst[3].set(ir + y3, ig + y3, ib + y3);
+
+      return success;
+   }
+   
+   void etc1_block::get_abs_subblock_colors(color_quad_u8* pDst, uint16 packed_color4, uint table_idx)
+   {
+      RG_ETC1_ASSERT(table_idx < cETC1IntenModifierValues);
+      const int *pInten_modifer_table = &g_etc1_inten_tables[table_idx][0];
+
+      uint r, g, b;
+      unpack_color4(r, g, b, packed_color4, true);
+      
+      const int ir = static_cast<int>(r), ig = static_cast<int>(g), ib = static_cast<int>(b);
+
+      const int y0 = pInten_modifer_table[0];
+      pDst[0].set(ir + y0, ig + y0, ib + y0);
+      
+      const int y1 = pInten_modifer_table[1];
+      pDst[1].set(ir + y1, ig + y1, ib + y1);
+
+      const int y2 = pInten_modifer_table[2];
+      pDst[2].set(ir + y2, ig + y2, ib + y2);
+
+      const int y3 = pInten_modifer_table[3];
+      pDst[3].set(ir + y3, ig + y3, ib + y3);
+   }
+      
+   bool unpack_etc1_block(const void* pETC1_block, unsigned int* pDst_pixels_rgba, bool preserve_alpha)
+   {
+      color_quad_u8* pDst = reinterpret_cast<color_quad_u8*>(pDst_pixels_rgba);
+      const etc1_block& block = *static_cast<const etc1_block*>(pETC1_block);
+
+      const bool diff_flag = block.get_diff_bit();
+      const bool flip_flag = block.get_flip_bit();
+      const uint table_index0 = block.get_inten_table(0);
+      const uint table_index1 = block.get_inten_table(1);
+
+      color_quad_u8 subblock_colors0[4];
+      color_quad_u8 subblock_colors1[4];
+      bool success = true;
+
+      if (diff_flag)
+      {
+         const uint16 base_color5 = block.get_base5_color();
+         const uint16 delta_color3 = block.get_delta3_color();
+         etc1_block::get_diff_subblock_colors(subblock_colors0, base_color5, table_index0);
+            
+         if (!etc1_block::get_diff_subblock_colors(subblock_colors1, base_color5, delta_color3, table_index1))
+            success = false;
+      }
+      else
+      {
+         const uint16 base_color4_0 = block.get_base4_color(0);
+         etc1_block::get_abs_subblock_colors(subblock_colors0, base_color4_0, table_index0);
+
+         const uint16 base_color4_1 = block.get_base4_color(1);
+         etc1_block::get_abs_subblock_colors(subblock_colors1, base_color4_1, table_index1);
+      }
+
+      if (preserve_alpha)
+      {
+         if (flip_flag)
+         {
+            for (uint y = 0; y < 2; y++)
+            {
+               pDst[0].set_rgb(subblock_colors0[block.get_selector(0, y)]);
+               pDst[1].set_rgb(subblock_colors0[block.get_selector(1, y)]);
+               pDst[2].set_rgb(subblock_colors0[block.get_selector(2, y)]);
+               pDst[3].set_rgb(subblock_colors0[block.get_selector(3, y)]);
+               pDst += 4;
+            }
+
+            for (uint y = 2; y < 4; y++)
+            {
+               pDst[0].set_rgb(subblock_colors1[block.get_selector(0, y)]);
+               pDst[1].set_rgb(subblock_colors1[block.get_selector(1, y)]);
+               pDst[2].set_rgb(subblock_colors1[block.get_selector(2, y)]);
+               pDst[3].set_rgb(subblock_colors1[block.get_selector(3, y)]);
+               pDst += 4;
+            }
+         }
+         else
+         {
+            for (uint y = 0; y < 4; y++)
+            {
+               pDst[0].set_rgb(subblock_colors0[block.get_selector(0, y)]);
+               pDst[1].set_rgb(subblock_colors0[block.get_selector(1, y)]);
+               pDst[2].set_rgb(subblock_colors1[block.get_selector(2, y)]);
+               pDst[3].set_rgb(subblock_colors1[block.get_selector(3, y)]);
+               pDst += 4;
+            }
+         }
+      }
+      else 
+      {
+         if (flip_flag)
+         {
+            // 0000
+            // 0000
+            // 1111
+            // 1111
+            for (uint y = 0; y < 2; y++)
+            {
+               pDst[0] = subblock_colors0[block.get_selector(0, y)];
+               pDst[1] = subblock_colors0[block.get_selector(1, y)];
+               pDst[2] = subblock_colors0[block.get_selector(2, y)];
+               pDst[3] = subblock_colors0[block.get_selector(3, y)];
+               pDst += 4;
+            }
+
+            for (uint y = 2; y < 4; y++)
+            {
+               pDst[0] = subblock_colors1[block.get_selector(0, y)];
+               pDst[1] = subblock_colors1[block.get_selector(1, y)];
+               pDst[2] = subblock_colors1[block.get_selector(2, y)];
+               pDst[3] = subblock_colors1[block.get_selector(3, y)];
+               pDst += 4;
+            }
+         }
+         else
+         {
+            // 0011
+            // 0011
+            // 0011
+            // 0011
+            for (uint y = 0; y < 4; y++)
+            {
+               pDst[0] = subblock_colors0[block.get_selector(0, y)];
+               pDst[1] = subblock_colors0[block.get_selector(1, y)];
+               pDst[2] = subblock_colors1[block.get_selector(2, y)];
+               pDst[3] = subblock_colors1[block.get_selector(3, y)];
+               pDst += 4;
+            }
+         }
+      }
+      
+      return success;
+   }
+
+   struct etc1_solution_coordinates
+   {
+      inline etc1_solution_coordinates() :
+      m_unscaled_color(0, 0, 0, 0),
+         m_inten_table(0),
+         m_color4(false)
+      {
+      }
+
+      inline etc1_solution_coordinates(uint r, uint g, uint b, uint inten_table, bool color4) : 
+      m_unscaled_color(r, g, b, 255),
+         m_inten_table(inten_table),
+         m_color4(color4)
+      {
+      }
+
+      inline etc1_solution_coordinates(const color_quad_u8& c, uint inten_table, bool color4) : 
+      m_unscaled_color(c),
+         m_inten_table(inten_table),
+         m_color4(color4)
+      {
+      }
+
+      inline etc1_solution_coordinates(const etc1_solution_coordinates& other)
+      {
+         *this = other;
+      }
+
+      inline etc1_solution_coordinates& operator= (const etc1_solution_coordinates& rhs)
+      {
+         m_unscaled_color = rhs.m_unscaled_color;
+         m_inten_table = rhs.m_inten_table;
+         m_color4 = rhs.m_color4;
+         return *this;
+      }
+
+      inline void clear()
+      {
+         m_unscaled_color.clear();
+         m_inten_table = 0;
+         m_color4 = false;
+      }
+
+      inline color_quad_u8 get_scaled_color() const
+      {
+         int br, bg, bb;
+         if (m_color4)
+         {
+            br = m_unscaled_color.r | (m_unscaled_color.r << 4);
+            bg = m_unscaled_color.g | (m_unscaled_color.g << 4);
+            bb = m_unscaled_color.b | (m_unscaled_color.b << 4);
+         }
+         else
+         {
+            br = (m_unscaled_color.r >> 2) | (m_unscaled_color.r << 3);
+            bg = (m_unscaled_color.g >> 2) | (m_unscaled_color.g << 3);
+            bb = (m_unscaled_color.b >> 2) | (m_unscaled_color.b << 3);
+         }
+         return color_quad_u8(br, bg, bb);
+      }
+
+      inline void get_block_colors(color_quad_u8* pBlock_colors)
+      {
+         int br, bg, bb;
+         if (m_color4)
+         {
+            br = m_unscaled_color.r | (m_unscaled_color.r << 4);
+            bg = m_unscaled_color.g | (m_unscaled_color.g << 4);
+            bb = m_unscaled_color.b | (m_unscaled_color.b << 4);
+         }
+         else
+         {
+            br = (m_unscaled_color.r >> 2) | (m_unscaled_color.r << 3);
+            bg = (m_unscaled_color.g >> 2) | (m_unscaled_color.g << 3);
+            bb = (m_unscaled_color.b >> 2) | (m_unscaled_color.b << 3);
+         }
+         const int* pInten_table = g_etc1_inten_tables[m_inten_table];
+         pBlock_colors[0].set(br + pInten_table[0], bg + pInten_table[0], bb + pInten_table[0]);
+         pBlock_colors[1].set(br + pInten_table[1], bg + pInten_table[1], bb + pInten_table[1]);
+         pBlock_colors[2].set(br + pInten_table[2], bg + pInten_table[2], bb + pInten_table[2]);
+         pBlock_colors[3].set(br + pInten_table[3], bg + pInten_table[3], bb + pInten_table[3]);
+      }
+
+      color_quad_u8 m_unscaled_color;
+      uint m_inten_table;
+      bool m_color4;
+   };
+
+   class etc1_optimizer
+   {
+      etc1_optimizer(const etc1_optimizer&);
+      etc1_optimizer& operator= (const etc1_optimizer&);
+
+   public:
+      etc1_optimizer()
+      {
+         clear();
+      }
+
+      void clear()
+      {
+         m_pParams = NULL;
+         m_pResult = NULL;
+         m_pSorted_luma = NULL;
+         m_pSorted_luma_indices = NULL;
+      }
+
+      struct params : etc1_pack_params
+      {
+         params()
+         {
+            clear();
+         }
+
+         params(const etc1_pack_params& base_params) : 
+         etc1_pack_params(base_params)
+         {
+            clear_optimizer_params();
+         }
+
+         void clear()
+         {
+            etc1_pack_params::clear();
+            clear_optimizer_params();
+         }
+
+         void clear_optimizer_params()
+         {
+            m_num_src_pixels = 0;
+            m_pSrc_pixels = 0;
+
+            m_use_color4 = false;
+            static const int s_default_scan_delta[] = { 0 };
+            m_pScan_deltas = s_default_scan_delta;
+            m_scan_delta_size = 1;
+
+            m_base_color5.clear();
+            m_constrain_against_base_color5 = false;
+         }
+
+         uint m_num_src_pixels;
+         const color_quad_u8* m_pSrc_pixels;
+
+         bool m_use_color4;
+         const int* m_pScan_deltas;
+         uint m_scan_delta_size;
+
+         color_quad_u8 m_base_color5;
+         bool m_constrain_against_base_color5;
+      };
+
+      struct results
+      {
+         uint64 m_error;
+         color_quad_u8 m_block_color_unscaled;
+         uint m_block_inten_table;
+         uint m_n;
+         uint8* m_pSelectors;
+         bool m_block_color4;
+
+         inline results& operator= (const results& rhs)
+         {
+            m_block_color_unscaled = rhs.m_block_color_unscaled;
+            m_block_color4 = rhs.m_block_color4;
+            m_block_inten_table = rhs.m_block_inten_table;
+            m_error = rhs.m_error;
+            RG_ETC1_ASSERT(m_n == rhs.m_n);
+            memcpy(m_pSelectors, rhs.m_pSelectors, rhs.m_n);
+            return *this;
+         }
+      };
+
+      void init(const params& params, results& result);
+      bool compute();
+
+   private:      
+      struct potential_solution
+      {
+         potential_solution() : m_coords(), m_error(cUINT64_MAX), m_valid(false)
+         {
+         }
+
+         etc1_solution_coordinates  m_coords;
+         uint8                      m_selectors[8];
+         uint64                     m_error;
+         bool                       m_valid;
+
+         void clear()
+         {
+            m_coords.clear();
+            m_error = cUINT64_MAX;
+            m_valid = false;
+         }
+      };
+
+      const params* m_pParams;
+      results* m_pResult;
+
+      int m_limit;
+
+      vec3F m_avg_color;
+      int m_br, m_bg, m_bb;
+      uint16 m_luma[8];
+      uint32 m_sorted_luma[2][8];
+      const uint32* m_pSorted_luma_indices;
+      uint32* m_pSorted_luma;
+
+      uint8 m_selectors[8];
+      uint8 m_best_selectors[8];
+
+      potential_solution m_best_solution;
+      potential_solution m_trial_solution;
+      uint8 m_temp_selectors[8];
+
+      bool evaluate_solution(const etc1_solution_coordinates& coords, potential_solution& trial_solution, potential_solution* pBest_solution);
+      bool evaluate_solution_fast(const etc1_solution_coordinates& coords, potential_solution& trial_solution, potential_solution* pBest_solution);
+   };
+      
+   bool etc1_optimizer::compute()
+   {
+      const uint n = m_pParams->m_num_src_pixels;
+      const int scan_delta_size = m_pParams->m_scan_delta_size;
+      
+      // Scan through a subset of the 3D lattice centered around the avg block color trying each 3D (555 or 444) lattice point as a potential block color.
+      // Each time a better solution is found try to refine the current solution's block color based of the current selectors and intensity table index.
+      for (int zdi = 0; zdi < scan_delta_size; zdi++)
+      {
+         const int zd = m_pParams->m_pScan_deltas[zdi];
+         const int mbb = m_bb + zd;
+         if (mbb < 0) continue; else if (mbb > m_limit) break;
+         
+         for (int ydi = 0; ydi < scan_delta_size; ydi++)
+         {
+            const int yd = m_pParams->m_pScan_deltas[ydi];
+            const int mbg = m_bg + yd;
+            if (mbg < 0) continue; else if (mbg > m_limit) break;
+
+            for (int xdi = 0; xdi < scan_delta_size; xdi++)
+            {
+               const int xd = m_pParams->m_pScan_deltas[xdi];
+               const int mbr = m_br + xd;
+               if (mbr < 0) continue; else if (mbr > m_limit) break;
+      
+               etc1_solution_coordinates coords(mbr, mbg, mbb, 0, m_pParams->m_use_color4);
+               if (m_pParams->m_quality == cHighQuality)
+               {
+                  if (!evaluate_solution(coords, m_trial_solution, &m_best_solution))
+                     continue;
+               }
+               else
+               {
+                  if (!evaluate_solution_fast(coords, m_trial_solution, &m_best_solution))
+                     continue;
+               }
+               
+               // Now we have the input block, the avg. color of the input pixels, a set of trial selector indices, and the block color+intensity index.
+               // Now, for each component, attempt to refine the current solution by solving a simple linear equation. For example, for 4 colors:
+               // The goal is:
+               // pixel0 - (block_color+inten_table[selector0]) + pixel1 - (block_color+inten_table[selector1]) + pixel2 - (block_color+inten_table[selector2]) + pixel3 - (block_color+inten_table[selector3]) = 0
+               // Rearranging this:
+               // (pixel0 + pixel1 + pixel2 + pixel3) - (block_color+inten_table[selector0]) - (block_color+inten_table[selector1]) - (block_color+inten_table[selector2]) - (block_color+inten_table[selector3]) = 0
+               // (pixel0 + pixel1 + pixel2 + pixel3) - block_color - inten_table[selector0] - block_color-inten_table[selector1] - block_color-inten_table[selector2] - block_color-inten_table[selector3] = 0
+               // (pixel0 + pixel1 + pixel2 + pixel3) - 4*block_color - inten_table[selector0] - inten_table[selector1] - inten_table[selector2] - inten_table[selector3] = 0
+               // (pixel0 + pixel1 + pixel2 + pixel3) - 4*block_color - (inten_table[selector0] + inten_table[selector1] + inten_table[selector2] + inten_table[selector3]) = 0
+               // (pixel0 + pixel1 + pixel2 + pixel3)/4 - block_color - (inten_table[selector0] + inten_table[selector1] + inten_table[selector2] + inten_table[selector3])/4 = 0
+               // block_color = (pixel0 + pixel1 + pixel2 + pixel3)/4 - (inten_table[selector0] + inten_table[selector1] + inten_table[selector2] + inten_table[selector3])/4
+               // So what this means:
+               // optimal_block_color = avg_input - avg_inten_delta
+               // So the optimal block color can be computed by taking the average block color and subtracting the current average of the intensity delta.
+               // Unfortunately, optimal_block_color must then be quantized to 555 or 444 so it's not always possible to improve matters using this formula.
+               // Also, the above formula is for unclamped intensity deltas. The actual implementation takes into account clamping.
+
+               const uint max_refinement_trials = (m_pParams->m_quality == cLowQuality) ? 2 : (((xd | yd | zd) == 0) ? 4 : 2);
+               for (uint refinement_trial = 0; refinement_trial < max_refinement_trials; refinement_trial++)
+               {
+                  const uint8* pSelectors = m_best_solution.m_selectors;
+                  const int* pInten_table = g_etc1_inten_tables[m_best_solution.m_coords.m_inten_table];
+
+                  int delta_sum_r = 0, delta_sum_g = 0, delta_sum_b = 0;
+                  const color_quad_u8 base_color(m_best_solution.m_coords.get_scaled_color());
+                  for (uint r = 0; r < n; r++)
+                  {
+                     const uint s = *pSelectors++;
+                     const int yd = pInten_table[s];
+                     // Compute actual delta being applied to each pixel, taking into account clamping.
+                     delta_sum_r += rg_etc1::clamp<int>(base_color.r + yd, 0, 255) - base_color.r;
+                     delta_sum_g += rg_etc1::clamp<int>(base_color.g + yd, 0, 255) - base_color.g;
+                     delta_sum_b += rg_etc1::clamp<int>(base_color.b + yd, 0, 255) - base_color.b;
+                  }
+                  if ((!delta_sum_r) && (!delta_sum_g) && (!delta_sum_b))
+                     break;
+                  const float avg_delta_r_f = static_cast<float>(delta_sum_r) / n;
+                  const float avg_delta_g_f = static_cast<float>(delta_sum_g) / n;
+                  const float avg_delta_b_f = static_cast<float>(delta_sum_b) / n;
+                  const int br1 = rg_etc1::clamp<int>(static_cast<uint>((m_avg_color[0] - avg_delta_r_f) * m_limit / 255.0f + .5f), 0, m_limit);
+                  const int bg1 = rg_etc1::clamp<int>(static_cast<uint>((m_avg_color[1] - avg_delta_g_f) * m_limit / 255.0f + .5f), 0, m_limit);
+                  const int bb1 = rg_etc1::clamp<int>(static_cast<uint>((m_avg_color[2] - avg_delta_b_f) * m_limit / 255.0f + .5f), 0, m_limit);
+                  
+                  bool skip = false;
+                  
+                  if ((mbr == br1) && (mbg == bg1) && (mbb == bb1))
+                     skip = true;
+                  else if ((br1 == m_best_solution.m_coords.m_unscaled_color.r) && (bg1 == m_best_solution.m_coords.m_unscaled_color.g) && (bb1 == m_best_solution.m_coords.m_unscaled_color.b))
+                     skip = true;
+                  else if ((m_br == br1) && (m_bg == bg1) && (m_bb == bb1))
+                     skip = true;
+
+                  if (skip)
+                     break;
+
+                  etc1_solution_coordinates coords1(br1, bg1, bb1, 0, m_pParams->m_use_color4);
+                  if (m_pParams->m_quality == cHighQuality)
+                  {
+                     if (!evaluate_solution(coords1, m_trial_solution, &m_best_solution)) 
+                        break;
+                  }
+                  else
+                  {
+                     if (!evaluate_solution_fast(coords1, m_trial_solution, &m_best_solution))
+                        break;
+                  }
+
+               }  // refinement_trial
+
+            } // xdi
+         } // ydi
+      } // zdi
+
+      if (!m_best_solution.m_valid)
+      {
+         m_pResult->m_error = cUINT32_MAX;
+         return false;
+      }
+      
+      const uint8* pSelectors = m_best_solution.m_selectors;
+
+#ifdef RG_ETC1_BUILD_DEBUG
+      {
+         color_quad_u8 block_colors[4];
+         m_best_solution.m_coords.get_block_colors(block_colors);
+
+         const color_quad_u8* pSrc_pixels = m_pParams->m_pSrc_pixels;
+         uint64 actual_error = 0;
+         for (uint i = 0; i < n; i++)
+            actual_error += pSrc_pixels[i].squared_distance_rgb(block_colors[pSelectors[i]]);
+         
+         RG_ETC1_ASSERT(actual_error == m_best_solution.m_error);
+      }
+#endif      
+      
+      m_pResult->m_error = m_best_solution.m_error;
+
+      m_pResult->m_block_color_unscaled = m_best_solution.m_coords.m_unscaled_color;
+      m_pResult->m_block_color4 = m_best_solution.m_coords.m_color4;
+      
+      m_pResult->m_block_inten_table = m_best_solution.m_coords.m_inten_table;
+      memcpy(m_pResult->m_pSelectors, pSelectors, n);
+      m_pResult->m_n = n;
+
+      return true;
+   }
+
+   void etc1_optimizer::init(const params& p, results& r)
+   {
+      // This version is hardcoded for 8 pixel subblocks.
+      RG_ETC1_ASSERT(p.m_num_src_pixels == 8);
+      
+      m_pParams = &p;
+      m_pResult = &r;
+                  
+      const uint n = 8;
+      
+      m_limit = m_pParams->m_use_color4 ? 15 : 31;
+
+      vec3F avg_color(0.0f);
+
+      for (uint i = 0; i < n; i++)
+      {
+         const color_quad_u8& c = m_pParams->m_pSrc_pixels[i];
+         const vec3F fc(c.r, c.g, c.b);
+
+         avg_color += fc;
+
+         m_luma[i] = static_cast<uint16>(c.r + c.g + c.b);
+         m_sorted_luma[0][i] = i;
+      }
+      avg_color *= (1.0f / static_cast<float>(n));
+      m_avg_color = avg_color;
+
+      m_br = rg_etc1::clamp<int>(static_cast<uint>(m_avg_color[0] * m_limit / 255.0f + .5f), 0, m_limit);
+      m_bg = rg_etc1::clamp<int>(static_cast<uint>(m_avg_color[1] * m_limit / 255.0f + .5f), 0, m_limit);
+      m_bb = rg_etc1::clamp<int>(static_cast<uint>(m_avg_color[2] * m_limit / 255.0f + .5f), 0, m_limit);
+
+      if (m_pParams->m_quality <= cMediumQuality)
+      {
+         m_pSorted_luma_indices = indirect_radix_sort(n, m_sorted_luma[0], m_sorted_luma[1], m_luma, 0, sizeof(m_luma[0]), false);
+         m_pSorted_luma = m_sorted_luma[0];
+         if (m_pSorted_luma_indices == m_sorted_luma[0])
+            m_pSorted_luma = m_sorted_luma[1];
+      
+         for (uint i = 0; i < n; i++)
+            m_pSorted_luma[i] = m_luma[m_pSorted_luma_indices[i]];
+      }
+      
+      m_best_solution.m_coords.clear();
+      m_best_solution.m_valid = false;
+      m_best_solution.m_error = cUINT64_MAX;
+   }
+
+   bool etc1_optimizer::evaluate_solution(const etc1_solution_coordinates& coords, potential_solution& trial_solution, potential_solution* pBest_solution)
+   {
+      trial_solution.m_valid = false;
+
+      if (m_pParams->m_constrain_against_base_color5)
+      {
+         const int dr = coords.m_unscaled_color.r - m_pParams->m_base_color5.r;
+         const int dg = coords.m_unscaled_color.g - m_pParams->m_base_color5.g;
+         const int db = coords.m_unscaled_color.b - m_pParams->m_base_color5.b;
+
+         if ((rg_etc1::minimum(dr, dg, db) < cETC1ColorDeltaMin) || (rg_etc1::maximum(dr, dg, db) > cETC1ColorDeltaMax))
+            return false;
+      }
+
+      const color_quad_u8 base_color(coords.get_scaled_color());
+      
+      const uint n = 8;
+            
+      trial_solution.m_error = cUINT64_MAX;
+            
+      for (uint inten_table = 0; inten_table < cETC1IntenModifierValues; inten_table++)
+      {
+         const int* pInten_table = g_etc1_inten_tables[inten_table];
+
+         color_quad_u8 block_colors[4];
+         for (uint s = 0; s < 4; s++)
+         {
+            const int yd = pInten_table[s];
+            block_colors[s].set(base_color.r + yd, base_color.g + yd, base_color.b + yd, 0);
+         }
+         
+         uint64 total_error = 0;
+         
+         const color_quad_u8* pSrc_pixels = m_pParams->m_pSrc_pixels;
+         for (uint c = 0; c < n; c++)
+         {
+            const color_quad_u8& src_pixel = *pSrc_pixels++;
+            
+            uint best_selector_index = 0;
+            uint best_error = rg_etc1::square(src_pixel.r - block_colors[0].r) + rg_etc1::square(src_pixel.g - block_colors[0].g) + rg_etc1::square(src_pixel.b - block_colors[0].b);
+
+            uint trial_error = rg_etc1::square(src_pixel.r - block_colors[1].r) + rg_etc1::square(src_pixel.g - block_colors[1].g) + rg_etc1::square(src_pixel.b - block_colors[1].b);
+            if (trial_error < best_error)
+            {
+               best_error = trial_error;
+               best_selector_index = 1;
+            }
+
+            trial_error = rg_etc1::square(src_pixel.r - block_colors[2].r) + rg_etc1::square(src_pixel.g - block_colors[2].g) + rg_etc1::square(src_pixel.b - block_colors[2].b);
+            if (trial_error < best_error)
+            {
+               best_error = trial_error;
+               best_selector_index = 2;
+            }
+
+            trial_error = rg_etc1::square(src_pixel.r - block_colors[3].r) + rg_etc1::square(src_pixel.g - block_colors[3].g) + rg_etc1::square(src_pixel.b - block_colors[3].b);
+            if (trial_error < best_error)
+            {
+               best_error = trial_error;
+               best_selector_index = 3;
+            }
+
+            m_temp_selectors[c] = static_cast<uint8>(best_selector_index);
+
+            total_error += best_error;
+            if (total_error >= trial_solution.m_error)
+               break;
+         }
+         
+         if (total_error < trial_solution.m_error)
+         {
+            trial_solution.m_error = total_error;
+            trial_solution.m_coords.m_inten_table = inten_table;
+            memcpy(trial_solution.m_selectors, m_temp_selectors, 8);
+            trial_solution.m_valid = true;
+         }
+      }
+      trial_solution.m_coords.m_unscaled_color = coords.m_unscaled_color;
+      trial_solution.m_coords.m_color4 = m_pParams->m_use_color4;
+
+      bool success = false;
+      if (pBest_solution)
+      {
+         if (trial_solution.m_error < pBest_solution->m_error)
+         {
+            *pBest_solution = trial_solution;
+            success = true;
+         }
+      }
+
+      return success;
+   }
+
+   bool etc1_optimizer::evaluate_solution_fast(const etc1_solution_coordinates& coords, potential_solution& trial_solution, potential_solution* pBest_solution)
+   {
+      if (m_pParams->m_constrain_against_base_color5)
+      {
+         const int dr = coords.m_unscaled_color.r - m_pParams->m_base_color5.r;
+         const int dg = coords.m_unscaled_color.g - m_pParams->m_base_color5.g;
+         const int db = coords.m_unscaled_color.b - m_pParams->m_base_color5.b;
+
+         if ((rg_etc1::minimum(dr, dg, db) < cETC1ColorDeltaMin) || (rg_etc1::maximum(dr, dg, db) > cETC1ColorDeltaMax))
+         {
+            trial_solution.m_valid = false;
+            return false;
+         }
+      }
+
+      const color_quad_u8 base_color(coords.get_scaled_color());
+
+      const uint n = 8;
+      
+      trial_solution.m_error = cUINT64_MAX;
+
+      for (int inten_table = cETC1IntenModifierValues - 1; inten_table >= 0; --inten_table)
+      {
+         const int* pInten_table = g_etc1_inten_tables[inten_table];
+
+         uint block_inten[4];
+         color_quad_u8 block_colors[4];
+         for (uint s = 0; s < 4; s++)
+         {
+            const int yd = pInten_table[s];
+            color_quad_u8 block_color(base_color.r + yd, base_color.g + yd, base_color.b + yd, 0);
+            block_colors[s] = block_color;
+            block_inten[s] = block_color.r + block_color.g + block_color.b;
+         }
+
+         // evaluate_solution_fast() enforces/assumesd a total ordering of the input colors along the intensity (1,1,1) axis to more quickly classify the inputs to selectors.
+         // The inputs colors have been presorted along the projection onto this axis, and ETC1 block colors are always ordered along the intensity axis, so this classification is fast.
+         // 0   1   2   3
+         //   01  12  23
+         const uint block_inten_midpoints[3] = { block_inten[0] + block_inten[1], block_inten[1] + block_inten[2], block_inten[2] + block_inten[3] };
+
+         uint64 total_error = 0;
+         const color_quad_u8* pSrc_pixels = m_pParams->m_pSrc_pixels;
+         if ((m_pSorted_luma[n - 1] * 2) < block_inten_midpoints[0])
+         {
+            if (block_inten[0] > m_pSorted_luma[n - 1])
+            {
+           const uint min_error = intabs(block_inten[0] - m_pSorted_luma[n - 1]);
+               if (min_error >= trial_solution.m_error)
+                  continue;
+            }
+
+            memset(&m_temp_selectors[0], 0, n);
+
+            for (uint c = 0; c < n; c++)
+               total_error += block_colors[0].squared_distance_rgb(pSrc_pixels[c]);
+         }
+         else if ((m_pSorted_luma[0] * 2) >= block_inten_midpoints[2])
+         {
+            if (m_pSorted_luma[0] > block_inten[3])
+            {
+           const uint min_error = intabs(m_pSorted_luma[0] - block_inten[3]);
+               if (min_error >= trial_solution.m_error)
+                  continue;
+            }
+
+            memset(&m_temp_selectors[0], 3, n);
+
+            for (uint c = 0; c < n; c++)
+               total_error += block_colors[3].squared_distance_rgb(pSrc_pixels[c]);
+         }
+         else
+         {
+            uint cur_selector = 0, c;
+            for (c = 0; c < n; c++)
+            {
+               const uint y = m_pSorted_luma[c];
+               while ((y * 2) >= block_inten_midpoints[cur_selector])
+                  if (++cur_selector > 2)
+                     goto done;
+               const uint sorted_pixel_index = m_pSorted_luma_indices[c];
+               m_temp_selectors[sorted_pixel_index] = static_cast<uint8>(cur_selector);
+               total_error += block_colors[cur_selector].squared_distance_rgb(pSrc_pixels[sorted_pixel_index]);
+            }
+done:
+            while (c < n)
+            {
+               const uint sorted_pixel_index = m_pSorted_luma_indices[c];
+               m_temp_selectors[sorted_pixel_index] = 3;
+               total_error += block_colors[3].squared_distance_rgb(pSrc_pixels[sorted_pixel_index]);
+               ++c;
+            }
+         }
+
+         if (total_error < trial_solution.m_error)
+         {
+            trial_solution.m_error = total_error;
+            trial_solution.m_coords.m_inten_table = inten_table;
+            memcpy(trial_solution.m_selectors, m_temp_selectors, n);
+            trial_solution.m_valid = true;
+            if (!total_error)
+               break;
+         }
+      }
+      trial_solution.m_coords.m_unscaled_color = coords.m_unscaled_color;
+      trial_solution.m_coords.m_color4 = m_pParams->m_use_color4;
+      
+      bool success = false;
+      if (pBest_solution)
+      {
+         if (trial_solution.m_error < pBest_solution->m_error)
+         {
+            *pBest_solution = trial_solution;
+            success = true;
+         }
+      }
+
+      return success;
+   }
+         
+   static uint etc1_decode_value(uint diff, uint inten, uint selector, uint packed_c)
+   {
+      const uint limit = diff ? 32 : 16; limit;
+      RG_ETC1_ASSERT((diff < 2) && (inten < 8) && (selector < 4) && (packed_c < limit));
+      int c;
+      if (diff)
+         c = (packed_c >> 2) | (packed_c << 3);
+      else 
+         c = packed_c | (packed_c << 4);
+      c += g_etc1_inten_tables[inten][selector];
+      c = rg_etc1::clamp<int>(c, 0, 255);
+      return c;
+   }
+
+   static inline int mul_8bit(int a, int b) { int t = a*b + 128; return (t + (t >> 8)) >> 8; }
+
+   void pack_etc1_block_init()
+   {
+      for (uint diff = 0; diff < 2; diff++)
+      {
+         const uint limit = diff ? 32 : 16;
+
+         for (uint inten = 0; inten < 8; inten++)
+         {
+            for (uint selector = 0; selector < 4; selector++)
+            {
+               const uint inverse_table_index = diff + (inten << 1) + (selector << 4);
+               for (uint color = 0; color < 256; color++)
+               {
+                  uint best_error = cUINT32_MAX, best_packed_c = 0;
+                  for (uint packed_c = 0; packed_c < limit; packed_c++)
+                  {
+                     int v = etc1_decode_value(diff, inten, selector, packed_c);
+                     uint err = labs(v - static_cast<int>(color));
+		     //printf("err: %d - %u = %u\n",v,color,err);
+                     if (err < best_error)
+                     {
+                        best_error = err;
+                        best_packed_c = packed_c;
+                        if (!best_error) 
+                           break;
+                     }
+                  }
+                  RG_ETC1_ASSERT(best_error <= 255);
+                  g_etc1_inverse_lookup[inverse_table_index][color] = static_cast<uint16>(best_packed_c | (best_error << 8));
+               }
+            }
+         }
+      }
+      
+      uint expand5[32];
+      for(int i = 0; i < 32; i++)
+         expand5[i] = (i << 3) | (i >> 2);
+
+      for(int i = 0; i < 256 + 16; i++)
+      {
+         int v = clamp<int>(i - 8, 0, 255);
+         g_quant5_tab[i] = static_cast<uint8>(expand5[mul_8bit(v,31)]);
+      }
+   }
+
+   // Packs solid color blocks efficiently using a set of small precomputed tables.
+   // For random 888 inputs, MSE results are better than Erricson's ETC1 packer in "slow" mode ~9.5% of the time, is slightly worse only ~.01% of the time, and is equal the rest of the time.
+   static uint64 pack_etc1_block_solid_color(etc1_block& block, const uint8* pColor, etc1_pack_params& pack_params)
+   {
+      pack_params;
+      RG_ETC1_ASSERT(g_etc1_inverse_lookup[0][255]);
+            
+      static uint s_next_comp[4] = { 1, 2, 0, 1 };
+            
+      uint best_error = cUINT32_MAX, best_i = 0;
+      int best_x = 0, best_packed_c1 = 0, best_packed_c2 = 0;
+
+      // For each possible 8-bit value, there is a precomputed list of diff/inten/selector configurations that allow that 8-bit value to be encoded with no error.
+      for (uint i = 0; i < 3; i++)
+      {
+         const uint c1 = pColor[s_next_comp[i]], c2 = pColor[s_next_comp[i + 1]];
+
+         const int delta_range = 1;
+         for (int delta = -delta_range; delta <= delta_range; delta++)
+         {
+            const int c_plus_delta = rg_etc1::clamp<int>(pColor[i] + delta, 0, 255);
+
+            const uint16* pTable;
+            if (!c_plus_delta)
+               pTable = g_color8_to_etc_block_config_0_255[0];
+            else if (c_plus_delta == 255)
+               pTable = g_color8_to_etc_block_config_0_255[1];
+            else
+               pTable = g_color8_to_etc_block_config_1_to_254[c_plus_delta - 1];
+
+            do
+            {
+               const uint x = *pTable++;
+
+#ifdef RG_ETC1_BUILD_DEBUG
+               const uint diff = x & 1;
+               const uint inten = (x >> 1) & 7;
+               const uint selector = (x >> 4) & 3;
+               const uint p0 = (x >> 8) & 255;
+               RG_ETC1_ASSERT(etc1_decode_value(diff, inten, selector, p0) == (uint)c_plus_delta);
+#endif
+
+               const uint16* pInverse_table = g_etc1_inverse_lookup[x & 0xFF];
+               uint16 p1 = pInverse_table[c1];
+               uint16 p2 = pInverse_table[c2];
+               const uint trial_error = rg_etc1::square(c_plus_delta - pColor[i]) + rg_etc1::square(p1 >> 8) + rg_etc1::square(p2 >> 8);
+               if (trial_error < best_error)
+               {
+                  best_error = trial_error;
+                  best_x = x;
+                  best_packed_c1 = p1 & 0xFF;
+                  best_packed_c2 = p2 & 0xFF;
+                  best_i = i;
+                  if (!best_error)
+                     goto found_perfect_match;
+               }
+            } while (*pTable != 0xFFFF);
+         }
+      }
+found_perfect_match:
+
+      const uint diff = best_x & 1;
+      const uint inten = (best_x >> 1) & 7;
+
+      block.m_bytes[3] = static_cast<uint8>(((inten | (inten << 3)) << 2) | (diff << 1));
+                        
+      const uint etc1_selector = g_selector_index_to_etc1[(best_x >> 4) & 3];
+      *reinterpret_cast<uint16*>(&block.m_bytes[4]) = (etc1_selector & 2) ? 0xFFFF : 0;
+      *reinterpret_cast<uint16*>(&block.m_bytes[6]) = (etc1_selector & 1) ? 0xFFFF : 0;
+
+      const uint best_packed_c0 = (best_x >> 8) & 255;
+      if (diff)
+      {
+         block.m_bytes[best_i] = static_cast<uint8>(best_packed_c0 << 3);
+         block.m_bytes[s_next_comp[best_i]] = static_cast<uint8>(best_packed_c1 << 3);
+         block.m_bytes[s_next_comp[best_i+1]] = static_cast<uint8>(best_packed_c2 << 3);
+      }
+      else
+      {
+         block.m_bytes[best_i] = static_cast<uint8>(best_packed_c0 | (best_packed_c0 << 4));
+         block.m_bytes[s_next_comp[best_i]] = static_cast<uint8>(best_packed_c1 | (best_packed_c1 << 4));
+         block.m_bytes[s_next_comp[best_i+1]] = static_cast<uint8>(best_packed_c2 | (best_packed_c2 << 4));
+      }
+
+      return best_error;
+   }
+      
+   static uint pack_etc1_block_solid_color_constrained(
+      etc1_optimizer::results& results, 
+      uint num_colors, const uint8* pColor, 
+      etc1_pack_params& pack_params, 
+      bool use_diff,
+      const color_quad_u8* pBase_color5_unscaled)
+   {
+      RG_ETC1_ASSERT(g_etc1_inverse_lookup[0][255]);
+
+      pack_params;
+      static uint s_next_comp[4] = { 1, 2, 0, 1 };
+
+      uint best_error = cUINT32_MAX, best_i = 0;
+      int best_x = 0, best_packed_c1 = 0, best_packed_c2 = 0;
+
+      // For each possible 8-bit value, there is a precomputed list of diff/inten/selector configurations that allow that 8-bit value to be encoded with no error.
+      for (uint i = 0; i < 3; i++)
+      {
+         const uint c1 = pColor[s_next_comp[i]], c2 = pColor[s_next_comp[i + 1]];
+
+         const int delta_range = 1;
+         for (int delta = -delta_range; delta <= delta_range; delta++)
+         {
+            const int c_plus_delta = rg_etc1::clamp<int>(pColor[i] + delta, 0, 255);
+
+            const uint16* pTable;
+            if (!c_plus_delta)
+               pTable = g_color8_to_etc_block_config_0_255[0];
+            else if (c_plus_delta == 255)
+               pTable = g_color8_to_etc_block_config_0_255[1];
+            else
+               pTable = g_color8_to_etc_block_config_1_to_254[c_plus_delta - 1];
+
+            do
+            {
+               const uint x = *pTable++;
+               const uint diff = x & 1;
+               if (static_cast<uint>(use_diff) != diff)
+               {
+                  if (*pTable == 0xFFFF)
+                     break;
+                  continue;
+               }
+
+               if ((diff) && (pBase_color5_unscaled))
+               {
+                  const int p0 = (x >> 8) & 255;
+                  int delta = p0 - static_cast<int>(pBase_color5_unscaled->c[i]);
+                  if ((delta < cETC1ColorDeltaMin) || (delta > cETC1ColorDeltaMax))
+                  {
+                     if (*pTable == 0xFFFF)
+                        break;
+                     continue;
+                  }
+               }
+
+#ifdef RG_ETC1_BUILD_DEBUG
+               {
+                  const uint inten = (x >> 1) & 7;
+                  const uint selector = (x >> 4) & 3;
+                  const uint p0 = (x >> 8) & 255;
+                  RG_ETC1_ASSERT(etc1_decode_value(diff, inten, selector, p0) == (uint)c_plus_delta);
+               }
+#endif
+
+               const uint16* pInverse_table = g_etc1_inverse_lookup[x & 0xFF];
+               uint16 p1 = pInverse_table[c1];
+               uint16 p2 = pInverse_table[c2];
+
+               if ((diff) && (pBase_color5_unscaled))
+               {
+                  int delta1 = (p1 & 0xFF) - static_cast<int>(pBase_color5_unscaled->c[s_next_comp[i]]);
+                  int delta2 = (p2 & 0xFF) - static_cast<int>(pBase_color5_unscaled->c[s_next_comp[i + 1]]);
+                  if ((delta1 < cETC1ColorDeltaMin) || (delta1 > cETC1ColorDeltaMax) || (delta2 < cETC1ColorDeltaMin) || (delta2 > cETC1ColorDeltaMax))
+                  {
+                     if (*pTable == 0xFFFF)
+                        break;
+                     continue;
+                  }
+               }
+
+               const uint trial_error = rg_etc1::square(c_plus_delta - pColor[i]) + rg_etc1::square(p1 >> 8) + rg_etc1::square(p2 >> 8);
+               if (trial_error < best_error)
+               {
+                  best_error = trial_error;
+                  best_x = x;
+                  best_packed_c1 = p1 & 0xFF;
+                  best_packed_c2 = p2 & 0xFF;
+                  best_i = i;
+                  if (!best_error)
+                     goto found_perfect_match;
+               }
+            } while (*pTable != 0xFFFF);
+         }
+      }
+found_perfect_match:
+
+      if (best_error == cUINT32_MAX)
+         return best_error;
+
+      best_error *= num_colors;
+
+      results.m_n = num_colors;
+      results.m_block_color4 = !(best_x & 1);
+      results.m_block_inten_table = (best_x >> 1) & 7;
+      memset(results.m_pSelectors, (best_x >> 4) & 3, num_colors);
+
+      const uint best_packed_c0 = (best_x >> 8) & 255;
+      results.m_block_color_unscaled[best_i] = static_cast<uint8>(best_packed_c0);
+      results.m_block_color_unscaled[s_next_comp[best_i]] = static_cast<uint8>(best_packed_c1);
+      results.m_block_color_unscaled[s_next_comp[best_i + 1]] = static_cast<uint8>(best_packed_c2);
+      results.m_error = best_error;
+      
+      return best_error;
+   }
+
+   // Function originally from RYG's public domain real-time DXT1 compressor, modified for 555.
+   static void dither_block_555(color_quad_u8* dest, const color_quad_u8* block)
+   {
+      int err[8],*ep1 = err,*ep2 = err+4;
+      uint8 *quant = g_quant5_tab+8;
+
+      memset(dest, 0xFF, sizeof(color_quad_u8)*16);
+
+      // process channels seperately
+      for(int ch=0;ch<3;ch++)
+      {
+         uint8* bp = (uint8*)block;
+         uint8* dp = (uint8*)dest;
+
+         bp += ch; dp += ch;
+
+         memset(err,0, sizeof(err));
+         for(int y = 0; y < 4; y++)
+         {
+            // pixel 0
+            dp[ 0] = quant[bp[ 0] + ((3*ep2[1] + 5*ep2[0]) >> 4)];
+            ep1[0] = bp[ 0] - dp[ 0];
+
+            // pixel 1
+            dp[ 4] = quant[bp[ 4] + ((7*ep1[0] + 3*ep2[2] + 5*ep2[1] + ep2[0]) >> 4)];
+            ep1[1] = bp[ 4] - dp[ 4];
+
+            // pixel 2
+            dp[ 8] = quant[bp[ 8] + ((7*ep1[1] + 3*ep2[3] + 5*ep2[2] + ep2[1]) >> 4)];
+            ep1[2] = bp[ 8] - dp[ 8];
+
+            // pixel 3
+            dp[12] = quant[bp[12] + ((7*ep1[2] + 5*ep2[3] + ep2[2]) >> 4)];
+            ep1[3] = bp[12] - dp[12];
+
+            // advance to next line
+            int* tmp = ep1; ep1 = ep2; ep2 = tmp;
+            bp += 16;
+            dp += 16;
+         }
+      }
+   }
+
+   unsigned int pack_etc1_block(void* pETC1_block, const unsigned int* pSrc_pixels_rgba, etc1_pack_params& pack_params)
+   {
+      const color_quad_u8* pSrc_pixels = reinterpret_cast<const color_quad_u8*>(pSrc_pixels_rgba);
+      etc1_block& dst_block = *static_cast<etc1_block*>(pETC1_block);
+
+#ifdef RG_ETC1_BUILD_DEBUG
+      // Ensure all alpha values are 0xFF.
+      for (uint i = 0; i < 16; i++)
+      {
+         RG_ETC1_ASSERT(pSrc_pixels[i].a == 255);
+      }
+#endif
+
+      color_quad_u8 src_pixel0(pSrc_pixels[0]);
+
+      // Check for solid block.
+      const uint32 first_pixel_u32 = pSrc_pixels->m_u32;
+      int r;
+      for (r = 15; r >= 1; --r)
+         if (pSrc_pixels[r].m_u32 != first_pixel_u32)
+            break;
+      if (!r)
+         return static_cast<unsigned int>(16 * pack_etc1_block_solid_color(dst_block, &pSrc_pixels[0].r, pack_params));
+      
+      color_quad_u8 dithered_pixels[16];
+      if (pack_params.m_dithering)
+      {
+         dither_block_555(dithered_pixels, pSrc_pixels);
+         pSrc_pixels = dithered_pixels;
+      }
+
+      etc1_optimizer optimizer;
+
+      uint64 best_error = cUINT64_MAX;
+      uint best_flip = false, best_use_color4 = false;
+      
+      uint8 best_selectors[2][8];
+      etc1_optimizer::results best_results[2];
+      for (uint i = 0; i < 2; i++)
+      {
+         best_results[i].m_n = 8;
+         best_results[i].m_pSelectors = best_selectors[i];
+      }
+      
+      uint8 selectors[3][8];
+      etc1_optimizer::results results[3];
+      
+      for (uint i = 0; i < 3; i++)
+      {
+         results[i].m_n = 8;
+         results[i].m_pSelectors = selectors[i];
+      }
+            
+      color_quad_u8 subblock_pixels[8];
+
+      etc1_optimizer::params params(pack_params);
+      params.m_num_src_pixels = 8;
+      params.m_pSrc_pixels = subblock_pixels;
+
+      for (uint flip = 0; flip < 2; flip++)
+      {
+         for (uint use_color4 = 0; use_color4 < 2; use_color4++)
+         {
+            uint64 trial_error = 0;
+
+            uint subblock;
+            for (subblock = 0; subblock < 2; subblock++)
+            {
+               if (flip)
+                  memcpy(subblock_pixels, pSrc_pixels + subblock * 8, sizeof(color_quad_u8) * 8);
+               else
+               {
+                  const color_quad_u8* pSrc_col = pSrc_pixels + subblock * 2;
+                  subblock_pixels[0] = pSrc_col[0]; subblock_pixels[1] = pSrc_col[4]; subblock_pixels[2] = pSrc_col[8]; subblock_pixels[3] = pSrc_col[12];
+                  subblock_pixels[4] = pSrc_col[1]; subblock_pixels[5] = pSrc_col[5]; subblock_pixels[6] = pSrc_col[9]; subblock_pixels[7] = pSrc_col[13];
+               }
+
+               results[2].m_error = cUINT64_MAX;
+               if ((params.m_quality >= cMediumQuality) && ((subblock) || (use_color4)))
+               {
+                  const uint32 subblock_pixel0_u32 = subblock_pixels[0].m_u32;
+                  for (r = 7; r >= 1; --r)
+                     if (subblock_pixels[r].m_u32 != subblock_pixel0_u32)
+                        break;
+                  if (!r)
+                  {
+                     pack_etc1_block_solid_color_constrained(results[2], 8, &subblock_pixels[0].r, pack_params, !use_color4, (subblock && !use_color4) ? &results[0].m_block_color_unscaled : NULL);
+                  }
+               }
+
+               params.m_use_color4 = (use_color4 != 0);
+               params.m_constrain_against_base_color5 = false;
+
+               if ((!use_color4) && (subblock))
+               {
+                  params.m_constrain_against_base_color5 = true;
+                  params.m_base_color5 = results[0].m_block_color_unscaled;
+               }
+                              
+               if (params.m_quality == cHighQuality)
+               {
+                  static const int s_scan_delta_0_to_4[] = { -4, -3, -2, -1, 0, 1, 2, 3, 4 };
+                  params.m_scan_delta_size = RG_ETC1_ARRAY_SIZE(s_scan_delta_0_to_4);
+                  params.m_pScan_deltas = s_scan_delta_0_to_4;
+               }
+               else if (params.m_quality == cMediumQuality)
+               {
+                  static const int s_scan_delta_0_to_1[] = { -1, 0, 1 };
+                  params.m_scan_delta_size = RG_ETC1_ARRAY_SIZE(s_scan_delta_0_to_1);
+                  params.m_pScan_deltas = s_scan_delta_0_to_1;
+               }
+               else
+               {
+                  static const int s_scan_delta_0[] = { 0 };
+                  params.m_scan_delta_size = RG_ETC1_ARRAY_SIZE(s_scan_delta_0);
+                  params.m_pScan_deltas = s_scan_delta_0;
+               }
+               
+               optimizer.init(params, results[subblock]);
+               if (!optimizer.compute())
+                  break;
+                              
+               if (params.m_quality >= cMediumQuality)
+               {
+                  // TODO: Fix fairly arbitrary/unrefined thresholds that control how far away to scan for potentially better solutions.
+                  const uint refinement_error_thresh0 = 3000;
+                  const uint refinement_error_thresh1 = 6000;
+                  if (results[subblock].m_error > refinement_error_thresh0)
+                  {
+                     if (params.m_quality == cMediumQuality)
+                     {
+                        static const int s_scan_delta_2_to_3[] = { -3, -2, 2, 3 };
+                        params.m_scan_delta_size = RG_ETC1_ARRAY_SIZE(s_scan_delta_2_to_3);
+                        params.m_pScan_deltas = s_scan_delta_2_to_3;
+                     }
+                     else
+                     {
+                        static const int s_scan_delta_5_to_5[] = { -5, 5 };
+                        static const int s_scan_delta_5_to_8[] = { -8, -7, -6, -5, 5, 6, 7, 8 };
+                        if (results[subblock].m_error > refinement_error_thresh1)
+                        {
+                           params.m_scan_delta_size = RG_ETC1_ARRAY_SIZE(s_scan_delta_5_to_8);
+                           params.m_pScan_deltas = s_scan_delta_5_to_8;
+                        }
+                        else
+                        {
+                           params.m_scan_delta_size = RG_ETC1_ARRAY_SIZE(s_scan_delta_5_to_5);
+                           params.m_pScan_deltas = s_scan_delta_5_to_5;
+                        }
+                     }
+
+                     if (!optimizer.compute())
+                        break;
+                  }
+
+                  if (results[2].m_error < results[subblock].m_error)
+                     results[subblock] = results[2];
+               }
+                            
+               trial_error += results[subblock].m_error;
+               if (trial_error >= best_error)
+                  break;
+            }
+
+            if (subblock < 2)
+               continue;
+
+            best_error = trial_error;
+            best_results[0] = results[0];
+            best_results[1] = results[1];
+            best_flip = flip;
+            best_use_color4 = use_color4;
+            
+         } // use_color4
+
+      } // flip
+
+      int dr = best_results[1].m_block_color_unscaled.r - best_results[0].m_block_color_unscaled.r;
+      int dg = best_results[1].m_block_color_unscaled.g - best_results[0].m_block_color_unscaled.g;
+      int db = best_results[1].m_block_color_unscaled.b - best_results[0].m_block_color_unscaled.b;
+      RG_ETC1_ASSERT(best_use_color4 || ((rg_etc1::minimum(dr, dg, db) >= cETC1ColorDeltaMin) && (rg_etc1::maximum(dr, dg, db) <= cETC1ColorDeltaMax)));
+           
+      if (best_use_color4)
+      {
+         dst_block.m_bytes[0] = static_cast<uint8>(best_results[1].m_block_color_unscaled.r | (best_results[0].m_block_color_unscaled.r << 4));
+         dst_block.m_bytes[1] = static_cast<uint8>(best_results[1].m_block_color_unscaled.g | (best_results[0].m_block_color_unscaled.g << 4));
+         dst_block.m_bytes[2] = static_cast<uint8>(best_results[1].m_block_color_unscaled.b | (best_results[0].m_block_color_unscaled.b << 4));
+      }
+      else
+      {
+         if (dr < 0) dr += 8; dst_block.m_bytes[0] = static_cast<uint8>((best_results[0].m_block_color_unscaled.r << 3) | dr);
+         if (dg < 0) dg += 8; dst_block.m_bytes[1] = static_cast<uint8>((best_results[0].m_block_color_unscaled.g << 3) | dg);
+         if (db < 0) db += 8; dst_block.m_bytes[2] = static_cast<uint8>((best_results[0].m_block_color_unscaled.b << 3) | db);
+      }
+      
+      dst_block.m_bytes[3] = static_cast<uint8>( (best_results[1].m_block_inten_table << 2) | (best_results[0].m_block_inten_table << 5) | ((~best_use_color4 & 1) << 1) | best_flip );
+      
+      uint selector0 = 0, selector1 = 0;
+      if (best_flip)
+      {
+         // flipped:
+         // { 0, 0 }, { 1, 0 }, { 2, 0 }, { 3, 0 },               
+         // { 0, 1 }, { 1, 1 }, { 2, 1 }, { 3, 1 } 
+         //
+         // { 0, 2 }, { 1, 2 }, { 2, 2 }, { 3, 2 },
+         // { 0, 3 }, { 1, 3 }, { 2, 3 }, { 3, 3 }
+         const uint8* pSelectors0 = best_results[0].m_pSelectors;
+         const uint8* pSelectors1 = best_results[1].m_pSelectors;
+         for (int x = 3; x >= 0; --x)
+         {
+            uint b;
+            b = g_selector_index_to_etc1[pSelectors1[4 + x]];
+            selector0 = (selector0 << 1) | (b & 1); selector1 = (selector1 << 1) | (b >> 1);
+
+            b = g_selector_index_to_etc1[pSelectors1[x]];
+            selector0 = (selector0 << 1) | (b & 1); selector1 = (selector1 << 1) | (b >> 1);
+
+            b = g_selector_index_to_etc1[pSelectors0[4 + x]];
+            selector0 = (selector0 << 1) | (b & 1); selector1 = (selector1 << 1) | (b >> 1);
+
+            b = g_selector_index_to_etc1[pSelectors0[x]];
+            selector0 = (selector0 << 1) | (b & 1); selector1 = (selector1 << 1) | (b >> 1);
+         }
+      }
+      else
+      {
+         // non-flipped:
+         // { 0, 0 }, { 0, 1 }, { 0, 2 }, { 0, 3 },
+         // { 1, 0 }, { 1, 1 }, { 1, 2 }, { 1, 3 }
+         //
+         // { 2, 0 }, { 2, 1 }, { 2, 2 }, { 2, 3 },
+         // { 3, 0 }, { 3, 1 }, { 3, 2 }, { 3, 3 }
+         for (int subblock = 1; subblock >= 0; --subblock)
+         {
+            const uint8* pSelectors = best_results[subblock].m_pSelectors + 4;
+            for (uint i = 0; i < 2; i++)
+            {
+               uint b;
+               b = g_selector_index_to_etc1[pSelectors[3]];
+               selector0 = (selector0 << 1) | (b & 1); selector1 = (selector1 << 1) | (b >> 1);
+
+               b = g_selector_index_to_etc1[pSelectors[2]];
+               selector0 = (selector0 << 1) | (b & 1); selector1 = (selector1 << 1) | (b >> 1);
+
+               b = g_selector_index_to_etc1[pSelectors[1]];
+               selector0 = (selector0 << 1) | (b & 1); selector1 = (selector1 << 1) | (b >> 1);
+
+               b = g_selector_index_to_etc1[pSelectors[0]];
+               selector0 = (selector0 << 1) | (b & 1);selector1 = (selector1 << 1) | (b >> 1);
+
+               pSelectors -= 4;
+            }
+         }
+      }
+                  
+      dst_block.m_bytes[4] = static_cast<uint8>(selector1 >> 8); dst_block.m_bytes[5] = static_cast<uint8>(selector1 & 0xFF);
+      dst_block.m_bytes[6] = static_cast<uint8>(selector0 >> 8); dst_block.m_bytes[7] = static_cast<uint8>(selector0 & 0xFF);
+
+      return static_cast<unsigned int>(best_error);
+   }
+
+} // namespace rg_etc1
diff --git a/drivers/etc1/rg_etc1.h b/drivers/etc1/rg_etc1.h
index 9a701506fd..9ce89a6cc6 100644
--- a/drivers/etc1/rg_etc1.h
+++ b/drivers/etc1/rg_etc1.h
@@ -1,76 +1,76 @@
-// File: rg_etc1.h - Fast, high quality ETC1 block packer/unpacker - Rich Geldreich <richgel99@gmail.com>
-// Please see ZLIB license at the end of this file.
-#pragma once
-
-namespace rg_etc1
-{
-   // Unpacks an 8-byte ETC1 compressed block to a block of 4x4 32bpp RGBA pixels.
-   // Returns false if the block is invalid. Invalid blocks will still be unpacked with clamping.
-   // This function is thread safe, and does not dynamically allocate any memory.
-   // If preserve_alpha is true, the alpha channel of the destination pixels will not be overwritten. Otherwise, alpha will be set to 255.
-   bool unpack_etc1_block(const void *pETC1_block, unsigned int* pDst_pixels_rgba, bool preserve_alpha = false);
-
-   // Quality setting = the higher the quality, the slower. 
-   // To pack large textures, it is highly recommended to call pack_etc1_block() in parallel, on different blocks, from multiple threads (particularly when using cHighQuality).
-   enum etc1_quality
-   { 
-      cLowQuality,
-      cMediumQuality,
-      cHighQuality,
-   };
-      
-   struct etc1_pack_params
-   {
-      etc1_quality m_quality;
-      bool m_dithering;
-                              
-      inline etc1_pack_params() 
-      {
-         clear();
-      }
-
-      void clear()
-      {
-         m_quality = cHighQuality;
-         m_dithering = false;
-      }
-   };
-
-   // Important: pack_etc1_block_init() must be called before calling pack_etc1_block().
-   void pack_etc1_block_init();
-
-   // Packs a 4x4 block of 32bpp RGBA pixels to an 8-byte ETC1 block.
-   // 32-bit RGBA pixels must always be arranged as (R,G,B,A) (R first, A last) in memory, independent of platform endianness. A should always be 255.
-   // Returns squared error of result.
-   // This function is thread safe, and does not dynamically allocate any memory.
-   // pack_etc1_block() does not currently support "perceptual" colorspace metrics - it primarily optimizes for RGB RMSE.
-   unsigned int pack_etc1_block(void* pETC1_block, const unsigned int* pSrc_pixels_rgba, etc1_pack_params& pack_params);
-            
-} // namespace rg_etc1
-
-//------------------------------------------------------------------------------
-//
-// rg_etc1 uses the ZLIB license:
-// http://opensource.org/licenses/Zlib
-//
-// Copyright (c) 2012 Rich Geldreich
-//
-// This software is provided 'as-is', without any express or implied
-// warranty.  In no event will the authors be held liable for any damages
-// arising from the use of this software.
-//
-// Permission is granted to anyone to use this software for any purpose,
-// including commercial applications, and to alter it and redistribute it
-// freely, subject to the following restrictions:
-//
-// 1. The origin of this software must not be misrepresented; you must not
-// claim that you wrote the original software. If you use this software
-// in a product, an acknowledgment in the product documentation would be
-// appreciated but is not required.
-//
-// 2. Altered source versions must be plainly marked as such, and must not be
-// misrepresented as being the original software.
-//
-// 3. This notice may not be removed or altered from any source distribution.
-//
-//------------------------------------------------------------------------------
+// File: rg_etc1.h - Fast, high quality ETC1 block packer/unpacker - Rich Geldreich <richgel99@gmail.com>
+// Please see ZLIB license at the end of this file.
+#pragma once
+
+namespace rg_etc1
+{
+   // Unpacks an 8-byte ETC1 compressed block to a block of 4x4 32bpp RGBA pixels.
+   // Returns false if the block is invalid. Invalid blocks will still be unpacked with clamping.
+   // This function is thread safe, and does not dynamically allocate any memory.
+   // If preserve_alpha is true, the alpha channel of the destination pixels will not be overwritten. Otherwise, alpha will be set to 255.
+   bool unpack_etc1_block(const void *pETC1_block, unsigned int* pDst_pixels_rgba, bool preserve_alpha = false);
+
+   // Quality setting = the higher the quality, the slower. 
+   // To pack large textures, it is highly recommended to call pack_etc1_block() in parallel, on different blocks, from multiple threads (particularly when using cHighQuality).
+   enum etc1_quality
+   { 
+      cLowQuality,
+      cMediumQuality,
+      cHighQuality,
+   };
+      
+   struct etc1_pack_params
+   {
+      etc1_quality m_quality;
+      bool m_dithering;
+                              
+      inline etc1_pack_params() 
+      {
+         clear();
+      }
+
+      void clear()
+      {
+         m_quality = cHighQuality;
+         m_dithering = false;
+      }
+   };
+
+   // Important: pack_etc1_block_init() must be called before calling pack_etc1_block().
+   void pack_etc1_block_init();
+
+   // Packs a 4x4 block of 32bpp RGBA pixels to an 8-byte ETC1 block.
+   // 32-bit RGBA pixels must always be arranged as (R,G,B,A) (R first, A last) in memory, independent of platform endianness. A should always be 255.
+   // Returns squared error of result.
+   // This function is thread safe, and does not dynamically allocate any memory.
+   // pack_etc1_block() does not currently support "perceptual" colorspace metrics - it primarily optimizes for RGB RMSE.
+   unsigned int pack_etc1_block(void* pETC1_block, const unsigned int* pSrc_pixels_rgba, etc1_pack_params& pack_params);
+            
+} // namespace rg_etc1
+
+//------------------------------------------------------------------------------
+//
+// rg_etc1 uses the ZLIB license:
+// http://opensource.org/licenses/Zlib
+//
+// Copyright (c) 2012 Rich Geldreich
+//
+// This software is provided 'as-is', without any express or implied
+// warranty.  In no event will the authors be held liable for any damages
+// arising from the use of this software.
+//
+// Permission is granted to anyone to use this software for any purpose,
+// including commercial applications, and to alter it and redistribute it
+// freely, subject to the following restrictions:
+//
+// 1. The origin of this software must not be misrepresented; you must not
+// claim that you wrote the original software. If you use this software
+// in a product, an acknowledgment in the product documentation would be
+// appreciated but is not required.
+//
+// 2. Altered source versions must be plainly marked as such, and must not be
+// misrepresented as being the original software.
+//
+// 3. This notice may not be removed or altered from any source distribution.
+//
+//------------------------------------------------------------------------------
diff --git a/drivers/gles2/rasterizer_gles2.cpp b/drivers/gles2/rasterizer_gles2.cpp
index d3a5f3b5bc..3d75ed29f3 100644
--- a/drivers/gles2/rasterizer_gles2.cpp
+++ b/drivers/gles2/rasterizer_gles2.cpp
@@ -4145,7 +4145,7 @@ void RasterizerGLES2::begin_frame() {
 
 	//fragment_lighting=Globals::get_singleton()->get("rasterizer/use_fragment_lighting");
 #ifdef TOOLS_ENABLED
-	canvas_shader.set_conditional(CanvasShaderGLES2::USE_PIXEL_SNAP,GLOBAL_DEF("rasterizer/use_pixel_snap",false));
+	canvas_shader.set_conditional(CanvasShaderGLES2::USE_PIXEL_SNAP,GLOBAL_DEF("display/use_2d_pixel_snap",false));
 	shadow_filter=ShadowFilterTechnique(int(Globals::get_singleton()->get("rasterizer/shadow_filter")));
 #endif
 
@@ -4160,7 +4160,6 @@ void RasterizerGLES2::begin_frame() {
 	time_delta=time-last_time;
 	last_time=time;
 	frame++;
-	clear_viewport(Color(1,0,0.5));
 
 	_rinfo.vertex_count=0;
 	_rinfo.object_count=0;
@@ -5970,6 +5969,10 @@ void RasterizerGLES2::_render(const Geometry *p_geometry,const Material *p_mater
 			if (element_count==0)
 				return;
 
+			if (mm->visible>=0) {
+				element_count=MIN(element_count,mm->visible);
+			}
+
 			const MultiMesh::Element *elements=&mm->elements[0];
 
 			_rinfo.vertex_count+=s->array_len*element_count;
@@ -10804,7 +10807,7 @@ void RasterizerGLES2::init() {
 	copy_shader.set_conditional(CopyShaderGLES2::USE_8BIT_HDR,!use_fp16_fb);
 	canvas_shader.set_conditional(CanvasShaderGLES2::USE_DEPTH_SHADOWS,read_depth_supported);
 
-	canvas_shader.set_conditional(CanvasShaderGLES2::USE_PIXEL_SNAP,GLOBAL_DEF("rasterizer/use_pixel_snap",false));
+	canvas_shader.set_conditional(CanvasShaderGLES2::USE_PIXEL_SNAP,GLOBAL_DEF("display/use_2d_pixel_snap",false));
 
 	npo2_textures_available=true;
 	//fragment_lighting=false;
@@ -11188,6 +11191,12 @@ RasterizerGLES2::RasterizerGLES2(bool p_compress_arrays,bool p_keep_ram_copy,boo
 	tc0_idx=0;
 };
 
+void RasterizerGLES2::restore_framebuffer() {
+
+	glBindFramebuffer(GL_FRAMEBUFFER, base_framebuffer);
+	
+}
+
 RasterizerGLES2::~RasterizerGLES2() {
 
 	memdelete_arr(skinned_buffer);
diff --git a/drivers/gles2/rasterizer_gles2.h b/drivers/gles2/rasterizer_gles2.h
index d337ecfb64..f759e84b53 100644
--- a/drivers/gles2/rasterizer_gles2.h
+++ b/drivers/gles2/rasterizer_gles2.h
@@ -1695,6 +1695,8 @@ public:
 	void reload_vram();
 
 	virtual bool has_feature(VS::Features p_feature) const;
+	
+	virtual void restore_framebuffer();
 
 	static RasterizerGLES2* get_singleton();
 
diff --git a/drivers/gles2/shader_compiler_gles2.cpp b/drivers/gles2/shader_compiler_gles2.cpp
index 157f2e398b..d57512c936 100644
--- a/drivers/gles2/shader_compiler_gles2.cpp
+++ b/drivers/gles2/shader_compiler_gles2.cpp
@@ -132,18 +132,18 @@ String ShaderCompilerGLES2::dump_node_code(SL::Node *p_node,int p_level,bool p_a
 			SL::BlockNode *bnode=(SL::BlockNode*)p_node;
 
 			//variables
-			code+="{"ENDL;
+            code+="{" ENDL;
 			for(Map<StringName,SL::DataType>::Element *E=bnode->variables.front();E;E=E->next()) {
 
-				code+=_mktab(p_level)+_typestr(E->value())+" "+replace_string(E->key())+";"ENDL;
+                code+=_mktab(p_level)+_typestr(E->value())+" "+replace_string(E->key())+";" ENDL;
 			}
 
 			for(int i=0;i<bnode->statements.size();i++) {
 
-				code+=_mktab(p_level)+dump_node_code(bnode->statements[i],p_level)+";"ENDL;
+                code+=_mktab(p_level)+dump_node_code(bnode->statements[i],p_level)+";" ENDL;
 			}
 
-			code+="}"ENDL;
+            code+="}" ENDL;
 
 		} break;
 		case SL::Node::TYPE_VARIABLE: {
@@ -489,15 +489,15 @@ String ShaderCompilerGLES2::dump_node_code(SL::Node *p_node,int p_level,bool p_a
 			SL::ControlFlowNode *cfnode=(SL::ControlFlowNode*)p_node;
 			if (cfnode->flow_op==SL::FLOW_OP_IF) {
 
-				code+="if ("+dump_node_code(cfnode->statements[0],p_level)+") {"ENDL;
+                code+="if ("+dump_node_code(cfnode->statements[0],p_level)+") {" ENDL;
 				code+=dump_node_code(cfnode->statements[1],p_level+1);
 				if (cfnode->statements.size()==3) {
 
-					code+="} else {"ENDL;
+                    code+="} else {" ENDL;
 					code+=dump_node_code(cfnode->statements[2],p_level+1);
 				}
 
-				code+="}"ENDL;
+                code+="}" ENDL;
 
 			} else if (cfnode->flow_op==SL::FLOW_OP_RETURN) {
 
@@ -560,7 +560,7 @@ Error ShaderCompilerGLES2::compile_node(SL::ProgramNode *p_program) {
 		ubase=uniforms->size();
 	for(Map<StringName,SL::Uniform>::Element *E=p_program->uniforms.front();E;E=E->next()) {
 
-		String uline="uniform "+_typestr(E->get().type)+" _"+E->key().operator String()+";"ENDL;
+        String uline="uniform "+_typestr(E->get().type)+" _"+E->key().operator String()+";" ENDL;
 
 		global_code+=uline;
 		if (uniforms) {
@@ -593,10 +593,10 @@ Error ShaderCompilerGLES2::compile_node(SL::ProgramNode *p_program) {
 			header+=_typestr(fnode->arguments[i].type)+" "+replace_string(fnode->arguments[i].name);
 		}
 
-		header+=") {"ENDL;
+        header+=") {" ENDL;
 		String fcode=header;
 		fcode+=dump_node_code(fnode->body,1);
-		fcode+="}"ENDL;
+        fcode+="}" ENDL;
 		global_code+=fcode;
 
 	}
@@ -605,7 +605,7 @@ Error ShaderCompilerGLES2::compile_node(SL::ProgramNode *p_program) {
 
 		StringName varname=E->key();
 		String newvarname=replace_string(varname);
-		global_code+="uniform "+_typestr(E->get())+" "+newvarname+";"ENDL;
+        global_code+="uniform "+_typestr(E->get())+" "+newvarname+";" ENDL;
 	}*/
 
 	code=dump_node_code(p_program,0);
diff --git a/drivers/mpc/audio_stream_mpc.cpp b/drivers/mpc/audio_stream_mpc.cpp
index 67f21f922c..fe6aa05d00 100644
--- a/drivers/mpc/audio_stream_mpc.cpp
+++ b/drivers/mpc/audio_stream_mpc.cpp
@@ -1,7 +1,7 @@
 #include "audio_stream_mpc.h"
 
 
-Error AudioStreamMPC::_open_file() {
+Error AudioStreamPlaybackMPC::_open_file() {
 
 	if (f) {
 		memdelete(f);
@@ -41,7 +41,7 @@ Error AudioStreamMPC::_open_file() {
 	return OK;
 }
 
-void AudioStreamMPC::_close_file() {
+void AudioStreamPlaybackMPC::_close_file() {
 
 	if (f) {
 		memdelete(f);
@@ -52,7 +52,7 @@ void AudioStreamMPC::_close_file() {
 	data_ofs=0;
 }
 
-int AudioStreamMPC::_read_file(void *p_dst,int p_bytes) {
+int AudioStreamPlaybackMPC::_read_file(void *p_dst,int p_bytes) {
 
 	if (f)
 		return f->get_buffer((uint8_t*)p_dst,p_bytes);
@@ -68,7 +68,7 @@ int AudioStreamMPC::_read_file(void *p_dst,int p_bytes) {
 	return p_bytes;
 }
 
-bool AudioStreamMPC::_seek_file(int p_pos){
+bool AudioStreamPlaybackMPC::_seek_file(int p_pos){
 
 	if (p_pos<0 || p_pos>streamlen)
 		return false;
@@ -83,7 +83,7 @@ bool AudioStreamMPC::_seek_file(int p_pos){
 	return true;
 
 }
-int AudioStreamMPC::_tell_file()  const{
+int AudioStreamPlaybackMPC::_tell_file()  const{
 
 	if (f)
 		return f->get_pos();
@@ -93,13 +93,13 @@ int AudioStreamMPC::_tell_file()  const{
 
 }
 
-int AudioStreamMPC::_sizeof_file() const{
+int AudioStreamPlaybackMPC::_sizeof_file() const{
 
 	//print_line("sizeof file, get: "+itos(streamlen));
 	return streamlen;
 }
 
-bool AudioStreamMPC::_canseek_file() const{
+bool AudioStreamPlaybackMPC::_canseek_file() const{
 
 	//print_line("canseek file, get true");
 	return true;
@@ -107,51 +107,46 @@ bool AudioStreamMPC::_canseek_file() const{
 
 /////////////////////
 
-mpc_int32_t AudioStreamMPC::_mpc_read(mpc_reader *p_reader,void *p_dst, mpc_int32_t p_bytes) {
+mpc_int32_t AudioStreamPlaybackMPC::_mpc_read(mpc_reader *p_reader,void *p_dst, mpc_int32_t p_bytes) {
 
-	AudioStreamMPC *smpc=(AudioStreamMPC *)p_reader->data;
+	AudioStreamPlaybackMPC *smpc=(AudioStreamPlaybackMPC *)p_reader->data;
 	return smpc->_read_file(p_dst,p_bytes);
 }
 
-mpc_bool_t AudioStreamMPC::_mpc_seek(mpc_reader *p_reader,mpc_int32_t p_offset) {
+mpc_bool_t AudioStreamPlaybackMPC::_mpc_seek(mpc_reader *p_reader,mpc_int32_t p_offset) {
 
-	AudioStreamMPC *smpc=(AudioStreamMPC *)p_reader->data;
+	AudioStreamPlaybackMPC *smpc=(AudioStreamPlaybackMPC *)p_reader->data;
 	return smpc->_seek_file(p_offset);
 
 }
-mpc_int32_t AudioStreamMPC::_mpc_tell(mpc_reader *p_reader) {
+mpc_int32_t AudioStreamPlaybackMPC::_mpc_tell(mpc_reader *p_reader) {
 
-	AudioStreamMPC *smpc=(AudioStreamMPC *)p_reader->data;
+	AudioStreamPlaybackMPC *smpc=(AudioStreamPlaybackMPC *)p_reader->data;
 	return smpc->_tell_file();
 
 }
-mpc_int32_t AudioStreamMPC::_mpc_get_size(mpc_reader *p_reader) {
+mpc_int32_t AudioStreamPlaybackMPC::_mpc_get_size(mpc_reader *p_reader) {
 
-	AudioStreamMPC *smpc=(AudioStreamMPC *)p_reader->data;
+	AudioStreamPlaybackMPC *smpc=(AudioStreamPlaybackMPC *)p_reader->data;
 	return smpc->_sizeof_file();
 
 
 }
-mpc_bool_t AudioStreamMPC::_mpc_canseek(mpc_reader *p_reader) {
+mpc_bool_t AudioStreamPlaybackMPC::_mpc_canseek(mpc_reader *p_reader) {
 
-	AudioStreamMPC *smpc=(AudioStreamMPC *)p_reader->data;
+	AudioStreamPlaybackMPC *smpc=(AudioStreamPlaybackMPC *)p_reader->data;
 	return smpc->_canseek_file();
 }
 
 
 
-bool AudioStreamMPC::_can_mix() const {
 
-	return /*active &&*/ !paused;
-}
-
-
-void AudioStreamMPC::update() {
+int AudioStreamPlaybackMPC::mix(int16_t* p_bufer,int p_frames) {
 
 	if (!active || paused)
-		return;
+		return 0;
 
-	int todo=get_todo();
+	int todo=p_frames;
 
 	while(todo>MPC_DECODER_BUFFER_LENGTH/si.channels) {
 
@@ -162,7 +157,7 @@ void AudioStreamMPC::update() {
 		mpc_status err = mpc_demux_decode(demux, &frame);
 		if (frame.bits!=-1) {
 
-			int16_t *dst_buff = get_write_buffer();
+			int16_t *dst_buff = p_bufer;
 
 #ifdef MPC_FIXED_POINT
 
@@ -185,21 +180,21 @@ void AudioStreamMPC::update() {
 #endif
 
 			int frames = frame.samples;
-			write(frames);
+			p_bufer+=si.channels*frames;
 			todo-=frames;
 		} else {
 
 			if (err != MPC_STATUS_OK) {
 
 				stop();
-				ERR_EXPLAIN("Error decoding MPC");
-				ERR_FAIL();
+				ERR_PRINT("Error decoding MPC");
+				break;
 			} else {
 
 				//finished
 				if (!loop) {
 					stop();
-					return;
+					break;
 				} else {
 
 
@@ -213,9 +208,11 @@ void AudioStreamMPC::update() {
 			}
 		}
 	}
+
+	return p_frames-todo;
 }
 
-Error AudioStreamMPC::_reload() {
+Error AudioStreamPlaybackMPC::_reload() {
 
 	ERR_FAIL_COND_V(demux!=NULL, ERR_FILE_ALREADY_IN_USE);
 
@@ -224,31 +221,40 @@ Error AudioStreamMPC::_reload() {
 
 	demux = mpc_demux_init(&reader);
 	ERR_FAIL_COND_V(!demux,ERR_CANT_CREATE);
-
 	mpc_demux_get_info(demux,  &si);
-	_setup(si.channels,si.sample_freq,MPC_DECODER_BUFFER_LENGTH*2/si.channels);
 
 	return OK;
 }
 
-void AudioStreamMPC::set_file(const String& p_file) {
+void AudioStreamPlaybackMPC::set_file(const String& p_file) {
 
 	file=p_file;
 
+	Error err = _open_file();
+	ERR_FAIL_COND(err!=OK);
+	demux = mpc_demux_init(&reader);
+	ERR_FAIL_COND(!demux);
+	mpc_demux_get_info(demux,  &si);
+	stream_min_size=MPC_DECODER_BUFFER_LENGTH*2/si.channels;
+	stream_rate=si.sample_freq;
+	stream_channels=si.channels;
+
+	mpc_demux_exit(demux);
+	demux=NULL;
+	_close_file();
+
 }
 
 
-String AudioStreamMPC::get_file() const {
+String AudioStreamPlaybackMPC::get_file() const {
 
 	return file;
 }
 
 
-void AudioStreamMPC::play() {
+void AudioStreamPlaybackMPC::play(float p_offset) {
 
 
-	_THREAD_SAFE_METHOD_
-
 	if (active)
 		stop();
 	active=false;
@@ -262,9 +268,9 @@ void AudioStreamMPC::play() {
 
 }
 
-void AudioStreamMPC::stop()  {
+void AudioStreamPlaybackMPC::stop()  {
+
 
-	_THREAD_SAFE_METHOD_
 	if (!active)
 		return;
 	if (demux) {
@@ -275,70 +281,58 @@ void AudioStreamMPC::stop()  {
 	active=false;
 
 }
-bool AudioStreamMPC::is_playing() const  {
+bool AudioStreamPlaybackMPC::is_playing() const  {
 
-	return active || (get_total() - get_todo() -1 > 0);
+	return active;
 }
 
-void AudioStreamMPC::set_paused(bool p_paused)  {
 
-	paused=p_paused;
-}
-bool AudioStreamMPC::is_paused(bool p_paused) const  {
-
-	return paused;
-}
-
-void AudioStreamMPC::set_loop(bool p_enable)  {
+void AudioStreamPlaybackMPC::set_loop(bool p_enable)  {
 
 	loop=p_enable;
 }
-bool AudioStreamMPC::has_loop() const  {
+bool AudioStreamPlaybackMPC::has_loop() const  {
 
 	return loop;
 }
 
-float AudioStreamMPC::get_length() const {
+float AudioStreamPlaybackMPC::get_length() const {
 
 	return 0;
 }
 
-String AudioStreamMPC::get_stream_name() const {
+String AudioStreamPlaybackMPC::get_stream_name() const {
 
 	return "";
 }
 
-int AudioStreamMPC::get_loop_count() const {
+int AudioStreamPlaybackMPC::get_loop_count() const {
 
 	return 0;
 }
 
-float AudioStreamMPC::get_pos() const {
+float AudioStreamPlaybackMPC::get_pos() const {
 
 	return 0;
 }
-void AudioStreamMPC::seek_pos(float p_time) {
+void AudioStreamPlaybackMPC::seek_pos(float p_time) {
 
 
 }
 
-AudioStream::UpdateMode AudioStreamMPC::get_update_mode() const {
-
-	return UPDATE_THREAD;
-}
 
-void AudioStreamMPC::_bind_methods() {
+void AudioStreamPlaybackMPC::_bind_methods() {
 
-	ObjectTypeDB::bind_method(_MD("set_file","name"),&AudioStreamMPC::set_file);
-	ObjectTypeDB::bind_method(_MD("get_file"),&AudioStreamMPC::get_file);
+	ObjectTypeDB::bind_method(_MD("set_file","name"),&AudioStreamPlaybackMPC::set_file);
+	ObjectTypeDB::bind_method(_MD("get_file"),&AudioStreamPlaybackMPC::get_file);
 
 	ADD_PROPERTYNZ( PropertyInfo(Variant::STRING,"file",PROPERTY_HINT_FILE,"mpc"), _SCS("set_file"), _SCS("get_file"));
 
 }
 
-AudioStreamMPC::AudioStreamMPC() {
+AudioStreamPlaybackMPC::AudioStreamPlaybackMPC() {
 
-	preload=true;
+	preload=false;
 	f=NULL;
 	streamlen=0;
 	data_ofs=0;
@@ -356,7 +350,7 @@ AudioStreamMPC::AudioStreamMPC() {
 
 }
 
-AudioStreamMPC::~AudioStreamMPC() {
+AudioStreamPlaybackMPC::~AudioStreamPlaybackMPC() {
 
 	stop();
 
diff --git a/drivers/mpc/audio_stream_mpc.h b/drivers/mpc/audio_stream_mpc.h
index 8fb0ed13de..122d0d0bbb 100644
--- a/drivers/mpc/audio_stream_mpc.h
+++ b/drivers/mpc/audio_stream_mpc.h
@@ -1,18 +1,17 @@
 #ifndef AUDIO_STREAM_MPC_H
 #define AUDIO_STREAM_MPC_H
 
-#include "scene/resources/audio_stream_resampled.h"
+#include "scene/resources/audio_stream.h"
 #include "os/file_access.h"
 #include "mpc/mpcdec.h"
 #include "os/thread_safe.h"
 #include "io/resource_loader.h"
 //#include "../libmpcdec/decoder.h"
 //#include "../libmpcdec/internal.h"
-class AudioStreamMPC : public AudioStreamResampled {
 
-	OBJ_TYPE( AudioStreamMPC, AudioStreamResampled );
+class AudioStreamPlaybackMPC : public AudioStreamPlayback {
 
-	_THREAD_SAFE_CLASS_
+	OBJ_TYPE( AudioStreamPlaybackMPC, AudioStreamPlayback );
 
 	bool preload;
 	FileAccess *f;
@@ -39,7 +38,9 @@ class AudioStreamMPC : public AudioStreamResampled {
 	static mpc_int32_t _mpc_get_size(mpc_reader *p_reader);
 	static mpc_bool_t _mpc_canseek(mpc_reader *p_reader);
 
-	virtual bool _can_mix() const ;
+	int stream_min_size;
+	int stream_rate;
+	int stream_channels;
 
 protected:
 	Error _open_file();
@@ -59,12 +60,10 @@ public:
 	void set_file(const String& p_file);
 	String get_file() const;
 
-	virtual void play();
+	virtual void play(float p_offset=0);
 	virtual void stop();
 	virtual bool is_playing() const;
 
-	virtual void set_paused(bool p_paused);
-	virtual bool is_paused(bool p_paused) const;
 
 	virtual void set_loop(bool p_enable);
 	virtual bool has_loop() const;
@@ -78,13 +77,35 @@ public:
 	virtual float get_pos() const;
 	virtual void seek_pos(float p_time);
 
-	virtual UpdateMode get_update_mode() const;
-	virtual void update();
+	virtual int get_channels() const { return stream_channels; }
+	virtual int get_mix_rate() const { return stream_rate; }
 
-	AudioStreamMPC();
-	~AudioStreamMPC();
+	virtual int get_minimum_buffer_size() const { return stream_min_size; }
+	virtual int mix(int16_t* p_bufer,int p_frames);
+
+	virtual void set_loop_restart_time(float p_time) {  }
+
+	AudioStreamPlaybackMPC();
+	~AudioStreamPlaybackMPC();
 };
 
+class AudioStreamMPC : public AudioStream {
+
+	OBJ_TYPE( AudioStreamMPC, AudioStream );
+
+	String file;
+public:
+
+	Ref<AudioStreamPlayback> instance_playback() {
+		Ref<AudioStreamPlaybackMPC> pb = memnew( AudioStreamPlaybackMPC );
+		pb->set_file(file);
+		return pb;
+	}
+
+	void set_file(const String& p_file) { file=p_file; }
+
+
+};
 
 class ResourceFormatLoaderAudioStreamMPC : public ResourceFormatLoader {
 public:
diff --git a/drivers/nedmalloc/malloc.c.h b/drivers/nedmalloc/malloc.c.h
index b9e65637d5..4fec5cc9d4 100644
--- a/drivers/nedmalloc/malloc.c.h
+++ b/drivers/nedmalloc/malloc.c.h
@@ -1,5814 +1,5814 @@
-#ifdef NEDMALLOC_ENABLED
-/*
-  This is a version (aka dlmalloc) of malloc/free/realloc written by
-  Doug Lea and released to the public domain, as explained at
-  http://creativecommons.org/licenses/publicdomain.  Send questions,
-  comments, complaints, performance data, etc to dl@cs.oswego.edu
-
-* Version 2.8.4 Wed May 27 09:56:23 2009  Doug Lea  (dl at gee)
-
-   Note: There may be an updated version of this malloc obtainable at
-           ftp://gee.cs.oswego.edu/pub/misc/malloc.c
-         Check before installing!
-
-* Quickstart
-
-  This library is all in one file to simplify the most common usage:
-  ftp it, compile it (-O3), and link it into another program. All of
-  the compile-time options default to reasonable values for use on
-  most platforms.  You might later want to step through various
-  compile-time and dynamic tuning options.
-
-  For convenience, an include file for code using this malloc is at:
-     ftp://gee.cs.oswego.edu/pub/misc/malloc-2.8.4.h
-  You don't really need this .h file unless you call functions not
-  defined in your system include files.  The .h file contains only the
-  excerpts from this file needed for using this malloc on ANSI C/C++
-  systems, so long as you haven't changed compile-time options about
-  naming and tuning parameters.  If you do, then you can create your
-  own malloc.h that does include all settings by cutting at the point
-  indicated below. Note that you may already by default be using a C
-  library containing a malloc that is based on some version of this
-  malloc (for example in linux). You might still want to use the one
-  in this file to customize settings or to avoid overheads associated
-  with library versions.
-
-* Vital statistics:
-
-  Supported pointer/size_t representation:       4 or 8 bytes
-       size_t MUST be an unsigned type of the same width as
-       pointers. (If you are using an ancient system that declares
-       size_t as a signed type, or need it to be a different width
-       than pointers, you can use a previous release of this malloc
-       (e.g. 2.7.2) supporting these.)
-
-  Alignment:                                     8 bytes (default)
-       This suffices for nearly all current machines and C compilers.
-       However, you can define MALLOC_ALIGNMENT to be wider than this
-       if necessary (up to 128bytes), at the expense of using more space.
-
-  Minimum overhead per allocated chunk:   4 or  8 bytes (if 4byte sizes)
-                                          8 or 16 bytes (if 8byte sizes)
-       Each malloced chunk has a hidden word of overhead holding size
-       and status information, and additional cross-check word
-       if FOOTERS is defined.
-
-  Minimum allocated size: 4-byte ptrs:  16 bytes    (including overhead)
-                          8-byte ptrs:  32 bytes    (including overhead)
-
-       Even a request for zero bytes (i.e., malloc(0)) returns a
-       pointer to something of the minimum allocatable size.
-       The maximum overhead wastage (i.e., number of extra bytes
-       allocated than were requested in malloc) is less than or equal
-       to the minimum size, except for requests >= mmap_threshold that
-       are serviced via mmap(), where the worst case wastage is about
-       32 bytes plus the remainder from a system page (the minimal
-       mmap unit); typically 4096 or 8192 bytes.
-
-  Security: static-safe; optionally more or less
-       The "security" of malloc refers to the ability of malicious
-       code to accentuate the effects of errors (for example, freeing
-       space that is not currently malloc'ed or overwriting past the
-       ends of chunks) in code that calls malloc.  This malloc
-       guarantees not to modify any memory locations below the base of
-       heap, i.e., static variables, even in the presence of usage
-       errors.  The routines additionally detect most improper frees
-       and reallocs.  All this holds as long as the static bookkeeping
-       for malloc itself is not corrupted by some other means.  This
-       is only one aspect of security -- these checks do not, and
-       cannot, detect all possible programming errors.
-
-       If FOOTERS is defined nonzero, then each allocated chunk
-       carries an additional check word to verify that it was malloced
-       from its space.  These check words are the same within each
-       execution of a program using malloc, but differ across
-       executions, so externally crafted fake chunks cannot be
-       freed. This improves security by rejecting frees/reallocs that
-       could corrupt heap memory, in addition to the checks preventing
-       writes to statics that are always on.  This may further improve
-       security at the expense of time and space overhead.  (Note that
-       FOOTERS may also be worth using with MSPACES.)
-
-       By default detected errors cause the program to abort (calling
-       "abort()"). You can override this to instead proceed past
-       errors by defining PROCEED_ON_ERROR.  In this case, a bad free
-       has no effect, and a malloc that encounters a bad address
-       caused by user overwrites will ignore the bad address by
-       dropping pointers and indices to all known memory. This may
-       be appropriate for programs that should continue if at all
-       possible in the face of programming errors, although they may
-       run out of memory because dropped memory is never reclaimed.
-
-       If you don't like either of these options, you can define
-       CORRUPTION_ERROR_ACTION and USAGE_ERROR_ACTION to do anything
-       else. And if if you are sure that your program using malloc has
-       no errors or vulnerabilities, you can define INSECURE to 1,
-       which might (or might not) provide a small performance improvement.
-
-  Thread-safety: NOT thread-safe unless USE_LOCKS defined
-       When USE_LOCKS is defined, each public call to malloc, free,
-       etc is surrounded with either a pthread mutex or a win32
-       spinlock (depending on WIN32). This is not especially fast, and
-       can be a major bottleneck.  It is designed only to provide
-       minimal protection in concurrent environments, and to provide a
-       basis for extensions.  If you are using malloc in a concurrent
-       program, consider instead using nedmalloc
-       (http://www.nedprod.com/programs/portable/nedmalloc/) or
-       ptmalloc (See http://www.malloc.de), which are derived
-       from versions of this malloc.
-
-  System requirements: Any combination of MORECORE and/or MMAP/MUNMAP
-       This malloc can use unix sbrk or any emulation (invoked using
-       the CALL_MORECORE macro) and/or mmap/munmap or any emulation
-       (invoked using CALL_MMAP/CALL_MUNMAP) to get and release system
-       memory.  On most unix systems, it tends to work best if both
-       MORECORE and MMAP are enabled.  On Win32, it uses emulations
-       based on VirtualAlloc. It also uses common C library functions
-       like memset.
-
-  Compliance: I believe it is compliant with the Single Unix Specification
-       (See http://www.unix.org). Also SVID/XPG, ANSI C, and probably
-       others as well.
-
-* Overview of algorithms
-
-  This is not the fastest, most space-conserving, most portable, or
-  most tunable malloc ever written. However it is among the fastest
-  while also being among the most space-conserving, portable and
-  tunable.  Consistent balance across these factors results in a good
-  general-purpose allocator for malloc-intensive programs.
-
-  In most ways, this malloc is a best-fit allocator. Generally, it
-  chooses the best-fitting existing chunk for a request, with ties
-  broken in approximately least-recently-used order. (This strategy
-  normally maintains low fragmentation.) However, for requests less
-  than 256bytes, it deviates from best-fit when there is not an
-  exactly fitting available chunk by preferring to use space adjacent
-  to that used for the previous small request, as well as by breaking
-  ties in approximately most-recently-used order. (These enhance
-  locality of series of small allocations.)  And for very large requests
-  (>= 256Kb by default), it relies on system memory mapping
-  facilities, if supported.  (This helps avoid carrying around and
-  possibly fragmenting memory used only for large chunks.)
-
-  All operations (except malloc_stats and mallinfo) have execution
-  times that are bounded by a constant factor of the number of bits in
-  a size_t, not counting any clearing in calloc or copying in realloc,
-  or actions surrounding MORECORE and MMAP that have times
-  proportional to the number of non-contiguous regions returned by
-  system allocation routines, which is often just 1. In real-time
-  applications, you can optionally suppress segment traversals using
-  NO_SEGMENT_TRAVERSAL, which assures bounded execution even when
-  system allocators return non-contiguous spaces, at the typical
-  expense of carrying around more memory and increased fragmentation.
-
-  The implementation is not very modular and seriously overuses
-  macros. Perhaps someday all C compilers will do as good a job
-  inlining modular code as can now be done by brute-force expansion,
-  but now, enough of them seem not to.
-
-  Some compilers issue a lot of warnings about code that is
-  dead/unreachable only on some platforms, and also about intentional
-  uses of negation on unsigned types. All known cases of each can be
-  ignored.
-
-  For a longer but out of date high-level description, see
-     http://gee.cs.oswego.edu/dl/html/malloc.html
-
-* MSPACES
-  If MSPACES is defined, then in addition to malloc, free, etc.,
-  this file also defines mspace_malloc, mspace_free, etc. These
-  are versions of malloc routines that take an "mspace" argument
-  obtained using create_mspace, to control all internal bookkeeping.
-  If ONLY_MSPACES is defined, only these versions are compiled.
-  So if you would like to use this allocator for only some allocations,
-  and your system malloc for others, you can compile with
-  ONLY_MSPACES and then do something like...
-    static mspace mymspace = create_mspace(0,0); // for example
-    #define mymalloc(bytes)  mspace_malloc(mymspace, bytes)
-
-  (Note: If you only need one instance of an mspace, you can instead
-  use "USE_DL_PREFIX" to relabel the global malloc.)
-
-  You can similarly create thread-local allocators by storing
-  mspaces as thread-locals. For example:
-    static __thread mspace tlms = 0;
-    void*  tlmalloc(size_t bytes) {
-      if (tlms == 0) tlms = create_mspace(0, 0);
-      return mspace_malloc(tlms, bytes);
-    }
-    void  tlfree(void* mem) { mspace_free(tlms, mem); }
-
-  Unless FOOTERS is defined, each mspace is completely independent.
-  You cannot allocate from one and free to another (although
-  conformance is only weakly checked, so usage errors are not always
-  caught). If FOOTERS is defined, then each chunk carries around a tag
-  indicating its originating mspace, and frees are directed to their
-  originating spaces.
-
- -------------------------  Compile-time options ---------------------------
-
-Be careful in setting #define values for numerical constants of type
-size_t. On some systems, literal values are not automatically extended
-to size_t precision unless they are explicitly casted. You can also
-use the symbolic values MAX_SIZE_T, SIZE_T_ONE, etc below.
-
-WIN32                    default: defined if _WIN32 defined
-  Defining WIN32 sets up defaults for MS environment and compilers.
-  Otherwise defaults are for unix. Beware that there seem to be some
-  cases where this malloc might not be a pure drop-in replacement for
-  Win32 malloc: Random-looking failures from Win32 GDI API's (eg;
-  SetDIBits()) may be due to bugs in some video driver implementations
-  when pixel buffers are malloc()ed, and the region spans more than
-  one VirtualAlloc()ed region. Because dlmalloc uses a small (64Kb)
-  default granularity, pixel buffers may straddle virtual allocation
-  regions more often than when using the Microsoft allocator.  You can
-  avoid this by using VirtualAlloc() and VirtualFree() for all pixel
-  buffers rather than using malloc().  If this is not possible,
-  recompile this malloc with a larger DEFAULT_GRANULARITY.
-
-MALLOC_ALIGNMENT         default: (size_t)8
-  Controls the minimum alignment for malloc'ed chunks.  It must be a
-  power of two and at least 8, even on machines for which smaller
-  alignments would suffice. It may be defined as larger than this
-  though. Note however that code and data structures are optimized for
-  the case of 8-byte alignment.
-
-MSPACES                  default: 0 (false)
-  If true, compile in support for independent allocation spaces.
-  This is only supported if HAVE_MMAP is true.
-
-ONLY_MSPACES             default: 0 (false)
-  If true, only compile in mspace versions, not regular versions.
-
-USE_LOCKS                default: 0 (false)
-  Causes each call to each public routine to be surrounded with
-  pthread or WIN32 mutex lock/unlock. (If set true, this can be
-  overridden on a per-mspace basis for mspace versions.) If set to a
-  non-zero value other than 1, locks are used, but their
-  implementation is left out, so lock functions must be supplied manually,
-  as described below.
-
-USE_SPIN_LOCKS           default: 1 iff USE_LOCKS and on x86 using gcc or MSC
-  If true, uses custom spin locks for locking. This is currently
-  supported only for x86 platforms using gcc or recent MS compilers.
-  Otherwise, posix locks or win32 critical sections are used.
-
-FOOTERS                  default: 0
-  If true, provide extra checking and dispatching by placing
-  information in the footers of allocated chunks. This adds
-  space and time overhead.
-
-INSECURE                 default: 0
-  If true, omit checks for usage errors and heap space overwrites.
-
-USE_DL_PREFIX            default: NOT defined
-  Causes compiler to prefix all public routines with the string 'dl'.
-  This can be useful when you only want to use this malloc in one part
-  of a program, using your regular system malloc elsewhere.
-
-ABORT                    default: defined as abort()
-  Defines how to abort on failed checks.  On most systems, a failed
-  check cannot die with an "assert" or even print an informative
-  message, because the underlying print routines in turn call malloc,
-  which will fail again.  Generally, the best policy is to simply call
-  abort(). It's not very useful to do more than this because many
-  errors due to overwriting will show up as address faults (null, odd
-  addresses etc) rather than malloc-triggered checks, so will also
-  abort.  Also, most compilers know that abort() does not return, so
-  can better optimize code conditionally calling it.
-
-PROCEED_ON_ERROR           default: defined as 0 (false)
-  Controls whether detected bad addresses cause them to bypassed
-  rather than aborting. If set, detected bad arguments to free and
-  realloc are ignored. And all bookkeeping information is zeroed out
-  upon a detected overwrite of freed heap space, thus losing the
-  ability to ever return it from malloc again, but enabling the
-  application to proceed. If PROCEED_ON_ERROR is defined, the
-  static variable malloc_corruption_error_count is compiled in
-  and can be examined to see if errors have occurred. This option
-  generates slower code than the default abort policy.
-
-DEBUG                    default: NOT defined
-  The DEBUG setting is mainly intended for people trying to modify
-  this code or diagnose problems when porting to new platforms.
-  However, it may also be able to better isolate user errors than just
-  using runtime checks.  The assertions in the check routines spell
-  out in more detail the assumptions and invariants underlying the
-  algorithms.  The checking is fairly extensive, and will slow down
-  execution noticeably. Calling malloc_stats or mallinfo with DEBUG
-  set will attempt to check every non-mmapped allocated and free chunk
-  in the course of computing the summaries.
-
-ABORT_ON_ASSERT_FAILURE   default: defined as 1 (true)
-  Debugging assertion failures can be nearly impossible if your
-  version of the assert macro causes malloc to be called, which will
-  lead to a cascade of further failures, blowing the runtime stack.
-  ABORT_ON_ASSERT_FAILURE cause assertions failures to call abort(),
-  which will usually make debugging easier.
-
-MALLOC_FAILURE_ACTION     default: sets errno to ENOMEM, or no-op on win32
-  The action to take before "return 0" when malloc fails to be able to
-  return memory because there is none available.
-
-HAVE_MORECORE             default: 1 (true) unless win32 or ONLY_MSPACES
-  True if this system supports sbrk or an emulation of it.
-
-MORECORE                  default: sbrk
-  The name of the sbrk-style system routine to call to obtain more
-  memory.  See below for guidance on writing custom MORECORE
-  functions. The type of the argument to sbrk/MORECORE varies across
-  systems.  It cannot be size_t, because it supports negative
-  arguments, so it is normally the signed type of the same width as
-  size_t (sometimes declared as "intptr_t").  It doesn't much matter
-  though. Internally, we only call it with arguments less than half
-  the max value of a size_t, which should work across all reasonable
-  possibilities, although sometimes generating compiler warnings.
-
-MORECORE_CONTIGUOUS       default: 1 (true) if HAVE_MORECORE
-  If true, take advantage of fact that consecutive calls to MORECORE
-  with positive arguments always return contiguous increasing
-  addresses.  This is true of unix sbrk. It does not hurt too much to
-  set it true anyway, since malloc copes with non-contiguities.
-  Setting it false when definitely non-contiguous saves time
-  and possibly wasted space it would take to discover this though.
-
-MORECORE_CANNOT_TRIM      default: NOT defined
-  True if MORECORE cannot release space back to the system when given
-  negative arguments. This is generally necessary only if you are
-  using a hand-crafted MORECORE function that cannot handle negative
-  arguments.
-
-NO_SEGMENT_TRAVERSAL       default: 0
-  If non-zero, suppresses traversals of memory segments
-  returned by either MORECORE or CALL_MMAP. This disables
-  merging of segments that are contiguous, and selectively
-  releasing them to the OS if unused, but bounds execution times.
-
-HAVE_MMAP                 default: 1 (true)
-  True if this system supports mmap or an emulation of it.  If so, and
-  HAVE_MORECORE is not true, MMAP is used for all system
-  allocation. If set and HAVE_MORECORE is true as well, MMAP is
-  primarily used to directly allocate very large blocks. It is also
-  used as a backup strategy in cases where MORECORE fails to provide
-  space from system. Note: A single call to MUNMAP is assumed to be
-  able to unmap memory that may have be allocated using multiple calls
-  to MMAP, so long as they are adjacent.
-
-HAVE_MREMAP               default: 1 on linux, else 0
-  If true realloc() uses mremap() to re-allocate large blocks and
-  extend or shrink allocation spaces.
-
-MMAP_CLEARS               default: 1 except on WINCE.
-  True if mmap clears memory so calloc doesn't need to. This is true
-  for standard unix mmap using /dev/zero and on WIN32 except for WINCE.
-
-USE_BUILTIN_FFS            default: 0 (i.e., not used)
-  Causes malloc to use the builtin ffs() function to compute indices.
-  Some compilers may recognize and intrinsify ffs to be faster than the
-  supplied C version. Also, the case of x86 using gcc is special-cased
-  to an asm instruction, so is already as fast as it can be, and so
-  this setting has no effect. Similarly for Win32 under recent MS compilers.
-  (On most x86s, the asm version is only slightly faster than the C version.)
-
-malloc_getpagesize         default: derive from system includes, or 4096.
-  The system page size. To the extent possible, this malloc manages
-  memory from the system in page-size units.  This may be (and
-  usually is) a function rather than a constant. This is ignored
-  if WIN32, where page size is determined using getSystemInfo during
-  initialization. This may be several megabytes if ENABLE_LARGE_PAGES
-  is enabled.
-
-ENABLE_LARGE_PAGES         default: NOT defined
-  Causes the system page size to be the value of GetLargePageMinimum()
-  if that function is available (Windows Server 2003/Vista or later).
-  This allows the use of large page entries in the MMU which can
-  significantly improve performance in large working set applications
-  as TLB cache load is reduced by a factor of three. Note that enabling
-  this option is equal to locking the process' memory in current
-  implementations of Windows and requires the SE_LOCK_MEMORY_PRIVILEGE
-  to be held by the process in order to succeed.
-
-USE_DEV_RANDOM             default: 0 (i.e., not used)
-  Causes malloc to use /dev/random to initialize secure magic seed for
-  stamping footers. Otherwise, the current time is used.
-
-NO_MALLINFO                default: 0
-  If defined, don't compile "mallinfo". This can be a simple way
-  of dealing with mismatches between system declarations and
-  those in this file.
-
-MALLINFO_FIELD_TYPE        default: size_t
-  The type of the fields in the mallinfo struct. This was originally
-  defined as "int" in SVID etc, but is more usefully defined as
-  size_t. The value is used only if  HAVE_USR_INCLUDE_MALLOC_H is not set
-
-REALLOC_ZERO_BYTES_FREES    default: not defined
-  This should be set if a call to realloc with zero bytes should
-  be the same as a call to free. Some people think it should. Otherwise,
-  since this malloc returns a unique pointer for malloc(0), so does
-  realloc(p, 0).
-
-LACKS_UNISTD_H, LACKS_FCNTL_H, LACKS_SYS_PARAM_H, LACKS_SYS_MMAN_H
-LACKS_STRINGS_H, LACKS_STRING_H, LACKS_SYS_TYPES_H,  LACKS_ERRNO_H
-LACKS_STDLIB_H                default: NOT defined unless on WIN32
-  Define these if your system does not have these header files.
-  You might need to manually insert some of the declarations they provide.
-
-DEFAULT_GRANULARITY        default: page size if MORECORE_CONTIGUOUS,
-                                system_info.dwAllocationGranularity in WIN32,
-                                GetLargePageMinimum() if ENABLE_LARGE_PAGES,
-                                otherwise 64K.
-      Also settable using mallopt(M_GRANULARITY, x)
-  The unit for allocating and deallocating memory from the system.  On
-  most systems with contiguous MORECORE, there is no reason to
-  make this more than a page. However, systems with MMAP tend to
-  either require or encourage larger granularities.  You can increase
-  this value to prevent system allocation functions to be called so
-  often, especially if they are slow.  The value must be at least one
-  page and must be a power of two.  Setting to 0 causes initialization
-  to either page size or win32 region size.  (Note: In previous
-  versions of malloc, the equivalent of this option was called
-  "TOP_PAD")
-
-DEFAULT_GRANULARITY_ALIGNED default: undefined (which means page size)
-  Whether to enforce alignment when allocating and deallocating memory
-  from the system i.e. the base address of all allocations will be
-  aligned to DEFAULT_GRANULARITY if it is set. Note that enabling this carries
-  some overhead as multiple calls must now be made when probing for a valid
-  aligned value, however it does greatly ease the checking for whether
-  a given memory pointer was allocated by this allocator rather than
-  some other.
-
-DEFAULT_TRIM_THRESHOLD    default: 2MB
-      Also settable using mallopt(M_TRIM_THRESHOLD, x)
-  The maximum amount of unused top-most memory to keep before
-  releasing via malloc_trim in free().  Automatic trimming is mainly
-  useful in long-lived programs using contiguous MORECORE.  Because
-  trimming via sbrk can be slow on some systems, and can sometimes be
-  wasteful (in cases where programs immediately afterward allocate
-  more large chunks) the value should be high enough so that your
-  overall system performance would improve by releasing this much
-  memory.  As a rough guide, you might set to a value close to the
-  average size of a process (program) running on your system.
-  Releasing this much memory would allow such a process to run in
-  memory.  Generally, it is worth tuning trim thresholds when a
-  program undergoes phases where several large chunks are allocated
-  and released in ways that can reuse each other's storage, perhaps
-  mixed with phases where there are no such chunks at all. The trim
-  value must be greater than page size to have any useful effect.  To
-  disable trimming completely, you can set to MAX_SIZE_T. Note that the trick
-  some people use of mallocing a huge space and then freeing it at
-  program startup, in an attempt to reserve system memory, doesn't
-  have the intended effect under automatic trimming, since that memory
-  will immediately be returned to the system.
-
-DEFAULT_MMAP_THRESHOLD       default: 256K
-      Also settable using mallopt(M_MMAP_THRESHOLD, x)
-  The request size threshold for using MMAP to directly service a
-  request. Requests of at least this size that cannot be allocated
-  using already-existing space will be serviced via mmap.  (If enough
-  normal freed space already exists it is used instead.)  Using mmap
-  segregates relatively large chunks of memory so that they can be
-  individually obtained and released from the host system. A request
-  serviced through mmap is never reused by any other request (at least
-  not directly; the system may just so happen to remap successive
-  requests to the same locations).  Segregating space in this way has
-  the benefits that: Mmapped space can always be individually released
-  back to the system, which helps keep the system level memory demands
-  of a long-lived program low.  Also, mapped memory doesn't become
-  `locked' between other chunks, as can happen with normally allocated
-  chunks, which means that even trimming via malloc_trim would not
-  release them.  However, it has the disadvantage that the space
-  cannot be reclaimed, consolidated, and then used to service later
-  requests, as happens with normal chunks.  The advantages of mmap
-  nearly always outweigh disadvantages for "large" chunks, but the
-  value of "large" may vary across systems.  The default is an
-  empirically derived value that works well in most systems. You can
-  disable mmap by setting to MAX_SIZE_T.
-
-MAX_RELEASE_CHECK_RATE   default: 4095 unless not HAVE_MMAP
-  The number of consolidated frees between checks to release
-  unused segments when freeing. When using non-contiguous segments,
-  especially with multiple mspaces, checking only for topmost space
-  doesn't always suffice to trigger trimming. To compensate for this,
-  free() will, with a period of MAX_RELEASE_CHECK_RATE (or the
-  current number of segments, if greater) try to release unused
-  segments to the OS when freeing chunks that result in
-  consolidation. The best value for this parameter is a compromise
-  between slowing down frees with relatively costly checks that
-  rarely trigger versus holding on to unused memory. To effectively
-  disable, set to MAX_SIZE_T. This may lead to a very slight speed
-  improvement at the expense of carrying around more memory.
-*/
-
-/* Version identifier to allow people to support multiple versions */
-#ifndef DLMALLOC_VERSION
-#define DLMALLOC_VERSION 20804
-#endif /* DLMALLOC_VERSION */
-
-#ifndef WIN32
-#ifdef _WIN32
-#define WIN32 1
-#endif  /* _WIN32 */
-#ifdef _WIN32_WCE
-#define LACKS_FCNTL_H
-#define WIN32 1
-#endif /* _WIN32_WCE */
-#endif  /* WIN32 */
-#ifdef WIN32
-#define WIN32_LEAN_AND_MEAN
-#include <windows.h>
-#include <tchar.h>
-#define HAVE_MMAP 1
-#define HAVE_MORECORE 0
-#define LACKS_UNISTD_H
-#define LACKS_SYS_PARAM_H
-#define LACKS_SYS_MMAN_H
-#define LACKS_STRING_H
-#define LACKS_STRINGS_H
-#define LACKS_SYS_TYPES_H
-#define LACKS_ERRNO_H
-#ifndef MALLOC_FAILURE_ACTION
-#define MALLOC_FAILURE_ACTION
-#endif /* MALLOC_FAILURE_ACTION */
-#ifdef _WIN32_WCE /* WINCE reportedly does not clear */
-#define MMAP_CLEARS 0
-#else
-#define MMAP_CLEARS 1
-#endif /* _WIN32_WCE */
-#endif  /* WIN32 */
-
-#if defined(DARWIN) || defined(_DARWIN)
-/* Mac OSX docs advise not to use sbrk; it seems better to use mmap */
-#ifndef HAVE_MORECORE
-#define HAVE_MORECORE 0
-#define HAVE_MMAP 1
-/* OSX allocators provide 16 byte alignment */
-#ifndef MALLOC_ALIGNMENT
-#define MALLOC_ALIGNMENT ((size_t)16U)
-#endif
-#endif  /* HAVE_MORECORE */
-#endif  /* DARWIN */
-
-#ifndef LACKS_SYS_TYPES_H
-#include <sys/types.h>  /* For size_t */
-#endif  /* LACKS_SYS_TYPES_H */
-
-#if (defined(__GNUC__) && ((defined(__i386__) || defined(__x86_64__)))) || (defined(_MSC_VER) && _MSC_VER>=1310)
-#define SPIN_LOCKS_AVAILABLE 1
-#else
-#define SPIN_LOCKS_AVAILABLE 0
-#endif
-
-/* The maximum possible size_t value has all bits set */
-#define MAX_SIZE_T           (~(size_t)0)
-
-#ifndef ONLY_MSPACES
-#define ONLY_MSPACES 0     /* define to a value */
-#else
-#define ONLY_MSPACES 1
-#endif  /* ONLY_MSPACES */
-#ifndef MSPACES
-#if ONLY_MSPACES
-#define MSPACES 1
-#else   /* ONLY_MSPACES */
-#define MSPACES 0
-#endif  /* ONLY_MSPACES */
-#endif  /* MSPACES */
-#ifndef MALLOC_ALIGNMENT
-#define MALLOC_ALIGNMENT ((size_t)8U)
-#endif  /* MALLOC_ALIGNMENT */
-#ifndef FOOTERS
-#define FOOTERS 0
-#endif  /* FOOTERS */
-#ifndef ABORT
-#define ABORT  abort()
-#endif  /* ABORT */
-#ifndef ABORT_ON_ASSERT_FAILURE
-#define ABORT_ON_ASSERT_FAILURE 1
-#endif  /* ABORT_ON_ASSERT_FAILURE */
-#ifndef PROCEED_ON_ERROR
-#define PROCEED_ON_ERROR 0
-#endif  /* PROCEED_ON_ERROR */
-#ifndef USE_LOCKS
-#define USE_LOCKS 0
-#endif  /* USE_LOCKS */
-#ifndef USE_SPIN_LOCKS
-#if USE_LOCKS && SPIN_LOCKS_AVAILABLE
-#define USE_SPIN_LOCKS 1
-#else
-#define USE_SPIN_LOCKS 0
-#endif /* USE_LOCKS && SPIN_LOCKS_AVAILABLE. */
-#endif /* USE_SPIN_LOCKS */
-#ifndef INSECURE
-#define INSECURE 0
-#endif  /* INSECURE */
-#ifndef HAVE_MMAP
-#define HAVE_MMAP 1
-#endif  /* HAVE_MMAP */
-#ifndef MMAP_CLEARS
-#define MMAP_CLEARS 1
-#endif  /* MMAP_CLEARS */
-#ifndef HAVE_MREMAP
-#ifdef linux
-#define HAVE_MREMAP 1
-#else   /* linux */
-#define HAVE_MREMAP 0
-#endif  /* linux */
-#endif  /* HAVE_MREMAP */
-#ifndef MALLOC_FAILURE_ACTION
-#define MALLOC_FAILURE_ACTION  errno = ENOMEM;
-#endif  /* MALLOC_FAILURE_ACTION */
-#ifndef HAVE_MORECORE
-#if ONLY_MSPACES
-#define HAVE_MORECORE 0
-#else   /* ONLY_MSPACES */
-#define HAVE_MORECORE 1
-#endif  /* ONLY_MSPACES */
-#endif  /* HAVE_MORECORE */
-#if !HAVE_MORECORE
-#define MORECORE_CONTIGUOUS 0
-#else   /* !HAVE_MORECORE */
-#define MORECORE_DEFAULT sbrk
-#ifndef MORECORE_CONTIGUOUS
-#define MORECORE_CONTIGUOUS 1
-#endif  /* MORECORE_CONTIGUOUS */
-#endif  /* HAVE_MORECORE */
-#ifndef DEFAULT_GRANULARITY
-#if (MORECORE_CONTIGUOUS || defined(WIN32))
-#define DEFAULT_GRANULARITY (0)  /* 0 means to compute in init_mparams */
-#else   /* MORECORE_CONTIGUOUS */
-#define DEFAULT_GRANULARITY ((size_t)64U * (size_t)1024U)
-#endif  /* MORECORE_CONTIGUOUS */
-#endif  /* DEFAULT_GRANULARITY */
-#ifndef DEFAULT_TRIM_THRESHOLD
-#ifndef MORECORE_CANNOT_TRIM
-#define DEFAULT_TRIM_THRESHOLD ((size_t)2U * (size_t)1024U * (size_t)1024U)
-#else   /* MORECORE_CANNOT_TRIM */
-#define DEFAULT_TRIM_THRESHOLD MAX_SIZE_T
-#endif  /* MORECORE_CANNOT_TRIM */
-#endif  /* DEFAULT_TRIM_THRESHOLD */
-#ifndef DEFAULT_MMAP_THRESHOLD
-#if HAVE_MMAP
-#define DEFAULT_MMAP_THRESHOLD ((size_t)256U * (size_t)1024U)
-#else   /* HAVE_MMAP */
-#define DEFAULT_MMAP_THRESHOLD MAX_SIZE_T
-#endif  /* HAVE_MMAP */
-#endif  /* DEFAULT_MMAP_THRESHOLD */
-#ifndef MAX_RELEASE_CHECK_RATE
-#if HAVE_MMAP
-#define MAX_RELEASE_CHECK_RATE 4095
-#else
-#define MAX_RELEASE_CHECK_RATE MAX_SIZE_T
-#endif /* HAVE_MMAP */
-#endif /* MAX_RELEASE_CHECK_RATE */
-#ifndef USE_BUILTIN_FFS
-#define USE_BUILTIN_FFS 0
-#endif  /* USE_BUILTIN_FFS */
-#ifndef USE_DEV_RANDOM
-#define USE_DEV_RANDOM 0
-#endif  /* USE_DEV_RANDOM */
-#ifndef NO_MALLINFO
-#define NO_MALLINFO 0
-#endif  /* NO_MALLINFO */
-#ifndef MALLINFO_FIELD_TYPE
-#define MALLINFO_FIELD_TYPE size_t
-#endif  /* MALLINFO_FIELD_TYPE */
-#ifndef NO_SEGMENT_TRAVERSAL
-#define NO_SEGMENT_TRAVERSAL 0
-#endif /* NO_SEGMENT_TRAVERSAL */
-
-/*
-  mallopt tuning options.  SVID/XPG defines four standard parameter
-  numbers for mallopt, normally defined in malloc.h.  None of these
-  are used in this malloc, so setting them has no effect. But this
-  malloc does support the following options.
-*/
-
-#define M_TRIM_THRESHOLD     (-1)
-#define M_GRANULARITY        (-2)
-#define M_MMAP_THRESHOLD     (-3)
-
-/* ------------------------ Mallinfo declarations ------------------------ */
-
-#if !NO_MALLINFO
-/*
-  This version of malloc supports the standard SVID/XPG mallinfo
-  routine that returns a struct containing usage properties and
-  statistics. It should work on any system that has a
-  /usr/include/malloc.h defining struct mallinfo.  The main
-  declaration needed is the mallinfo struct that is returned (by-copy)
-  by mallinfo().  The malloinfo struct contains a bunch of fields that
-  are not even meaningful in this version of malloc.  These fields are
-  are instead filled by mallinfo() with other numbers that might be of
-  interest.
-
-  HAVE_USR_INCLUDE_MALLOC_H should be set if you have a
-  /usr/include/malloc.h file that includes a declaration of struct
-  mallinfo.  If so, it is included; else a compliant version is
-  declared below.  These must be precisely the same for mallinfo() to
-  work.  The original SVID version of this struct, defined on most
-  systems with mallinfo, declares all fields as ints. But some others
-  define as unsigned long. If your system defines the fields using a
-  type of different width than listed here, you MUST #include your
-  system version and #define HAVE_USR_INCLUDE_MALLOC_H.
-*/
-
-/* #define HAVE_USR_INCLUDE_MALLOC_H */
-
-#ifdef HAVE_USR_INCLUDE_MALLOC_H
-#include "/usr/include/malloc.h"
-#else /* HAVE_USR_INCLUDE_MALLOC_H */
-#ifndef STRUCT_MALLINFO_DECLARED
-#define STRUCT_MALLINFO_DECLARED 1
-struct mallinfo {
-  MALLINFO_FIELD_TYPE arena;    /* non-mmapped space allocated from system */
-  MALLINFO_FIELD_TYPE ordblks;  /* number of free chunks */
-  MALLINFO_FIELD_TYPE smblks;   /* always 0 */
-  MALLINFO_FIELD_TYPE hblks;    /* always 0 */
-  MALLINFO_FIELD_TYPE hblkhd;   /* space in mmapped regions */
-  MALLINFO_FIELD_TYPE usmblks;  /* maximum total allocated space */
-  MALLINFO_FIELD_TYPE fsmblks;  /* always 0 */
-  MALLINFO_FIELD_TYPE uordblks; /* total allocated space */
-  MALLINFO_FIELD_TYPE fordblks; /* total free space */
-  MALLINFO_FIELD_TYPE keepcost; /* releasable (via malloc_trim) space */
-};
-#endif /* STRUCT_MALLINFO_DECLARED */
-#endif /* HAVE_USR_INCLUDE_MALLOC_H */
-#endif /* NO_MALLINFO */
-
-/*
-  Try to persuade compilers to inline. The most critical functions for
-  inlining are defined as macros, so these aren't used for them.
-*/
-
-#ifndef FORCEINLINE
-  #if defined(__GNUC__)
-#define FORCEINLINE __inline __attribute__ ((always_inline))
-  #elif defined(_MSC_VER)
-    #define FORCEINLINE __forceinline
-  #endif
-#endif
-#ifndef NOINLINE
-  #if defined(__GNUC__)
-    #define NOINLINE __attribute__ ((noinline))
-  #elif defined(_MSC_VER)
-    #define NOINLINE __declspec(noinline)
-  #else
-    #define NOINLINE
-  #endif
-#endif
-
-#ifdef __cplusplus
-extern "C" {
-#ifndef FORCEINLINE
- #define FORCEINLINE inline
-#endif
-#endif /* __cplusplus */
-#ifndef FORCEINLINE
- #define FORCEINLINE
-#endif
-
-#if !ONLY_MSPACES
-
-/* ------------------- Declarations of public routines ------------------- */
-
-#ifndef USE_DL_PREFIX
-#define dlcalloc               calloc
-#define dlfree                 free
-#define dlmalloc               malloc
-#define dlmemalign             memalign
-#define dlrealloc              realloc
-#define dlvalloc               valloc
-#define dlpvalloc              pvalloc
-#define dlmallinfo             mallinfo
-#define dlmallopt              mallopt
-#define dlmalloc_trim          malloc_trim
-#define dlmalloc_stats         malloc_stats
-#define dlmalloc_usable_size   malloc_usable_size
-#define dlmalloc_footprint     malloc_footprint
-#define dlmalloc_max_footprint malloc_max_footprint
-#define dlindependent_calloc   independent_calloc
-#define dlindependent_comalloc independent_comalloc
-#endif /* USE_DL_PREFIX */
-
-
-/*
-  malloc(size_t n)
-  Returns a pointer to a newly allocated chunk of at least n bytes, or
-  null if no space is available, in which case errno is set to ENOMEM
-  on ANSI C systems.
-
-  If n is zero, malloc returns a minimum-sized chunk. (The minimum
-  size is 16 bytes on most 32bit systems, and 32 bytes on 64bit
-  systems.)  Note that size_t is an unsigned type, so calls with
-  arguments that would be negative if signed are interpreted as
-  requests for huge amounts of space, which will often fail. The
-  maximum supported value of n differs across systems, but is in all
-  cases less than the maximum representable value of a size_t.
-*/
-void* dlmalloc(size_t);
-
-/*
-  free(void* p)
-  Releases the chunk of memory pointed to by p, that had been previously
-  allocated using malloc or a related routine such as realloc.
-  It has no effect if p is null. If p was not malloced or already
-  freed, free(p) will by default cause the current program to abort.
-*/
-void  dlfree(void*);
-
-/*
-  calloc(size_t n_elements, size_t element_size);
-  Returns a pointer to n_elements * element_size bytes, with all locations
-  set to zero.
-*/
-void* dlcalloc(size_t, size_t);
-
-/*
-  realloc(void* p, size_t n)
-  Returns a pointer to a chunk of size n that contains the same data
-  as does chunk p up to the minimum of (n, p's size) bytes, or null
-  if no space is available.
-
-  The returned pointer may or may not be the same as p. The algorithm
-  prefers extending p in most cases when possible, otherwise it
-  employs the equivalent of a malloc-copy-free sequence.
-
-  If p is null, realloc is equivalent to malloc.
-
-  If space is not available, realloc returns null, errno is set (if on
-  ANSI) and p is NOT freed.
-
-  if n is for fewer bytes than already held by p, the newly unused
-  space is lopped off and freed if possible.  realloc with a size
-  argument of zero (re)allocates a minimum-sized chunk.
-
-  The old unix realloc convention of allowing the last-free'd chunk
-  to be used as an argument to realloc is not supported.
-*/
-
-void* dlrealloc(void*, size_t);
-
-/*
-  memalign(size_t alignment, size_t n);
-  Returns a pointer to a newly allocated chunk of n bytes, aligned
-  in accord with the alignment argument.
-
-  The alignment argument should be a power of two. If the argument is
-  not a power of two, the nearest greater power is used.
-  8-byte alignment is guaranteed by normal malloc calls, so don't
-  bother calling memalign with an argument of 8 or less.
-
-  Overreliance on memalign is a sure way to fragment space.
-*/
-void* dlmemalign(size_t, size_t);
-
-/*
-  valloc(size_t n);
-  Equivalent to memalign(pagesize, n), where pagesize is the page
-  size of the system. If the pagesize is unknown, 4096 is used.
-*/
-void* dlvalloc(size_t);
-
-/*
-  mallopt(int parameter_number, int parameter_value)
-  Sets tunable parameters The format is to provide a
-  (parameter-number, parameter-value) pair.  mallopt then sets the
-  corresponding parameter to the argument value if it can (i.e., so
-  long as the value is meaningful), and returns 1 if successful else
-  0.  To workaround the fact that mallopt is specified to use int,
-  not size_t parameters, the value -1 is specially treated as the
-  maximum unsigned size_t value.
-
-  SVID/XPG/ANSI defines four standard param numbers for mallopt,
-  normally defined in malloc.h.  None of these are use in this malloc,
-  so setting them has no effect. But this malloc also supports other
-  options in mallopt. See below for details.  Briefly, supported
-  parameters are as follows (listed defaults are for "typical"
-  configurations).
-
-  Symbol            param #  default    allowed param values
-  M_TRIM_THRESHOLD     -1   2*1024*1024   any   (-1 disables)
-  M_GRANULARITY        -2     page size   any power of 2 >= page size
-  M_MMAP_THRESHOLD     -3      256*1024   any   (or 0 if no MMAP support)
-*/
-int dlmallopt(int, int);
-
-/*
-  malloc_footprint();
-  Returns the number of bytes obtained from the system.  The total
-  number of bytes allocated by malloc, realloc etc., is less than this
-  value. Unlike mallinfo, this function returns only a precomputed
-  result, so can be called frequently to monitor memory consumption.
-  Even if locks are otherwise defined, this function does not use them,
-  so results might not be up to date.
-*/
-size_t dlmalloc_footprint(void);
-
-/*
-  malloc_max_footprint();
-  Returns the maximum number of bytes obtained from the system. This
-  value will be greater than current footprint if deallocated space
-  has been reclaimed by the system. The peak number of bytes allocated
-  by malloc, realloc etc., is less than this value. Unlike mallinfo,
-  this function returns only a precomputed result, so can be called
-  frequently to monitor memory consumption.  Even if locks are
-  otherwise defined, this function does not use them, so results might
-  not be up to date.
-*/
-size_t dlmalloc_max_footprint(void);
-
-#if !NO_MALLINFO
-/*
-  mallinfo()
-  Returns (by copy) a struct containing various summary statistics:
-
-  arena:     current total non-mmapped bytes allocated from system
-  ordblks:   the number of free chunks
-  smblks:    always zero.
-  hblks:     current number of mmapped regions
-  hblkhd:    total bytes held in mmapped regions
-  usmblks:   the maximum total allocated space. This will be greater
-                than current total if trimming has occurred.
-  fsmblks:   always zero
-  uordblks:  current total allocated space (normal or mmapped)
-  fordblks:  total free space
-  keepcost:  the maximum number of bytes that could ideally be released
-               back to system via malloc_trim. ("ideally" means that
-               it ignores page restrictions etc.)
-
-  Because these fields are ints, but internal bookkeeping may
-  be kept as longs, the reported values may wrap around zero and
-  thus be inaccurate.
-*/
-struct mallinfo dlmallinfo(void);
-#endif /* NO_MALLINFO */
-
-/*
-  independent_calloc(size_t n_elements, size_t element_size, void* chunks[]);
-
-  independent_calloc is similar to calloc, but instead of returning a
-  single cleared space, it returns an array of pointers to n_elements
-  independent elements that can hold contents of size elem_size, each
-  of which starts out cleared, and can be independently freed,
-  realloc'ed etc. The elements are guaranteed to be adjacently
-  allocated (this is not guaranteed to occur with multiple callocs or
-  mallocs), which may also improve cache locality in some
-  applications.
-
-  The "chunks" argument is optional (i.e., may be null, which is
-  probably the most typical usage). If it is null, the returned array
-  is itself dynamically allocated and should also be freed when it is
-  no longer needed. Otherwise, the chunks array must be of at least
-  n_elements in length. It is filled in with the pointers to the
-  chunks.
-
-  In either case, independent_calloc returns this pointer array, or
-  null if the allocation failed.  If n_elements is zero and "chunks"
-  is null, it returns a chunk representing an array with zero elements
-  (which should be freed if not wanted).
-
-  Each element must be individually freed when it is no longer
-  needed. If you'd like to instead be able to free all at once, you
-  should instead use regular calloc and assign pointers into this
-  space to represent elements.  (In this case though, you cannot
-  independently free elements.)
-
-  independent_calloc simplifies and speeds up implementations of many
-  kinds of pools.  It may also be useful when constructing large data
-  structures that initially have a fixed number of fixed-sized nodes,
-  but the number is not known at compile time, and some of the nodes
-  may later need to be freed. For example:
-
-  struct Node { int item; struct Node* next; };
-
-  struct Node* build_list() {
-    struct Node** pool;
-    int n = read_number_of_nodes_needed();
-    if (n <= 0) return 0;
-    pool = (struct Node**)(independent_calloc(n, sizeof(struct Node), 0);
-    if (pool == 0) die();
-    // organize into a linked list...
-    struct Node* first = pool[0];
-    for (i = 0; i < n-1; ++i)
-      pool[i]->next = pool[i+1];
-    free(pool);     // Can now free the array (or not, if it is needed later)
-    return first;
-  }
-*/
-void** dlindependent_calloc(size_t, size_t, void**);
-
-/*
-  independent_comalloc(size_t n_elements, size_t sizes[], void* chunks[]);
-
-  independent_comalloc allocates, all at once, a set of n_elements
-  chunks with sizes indicated in the "sizes" array.    It returns
-  an array of pointers to these elements, each of which can be
-  independently freed, realloc'ed etc. The elements are guaranteed to
-  be adjacently allocated (this is not guaranteed to occur with
-  multiple callocs or mallocs), which may also improve cache locality
-  in some applications.
-
-  The "chunks" argument is optional (i.e., may be null). If it is null
-  the returned array is itself dynamically allocated and should also
-  be freed when it is no longer needed. Otherwise, the chunks array
-  must be of at least n_elements in length. It is filled in with the
-  pointers to the chunks.
-
-  In either case, independent_comalloc returns this pointer array, or
-  null if the allocation failed.  If n_elements is zero and chunks is
-  null, it returns a chunk representing an array with zero elements
-  (which should be freed if not wanted).
-
-  Each element must be individually freed when it is no longer
-  needed. If you'd like to instead be able to free all at once, you
-  should instead use a single regular malloc, and assign pointers at
-  particular offsets in the aggregate space. (In this case though, you
-  cannot independently free elements.)
-
-  independent_comallac differs from independent_calloc in that each
-  element may have a different size, and also that it does not
-  automatically clear elements.
-
-  independent_comalloc can be used to speed up allocation in cases
-  where several structs or objects must always be allocated at the
-  same time.  For example:
-
-  struct Head { ... }
-  struct Foot { ... }
-
-  void send_message(char* msg) {
-    int msglen = strlen(msg);
-    size_t sizes[3] = { sizeof(struct Head), msglen, sizeof(struct Foot) };
-    void* chunks[3];
-    if (independent_comalloc(3, sizes, chunks) == 0)
-      die();
-    struct Head* head = (struct Head*)(chunks[0]);
-    char*        body = (char*)(chunks[1]);
-    struct Foot* foot = (struct Foot*)(chunks[2]);
-    // ...
-  }
-
-  In general though, independent_comalloc is worth using only for
-  larger values of n_elements. For small values, you probably won't
-  detect enough difference from series of malloc calls to bother.
-
-  Overuse of independent_comalloc can increase overall memory usage,
-  since it cannot reuse existing noncontiguous small chunks that
-  might be available for some of the elements.
-*/
-void** dlindependent_comalloc(size_t, size_t*, void**);
-
-
-/*
-  pvalloc(size_t n);
-  Equivalent to valloc(minimum-page-that-holds(n)), that is,
-  round up n to nearest pagesize.
- */
-void*  dlpvalloc(size_t);
-
-/*
-  malloc_trim(size_t pad);
-
-  If possible, gives memory back to the system (via negative arguments
-  to sbrk) if there is unused memory at the `high' end of the malloc
-  pool or in unused MMAP segments. You can call this after freeing
-  large blocks of memory to potentially reduce the system-level memory
-  requirements of a program. However, it cannot guarantee to reduce
-  memory. Under some allocation patterns, some large free blocks of
-  memory will be locked between two used chunks, so they cannot be
-  given back to the system.
-
-  The `pad' argument to malloc_trim represents the amount of free
-  trailing space to leave untrimmed. If this argument is zero, only
-  the minimum amount of memory to maintain internal data structures
-  will be left. Non-zero arguments can be supplied to maintain enough
-  trailing space to service future expected allocations without having
-  to re-obtain memory from the system.
-
-  Malloc_trim returns 1 if it actually released any memory, else 0.
-*/
-int  dlmalloc_trim(size_t);
-
-/*
-  malloc_stats();
-  Prints on stderr the amount of space obtained from the system (both
-  via sbrk and mmap), the maximum amount (which may be more than
-  current if malloc_trim and/or munmap got called), and the current
-  number of bytes allocated via malloc (or realloc, etc) but not yet
-  freed. Note that this is the number of bytes allocated, not the
-  number requested. It will be larger than the number requested
-  because of alignment and bookkeeping overhead. Because it includes
-  alignment wastage as being in use, this figure may be greater than
-  zero even when no user-level chunks are allocated.
-
-  The reported current and maximum system memory can be inaccurate if
-  a program makes other calls to system memory allocation functions
-  (normally sbrk) outside of malloc.
-
-  malloc_stats prints only the most commonly interesting statistics.
-  More information can be obtained by calling mallinfo.
-*/
-void  dlmalloc_stats(void);
-
-#endif /* ONLY_MSPACES */
-
-/*
-  malloc_usable_size(void* p);
-
-  Returns the number of bytes you can actually use in
-  an allocated chunk, which may be more than you requested (although
-  often not) due to alignment and minimum size constraints.
-  You can use this many bytes without worrying about
-  overwriting other allocated objects. This is not a particularly great
-  programming practice. malloc_usable_size can be more useful in
-  debugging and assertions, for example:
-
-  p = malloc(n);
-  assert(malloc_usable_size(p) >= 256);
-*/
-size_t dlmalloc_usable_size(void*);
-
-
-#if MSPACES
-
-/*
-  mspace is an opaque type representing an independent
-  region of space that supports mspace_malloc, etc.
-*/
-typedef void* mspace;
-
-/*
-  create_mspace creates and returns a new independent space with the
-  given initial capacity, or, if 0, the default granularity size.  It
-  returns null if there is no system memory available to create the
-  space.  If argument locked is non-zero, the space uses a separate
-  lock to control access. The capacity of the space will grow
-  dynamically as needed to service mspace_malloc requests.  You can
-  control the sizes of incremental increases of this space by
-  compiling with a different DEFAULT_GRANULARITY or dynamically
-  setting with mallopt(M_GRANULARITY, value).
-*/
-mspace create_mspace(size_t capacity, int locked);
-
-/*
-  destroy_mspace destroys the given space, and attempts to return all
-  of its memory back to the system, returning the total number of
-  bytes freed. After destruction, the results of access to all memory
-  used by the space become undefined.
-*/
-size_t destroy_mspace(mspace msp);
-
-/*
-  create_mspace_with_base uses the memory supplied as the initial base
-  of a new mspace. Part (less than 128*sizeof(size_t) bytes) of this
-  space is used for bookkeeping, so the capacity must be at least this
-  large. (Otherwise 0 is returned.) When this initial space is
-  exhausted, additional memory will be obtained from the system.
-  Destroying this space will deallocate all additionally allocated
-  space (if possible) but not the initial base.
-*/
-mspace create_mspace_with_base(void* base, size_t capacity, int locked);
-
-/*
-  mspace_track_large_chunks controls whether requests for large chunks
-  are allocated in their own untracked mmapped regions, separate from
-  others in this mspace. By default large chunks are not tracked,
-  which reduces fragmentation. However, such chunks are not
-  necessarily released to the system upon destroy_mspace.  Enabling
-  tracking by setting to true may increase fragmentation, but avoids
-  leakage when relying on destroy_mspace to release all memory
-  allocated using this space.  The function returns the previous
-  setting.
-*/
-int mspace_track_large_chunks(mspace msp, int enable);
-
-
-/*
-  mspace_malloc behaves as malloc, but operates within
-  the given space.
-*/
-void* mspace_malloc(mspace msp, size_t bytes);
-
-/*
-  mspace_free behaves as free, but operates within
-  the given space.
-
-  If compiled with FOOTERS==1, mspace_free is not actually needed.
-  free may be called instead of mspace_free because freed chunks from
-  any space are handled by their originating spaces.
-*/
-void mspace_free(mspace msp, void* mem);
-
-/*
-  mspace_realloc behaves as realloc, but operates within
-  the given space.
-
-  If compiled with FOOTERS==1, mspace_realloc is not actually
-  needed.  realloc may be called instead of mspace_realloc because
-  realloced chunks from any space are handled by their originating
-  spaces.
-*/
-void* mspace_realloc(mspace msp, void* mem, size_t newsize);
-
-/*
-  mspace_calloc behaves as calloc, but operates within
-  the given space.
-*/
-void* mspace_calloc(mspace msp, size_t n_elements, size_t elem_size);
-
-/*
-  mspace_memalign behaves as memalign, but operates within
-  the given space.
-*/
-void* mspace_memalign(mspace msp, size_t alignment, size_t bytes);
-
-/*
-  mspace_independent_calloc behaves as independent_calloc, but
-  operates within the given space.
-*/
-void** mspace_independent_calloc(mspace msp, size_t n_elements,
-                                 size_t elem_size, void* chunks[]);
-
-/*
-  mspace_independent_comalloc behaves as independent_comalloc, but
-  operates within the given space.
-*/
-void** mspace_independent_comalloc(mspace msp, size_t n_elements,
-                                   size_t sizes[], void* chunks[]);
-
-/*
-  mspace_footprint() returns the number of bytes obtained from the
-  system for this space.
-*/
-size_t mspace_footprint(mspace msp);
-
-/*
-  mspace_max_footprint() returns the peak number of bytes obtained from the
-  system for this space.
-*/
-size_t mspace_max_footprint(mspace msp);
-
-
-#if !NO_MALLINFO
-/*
-  mspace_mallinfo behaves as mallinfo, but reports properties of
-  the given space.
-*/
-struct mallinfo mspace_mallinfo(mspace msp);
-#endif /* NO_MALLINFO */
-
-/*
-  malloc_usable_size(void* p) behaves the same as malloc_usable_size;
-*/
-  size_t mspace_usable_size(void* mem);
-
-/*
-  mspace_malloc_stats behaves as malloc_stats, but reports
-  properties of the given space.
-*/
-void mspace_malloc_stats(mspace msp);
-
-/*
-  mspace_trim behaves as malloc_trim, but
-  operates within the given space.
-*/
-int mspace_trim(mspace msp, size_t pad);
-
-/*
-  An alias for mallopt.
-*/
-int mspace_mallopt(int, int);
-
-#endif /* MSPACES */
-
-#ifdef __cplusplus
-}  /* end of extern "C" */
-#endif /* __cplusplus */
-
-/*
-  ========================================================================
-  To make a fully customizable malloc.h header file, cut everything
-  above this line, put into file malloc.h, edit to suit, and #include it
-  on the next line, as well as in programs that use this malloc.
-  ========================================================================
-*/
-
-/* #include "malloc.h" */
-
-/*------------------------------ internal #includes ---------------------- */
-
-#ifdef WIN32
-#pragma warning( disable : 4146 ) /* no "unsigned" warnings */
-#endif /* WIN32 */
-
-#include <stdio.h>       /* for printing in malloc_stats */
-
-#ifndef LACKS_ERRNO_H
-#include <errno.h>       /* for MALLOC_FAILURE_ACTION */
-#endif /* LACKS_ERRNO_H */
-#if FOOTERS || DEBUG
-#include <time.h>        /* for magic initialization */
-#endif /* FOOTERS */
-#ifndef LACKS_STDLIB_H
-#include <stdlib.h>      /* for abort() */
-#endif /* LACKS_STDLIB_H */
-#ifdef DEBUG
-#if ABORT_ON_ASSERT_FAILURE
-#undef assert
-#define assert(x) if(!(x)) ABORT
-#else /* ABORT_ON_ASSERT_FAILURE */
-#include <assert.h>
-#endif /* ABORT_ON_ASSERT_FAILURE */
-#else  /* DEBUG */
-#ifndef assert
-#define assert(x)
-#endif
-#define DEBUG 0
-#endif /* DEBUG */
-#ifndef LACKS_STRING_H
-#include <string.h>      /* for memset etc */
-#endif  /* LACKS_STRING_H */
-#if USE_BUILTIN_FFS
-#ifndef LACKS_STRINGS_H
-#include <strings.h>     /* for ffs */
-#endif /* LACKS_STRINGS_H */
-#endif /* USE_BUILTIN_FFS */
-#if HAVE_MMAP
-#ifndef LACKS_SYS_MMAN_H
-/* On some versions of linux, mremap decl in mman.h needs __USE_GNU set */
-#if (defined(linux) && !defined(__USE_GNU))
-#define __USE_GNU 1
-#include <sys/mman.h>    /* for mmap */
-#undef __USE_GNU
-#else
-#include <sys/mman.h>    /* for mmap */
-#endif /* linux */
-#endif /* LACKS_SYS_MMAN_H */
-#ifndef LACKS_FCNTL_H
-#include <fcntl.h>
-#endif /* LACKS_FCNTL_H */
-#endif /* HAVE_MMAP */
-#ifndef LACKS_UNISTD_H
-#include <unistd.h>     /* for sbrk, sysconf */
-#else /* LACKS_UNISTD_H */
-#if !defined(__FreeBSD__) && !defined(__OpenBSD__) && !defined(__NetBSD__)
-extern void*     sbrk(ptrdiff_t);
-#endif /* FreeBSD etc */
-#endif /* LACKS_UNISTD_H */
-
-/* Declarations for locking */
-#if USE_LOCKS
-#ifndef WIN32
-#include <pthread.h>
-#if defined (__SVR4) && defined (__sun)  /* solaris */
-#include <thread.h>
-#endif /* solaris */
-#else
-#ifndef _M_AMD64
-/* These are already defined on AMD64 builds */
-#ifdef __cplusplus
-extern "C" {
-#endif /* __cplusplus */
-LONG __cdecl _InterlockedCompareExchange(LONG volatile *Dest, LONG Exchange, LONG Comp);
-LONG __cdecl _InterlockedExchange(LONG volatile *Target, LONG Value);
-#ifdef __cplusplus
-}
-#endif /* __cplusplus */
-#endif /* _M_AMD64 */
-#pragma intrinsic (_InterlockedCompareExchange)
-#pragma intrinsic (_InterlockedExchange)
-#define interlockedcompareexchange _InterlockedCompareExchange
-#define interlockedexchange _InterlockedExchange
-#endif /* Win32 */
-#endif /* USE_LOCKS */
-
-/* Declarations for bit scanning on win32 */
-#if defined(_MSC_VER) && _MSC_VER>=1300
-#ifndef BitScanForward	/* Try to avoid pulling in WinNT.h */
-#ifdef __cplusplus
-extern "C" {
-#endif /* __cplusplus */
-unsigned char _BitScanForward(unsigned long *index, unsigned long mask);
-unsigned char _BitScanReverse(unsigned long *index, unsigned long mask);
-#ifdef __cplusplus
-}
-#endif /* __cplusplus */
-
-#define BitScanForward _BitScanForward
-#define BitScanReverse _BitScanReverse
-#pragma intrinsic(_BitScanForward)
-#pragma intrinsic(_BitScanReverse)
-#endif /* BitScanForward */
-#endif /* defined(_MSC_VER) && _MSC_VER>=1300 */
-
-#ifndef WIN32
-#ifndef malloc_getpagesize
-#  ifdef _SC_PAGESIZE         /* some SVR4 systems omit an underscore */
-#    ifndef _SC_PAGE_SIZE
-#      define _SC_PAGE_SIZE _SC_PAGESIZE
-#    endif
-#  endif
-#  ifdef _SC_PAGE_SIZE
-#    define malloc_getpagesize sysconf(_SC_PAGE_SIZE)
-#  else
-#    if defined(BSD) || defined(DGUX) || defined(HAVE_GETPAGESIZE)
-       extern size_t getpagesize();
-#      define malloc_getpagesize getpagesize()
-#    else
-#      ifdef WIN32 /* use supplied emulation of getpagesize */
-#        define malloc_getpagesize getpagesize()
-#      else
-#        ifndef LACKS_SYS_PARAM_H
-#          include <sys/param.h>
-#        endif
-#        ifdef EXEC_PAGESIZE
-#          define malloc_getpagesize EXEC_PAGESIZE
-#        else
-#          ifdef NBPG
-#            ifndef CLSIZE
-#              define malloc_getpagesize NBPG
-#            else
-#              define malloc_getpagesize (NBPG * CLSIZE)
-#            endif
-#          else
-#            ifdef NBPC
-#              define malloc_getpagesize NBPC
-#            else
-#              ifdef PAGESIZE
-#                define malloc_getpagesize PAGESIZE
-#              else /* just guess */
-#                define malloc_getpagesize ((size_t)4096U)
-#              endif
-#            endif
-#          endif
-#        endif
-#      endif
-#    endif
-#  endif
-#endif
-#endif
-
-
-
-/* ------------------- size_t and alignment properties -------------------- */
-
-/* The byte and bit size of a size_t */
-#define SIZE_T_SIZE         (sizeof(size_t))
-#define SIZE_T_BITSIZE      (sizeof(size_t) << 3)
-
-/* Some constants coerced to size_t */
-/* Annoying but necessary to avoid errors on some platforms */
-#define SIZE_T_ZERO         ((size_t)0)
-#define SIZE_T_ONE          ((size_t)1)
-#define SIZE_T_TWO          ((size_t)2)
-#define SIZE_T_FOUR         ((size_t)4)
-#define TWO_SIZE_T_SIZES    (SIZE_T_SIZE<<1)
-#define FOUR_SIZE_T_SIZES   (SIZE_T_SIZE<<2)
-#define SIX_SIZE_T_SIZES    (FOUR_SIZE_T_SIZES+TWO_SIZE_T_SIZES)
-#define HALF_MAX_SIZE_T     (MAX_SIZE_T / 2U)
-
-/* The bit mask value corresponding to MALLOC_ALIGNMENT */
-#define CHUNK_ALIGN_MASK    (MALLOC_ALIGNMENT - SIZE_T_ONE)
-
-/* True if address a has acceptable alignment */
-#define is_aligned(A)       (((size_t)((A)) & (CHUNK_ALIGN_MASK)) == 0)
-
-/* the number of bytes to offset an address to align it */
-#define align_offset(A)\
- ((((size_t)(A) & CHUNK_ALIGN_MASK) == 0)? 0 :\
-  ((MALLOC_ALIGNMENT - ((size_t)(A) & CHUNK_ALIGN_MASK)) & CHUNK_ALIGN_MASK))
-
-/*
-  malloc_params holds global properties, including those that can be
-  dynamically set using mallopt. There is a single instance, mparams,
-  initialized in init_mparams. Note that the non-zeroness of "magic"
-  also serves as an initialization flag.
-*/
-typedef unsigned int flag_t;
-struct malloc_params {
-  volatile size_t magic;
-  size_t page_size;
-  size_t granularity;
-  size_t mmap_threshold;
-  size_t trim_threshold;
-  flag_t default_mflags;
-};
-
-static struct malloc_params mparams;
-
-/* Ensure mparams initialized */
-#define ensure_initialization() (void)(mparams.magic != 0 || init_mparams())
-
-/* -------------------------- MMAP preliminaries ------------------------- */
-
-/*
-   If HAVE_MORECORE or HAVE_MMAP are false, we just define calls and
-   checks to fail so compiler optimizer can delete code rather than
-   using so many "#if"s.
-*/
-
-
-/* MORECORE and MMAP must return MFAIL on failure */
-#define MFAIL                ((void*)(MAX_SIZE_T))
-#define CMFAIL               ((char*)(MFAIL)) /* defined for convenience */
-
-#if HAVE_MMAP
-
-#ifndef WIN32
-#if !defined(MAP_ANONYMOUS) && defined(MAP_ANON)
-#define MAP_ANONYMOUS        MAP_ANON
-#endif /* MAP_ANON */
-#ifdef DEFAULT_GRANULARITY_ALIGNED
-#define MMAP_IMPL mmap_aligned
-static void* lastAlignedmmap; /* Used as a hint */
-static void* mmap_aligned(void *start, size_t length, int prot, int flags, int fd, off_t offset) {
-  void* baseaddress = 0;
-  void* ptr = 0;
-  if(!start) {
-    baseaddress = lastAlignedmmap;
-    for(;;) {
-      if(baseaddress) flags|=MAP_FIXED;
-      ptr = mmap(baseaddress, length, prot, flags, fd, offset);
-      if(!ptr)
-        baseaddress = (void*)((size_t)baseaddress + mparams.granularity);
-      else if((size_t)ptr & (mparams.granularity - SIZE_T_ONE)) {
-        munmap(ptr, length);
-        baseaddress = (void*)(((size_t)ptr + mparams.granularity) & ~(mparams.granularity - SIZE_T_ONE));
-      }
-      else break;
-    }
-  }
-  else ptr = mmap(start, length, prot, flags, fd, offset);
-  if(ptr) lastAlignedmmap = (void*)((size_t) ptr + mparams.granularity);
-  return ptr;
-}
-#else
-#define MMAP_IMPL mmap
-#endif /* DEFAULT_GRANULARITY_ALIGNED */
-#define MUNMAP_DEFAULT(a, s)  munmap((a), (s))
-#define MMAP_PROT            (PROT_READ|PROT_WRITE)
-#ifdef MAP_ANONYMOUS
-#define MMAP_FLAGS           (MAP_PRIVATE|MAP_ANONYMOUS)
-#define MMAP_DEFAULT(s)       MMAP_IMPL(0, (s), MMAP_PROT, MMAP_FLAGS, -1, 0)
-#else /* MAP_ANONYMOUS */
-/*
-   Nearly all versions of mmap support MAP_ANONYMOUS, so the following
-   is unlikely to be needed, but is supplied just in case.
-*/
-#define MMAP_FLAGS           (MAP_PRIVATE)
-static int dev_zero_fd = -1; /* Cached file descriptor for /dev/zero. */
-#define MMAP_DEFAULT(s) ((dev_zero_fd < 0) ? \
-           (dev_zero_fd = open("/dev/zero", O_RDWR), \
-            MMAP_IMPL(0, (s), MMAP_PROT, MMAP_FLAGS, dev_zero_fd, 0)) : \
-            MMAP_IMPL(0, (s), MMAP_PROT, MMAP_FLAGS, dev_zero_fd, 0))
-#endif /* MAP_ANONYMOUS */
-
-#define DIRECT_MMAP_DEFAULT(s) MMAP_DEFAULT(s)
-
-#else /* WIN32 */
-
-/* Win32 MMAP via VirtualAlloc */
-#ifdef DEFAULT_GRANULARITY_ALIGNED
-static void* lastWin32mmap; /* Used as a hint */
-#endif /* DEFAULT_GRANULARITY_ALIGNED */
-#ifdef ENABLE_LARGE_PAGES
-static int largepagesavailable = 1;
-#endif /* ENABLE_LARGE_PAGES */
-static FORCEINLINE void* win32mmap(size_t size) {
-  void* baseaddress = 0;
-  void* ptr = 0;
-#ifdef ENABLE_LARGE_PAGES
-  /* Note that large pages are *always* allocated on a large page boundary.
-  If however granularity is small then don't waste a kernel call if size
-  isn't around the size of a large page */
-  if(largepagesavailable && size >= 1*1024*1024) {
-    ptr = VirtualAlloc(baseaddress, size, MEM_RESERVE|MEM_COMMIT|MEM_LARGE_PAGES, PAGE_READWRITE);
-    if(!ptr && ERROR_PRIVILEGE_NOT_HELD==GetLastError()) largepagesavailable=0;
-  }
-#endif
-  if(!ptr) {
-#ifdef DEFAULT_GRANULARITY_ALIGNED
-    /* We try to avoid overhead by speculatively reserving at aligned
-    addresses until we succeed */
-    baseaddress = lastWin32mmap;
-    for(;;) {
-      void* reserveaddr = VirtualAlloc(baseaddress, size, MEM_RESERVE, PAGE_READWRITE);
-      if(!reserveaddr)
-        baseaddress = (void*)((size_t)baseaddress + mparams.granularity);
-      else if((size_t)reserveaddr & (mparams.granularity - SIZE_T_ONE)) {
-        VirtualFree(reserveaddr, 0, MEM_RELEASE);
-        baseaddress = (void*)(((size_t)reserveaddr + mparams.granularity) & ~(mparams.granularity - SIZE_T_ONE));
-      }
-      else break;
-    }
-#endif
-    if(!ptr) ptr = VirtualAlloc(baseaddress, size, baseaddress ? MEM_COMMIT : MEM_RESERVE|MEM_COMMIT, PAGE_READWRITE);
-#if DEBUG
-    if(lastWin32mmap && ptr!=lastWin32mmap) printf("Non-contiguous VirtualAlloc between %p and %p\n", ptr, lastWin32mmap);
-#endif
-#ifdef DEFAULT_GRANULARITY_ALIGNED
-    if(ptr) lastWin32mmap = (void*)((size_t) ptr + mparams.granularity);
-#endif
-  }
-#if DEBUG
-#ifdef ENABLE_LARGE_PAGES
-  printf("VirtualAlloc returns %p size %u. LargePagesAvailable=%d\n", ptr, size, largepagesavailable);
-#else
-  printf("VirtualAlloc returns %p size %u\n", ptr, size);
-#endif
-#endif
-  return (ptr != 0)? ptr: MFAIL;
-}
-
-/* For direct MMAP, use MEM_TOP_DOWN to minimize interference */
-static FORCEINLINE void* win32direct_mmap(size_t size) {
-  void* ptr = VirtualAlloc(0, size, MEM_RESERVE|MEM_COMMIT|MEM_TOP_DOWN,
-                           PAGE_READWRITE);
-  return (ptr != 0)? ptr: MFAIL;
-}
-
-/* This function supports releasing coalesed segments */
-static FORCEINLINE int win32munmap(void* ptr, size_t size) {
-  MEMORY_BASIC_INFORMATION minfo;
-  char* cptr = (char*)ptr;
-  while (size) {
-    if (VirtualQuery(cptr, &minfo, sizeof(minfo)) == 0)
-      return -1;
-    if (minfo.BaseAddress != cptr || minfo.AllocationBase != cptr ||
-        minfo.State != MEM_COMMIT || minfo.RegionSize > size)
-      return -1;
-    if (VirtualFree(cptr, 0, MEM_RELEASE) == 0)
-      return -1;
-    cptr += minfo.RegionSize;
-    size -= minfo.RegionSize;
-  }
-  return 0;
-}
-
-#define MMAP_DEFAULT(s)             win32mmap(s)
-#define MUNMAP_DEFAULT(a, s)        win32munmap((a), (s))
-#define DIRECT_MMAP_DEFAULT(s)      win32direct_mmap(s)
-#endif /* WIN32 */
-#endif /* HAVE_MMAP */
-
-#if HAVE_MREMAP
-#ifndef WIN32
-#define MREMAP_DEFAULT(addr, osz, nsz, mv) mremap((addr), (osz), (nsz), (mv))
-#endif /* WIN32 */
-#endif /* HAVE_MREMAP */
-
-
-/**
- * Define CALL_MORECORE
- */
-#if HAVE_MORECORE
-    #ifdef MORECORE
-        #define CALL_MORECORE(S)    MORECORE(S)
-    #else  /* MORECORE */
-        #define CALL_MORECORE(S)    MORECORE_DEFAULT(S)
-    #endif /* MORECORE */
-#else  /* HAVE_MORECORE */
-    #define CALL_MORECORE(S)        MFAIL
-#endif /* HAVE_MORECORE */
-
-/**
- * Define CALL_MMAP/CALL_MUNMAP/CALL_DIRECT_MMAP
- */
-#if HAVE_MMAP
-    #define USE_MMAP_BIT            (SIZE_T_ONE)
-
-    #ifdef MMAP
-        #define CALL_MMAP(s)        MMAP(s)
-    #else /* MMAP */
-        #define CALL_MMAP(s)        MMAP_DEFAULT(s)
-    #endif /* MMAP */
-    #ifdef MUNMAP
-        #define CALL_MUNMAP(a, s)   MUNMAP((a), (s))
-    #else /* MUNMAP */
-        #define CALL_MUNMAP(a, s)   MUNMAP_DEFAULT((a), (s))
-    #endif /* MUNMAP */
-    #ifdef DIRECT_MMAP
-        #define CALL_DIRECT_MMAP(s) DIRECT_MMAP(s)
-    #else /* DIRECT_MMAP */
-        #define CALL_DIRECT_MMAP(s) DIRECT_MMAP_DEFAULT(s)
-    #endif /* DIRECT_MMAP */
-#else  /* HAVE_MMAP */
-    #define USE_MMAP_BIT            (SIZE_T_ZERO)
-
-    #define MMAP(s)                 MFAIL
-    #define MUNMAP(a, s)            (-1)
-    #define DIRECT_MMAP(s)          MFAIL
-    #define CALL_DIRECT_MMAP(s)     DIRECT_MMAP(s)
-    #define CALL_MMAP(s)            MMAP(s)
-    #define CALL_MUNMAP(a, s)       MUNMAP((a), (s))
-#endif /* HAVE_MMAP */
-
-/**
- * Define CALL_MREMAP
- */
-#if HAVE_MMAP && HAVE_MREMAP
-    #ifdef MREMAP
-        #define CALL_MREMAP(addr, osz, nsz, mv) MREMAP((addr), (osz), (nsz), (mv))
-    #else /* MREMAP */
-        #define CALL_MREMAP(addr, osz, nsz, mv) MREMAP_DEFAULT((addr), (osz), (nsz), (mv))
-    #endif /* MREMAP */
-#else  /* HAVE_MMAP && HAVE_MREMAP */
-    #define CALL_MREMAP(addr, osz, nsz, mv)     MFAIL
-#endif /* HAVE_MMAP && HAVE_MREMAP */
-
-/* mstate bit set if continguous morecore disabled or failed */
-#define USE_NONCONTIGUOUS_BIT (4U)
-
-/* segment bit set in create_mspace_with_base */
-#define EXTERN_BIT            (8U)
-
-
-/* --------------------------- Lock preliminaries ------------------------ */
-
-/*
-  When locks are defined, there is one global lock, plus
-  one per-mspace lock.
-
-  The global lock_ensures that mparams.magic and other unique
-  mparams values are initialized only once. It also protects
-  sequences of calls to MORECORE.  In many cases sys_alloc requires
-  two calls, that should not be interleaved with calls by other
-  threads.  This does not protect against direct calls to MORECORE
-  by other threads not using this lock, so there is still code to
-  cope the best we can on interference.
-
-  Per-mspace locks surround calls to malloc, free, etc.  To enable use
-  in layered extensions, per-mspace locks are reentrant.
-
-  Because lock-protected regions generally have bounded times, it is
-  OK to use the supplied simple spinlocks in the custom versions for
-  x86. Spinlocks are likely to improve performance for lightly
-  contended applications, but worsen performance under heavy
-  contention.
-
-  If USE_LOCKS is > 1, the definitions of lock routines here are
-  bypassed, in which case you will need to define the type MLOCK_T,
-  and at least INITIAL_LOCK, ACQUIRE_LOCK, RELEASE_LOCK and possibly
-  TRY_LOCK (which is not used in this malloc, but commonly needed in
-  extensions.)  You must also declare a
-    static MLOCK_T malloc_global_mutex = { initialization values };.
-
-*/
-
-#if USE_LOCKS == 1
-
-#if USE_SPIN_LOCKS && SPIN_LOCKS_AVAILABLE
-#ifndef WIN32
-
-/* Custom pthread-style spin locks on x86 and x64 for gcc */
-struct pthread_mlock_t {
-  volatile unsigned int l;
-  char cachelinepadding[64];
-  unsigned int c;
-  pthread_t threadid;
-};
-#define MLOCK_T               struct pthread_mlock_t
-#define CURRENT_THREAD        pthread_self()
-#define INITIAL_LOCK(sl)      ((sl)->threadid = 0, (sl)->l = (sl)->c = 0, 0)
-#define ACQUIRE_LOCK(sl)      pthread_acquire_lock(sl)
-#define RELEASE_LOCK(sl)      pthread_release_lock(sl)
-#define TRY_LOCK(sl)          pthread_try_lock(sl)
-#define SPINS_PER_YIELD       63
-
-static MLOCK_T malloc_global_mutex = { 0, "", 0, 0};
-
-static FORCEINLINE int pthread_acquire_lock (MLOCK_T *sl) {
-  int spins = 0;
-  volatile unsigned int* lp = &sl->l;
-  for (;;) {
-    if (*lp != 0) {
-      if (sl->threadid == CURRENT_THREAD) {
-        ++sl->c;
-        return 0;
-      }
-    }
-    else {
-      /* place args to cmpxchgl in locals to evade oddities in some gccs */
-      int cmp = 0;
-      int val = 1;
-      int ret;
-      __asm__ __volatile__  ("lock; cmpxchgl %1, %2"
-                             : "=a" (ret)
-                             : "r" (val), "m" (*(lp)), "0"(cmp)
-                             : "memory", "cc");
-      if (!ret) {
-        assert(!sl->threadid);
-        sl->threadid = CURRENT_THREAD;
-        sl->c = 1;
-        return 0;
-      }
-    }
-    if ((++spins & SPINS_PER_YIELD) == 0) {
-#if defined (__SVR4) && defined (__sun) /* solaris */
-      thr_yield();
-#else
-#if defined(__linux__) || defined(__FreeBSD__) || defined(__APPLE__)
-      sched_yield();
-#else  /* no-op yield on unknown systems */
-      ;
-#endif /* __linux__ || __FreeBSD__ || __APPLE__ */
-#endif /* solaris */
-    }
-  }
-}
-
-static FORCEINLINE void pthread_release_lock (MLOCK_T *sl) {
-  volatile unsigned int* lp = &sl->l;
-  assert(*lp != 0);
-  assert(sl->threadid == CURRENT_THREAD);
-  if (--sl->c == 0) {
-    sl->threadid = 0;
-    int prev = 0;
-    int ret;
-    __asm__ __volatile__ ("lock; xchgl %0, %1"
-                          : "=r" (ret)
-                          : "m" (*(lp)), "0"(prev)
-                          : "memory");
-  }
-}
-
-static FORCEINLINE int pthread_try_lock (MLOCK_T *sl) {
-  volatile unsigned int* lp = &sl->l;
-  if (*lp != 0) {
-    if (sl->threadid == CURRENT_THREAD) {
-      ++sl->c;
-      return 1;
-    }
-  }
-  else {
-    int cmp = 0;
-    int val = 1;
-    int ret;
-    __asm__ __volatile__  ("lock; cmpxchgl %1, %2"
-                           : "=a" (ret)
-                           : "r" (val), "m" (*(lp)), "0"(cmp)
-                           : "memory", "cc");
-    if (!ret) {
-      assert(!sl->threadid);
-      sl->threadid = CURRENT_THREAD;
-      sl->c = 1;
-      return 1;
-    }
-  }
-  return 0;
-}
-
-
-#else /* WIN32 */
-/* Custom win32-style spin locks on x86 and x64 for MSC */
-struct win32_mlock_t {
-  volatile long l;
-  char cachelinepadding[64];
-  unsigned int c;
-  long threadid;
-};
-
-#define MLOCK_T               struct win32_mlock_t
-#define CURRENT_THREAD        ((long)GetCurrentThreadId())
-#define INITIAL_LOCK(sl)      ((sl)->threadid = 0, (sl)->l = (sl)->c = 0, 0)
-#define ACQUIRE_LOCK(sl)      win32_acquire_lock(sl)
-#define RELEASE_LOCK(sl)      win32_release_lock(sl)
-#define TRY_LOCK(sl)          win32_try_lock(sl)
-#define SPINS_PER_YIELD       63
-
-static MLOCK_T malloc_global_mutex = { 0, 0, 0};
-
-static FORCEINLINE int win32_acquire_lock (MLOCK_T *sl) {
-  int spins = 0;
-  for (;;) {
-    if (sl->l != 0) {
-      if (sl->threadid == CURRENT_THREAD) {
-        ++sl->c;
-        return 0;
-      }
-    }
-    else {
-      if (!interlockedexchange(&sl->l, 1)) {
-        assert(!sl->threadid);
-        sl->threadid = CURRENT_THREAD;
-        sl->c = 1;
-        return 0;
-      }
-    }
-    if ((++spins & SPINS_PER_YIELD) == 0)
-      SleepEx(0, FALSE);
-  }
-}
-
-static FORCEINLINE void win32_release_lock (MLOCK_T *sl) {
-  assert(sl->threadid == CURRENT_THREAD);
-  assert(sl->l != 0);
-  if (--sl->c == 0) {
-    sl->threadid = 0;
-    interlockedexchange (&sl->l, 0);
-  }
-}
-
-static FORCEINLINE int win32_try_lock (MLOCK_T *sl) {
-  if (sl->l != 0) {
-    if (sl->threadid == CURRENT_THREAD) {
-      ++sl->c;
-      return 1;
-    }
-  }
-  else {
-    if (!interlockedexchange(&sl->l, 1)){
-      assert(!sl->threadid);
-      sl->threadid = CURRENT_THREAD;
-      sl->c = 1;
-      return 1;
-    }
-  }
-  return 0;
-}
-
-#endif /* WIN32 */
-#else /* USE_SPIN_LOCKS */
-
-#ifndef WIN32
-/* pthreads-based locks */
-
-#define MLOCK_T               pthread_mutex_t
-#define CURRENT_THREAD        pthread_self()
-#define INITIAL_LOCK(sl)      pthread_init_lock(sl)
-#define ACQUIRE_LOCK(sl)      pthread_mutex_lock(sl)
-#define RELEASE_LOCK(sl)      pthread_mutex_unlock(sl)
-#define TRY_LOCK(sl)          (!pthread_mutex_trylock(sl))
-
-static MLOCK_T malloc_global_mutex = PTHREAD_MUTEX_INITIALIZER;
-
-/* Cope with old-style linux recursive lock initialization by adding */
-/* skipped internal declaration from pthread.h */
-#ifdef linux
-#ifndef PTHREAD_MUTEX_RECURSIVE
-extern int pthread_mutexattr_setkind_np __P ((pthread_mutexattr_t *__attr,
-					   int __kind));
-#define PTHREAD_MUTEX_RECURSIVE PTHREAD_MUTEX_RECURSIVE_NP
-#define pthread_mutexattr_settype(x,y) pthread_mutexattr_setkind_np(x,y)
-#endif
-#endif
-
-static int pthread_init_lock (MLOCK_T *sl) {
-  pthread_mutexattr_t attr;
-  if (pthread_mutexattr_init(&attr)) return 1;
-  if (pthread_mutexattr_settype(&attr, PTHREAD_MUTEX_RECURSIVE)) return 1;
-  if (pthread_mutex_init(sl, &attr)) return 1;
-  if (pthread_mutexattr_destroy(&attr)) return 1;
-  return 0;
-}
-
-#else /* WIN32 */
-/* Win32 critical sections */
-#define MLOCK_T               CRITICAL_SECTION
-#define CURRENT_THREAD        GetCurrentThreadId()
-#define INITIAL_LOCK(s)       (!InitializeCriticalSectionAndSpinCount((s), 0x80000000|4000))
-#define ACQUIRE_LOCK(s)       (EnterCriticalSection(sl), 0)
-#define RELEASE_LOCK(s)       LeaveCriticalSection(sl)
-#define TRY_LOCK(s)           TryEnterCriticalSection(sl)
-#define NEED_GLOBAL_LOCK_INIT
-
-static MLOCK_T malloc_global_mutex;
-static volatile long malloc_global_mutex_status;
-
-/* Use spin loop to initialize global lock */
-static void init_malloc_global_mutex() {
-  for (;;) {
-    long stat = malloc_global_mutex_status;
-    if (stat > 0)
-      return;
-    /* transition to < 0 while initializing, then to > 0) */
-    if (stat == 0 &&
-        interlockedcompareexchange(&malloc_global_mutex_status, -1, 0) == 0) {
-      InitializeCriticalSection(&malloc_global_mutex);
-      interlockedexchange(&malloc_global_mutex_status,1);
-      return;
-    }
-    SleepEx(0, FALSE);
-  }
-}
-
-#endif /* WIN32 */
-#endif /* USE_SPIN_LOCKS */
-#endif /* USE_LOCKS == 1 */
-
-/* -----------------------  User-defined locks ------------------------ */
-
-#if USE_LOCKS > 1
-/* Define your own lock implementation here */
-/* #define INITIAL_LOCK(sl)  ... */
-/* #define ACQUIRE_LOCK(sl)  ... */
-/* #define RELEASE_LOCK(sl)  ... */
-/* #define TRY_LOCK(sl) ... */
-/* static MLOCK_T malloc_global_mutex = ... */
-#endif /* USE_LOCKS > 1 */
-
-/* -----------------------  Lock-based state ------------------------ */
-
-#if USE_LOCKS
-#define USE_LOCK_BIT               (2U)
-#else  /* USE_LOCKS */
-#define USE_LOCK_BIT               (0U)
-#define INITIAL_LOCK(l)
-#endif /* USE_LOCKS */
-
-#if USE_LOCKS
-#ifndef ACQUIRE_MALLOC_GLOBAL_LOCK
-#define ACQUIRE_MALLOC_GLOBAL_LOCK()  ACQUIRE_LOCK(&malloc_global_mutex);
-#endif
-#ifndef RELEASE_MALLOC_GLOBAL_LOCK
-#define RELEASE_MALLOC_GLOBAL_LOCK()  RELEASE_LOCK(&malloc_global_mutex);
-#endif
-#else  /* USE_LOCKS */
-#define ACQUIRE_MALLOC_GLOBAL_LOCK()
-#define RELEASE_MALLOC_GLOBAL_LOCK()
-#endif /* USE_LOCKS */
-
-
-/* -----------------------  Chunk representations ------------------------ */
-
-/*
-  (The following includes lightly edited explanations by Colin Plumb.)
-
-  The malloc_chunk declaration below is misleading (but accurate and
-  necessary).  It declares a "view" into memory allowing access to
-  necessary fields at known offsets from a given base.
-
-  Chunks of memory are maintained using a `boundary tag' method as
-  originally described by Knuth.  (See the paper by Paul Wilson
-  ftp://ftp.cs.utexas.edu/pub/garbage/allocsrv.ps for a survey of such
-  techniques.)  Sizes of free chunks are stored both in the front of
-  each chunk and at the end.  This makes consolidating fragmented
-  chunks into bigger chunks fast.  The head fields also hold bits
-  representing whether chunks are free or in use.
-
-  Here are some pictures to make it clearer.  They are "exploded" to
-  show that the state of a chunk can be thought of as extending from
-  the high 31 bits of the head field of its header through the
-  prev_foot and PINUSE_BIT bit of the following chunk header.
-
-  A chunk that's in use looks like:
-
-   chunk-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
-           | Size of previous chunk (if P = 0)                             |
-           +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
-         +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ |P|
-         | Size of this chunk                                         1| +-+
-   mem-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
-         |                                                               |
-         +-                                                             -+
-         |                                                               |
-         +-                                                             -+
-         |                                                               :
-         +-      size - sizeof(size_t) available payload bytes          -+
-         :                                                               |
- chunk-> +-                                                             -+
-         |                                                               |
-         +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
-       +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ |1|
-       | Size of next chunk (may or may not be in use)               | +-+
- mem-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
-
-    And if it's free, it looks like this:
-
-   chunk-> +-                                                             -+
-           | User payload (must be in use, or we would have merged!)       |
-           +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
-         +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ |P|
-         | Size of this chunk                                         0| +-+
-   mem-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
-         | Next pointer                                                  |
-         +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
-         | Prev pointer                                                  |
-         +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
-         |                                                               :
-         +-      size - sizeof(struct chunk) unused bytes               -+
-         :                                                               |
- chunk-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
-         | Size of this chunk                                            |
-         +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
-       +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ |0|
-       | Size of next chunk (must be in use, or we would have merged)| +-+
- mem-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
-       |                                                               :
-       +- User payload                                                -+
-       :                                                               |
-       +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
-                                                                     |0|
-                                                                     +-+
-  Note that since we always merge adjacent free chunks, the chunks
-  adjacent to a free chunk must be in use.
-
-  Given a pointer to a chunk (which can be derived trivially from the
-  payload pointer) we can, in O(1) time, find out whether the adjacent
-  chunks are free, and if so, unlink them from the lists that they
-  are on and merge them with the current chunk.
-
-  Chunks always begin on even word boundaries, so the mem portion
-  (which is returned to the user) is also on an even word boundary, and
-  thus at least double-word aligned.
-
-  The P (PINUSE_BIT) bit, stored in the unused low-order bit of the
-  chunk size (which is always a multiple of two words), is an in-use
-  bit for the *previous* chunk.  If that bit is *clear*, then the
-  word before the current chunk size contains the previous chunk
-  size, and can be used to find the front of the previous chunk.
-  The very first chunk allocated always has this bit set, preventing
-  access to non-existent (or non-owned) memory. If pinuse is set for
-  any given chunk, then you CANNOT determine the size of the
-  previous chunk, and might even get a memory addressing fault when
-  trying to do so.
-
-  The C (CINUSE_BIT) bit, stored in the unused second-lowest bit of
-  the chunk size redundantly records whether the current chunk is
-  inuse (unless the chunk is mmapped). This redundancy enables usage
-  checks within free and realloc, and reduces indirection when freeing
-  and consolidating chunks.
-
-  Each freshly allocated chunk must have both cinuse and pinuse set.
-  That is, each allocated chunk borders either a previously allocated
-  and still in-use chunk, or the base of its memory arena. This is
-  ensured by making all allocations from the the `lowest' part of any
-  found chunk.  Further, no free chunk physically borders another one,
-  so each free chunk is known to be preceded and followed by either
-  inuse chunks or the ends of memory.
-
-  Note that the `foot' of the current chunk is actually represented
-  as the prev_foot of the NEXT chunk. This makes it easier to
-  deal with alignments etc but can be very confusing when trying
-  to extend or adapt this code.
-
-  The exceptions to all this are
-
-     1. The special chunk `top' is the top-most available chunk (i.e.,
-        the one bordering the end of available memory). It is treated
-        specially.  Top is never included in any bin, is used only if
-        no other chunk is available, and is released back to the
-        system if it is very large (see M_TRIM_THRESHOLD).  In effect,
-        the top chunk is treated as larger (and thus less well
-        fitting) than any other available chunk.  The top chunk
-        doesn't update its trailing size field since there is no next
-        contiguous chunk that would have to index off it. However,
-        space is still allocated for it (TOP_FOOT_SIZE) to enable
-        separation or merging when space is extended.
-
-     3. Chunks allocated via mmap, have both cinuse and pinuse bits
-        cleared in their head fields.  Because they are allocated
-        one-by-one, each must carry its own prev_foot field, which is
-        also used to hold the offset this chunk has within its mmapped
-        region, which is needed to preserve alignment. Each mmapped
-        chunk is trailed by the first two fields of a fake next-chunk
-        for sake of usage checks.
-
-*/
-
-struct malloc_chunk {
-  size_t               prev_foot;  /* Size of previous chunk (if free).  */
-  size_t               head;       /* Size and inuse bits. */
-  struct malloc_chunk* fd;         /* double links -- used only if free. */
-  struct malloc_chunk* bk;
-};
-
-typedef struct malloc_chunk  mchunk;
-typedef struct malloc_chunk* mchunkptr;
-typedef struct malloc_chunk* sbinptr;  /* The type of bins of chunks */
-typedef unsigned int bindex_t;         /* Described below */
-typedef unsigned int binmap_t;         /* Described below */
-
-/* ------------------- Chunks sizes and alignments ----------------------- */
-
-#define MCHUNK_SIZE         (sizeof(mchunk))
-
-#if FOOTERS
-#define CHUNK_OVERHEAD      (TWO_SIZE_T_SIZES)
-#else /* FOOTERS */
-#define CHUNK_OVERHEAD      (SIZE_T_SIZE)
-#endif /* FOOTERS */
-
-/* MMapped chunks need a second word of overhead ... */
-#define MMAP_CHUNK_OVERHEAD (TWO_SIZE_T_SIZES)
-/* ... and additional padding for fake next-chunk at foot */
-#define MMAP_FOOT_PAD       (FOUR_SIZE_T_SIZES)
-
-/* The smallest size we can malloc is an aligned minimal chunk */
-#define MIN_CHUNK_SIZE\
-  ((MCHUNK_SIZE + CHUNK_ALIGN_MASK) & ~CHUNK_ALIGN_MASK)
-
-/* conversion from malloc headers to user pointers, and back */
-#define chunk2mem(p)        ((void*)((char*)(p)       + TWO_SIZE_T_SIZES))
-#define mem2chunk(mem)      ((mchunkptr)((char*)(mem) - TWO_SIZE_T_SIZES))
-/* chunk associated with aligned address A */
-#define align_as_chunk(A)   (mchunkptr)((A) + align_offset(chunk2mem(A)))
-
-/* Bounds on request (not chunk) sizes. */
-#define MAX_REQUEST         ((-MIN_CHUNK_SIZE) << 2)
-#define MIN_REQUEST         (MIN_CHUNK_SIZE - CHUNK_OVERHEAD - SIZE_T_ONE)
-
-/* pad request bytes into a usable size */
-#define pad_request(req) \
-   (((req) + CHUNK_OVERHEAD + CHUNK_ALIGN_MASK) & ~CHUNK_ALIGN_MASK)
-
-/* pad request, checking for minimum (but not maximum) */
-#define request2size(req) \
-  (((req) < MIN_REQUEST)? MIN_CHUNK_SIZE : pad_request(req))
-
-
-/* ------------------ Operations on head and foot fields ----------------- */
-
-/*
-  The head field of a chunk is or'ed with PINUSE_BIT when previous
-  adjacent chunk in use, and or'ed with CINUSE_BIT if this chunk is in
-  use, unless mmapped, in which case both bits are cleared.
-
-  FLAG4_BIT is not used by this malloc, but might be useful in extensions.
-*/
-
-#define PINUSE_BIT          (SIZE_T_ONE)
-#define CINUSE_BIT          (SIZE_T_TWO)
-#define FLAG4_BIT           (SIZE_T_FOUR)
-#define INUSE_BITS          (PINUSE_BIT|CINUSE_BIT)
-#define FLAG_BITS           (PINUSE_BIT|CINUSE_BIT|FLAG4_BIT)
-
-/* Head value for fenceposts */
-#define FENCEPOST_HEAD      (INUSE_BITS|SIZE_T_SIZE)
-
-/* extraction of fields from head words */
-#define cinuse(p)           ((p)->head & CINUSE_BIT)
-#define pinuse(p)           ((p)->head & PINUSE_BIT)
-#define is_inuse(p)         (((p)->head & INUSE_BITS) != PINUSE_BIT)
-#define is_mmapped(p)       (((p)->head & INUSE_BITS) == 0)
-
-#define chunksize(p)        ((p)->head & ~(FLAG_BITS))
-
-#define clear_pinuse(p)     ((p)->head &= ~PINUSE_BIT)
-
-/* Treat space at ptr +/- offset as a chunk */
-#define chunk_plus_offset(p, s)  ((mchunkptr)(((char*)(p)) + (s)))
-#define chunk_minus_offset(p, s) ((mchunkptr)(((char*)(p)) - (s)))
-
-/* Ptr to next or previous physical malloc_chunk. */
-#define next_chunk(p) ((mchunkptr)( ((char*)(p)) + ((p)->head & ~FLAG_BITS)))
-#define prev_chunk(p) ((mchunkptr)( ((char*)(p)) - ((p)->prev_foot) ))
-
-/* extract next chunk's pinuse bit */
-#define next_pinuse(p)  ((next_chunk(p)->head) & PINUSE_BIT)
-
-/* Get/set size at footer */
-#define get_foot(p, s)  (((mchunkptr)((char*)(p) + (s)))->prev_foot)
-#define set_foot(p, s)  (((mchunkptr)((char*)(p) + (s)))->prev_foot = (s))
-
-/* Set size, pinuse bit, and foot */
-#define set_size_and_pinuse_of_free_chunk(p, s)\
-  ((p)->head = (s|PINUSE_BIT), set_foot(p, s))
-
-/* Set size, pinuse bit, foot, and clear next pinuse */
-#define set_free_with_pinuse(p, s, n)\
-  (clear_pinuse(n), set_size_and_pinuse_of_free_chunk(p, s))
-
-/* Get the internal overhead associated with chunk p */
-#define overhead_for(p)\
- (is_mmapped(p)? MMAP_CHUNK_OVERHEAD : CHUNK_OVERHEAD)
-
-/* Return true if malloced space is not necessarily cleared */
-#if MMAP_CLEARS
-#define calloc_must_clear(p) (!is_mmapped(p))
-#else /* MMAP_CLEARS */
-#define calloc_must_clear(p) (1)
-#endif /* MMAP_CLEARS */
-
-/* ---------------------- Overlaid data structures ----------------------- */
-
-/*
-  When chunks are not in use, they are treated as nodes of either
-  lists or trees.
-
-  "Small"  chunks are stored in circular doubly-linked lists, and look
-  like this:
-
-    chunk-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
-            |             Size of previous chunk                            |
-            +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
-    `head:' |             Size of chunk, in bytes                         |P|
-      mem-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
-            |             Forward pointer to next chunk in list             |
-            +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
-            |             Back pointer to previous chunk in list            |
-            +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
-            |             Unused space (may be 0 bytes long)                .
-            .                                                               .
-            .                                                               |
-nextchunk-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
-    `foot:' |             Size of chunk, in bytes                           |
-            +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
-
-  Larger chunks are kept in a form of bitwise digital trees (aka
-  tries) keyed on chunksizes.  Because malloc_tree_chunks are only for
-  free chunks greater than 256 bytes, their size doesn't impose any
-  constraints on user chunk sizes.  Each node looks like:
-
-    chunk-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
-            |             Size of previous chunk                            |
-            +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
-    `head:' |             Size of chunk, in bytes                         |P|
-      mem-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
-            |             Forward pointer to next chunk of same size        |
-            +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
-            |             Back pointer to previous chunk of same size       |
-            +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
-            |             Pointer to left child (child[0])                  |
-            +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
-            |             Pointer to right child (child[1])                 |
-            +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
-            |             Pointer to parent                                 |
-            +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
-            |             bin index of this chunk                           |
-            +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
-            |             Unused space                                      .
-            .                                                               |
-nextchunk-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
-    `foot:' |             Size of chunk, in bytes                           |
-            +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
-
-  Each tree holding treenodes is a tree of unique chunk sizes.  Chunks
-  of the same size are arranged in a circularly-linked list, with only
-  the oldest chunk (the next to be used, in our FIFO ordering)
-  actually in the tree.  (Tree members are distinguished by a non-null
-  parent pointer.)  If a chunk with the same size an an existing node
-  is inserted, it is linked off the existing node using pointers that
-  work in the same way as fd/bk pointers of small chunks.
-
-  Each tree contains a power of 2 sized range of chunk sizes (the
-  smallest is 0x100 <= x < 0x180), which is is divided in half at each
-  tree level, with the chunks in the smaller half of the range (0x100
-  <= x < 0x140 for the top nose) in the left subtree and the larger
-  half (0x140 <= x < 0x180) in the right subtree.  This is, of course,
-  done by inspecting individual bits.
-
-  Using these rules, each node's left subtree contains all smaller
-  sizes than its right subtree.  However, the node at the root of each
-  subtree has no particular ordering relationship to either.  (The
-  dividing line between the subtree sizes is based on trie relation.)
-  If we remove the last chunk of a given size from the interior of the
-  tree, we need to replace it with a leaf node.  The tree ordering
-  rules permit a node to be replaced by any leaf below it.
-
-  The smallest chunk in a tree (a common operation in a best-fit
-  allocator) can be found by walking a path to the leftmost leaf in
-  the tree.  Unlike a usual binary tree, where we follow left child
-  pointers until we reach a null, here we follow the right child
-  pointer any time the left one is null, until we reach a leaf with
-  both child pointers null. The smallest chunk in the tree will be
-  somewhere along that path.
-
-  The worst case number of steps to add, find, or remove a node is
-  bounded by the number of bits differentiating chunks within
-  bins. Under current bin calculations, this ranges from 6 up to 21
-  (for 32 bit sizes) or up to 53 (for 64 bit sizes). The typical case
-  is of course much better.
-*/
-
-struct malloc_tree_chunk {
-  /* The first four fields must be compatible with malloc_chunk */
-  size_t                    prev_foot;
-  size_t                    head;
-  struct malloc_tree_chunk* fd;
-  struct malloc_tree_chunk* bk;
-
-  struct malloc_tree_chunk* child[2];
-  struct malloc_tree_chunk* parent;
-  bindex_t                  index;
-};
-
-typedef struct malloc_tree_chunk  tchunk;
-typedef struct malloc_tree_chunk* tchunkptr;
-typedef struct malloc_tree_chunk* tbinptr; /* The type of bins of trees */
-
-/* A little helper macro for trees */
-#define leftmost_child(t) ((t)->child[0] != 0? (t)->child[0] : (t)->child[1])
-
-/* ----------------------------- Segments -------------------------------- */
-
-/*
-  Each malloc space may include non-contiguous segments, held in a
-  list headed by an embedded malloc_segment record representing the
-  top-most space. Segments also include flags holding properties of
-  the space. Large chunks that are directly allocated by mmap are not
-  included in this list. They are instead independently created and
-  destroyed without otherwise keeping track of them.
-
-  Segment management mainly comes into play for spaces allocated by
-  MMAP.  Any call to MMAP might or might not return memory that is
-  adjacent to an existing segment.  MORECORE normally contiguously
-  extends the current space, so this space is almost always adjacent,
-  which is simpler and faster to deal with. (This is why MORECORE is
-  used preferentially to MMAP when both are available -- see
-  sys_alloc.)  When allocating using MMAP, we don't use any of the
-  hinting mechanisms (inconsistently) supported in various
-  implementations of unix mmap, or distinguish reserving from
-  committing memory. Instead, we just ask for space, and exploit
-  contiguity when we get it.  It is probably possible to do
-  better than this on some systems, but no general scheme seems
-  to be significantly better.
-
-  Management entails a simpler variant of the consolidation scheme
-  used for chunks to reduce fragmentation -- new adjacent memory is
-  normally prepended or appended to an existing segment. However,
-  there are limitations compared to chunk consolidation that mostly
-  reflect the fact that segment processing is relatively infrequent
-  (occurring only when getting memory from system) and that we
-  don't expect to have huge numbers of segments:
-
-  * Segments are not indexed, so traversal requires linear scans.  (It
-    would be possible to index these, but is not worth the extra
-    overhead and complexity for most programs on most platforms.)
-  * New segments are only appended to old ones when holding top-most
-    memory; if they cannot be prepended to others, they are held in
-    different segments.
-
-  Except for the top-most segment of an mstate, each segment record
-  is kept at the tail of its segment. Segments are added by pushing
-  segment records onto the list headed by &mstate.seg for the
-  containing mstate.
-
-  Segment flags control allocation/merge/deallocation policies:
-  * If EXTERN_BIT set, then we did not allocate this segment,
-    and so should not try to deallocate or merge with others.
-    (This currently holds only for the initial segment passed
-    into create_mspace_with_base.)
-  * If USE_MMAP_BIT set, the segment may be merged with
-    other surrounding mmapped segments and trimmed/de-allocated
-    using munmap.
-  * If neither bit is set, then the segment was obtained using
-    MORECORE so can be merged with surrounding MORECORE'd segments
-    and deallocated/trimmed using MORECORE with negative arguments.
-*/
-
-struct malloc_segment {
-  char*        base;             /* base address */
-  size_t       size;             /* allocated size */
-  struct malloc_segment* next;   /* ptr to next segment */
-  flag_t       sflags;           /* mmap and extern flag */
-};
-
-#define is_mmapped_segment(S)  ((S)->sflags & USE_MMAP_BIT)
-#define is_extern_segment(S)   ((S)->sflags & EXTERN_BIT)
-
-typedef struct malloc_segment  msegment;
-typedef struct malloc_segment* msegmentptr;
-
-/* ---------------------------- malloc_state ----------------------------- */
-
-/*
-   A malloc_state holds all of the bookkeeping for a space.
-   The main fields are:
-
-  Top
-    The topmost chunk of the currently active segment. Its size is
-    cached in topsize.  The actual size of topmost space is
-    topsize+TOP_FOOT_SIZE, which includes space reserved for adding
-    fenceposts and segment records if necessary when getting more
-    space from the system.  The size at which to autotrim top is
-    cached from mparams in trim_check, except that it is disabled if
-    an autotrim fails.
-
-  Designated victim (dv)
-    This is the preferred chunk for servicing small requests that
-    don't have exact fits.  It is normally the chunk split off most
-    recently to service another small request.  Its size is cached in
-    dvsize. The link fields of this chunk are not maintained since it
-    is not kept in a bin.
-
-  SmallBins
-    An array of bin headers for free chunks.  These bins hold chunks
-    with sizes less than MIN_LARGE_SIZE bytes. Each bin contains
-    chunks of all the same size, spaced 8 bytes apart.  To simplify
-    use in double-linked lists, each bin header acts as a malloc_chunk
-    pointing to the real first node, if it exists (else pointing to
-    itself).  This avoids special-casing for headers.  But to avoid
-    waste, we allocate only the fd/bk pointers of bins, and then use
-    repositioning tricks to treat these as the fields of a chunk.
-
-  TreeBins
-    Treebins are pointers to the roots of trees holding a range of
-    sizes. There are 2 equally spaced treebins for each power of two
-    from TREE_SHIFT to TREE_SHIFT+16. The last bin holds anything
-    larger.
-
-  Bin maps
-    There is one bit map for small bins ("smallmap") and one for
-    treebins ("treemap).  Each bin sets its bit when non-empty, and
-    clears the bit when empty.  Bit operations are then used to avoid
-    bin-by-bin searching -- nearly all "search" is done without ever
-    looking at bins that won't be selected.  The bit maps
-    conservatively use 32 bits per map word, even if on 64bit system.
-    For a good description of some of the bit-based techniques used
-    here, see Henry S. Warren Jr's book "Hacker's Delight" (and
-    supplement at http://hackersdelight.org/). Many of these are
-    intended to reduce the branchiness of paths through malloc etc, as
-    well as to reduce the number of memory locations read or written.
-
-  Segments
-    A list of segments headed by an embedded malloc_segment record
-    representing the initial space.
-
-  Address check support
-    The least_addr field is the least address ever obtained from
-    MORECORE or MMAP. Attempted frees and reallocs of any address less
-    than this are trapped (unless INSECURE is defined).
-
-  Magic tag
-    A cross-check field that should always hold same value as mparams.magic.
-
-  Flags
-    Bits recording whether to use MMAP, locks, or contiguous MORECORE
-
-  Statistics
-    Each space keeps track of current and maximum system memory
-    obtained via MORECORE or MMAP.
-
-  Trim support
-    Fields holding the amount of unused topmost memory that should trigger
-    timming, and a counter to force periodic scanning to release unused
-    non-topmost segments.
-
-  Locking
-    If USE_LOCKS is defined, the "mutex" lock is acquired and released
-    around every public call using this mspace.
-
-  Extension support
-    A void* pointer and a size_t field that can be used to help implement
-    extensions to this malloc.
-*/
-
-/* Bin types, widths and sizes */
-#define NSMALLBINS        (32U)
-#define NTREEBINS         (32U)
-#define SMALLBIN_SHIFT    (3U)
-#define SMALLBIN_WIDTH    (SIZE_T_ONE << SMALLBIN_SHIFT)
-#define TREEBIN_SHIFT     (8U)
-#define MIN_LARGE_SIZE    (SIZE_T_ONE << TREEBIN_SHIFT)
-#define MAX_SMALL_SIZE    (MIN_LARGE_SIZE - SIZE_T_ONE)
-#define MAX_SMALL_REQUEST (MAX_SMALL_SIZE - CHUNK_ALIGN_MASK - CHUNK_OVERHEAD)
-
-struct malloc_state {
-  binmap_t   smallmap;
-  binmap_t   treemap;
-  size_t     dvsize;
-  size_t     topsize;
-  char*      least_addr;
-  mchunkptr  dv;
-  mchunkptr  top;
-  size_t     trim_check;
-  size_t     release_checks;
-  size_t     magic;
-  mchunkptr  smallbins[(NSMALLBINS+1)*2];
-  tbinptr    treebins[NTREEBINS];
-  size_t     footprint;
-  size_t     max_footprint;
-  flag_t     mflags;
-  msegment   seg;
-#if USE_LOCKS
-  MLOCK_T    mutex;     /* locate lock among fields that rarely change */
-#endif /* USE_LOCKS */
-  void*      extp;      /* Unused but available for extensions */
-  size_t     exts;
-};
-
-typedef struct malloc_state*    mstate;
-
-/* ------------- Global malloc_state and malloc_params ------------------- */
-
-#if !ONLY_MSPACES
-
-/* The global malloc_state used for all non-"mspace" calls */
-static struct malloc_state _gm_;
-#define gm                 (&_gm_)
-#define is_global(M)       ((M) == &_gm_)
-
-#endif /* !ONLY_MSPACES */
-
-#define is_initialized(M)  ((M)->top != 0)
-
-/* -------------------------- system alloc setup ------------------------- */
-
-/* Operations on mflags */
-
-#define use_lock(M)           ((M)->mflags &   USE_LOCK_BIT)
-#define enable_lock(M)        ((M)->mflags |=  USE_LOCK_BIT)
-#define disable_lock(M)       ((M)->mflags &= ~USE_LOCK_BIT)
-
-#define use_mmap(M)           ((M)->mflags &   USE_MMAP_BIT)
-#define enable_mmap(M)        ((M)->mflags |=  USE_MMAP_BIT)
-#define disable_mmap(M)       ((M)->mflags &= ~USE_MMAP_BIT)
-
-#define use_noncontiguous(M)  ((M)->mflags &   USE_NONCONTIGUOUS_BIT)
-#define disable_contiguous(M) ((M)->mflags |=  USE_NONCONTIGUOUS_BIT)
-
-#define set_lock(M,L)\
- ((M)->mflags = (L)?\
-  ((M)->mflags | USE_LOCK_BIT) :\
-  ((M)->mflags & ~USE_LOCK_BIT))
-
-/* page-align a size */
-#define page_align(S)\
- (((S) + (mparams.page_size - SIZE_T_ONE)) & ~(mparams.page_size - SIZE_T_ONE))
-
-/* granularity-align a size */
-#define granularity_align(S)\
-  (((S) + (mparams.granularity - SIZE_T_ONE))\
-   & ~(mparams.granularity - SIZE_T_ONE))
-
-
-/* For mmap, use granularity alignment on windows, else page-align */
-#ifdef WIN32
-#define mmap_align(S) granularity_align(S)
-#else
-#define mmap_align(S) page_align(S)
-#endif
-
-/* For sys_alloc, enough padding to ensure can malloc request on success */
-#define SYS_ALLOC_PADDING (TOP_FOOT_SIZE + MALLOC_ALIGNMENT)
-
-#define is_page_aligned(S)\
-   (((size_t)(S) & (mparams.page_size - SIZE_T_ONE)) == 0)
-#define is_granularity_aligned(S)\
-   (((size_t)(S) & (mparams.granularity - SIZE_T_ONE)) == 0)
-
-/*  True if segment S holds address A */
-#define segment_holds(S, A)\
-  ((char*)(A) >= S->base && (char*)(A) < S->base + S->size)
-
-/* Return segment holding given address */
-static msegmentptr segment_holding(mstate m, char* addr) {
-  msegmentptr sp = &m->seg;
-  for (;;) {
-    if (addr >= sp->base && addr < sp->base + sp->size)
-      return sp;
-    if ((sp = sp->next) == 0)
-      return 0;
-  }
-}
-
-/* Return true if segment contains a segment link */
-static int has_segment_link(mstate m, msegmentptr ss) {
-  msegmentptr sp = &m->seg;
-  for (;;) {
-    if ((char*)sp >= ss->base && (char*)sp < ss->base + ss->size)
-      return 1;
-    if ((sp = sp->next) == 0)
-      return 0;
-  }
-}
-
-#ifndef MORECORE_CANNOT_TRIM
-#define should_trim(M,s)  ((s) > (M)->trim_check)
-#else  /* MORECORE_CANNOT_TRIM */
-#define should_trim(M,s)  (0)
-#endif /* MORECORE_CANNOT_TRIM */
-
-/*
-  TOP_FOOT_SIZE is padding at the end of a segment, including space
-  that may be needed to place segment records and fenceposts when new
-  noncontiguous segments are added.
-*/
-#define TOP_FOOT_SIZE\
-  (align_offset(chunk2mem(0))+pad_request(sizeof(struct malloc_segment))+MIN_CHUNK_SIZE)
-
-
-/* -------------------------------  Hooks -------------------------------- */
-
-/*
-  PREACTION should be defined to return 0 on success, and nonzero on
-  failure. If you are not using locking, you can redefine these to do
-  anything you like.
-*/
-
-#if USE_LOCKS
-
-#define PREACTION(M)  ((use_lock(M))? ACQUIRE_LOCK(&(M)->mutex) : 0)
-#define POSTACTION(M) { if (use_lock(M)) RELEASE_LOCK(&(M)->mutex); }
-#else /* USE_LOCKS */
-
-#ifndef PREACTION
-#define PREACTION(M) (0)
-#endif  /* PREACTION */
-
-#ifndef POSTACTION
-#define POSTACTION(M)
-#endif  /* POSTACTION */
-
-#endif /* USE_LOCKS */
-
-/*
-  CORRUPTION_ERROR_ACTION is triggered upon detected bad addresses.
-  USAGE_ERROR_ACTION is triggered on detected bad frees and
-  reallocs. The argument p is an address that might have triggered the
-  fault. It is ignored by the two predefined actions, but might be
-  useful in custom actions that try to help diagnose errors.
-*/
-
-#if PROCEED_ON_ERROR
-
-/* A count of the number of corruption errors causing resets */
-int malloc_corruption_error_count;
-
-/* default corruption action */
-static void reset_on_error(mstate m);
-
-#define CORRUPTION_ERROR_ACTION(m)  reset_on_error(m)
-#define USAGE_ERROR_ACTION(m, p)
-
-#else /* PROCEED_ON_ERROR */
-
-#ifndef CORRUPTION_ERROR_ACTION
-#define CORRUPTION_ERROR_ACTION(m) ABORT
-#endif /* CORRUPTION_ERROR_ACTION */
-
-#ifndef USAGE_ERROR_ACTION
-#define USAGE_ERROR_ACTION(m,p) ABORT
-#endif /* USAGE_ERROR_ACTION */
-
-#endif /* PROCEED_ON_ERROR */
-
-/* -------------------------- Debugging setup ---------------------------- */
-
-#if ! DEBUG
-
-#define check_free_chunk(M,P)
-#define check_inuse_chunk(M,P)
-#define check_malloced_chunk(M,P,N)
-#define check_mmapped_chunk(M,P)
-#define check_malloc_state(M)
-#define check_top_chunk(M,P)
-
-#else /* DEBUG */
-#define check_free_chunk(M,P)       do_check_free_chunk(M,P)
-#define check_inuse_chunk(M,P)      do_check_inuse_chunk(M,P)
-#define check_top_chunk(M,P)        do_check_top_chunk(M,P)
-#define check_malloced_chunk(M,P,N) do_check_malloced_chunk(M,P,N)
-#define check_mmapped_chunk(M,P)    do_check_mmapped_chunk(M,P)
-#define check_malloc_state(M)       do_check_malloc_state(M)
-
-static void   do_check_any_chunk(mstate m, mchunkptr p);
-static void   do_check_top_chunk(mstate m, mchunkptr p);
-static void   do_check_mmapped_chunk(mstate m, mchunkptr p);
-static void   do_check_inuse_chunk(mstate m, mchunkptr p);
-static void   do_check_free_chunk(mstate m, mchunkptr p);
-static void   do_check_malloced_chunk(mstate m, void* mem, size_t s);
-static void   do_check_tree(mstate m, tchunkptr t);
-static void   do_check_treebin(mstate m, bindex_t i);
-static void   do_check_smallbin(mstate m, bindex_t i);
-static void   do_check_malloc_state(mstate m);
-static int    bin_find(mstate m, mchunkptr x);
-static size_t traverse_and_check(mstate m);
-#endif /* DEBUG */
-
-/* ---------------------------- Indexing Bins ---------------------------- */
-
-#define is_small(s)         (((s) >> SMALLBIN_SHIFT) < NSMALLBINS)
-#define small_index(s)      (bindex_t)((s)  >> SMALLBIN_SHIFT)
-#define small_index2size(i) ((i)  << SMALLBIN_SHIFT)
-#define MIN_SMALL_INDEX     (small_index(MIN_CHUNK_SIZE))
-
-/* addressing by index. See above about smallbin repositioning */
-#define smallbin_at(M, i)   ((sbinptr)((char*)&((M)->smallbins[(i)<<1])))
-#define treebin_at(M,i)     (&((M)->treebins[i]))
-
-/* assign tree index for size S to variable I. Use x86 asm if possible  */
-#if defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__))
-#define compute_tree_index(S, I)\
-{\
-  unsigned int X = S >> TREEBIN_SHIFT;\
-  if (X == 0)\
-    I = 0;\
-  else if (X > 0xFFFF)\
-    I = NTREEBINS-1;\
-  else {\
-    unsigned int K;\
-    __asm__("bsrl\t%1, %0\n\t" : "=r" (K) : "g"  (X));\
-    I =  (bindex_t)((K << 1) + ((S >> (K + (TREEBIN_SHIFT-1)) & 1)));\
-  }\
-}
-
-#elif defined (__INTEL_COMPILER)
-#define compute_tree_index(S, I)\
-{\
-  size_t X = S >> TREEBIN_SHIFT;\
-  if (X == 0)\
-    I = 0;\
-  else if (X > 0xFFFF)\
-    I = NTREEBINS-1;\
-  else {\
-    unsigned int K = _bit_scan_reverse (X); \
-    I =  (bindex_t)((K << 1) + ((S >> (K + (TREEBIN_SHIFT-1)) & 1)));\
-  }\
-}
-
-#elif defined(_MSC_VER) && _MSC_VER>=1300
-#define compute_tree_index(S, I)\
-{\
-  size_t X = S >> TREEBIN_SHIFT;\
-  if (X == 0)\
-    I = 0;\
-  else if (X > 0xFFFF)\
-    I = NTREEBINS-1;\
-  else {\
-    unsigned int K;\
-    _BitScanReverse((DWORD *) &K, (DWORD) X);\
-    I =  (bindex_t)((K << 1) + ((S >> (K + (TREEBIN_SHIFT-1)) & 1)));\
-  }\
-}
-
-#else /* GNUC */
-#define compute_tree_index(S, I)\
-{\
-  size_t X = S >> TREEBIN_SHIFT;\
-  if (X == 0)\
-    I = 0;\
-  else if (X > 0xFFFF)\
-    I = NTREEBINS-1;\
-  else {\
-    unsigned int Y = (unsigned int)X;\
-    unsigned int N = ((Y - 0x100) >> 16) & 8;\
-    unsigned int K = (((Y <<= N) - 0x1000) >> 16) & 4;\
-    N += K;\
-    N += K = (((Y <<= K) - 0x4000) >> 16) & 2;\
-    K = 14 - N + ((Y <<= K) >> 15);\
-    I = (K << 1) + ((S >> (K + (TREEBIN_SHIFT-1)) & 1));\
-  }\
-}
-#endif /* GNUC */
-
-/* Bit representing maximum resolved size in a treebin at i */
-#define bit_for_tree_index(i) \
-   (i == NTREEBINS-1)? (SIZE_T_BITSIZE-1) : (((i) >> 1) + TREEBIN_SHIFT - 2)
-
-/* Shift placing maximum resolved bit in a treebin at i as sign bit */
-#define leftshift_for_tree_index(i) \
-   ((i == NTREEBINS-1)? 0 : \
-    ((SIZE_T_BITSIZE-SIZE_T_ONE) - (((i) >> 1) + TREEBIN_SHIFT - 2)))
-
-/* The size of the smallest chunk held in bin with index i */
-#define minsize_for_tree_index(i) \
-   ((SIZE_T_ONE << (((i) >> 1) + TREEBIN_SHIFT)) |  \
-   (((size_t)((i) & SIZE_T_ONE)) << (((i) >> 1) + TREEBIN_SHIFT - 1)))
-
-
-/* ------------------------ Operations on bin maps ----------------------- */
-
-/* bit corresponding to given index */
-#define idx2bit(i)              ((binmap_t)(1) << (i))
-
-/* Mark/Clear bits with given index */
-#define mark_smallmap(M,i)      ((M)->smallmap |=  idx2bit(i))
-#define clear_smallmap(M,i)     ((M)->smallmap &= ~idx2bit(i))
-#define smallmap_is_marked(M,i) ((M)->smallmap &   idx2bit(i))
-
-#define mark_treemap(M,i)       ((M)->treemap  |=  idx2bit(i))
-#define clear_treemap(M,i)      ((M)->treemap  &= ~idx2bit(i))
-#define treemap_is_marked(M,i)  ((M)->treemap  &   idx2bit(i))
-
-/* isolate the least set bit of a bitmap */
-#define least_bit(x)         ((x) & -(x))
-
-/* mask with all bits to left of least bit of x on */
-#define left_bits(x)         ((x<<1) | -(x<<1))
-
-/* mask with all bits to left of or equal to least bit of x on */
-#define same_or_left_bits(x) ((x) | -(x))
-
-/* index corresponding to given bit. Use x86 asm if possible */
-
-#if defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__))
-#define compute_bit2idx(X, I)\
-{\
-  unsigned int J;\
-  __asm__("bsfl\t%1, %0\n\t" : "=r" (J) : "g" (X));\
-  I = (bindex_t)J;\
-}
-
-#elif defined (__INTEL_COMPILER)
-#define compute_bit2idx(X, I)\
-{\
-  unsigned int J;\
-  J = _bit_scan_forward (X); \
-  I = (bindex_t)J;\
-}
-
-#elif defined(_MSC_VER) && _MSC_VER>=1300
-#define compute_bit2idx(X, I)\
-{\
-  unsigned int J;\
-  _BitScanForward((DWORD *) &J, X);\
-  I = (bindex_t)J;\
-}
-
-#elif USE_BUILTIN_FFS
-#define compute_bit2idx(X, I) I = ffs(X)-1
-
-#else
-#define compute_bit2idx(X, I)\
-{\
-  unsigned int Y = X - 1;\
-  unsigned int K = Y >> (16-4) & 16;\
-  unsigned int N = K;        Y >>= K;\
-  N += K = Y >> (8-3) &  8;  Y >>= K;\
-  N += K = Y >> (4-2) &  4;  Y >>= K;\
-  N += K = Y >> (2-1) &  2;  Y >>= K;\
-  N += K = Y >> (1-0) &  1;  Y >>= K;\
-  I = (bindex_t)(N + Y);\
-}
-#endif /* GNUC */
-
-
-/* ----------------------- Runtime Check Support ------------------------- */
-
-/*
-  For security, the main invariant is that malloc/free/etc never
-  writes to a static address other than malloc_state, unless static
-  malloc_state itself has been corrupted, which cannot occur via
-  malloc (because of these checks). In essence this means that we
-  believe all pointers, sizes, maps etc held in malloc_state, but
-  check all of those linked or offsetted from other embedded data
-  structures.  These checks are interspersed with main code in a way
-  that tends to minimize their run-time cost.
-
-  When FOOTERS is defined, in addition to range checking, we also
-  verify footer fields of inuse chunks, which can be used guarantee
-  that the mstate controlling malloc/free is intact.  This is a
-  streamlined version of the approach described by William Robertson
-  et al in "Run-time Detection of Heap-based Overflows" LISA'03
-  http://www.usenix.org/events/lisa03/tech/robertson.html The footer
-  of an inuse chunk holds the xor of its mstate and a random seed,
-  that is checked upon calls to free() and realloc().  This is
-  (probablistically) unguessable from outside the program, but can be
-  computed by any code successfully malloc'ing any chunk, so does not
-  itself provide protection against code that has already broken
-  security through some other means.  Unlike Robertson et al, we
-  always dynamically check addresses of all offset chunks (previous,
-  next, etc). This turns out to be cheaper than relying on hashes.
-*/
-
-#if !INSECURE
-/* Check if address a is at least as high as any from MORECORE or MMAP */
-#define ok_address(M, a) ((char*)(a) >= (M)->least_addr)
-/* Check if address of next chunk n is higher than base chunk p */
-#define ok_next(p, n)    ((char*)(p) < (char*)(n))
-/* Check if p has inuse status */
-#define ok_inuse(p)     is_inuse(p)
-/* Check if p has its pinuse bit on */
-#define ok_pinuse(p)     pinuse(p)
-
-#else /* !INSECURE */
-#define ok_address(M, a) (1)
-#define ok_next(b, n)    (1)
-#define ok_inuse(p)      (1)
-#define ok_pinuse(p)     (1)
-#endif /* !INSECURE */
-
-#if (FOOTERS && !INSECURE)
-/* Check if (alleged) mstate m has expected magic field */
-#define ok_magic(M)      ((M)->magic == mparams.magic)
-#else  /* (FOOTERS && !INSECURE) */
-#define ok_magic(M)      (1)
-#endif /* (FOOTERS && !INSECURE) */
-
-
-/* In gcc, use __builtin_expect to minimize impact of checks */
-#if !INSECURE
-#if defined(__GNUC__) && __GNUC__ >= 3
-#define RTCHECK(e)  __builtin_expect(e, 1)
-#else /* GNUC */
-#define RTCHECK(e)  (e)
-#endif /* GNUC */
-#else /* !INSECURE */
-#define RTCHECK(e)  (1)
-#endif /* !INSECURE */
-
-/* macros to set up inuse chunks with or without footers */
-
-#if !FOOTERS
-
-#define mark_inuse_foot(M,p,s)
-
-/* Macros for setting head/foot of non-mmapped chunks */
-
-/* Set cinuse bit and pinuse bit of next chunk */
-#define set_inuse(M,p,s)\
-  ((p)->head = (((p)->head & PINUSE_BIT)|s|CINUSE_BIT),\
-  ((mchunkptr)(((char*)(p)) + (s)))->head |= PINUSE_BIT)
-
-/* Set cinuse and pinuse of this chunk and pinuse of next chunk */
-#define set_inuse_and_pinuse(M,p,s)\
-  ((p)->head = (s|PINUSE_BIT|CINUSE_BIT),\
-  ((mchunkptr)(((char*)(p)) + (s)))->head |= PINUSE_BIT)
-
-/* Set size, cinuse and pinuse bit of this chunk */
-#define set_size_and_pinuse_of_inuse_chunk(M, p, s)\
-  ((p)->head = (s|PINUSE_BIT|CINUSE_BIT))
-
-#else /* FOOTERS */
-
-/* Set foot of inuse chunk to be xor of mstate and seed */
-#define mark_inuse_foot(M,p,s)\
-  (((mchunkptr)((char*)(p) + (s)))->prev_foot = ((size_t)(M) ^ mparams.magic))
-
-#define get_mstate_for(p)\
-  ((mstate)(((mchunkptr)((char*)(p) +\
-    (chunksize(p))))->prev_foot ^ mparams.magic))
-
-#define set_inuse(M,p,s)\
-  ((p)->head = (((p)->head & PINUSE_BIT)|s|CINUSE_BIT),\
-  (((mchunkptr)(((char*)(p)) + (s)))->head |= PINUSE_BIT), \
-  mark_inuse_foot(M,p,s))
-
-#define set_inuse_and_pinuse(M,p,s)\
-  ((p)->head = (s|PINUSE_BIT|CINUSE_BIT),\
-  (((mchunkptr)(((char*)(p)) + (s)))->head |= PINUSE_BIT),\
- mark_inuse_foot(M,p,s))
-
-#define set_size_and_pinuse_of_inuse_chunk(M, p, s)\
-  ((p)->head = (s|PINUSE_BIT|CINUSE_BIT),\
-  mark_inuse_foot(M, p, s))
-
-#endif /* !FOOTERS */
-
-/* ---------------------------- setting mparams -------------------------- */
-
-#ifdef ENABLE_LARGE_PAGES
-typedef size_t (WINAPI *GetLargePageMinimum_t)(void);
-#endif
-
-/* Initialize mparams */
-static int init_mparams(void) {
-#ifdef NEED_GLOBAL_LOCK_INIT
-  if (malloc_global_mutex_status <= 0)
-    init_malloc_global_mutex();
-#endif
-
-  ACQUIRE_MALLOC_GLOBAL_LOCK();
-  if (mparams.magic == 0) {
-    size_t magic;
-    size_t psize;
-    size_t gsize;
-
-#ifndef WIN32
-    psize = malloc_getpagesize;
-    gsize = ((DEFAULT_GRANULARITY != 0)? DEFAULT_GRANULARITY : psize);
-#else /* WIN32 */
-    {
-      SYSTEM_INFO system_info;
-      GetSystemInfo(&system_info);
-      psize = system_info.dwPageSize;
-      gsize = ((DEFAULT_GRANULARITY != 0)?
-               DEFAULT_GRANULARITY : system_info.dwAllocationGranularity);
-#ifdef ENABLE_LARGE_PAGES
-      { 
-          GetLargePageMinimum_t GetLargePageMinimum_ = (GetLargePageMinimum_t) GetProcAddress(GetModuleHandle(__T("kernel32.dll")), "GetLargePageMinimum");
-          if(GetLargePageMinimum_) {
-              size_t largepagesize = GetLargePageMinimum_();
-              if(largepagesize) {
-                  psize = largepagesize;
-                  gsize = ((DEFAULT_GRANULARITY != 0)?
-                           DEFAULT_GRANULARITY : largepagesize);
-                  if(gsize < largepagesize) gsize = largepagesize;
-              }
-          }
-      }
-#endif
-    }
-#endif /* WIN32 */
-
-    /* Sanity-check configuration:
-       size_t must be unsigned and as wide as pointer type.
-       ints must be at least 4 bytes.
-       alignment must be at least 8.
-       Alignment, min chunk size, and page size must all be powers of 2.
-    */
-    if ((sizeof(size_t) != sizeof(char*)) ||
-        (MAX_SIZE_T < MIN_CHUNK_SIZE)  ||
-        (sizeof(int) < 4)  ||
-        (MALLOC_ALIGNMENT < (size_t)8U) ||
-        ((MALLOC_ALIGNMENT & (MALLOC_ALIGNMENT-SIZE_T_ONE)) != 0) ||
-        ((MCHUNK_SIZE      & (MCHUNK_SIZE-SIZE_T_ONE))      != 0) ||
-        ((gsize            & (gsize-SIZE_T_ONE))            != 0) ||
-        ((psize            & (psize-SIZE_T_ONE))            != 0))
-      ABORT;
-
-    mparams.granularity = gsize;
-    mparams.page_size = psize;
-    mparams.mmap_threshold = DEFAULT_MMAP_THRESHOLD;
-    mparams.trim_threshold = DEFAULT_TRIM_THRESHOLD;
-#if MORECORE_CONTIGUOUS
-    mparams.default_mflags = USE_LOCK_BIT|USE_MMAP_BIT;
-#else  /* MORECORE_CONTIGUOUS */
-    mparams.default_mflags = USE_LOCK_BIT|USE_MMAP_BIT|USE_NONCONTIGUOUS_BIT;
-#endif /* MORECORE_CONTIGUOUS */
-
-#if !ONLY_MSPACES
-    /* Set up lock for main malloc area */
-    gm->mflags = mparams.default_mflags;
-    INITIAL_LOCK(&gm->mutex);
-#endif
-
-    {
-#if USE_DEV_RANDOM
-      int fd;
-      unsigned char buf[sizeof(size_t)];
-      /* Try to use /dev/urandom, else fall back on using time */
-      if ((fd = open("/dev/urandom", O_RDONLY)) >= 0 &&
-          read(fd, buf, sizeof(buf)) == sizeof(buf)) {
-        magic = *((size_t *) buf);
-        close(fd);
-      }
-      else
-#endif /* USE_DEV_RANDOM */
-#ifdef WIN32
-        magic = (size_t)(GetTickCount() ^ (size_t)0x55555555U);
-#else
-        magic = (size_t)(time(0) ^ (size_t)0x55555555U);
-#endif
-      magic |= (size_t)8U;    /* ensure nonzero */
-      magic &= ~(size_t)7U;   /* improve chances of fault for bad values */
-      mparams.magic = magic;
-    }
-  }
-
-  RELEASE_MALLOC_GLOBAL_LOCK();
-  return 1;
-}
-
-/* support for mallopt */
-static int change_mparam(int param_number, int value) {
-  size_t val;
-  ensure_initialization();
-  val = (value == -1)? MAX_SIZE_T : (size_t)value;
-  switch(param_number) {
-  case M_TRIM_THRESHOLD:
-    mparams.trim_threshold = val;
-    return 1;
-  case M_GRANULARITY:
-    if (val >= mparams.page_size && ((val & (val-1)) == 0)) {
-      mparams.granularity = val;
-      return 1;
-    }
-    else
-      return 0;
-  case M_MMAP_THRESHOLD:
-    mparams.mmap_threshold = val;
-    return 1;
-  default:
-    return 0;
-  }
-}
-
-#if DEBUG
-/* ------------------------- Debugging Support --------------------------- */
-
-/* Check properties of any chunk, whether free, inuse, mmapped etc  */
-static void do_check_any_chunk(mstate m, mchunkptr p) {
-  assert((is_aligned(chunk2mem(p))) || (p->head == FENCEPOST_HEAD));
-  assert(ok_address(m, p));
-}
-
-/* Check properties of top chunk */
-static void do_check_top_chunk(mstate m, mchunkptr p) {
-  msegmentptr sp = segment_holding(m, (char*)p);
-  size_t  sz = p->head & ~INUSE_BITS; /* third-lowest bit can be set! */
-  assert(sp != 0);
-  assert((is_aligned(chunk2mem(p))) || (p->head == FENCEPOST_HEAD));
-  assert(ok_address(m, p));
-  assert(sz == m->topsize);
-  assert(sz > 0);
-  assert(sz == ((sp->base + sp->size) - (char*)p) - TOP_FOOT_SIZE);
-  assert(pinuse(p));
-  assert(!pinuse(chunk_plus_offset(p, sz)));
-}
-
-/* Check properties of (inuse) mmapped chunks */
-static void do_check_mmapped_chunk(mstate m, mchunkptr p) {
-  size_t  sz = chunksize(p);
-  size_t len = (sz + (p->prev_foot) + MMAP_FOOT_PAD);
-  assert(is_mmapped(p));
-  assert(use_mmap(m));
-  assert((is_aligned(chunk2mem(p))) || (p->head == FENCEPOST_HEAD));
-  assert(ok_address(m, p));
-  assert(!is_small(sz));
-  assert((len & (mparams.page_size-SIZE_T_ONE)) == 0);
-  assert(chunk_plus_offset(p, sz)->head == FENCEPOST_HEAD);
-  assert(chunk_plus_offset(p, sz+SIZE_T_SIZE)->head == 0);
-}
-
-/* Check properties of inuse chunks */
-static void do_check_inuse_chunk(mstate m, mchunkptr p) {
-  do_check_any_chunk(m, p);
-  assert(is_inuse(p));
-  assert(next_pinuse(p));
-  /* If not pinuse and not mmapped, previous chunk has OK offset */
-  assert(is_mmapped(p) || pinuse(p) || next_chunk(prev_chunk(p)) == p);
-  if (is_mmapped(p))
-    do_check_mmapped_chunk(m, p);
-}
-
-/* Check properties of free chunks */
-static void do_check_free_chunk(mstate m, mchunkptr p) {
-  size_t sz = chunksize(p);
-  mchunkptr next = chunk_plus_offset(p, sz);
-  do_check_any_chunk(m, p);
-  assert(!is_inuse(p));
-  assert(!next_pinuse(p));
-  assert (!is_mmapped(p));
-  if (p != m->dv && p != m->top) {
-    if (sz >= MIN_CHUNK_SIZE) {
-      assert((sz & CHUNK_ALIGN_MASK) == 0);
-      assert(is_aligned(chunk2mem(p)));
-      assert(next->prev_foot == sz);
-      assert(pinuse(p));
-      assert (next == m->top || is_inuse(next));
-      assert(p->fd->bk == p);
-      assert(p->bk->fd == p);
-    }
-    else  /* markers are always of size SIZE_T_SIZE */
-      assert(sz == SIZE_T_SIZE);
-  }
-}
-
-/* Check properties of malloced chunks at the point they are malloced */
-static void do_check_malloced_chunk(mstate m, void* mem, size_t s) {
-  if (mem != 0) {
-    mchunkptr p = mem2chunk(mem);
-    size_t sz = p->head & ~INUSE_BITS;
-    do_check_inuse_chunk(m, p);
-    assert((sz & CHUNK_ALIGN_MASK) == 0);
-    assert(sz >= MIN_CHUNK_SIZE);
-    assert(sz >= s);
-    /* unless mmapped, size is less than MIN_CHUNK_SIZE more than request */
-    assert(is_mmapped(p) || sz < (s + MIN_CHUNK_SIZE));
-  }
-}
-
-/* Check a tree and its subtrees.  */
-static void do_check_tree(mstate m, tchunkptr t) {
-  tchunkptr head = 0;
-  tchunkptr u = t;
-  bindex_t tindex = t->index;
-  size_t tsize = chunksize(t);
-  bindex_t idx;
-  compute_tree_index(tsize, idx);
-  assert(tindex == idx);
-  assert(tsize >= MIN_LARGE_SIZE);
-  assert(tsize >= minsize_for_tree_index(idx));
-  assert((idx == NTREEBINS-1) || (tsize < minsize_for_tree_index((idx+1))));
-
-  do { /* traverse through chain of same-sized nodes */
-    do_check_any_chunk(m, ((mchunkptr)u));
-    assert(u->index == tindex);
-    assert(chunksize(u) == tsize);
-    assert(!is_inuse(u));
-    assert(!next_pinuse(u));
-    assert(u->fd->bk == u);
-    assert(u->bk->fd == u);
-    if (u->parent == 0) {
-      assert(u->child[0] == 0);
-      assert(u->child[1] == 0);
-    }
-    else {
-      assert(head == 0); /* only one node on chain has parent */
-      head = u;
-      assert(u->parent != u);
-      assert (u->parent->child[0] == u ||
-              u->parent->child[1] == u ||
-              *((tbinptr*)(u->parent)) == u);
-      if (u->child[0] != 0) {
-        assert(u->child[0]->parent == u);
-        assert(u->child[0] != u);
-        do_check_tree(m, u->child[0]);
-      }
-      if (u->child[1] != 0) {
-        assert(u->child[1]->parent == u);
-        assert(u->child[1] != u);
-        do_check_tree(m, u->child[1]);
-      }
-      if (u->child[0] != 0 && u->child[1] != 0) {
-        assert(chunksize(u->child[0]) < chunksize(u->child[1]));
-      }
-    }
-    u = u->fd;
-  } while (u != t);
-  assert(head != 0);
-}
-
-/*  Check all the chunks in a treebin.  */
-static void do_check_treebin(mstate m, bindex_t i) {
-  tbinptr* tb = treebin_at(m, i);
-  tchunkptr t = *tb;
-  int empty = (m->treemap & (1U << i)) == 0;
-  if (t == 0)
-    assert(empty);
-  if (!empty)
-    do_check_tree(m, t);
-}
-
-/*  Check all the chunks in a smallbin.  */
-static void do_check_smallbin(mstate m, bindex_t i) {
-  sbinptr b = smallbin_at(m, i);
-  mchunkptr p = b->bk;
-  unsigned int empty = (m->smallmap & (1U << i)) == 0;
-  if (p == b)
-    assert(empty);
-  if (!empty) {
-    for (; p != b; p = p->bk) {
-      size_t size = chunksize(p);
-      mchunkptr q;
-      /* each chunk claims to be free */
-      do_check_free_chunk(m, p);
-      /* chunk belongs in bin */
-      assert(small_index(size) == i);
-      assert(p->bk == b || chunksize(p->bk) == chunksize(p));
-      /* chunk is followed by an inuse chunk */
-      q = next_chunk(p);
-      if (q->head != FENCEPOST_HEAD)
-        do_check_inuse_chunk(m, q);
-    }
-  }
-}
-
-/* Find x in a bin. Used in other check functions. */
-static int bin_find(mstate m, mchunkptr x) {
-  size_t size = chunksize(x);
-  if (is_small(size)) {
-    bindex_t sidx = small_index(size);
-    sbinptr b = smallbin_at(m, sidx);
-    if (smallmap_is_marked(m, sidx)) {
-      mchunkptr p = b;
-      do {
-        if (p == x)
-          return 1;
-      } while ((p = p->fd) != b);
-    }
-  }
-  else {
-    bindex_t tidx;
-    compute_tree_index(size, tidx);
-    if (treemap_is_marked(m, tidx)) {
-      tchunkptr t = *treebin_at(m, tidx);
-      size_t sizebits = size << leftshift_for_tree_index(tidx);
-      while (t != 0 && chunksize(t) != size) {
-        t = t->child[(sizebits >> (SIZE_T_BITSIZE-SIZE_T_ONE)) & 1];
-        sizebits <<= 1;
-      }
-      if (t != 0) {
-        tchunkptr u = t;
-        do {
-          if (u == (tchunkptr)x)
-            return 1;
-        } while ((u = u->fd) != t);
-      }
-    }
-  }
-  return 0;
-}
-
-/* Traverse each chunk and check it; return total */
-static size_t traverse_and_check(mstate m) {
-  size_t sum = 0;
-  if (is_initialized(m)) {
-    msegmentptr s = &m->seg;
-    sum += m->topsize + TOP_FOOT_SIZE;
-    while (s != 0) {
-      mchunkptr q = align_as_chunk(s->base);
-      mchunkptr lastq = 0;
-      assert(pinuse(q));
-      while (segment_holds(s, q) &&
-             q != m->top && q->head != FENCEPOST_HEAD) {
-        sum += chunksize(q);
-        if (is_inuse(q)) {
-          assert(!bin_find(m, q));
-          do_check_inuse_chunk(m, q);
-        }
-        else {
-          assert(q == m->dv || bin_find(m, q));
-          assert(lastq == 0 || is_inuse(lastq)); /* Not 2 consecutive free */
-          do_check_free_chunk(m, q);
-        }
-        lastq = q;
-        q = next_chunk(q);
-      }
-      s = s->next;
-    }
-  }
-  return sum;
-}
-
-/* Check all properties of malloc_state. */
-static void do_check_malloc_state(mstate m) {
-  bindex_t i;
-  size_t total;
-  /* check bins */
-  for (i = 0; i < NSMALLBINS; ++i)
-    do_check_smallbin(m, i);
-  for (i = 0; i < NTREEBINS; ++i)
-    do_check_treebin(m, i);
-
-  if (m->dvsize != 0) { /* check dv chunk */
-    do_check_any_chunk(m, m->dv);
-    assert(m->dvsize == chunksize(m->dv));
-    assert(m->dvsize >= MIN_CHUNK_SIZE);
-    assert(bin_find(m, m->dv) == 0);
-  }
-
-  if (m->top != 0) {   /* check top chunk */
-    do_check_top_chunk(m, m->top);
-    /*assert(m->topsize == chunksize(m->top)); redundant */
-    assert(m->topsize > 0);
-    assert(bin_find(m, m->top) == 0);
-  }
-
-  total = traverse_and_check(m);
-  assert(total <= m->footprint);
-  assert(m->footprint <= m->max_footprint);
-}
-#endif /* DEBUG */
-
-/* ----------------------------- statistics ------------------------------ */
-
-#if !NO_MALLINFO
-static struct mallinfo internal_mallinfo(mstate m) {
-  struct mallinfo nm = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
-  ensure_initialization();
-  if (!PREACTION(m)) {
-    check_malloc_state(m);
-    if (is_initialized(m)) {
-      size_t nfree = SIZE_T_ONE; /* top always free */
-      size_t mfree = m->topsize + TOP_FOOT_SIZE;
-      size_t sum = mfree;
-      msegmentptr s = &m->seg;
-      while (s != 0) {
-        mchunkptr q = align_as_chunk(s->base);
-        while (segment_holds(s, q) &&
-               q != m->top && q->head != FENCEPOST_HEAD) {
-          size_t sz = chunksize(q);
-          sum += sz;
-          if (!is_inuse(q)) {
-            mfree += sz;
-            ++nfree;
-          }
-          q = next_chunk(q);
-        }
-        s = s->next;
-      }
-
-      nm.arena    = sum;
-      nm.ordblks  = nfree;
-      nm.hblkhd   = m->footprint - sum;
-      nm.usmblks  = m->max_footprint;
-      nm.uordblks = m->footprint - mfree;
-      nm.fordblks = mfree;
-      nm.keepcost = m->topsize;
-    }
-
-    POSTACTION(m);
-  }
-  return nm;
-}
-#endif /* !NO_MALLINFO */
-
-static void internal_malloc_stats(mstate m) {
-  ensure_initialization();
-  if (!PREACTION(m)) {
-    size_t maxfp = 0;
-    size_t fp = 0;
-    size_t used = 0;
-    check_malloc_state(m);
-    if (is_initialized(m)) {
-      msegmentptr s = &m->seg;
-      maxfp = m->max_footprint;
-      fp = m->footprint;
-      used = fp - (m->topsize + TOP_FOOT_SIZE);
-
-      while (s != 0) {
-        mchunkptr q = align_as_chunk(s->base);
-        while (segment_holds(s, q) &&
-               q != m->top && q->head != FENCEPOST_HEAD) {
-          if (!is_inuse(q))
-            used -= chunksize(q);
-          q = next_chunk(q);
-        }
-        s = s->next;
-      }
-    }
-
-    fprintf(stderr, "max system bytes = %10lu\n", (unsigned long)(maxfp));
-    fprintf(stderr, "system bytes     = %10lu\n", (unsigned long)(fp));
-    fprintf(stderr, "in use bytes     = %10lu\n", (unsigned long)(used));
-
-    POSTACTION(m);
-  }
-}
-
-/* ----------------------- Operations on smallbins ----------------------- */
-
-/*
-  Various forms of linking and unlinking are defined as macros.  Even
-  the ones for trees, which are very long but have very short typical
-  paths.  This is ugly but reduces reliance on inlining support of
-  compilers.
-*/
-
-/* Link a free chunk into a smallbin  */
-#define insert_small_chunk(M, P, S) {\
-  bindex_t I  = small_index(S);\
-  mchunkptr B = smallbin_at(M, I);\
-  mchunkptr F = B;\
-  assert(S >= MIN_CHUNK_SIZE);\
-  if (!smallmap_is_marked(M, I))\
-    mark_smallmap(M, I);\
-  else if (RTCHECK(ok_address(M, B->fd)))\
-    F = B->fd;\
-  else {\
-    CORRUPTION_ERROR_ACTION(M);\
-  }\
-  B->fd = P;\
-  F->bk = P;\
-  P->fd = F;\
-  P->bk = B;\
-}
-
-/* Unlink a chunk from a smallbin  */
-#define unlink_small_chunk(M, P, S) {\
-  mchunkptr F = P->fd;\
-  mchunkptr B = P->bk;\
-  bindex_t I = small_index(S);\
-  assert(P != B);\
-  assert(P != F);\
-  assert(chunksize(P) == small_index2size(I));\
-  if (F == B)\
-    clear_smallmap(M, I);\
-  else if (RTCHECK((F == smallbin_at(M,I) || ok_address(M, F)) &&\
-                   (B == smallbin_at(M,I) || ok_address(M, B)))) {\
-    F->bk = B;\
-    B->fd = F;\
-  }\
-  else {\
-    CORRUPTION_ERROR_ACTION(M);\
-  }\
-}
-
-/* Unlink the first chunk from a smallbin */
-#define unlink_first_small_chunk(M, B, P, I) {\
-  mchunkptr F = P->fd;\
-  assert(P != B);\
-  assert(P != F);\
-  assert(chunksize(P) == small_index2size(I));\
-  if (B == F)\
-    clear_smallmap(M, I);\
-  else if (RTCHECK(ok_address(M, F))) {\
-    B->fd = F;\
-    F->bk = B;\
-  }\
-  else {\
-    CORRUPTION_ERROR_ACTION(M);\
-  }\
-}
-
-
-
-/* Replace dv node, binning the old one */
-/* Used only when dvsize known to be small */
-#define replace_dv(M, P, S) {\
-  size_t DVS = M->dvsize;\
-  if (DVS != 0) {\
-    mchunkptr DV = M->dv;\
-    assert(is_small(DVS));\
-    insert_small_chunk(M, DV, DVS);\
-  }\
-  M->dvsize = S;\
-  M->dv = P;\
-}
-
-/* ------------------------- Operations on trees ------------------------- */
-
-/* Insert chunk into tree */
-#define insert_large_chunk(M, X, S) {\
-  tbinptr* H;\
-  bindex_t I;\
-  compute_tree_index(S, I);\
-  H = treebin_at(M, I);\
-  X->index = I;\
-  X->child[0] = X->child[1] = 0;\
-  if (!treemap_is_marked(M, I)) {\
-    mark_treemap(M, I);\
-    *H = X;\
-    X->parent = (tchunkptr)H;\
-    X->fd = X->bk = X;\
-  }\
-  else {\
-    tchunkptr T = *H;\
-    size_t K = S << leftshift_for_tree_index(I);\
-    for (;;) {\
-      if (chunksize(T) != S) {\
-        tchunkptr* C = &(T->child[(K >> (SIZE_T_BITSIZE-SIZE_T_ONE)) & 1]);\
-        K <<= 1;\
-        if (*C != 0)\
-          T = *C;\
-        else if (RTCHECK(ok_address(M, C))) {\
-          *C = X;\
-          X->parent = T;\
-          X->fd = X->bk = X;\
-          break;\
-        }\
-        else {\
-          CORRUPTION_ERROR_ACTION(M);\
-          break;\
-        }\
-      }\
-      else {\
-        tchunkptr F = T->fd;\
-        if (RTCHECK(ok_address(M, T) && ok_address(M, F))) {\
-          T->fd = F->bk = X;\
-          X->fd = F;\
-          X->bk = T;\
-          X->parent = 0;\
-          break;\
-        }\
-        else {\
-          CORRUPTION_ERROR_ACTION(M);\
-          break;\
-        }\
-      }\
-    }\
-  }\
-}
-
-/*
-  Unlink steps:
-
-  1. If x is a chained node, unlink it from its same-sized fd/bk links
-     and choose its bk node as its replacement.
-  2. If x was the last node of its size, but not a leaf node, it must
-     be replaced with a leaf node (not merely one with an open left or
-     right), to make sure that lefts and rights of descendents
-     correspond properly to bit masks.  We use the rightmost descendent
-     of x.  We could use any other leaf, but this is easy to locate and
-     tends to counteract removal of leftmosts elsewhere, and so keeps
-     paths shorter than minimally guaranteed.  This doesn't loop much
-     because on average a node in a tree is near the bottom.
-  3. If x is the base of a chain (i.e., has parent links) relink
-     x's parent and children to x's replacement (or null if none).
-*/
-
-#define unlink_large_chunk(M, X) {\
-  tchunkptr XP = X->parent;\
-  tchunkptr R;\
-  if (X->bk != X) {\
-    tchunkptr F = X->fd;\
-    R = X->bk;\
-    if (RTCHECK(ok_address(M, F))) {\
-      F->bk = R;\
-      R->fd = F;\
-    }\
-    else {\
-      CORRUPTION_ERROR_ACTION(M);\
-    }\
-  }\
-  else {\
-    tchunkptr* RP;\
-    if (((R = *(RP = &(X->child[1]))) != 0) ||\
-        ((R = *(RP = &(X->child[0]))) != 0)) {\
-      tchunkptr* CP;\
-      while ((*(CP = &(R->child[1])) != 0) ||\
-             (*(CP = &(R->child[0])) != 0)) {\
-        R = *(RP = CP);\
-      }\
-      if (RTCHECK(ok_address(M, RP)))\
-        *RP = 0;\
-      else {\
-        CORRUPTION_ERROR_ACTION(M);\
-      }\
-    }\
-  }\
-  if (XP != 0) {\
-    tbinptr* H = treebin_at(M, X->index);\
-    if (X == *H) {\
-      if ((*H = R) == 0) \
-        clear_treemap(M, X->index);\
-    }\
-    else if (RTCHECK(ok_address(M, XP))) {\
-      if (XP->child[0] == X) \
-        XP->child[0] = R;\
-      else \
-        XP->child[1] = R;\
-    }\
-    else\
-      CORRUPTION_ERROR_ACTION(M);\
-    if (R != 0) {\
-      if (RTCHECK(ok_address(M, R))) {\
-        tchunkptr C0, C1;\
-        R->parent = XP;\
-        if ((C0 = X->child[0]) != 0) {\
-          if (RTCHECK(ok_address(M, C0))) {\
-            R->child[0] = C0;\
-            C0->parent = R;\
-          }\
-          else\
-            CORRUPTION_ERROR_ACTION(M);\
-        }\
-        if ((C1 = X->child[1]) != 0) {\
-          if (RTCHECK(ok_address(M, C1))) {\
-            R->child[1] = C1;\
-            C1->parent = R;\
-          }\
-          else\
-            CORRUPTION_ERROR_ACTION(M);\
-        }\
-      }\
-      else\
-        CORRUPTION_ERROR_ACTION(M);\
-    }\
-  }\
-}
-
-/* Relays to large vs small bin operations */
-
-#define insert_chunk(M, P, S)\
-  if (is_small(S)) insert_small_chunk(M, P, S)\
-  else { tchunkptr TP = (tchunkptr)(P); insert_large_chunk(M, TP, S); }
-
-#define unlink_chunk(M, P, S)\
-  if (is_small(S)) unlink_small_chunk(M, P, S)\
-  else { tchunkptr TP = (tchunkptr)(P); unlink_large_chunk(M, TP); }
-
-
-/* Relays to internal calls to malloc/free from realloc, memalign etc */
-
-#if ONLY_MSPACES
-#define internal_malloc(m, b) mspace_malloc(m, b)
-#define internal_free(m, mem) mspace_free(m,mem);
-#else /* ONLY_MSPACES */
-#if MSPACES
-#define internal_malloc(m, b)\
-   (m == gm)? dlmalloc(b) : mspace_malloc(m, b)
-#define internal_free(m, mem)\
-   if (m == gm) dlfree(mem); else mspace_free(m,mem);
-#else /* MSPACES */
-#define internal_malloc(m, b) dlmalloc(b)
-#define internal_free(m, mem) dlfree(mem)
-#endif /* MSPACES */
-#endif /* ONLY_MSPACES */
-
-/* -----------------------  Direct-mmapping chunks ----------------------- */
-
-/*
-  Directly mmapped chunks are set up with an offset to the start of
-  the mmapped region stored in the prev_foot field of the chunk. This
-  allows reconstruction of the required argument to MUNMAP when freed,
-  and also allows adjustment of the returned chunk to meet alignment
-  requirements (especially in memalign).
-*/
-
-/* Malloc using mmap */
-static void* mmap_alloc(mstate m, size_t nb) {
-  size_t mmsize = mmap_align(nb + SIX_SIZE_T_SIZES + CHUNK_ALIGN_MASK);
-  if (mmsize > nb) {     /* Check for wrap around 0 */
-    char* mm = (char*)(CALL_DIRECT_MMAP(mmsize));
-    if (mm != CMFAIL) {
-      size_t offset = align_offset(chunk2mem(mm));
-      size_t psize = mmsize - offset - MMAP_FOOT_PAD;
-      mchunkptr p = (mchunkptr)(mm + offset);
-      p->prev_foot = offset;
-      p->head = psize;
-      mark_inuse_foot(m, p, psize);
-      chunk_plus_offset(p, psize)->head = FENCEPOST_HEAD;
-      chunk_plus_offset(p, psize+SIZE_T_SIZE)->head = 0;
-
-      if (m->least_addr == 0 || mm < m->least_addr)
-        m->least_addr = mm;
-      if ((m->footprint += mmsize) > m->max_footprint)
-        m->max_footprint = m->footprint;
-      assert(is_aligned(chunk2mem(p)));
-      check_mmapped_chunk(m, p);
-      return chunk2mem(p);
-    }
-  }
-  return 0;
-}
-
-/* Realloc using mmap */
-static mchunkptr mmap_resize(mstate m, mchunkptr oldp, size_t nb) {
-  size_t oldsize = chunksize(oldp);
-  if (is_small(nb)) /* Can't shrink mmap regions below small size */
-    return 0;
-  /* Keep old chunk if big enough but not too big */
-  if (oldsize >= nb + SIZE_T_SIZE &&
-      (oldsize - nb) <= (mparams.granularity << 1))
-    return oldp;
-  else {
-    size_t offset = oldp->prev_foot;
-    size_t oldmmsize = oldsize + offset + MMAP_FOOT_PAD;
-    size_t newmmsize = mmap_align(nb + SIX_SIZE_T_SIZES + CHUNK_ALIGN_MASK);
-    char* cp = (char*)CALL_MREMAP((char*)oldp - offset,
-                                  oldmmsize, newmmsize, 1);
-    if (cp != CMFAIL) {
-      mchunkptr newp = (mchunkptr)(cp + offset);
-      size_t psize = newmmsize - offset - MMAP_FOOT_PAD;
-      newp->head = psize;
-      mark_inuse_foot(m, newp, psize);
-      chunk_plus_offset(newp, psize)->head = FENCEPOST_HEAD;
-      chunk_plus_offset(newp, psize+SIZE_T_SIZE)->head = 0;
-
-      if (cp < m->least_addr)
-        m->least_addr = cp;
-      if ((m->footprint += newmmsize - oldmmsize) > m->max_footprint)
-        m->max_footprint = m->footprint;
-      check_mmapped_chunk(m, newp);
-      return newp;
-    }
-  }
-  return 0;
-}
-
-/* -------------------------- mspace management -------------------------- */
-
-/* Initialize top chunk and its size */
-static void init_top(mstate m, mchunkptr p, size_t psize) {
-  /* Ensure alignment */
-  size_t offset = align_offset(chunk2mem(p));
-  p = (mchunkptr)((char*)p + offset);
-  psize -= offset;
-
-  m->top = p;
-  m->topsize = psize;
-  p->head = psize | PINUSE_BIT;
-  /* set size of fake trailing chunk holding overhead space only once */
-  chunk_plus_offset(p, psize)->head = TOP_FOOT_SIZE;
-  m->trim_check = mparams.trim_threshold; /* reset on each update */
-}
-
-/* Initialize bins for a new mstate that is otherwise zeroed out */
-static void init_bins(mstate m) {
-  /* Establish circular links for smallbins */
-  bindex_t i;
-  for (i = 0; i < NSMALLBINS; ++i) {
-    sbinptr bin = smallbin_at(m,i);
-    bin->fd = bin->bk = bin;
-  }
-}
-
-#if PROCEED_ON_ERROR
-
-/* default corruption action */
-static void reset_on_error(mstate m) {
-  int i;
-  ++malloc_corruption_error_count;
-  /* Reinitialize fields to forget about all memory */
-  m->smallbins = m->treebins = 0;
-  m->dvsize = m->topsize = 0;
-  m->seg.base = 0;
-  m->seg.size = 0;
-  m->seg.next = 0;
-  m->top = m->dv = 0;
-  for (i = 0; i < NTREEBINS; ++i)
-    *treebin_at(m, i) = 0;
-  init_bins(m);
-}
-#endif /* PROCEED_ON_ERROR */
-
-/* Allocate chunk and prepend remainder with chunk in successor base. */
-static void* prepend_alloc(mstate m, char* newbase, char* oldbase,
-                           size_t nb) {
-  mchunkptr p = align_as_chunk(newbase);
-  mchunkptr oldfirst = align_as_chunk(oldbase);
-  size_t psize = (char*)oldfirst - (char*)p;
-  mchunkptr q = chunk_plus_offset(p, nb);
-  size_t qsize = psize - nb;
-  set_size_and_pinuse_of_inuse_chunk(m, p, nb);
-
-  assert((char*)oldfirst > (char*)q);
-  assert(pinuse(oldfirst));
-  assert(qsize >= MIN_CHUNK_SIZE);
-
-  /* consolidate remainder with first chunk of old base */
-  if (oldfirst == m->top) {
-    size_t tsize = m->topsize += qsize;
-    m->top = q;
-    q->head = tsize | PINUSE_BIT;
-    check_top_chunk(m, q);
-  }
-  else if (oldfirst == m->dv) {
-    size_t dsize = m->dvsize += qsize;
-    m->dv = q;
-    set_size_and_pinuse_of_free_chunk(q, dsize);
-  }
-  else {
-    if (!is_inuse(oldfirst)) {
-      size_t nsize = chunksize(oldfirst);
-      unlink_chunk(m, oldfirst, nsize);
-      oldfirst = chunk_plus_offset(oldfirst, nsize);
-      qsize += nsize;
-    }
-    set_free_with_pinuse(q, qsize, oldfirst);
-    insert_chunk(m, q, qsize);
-    check_free_chunk(m, q);
-  }
-
-  check_malloced_chunk(m, chunk2mem(p), nb);
-  return chunk2mem(p);
-}
-
-/* Add a segment to hold a new noncontiguous region */
-static void add_segment(mstate m, char* tbase, size_t tsize, flag_t mmapped) {
-  /* Determine locations and sizes of segment, fenceposts, old top */
-  char* old_top = (char*)m->top;
-  msegmentptr oldsp = segment_holding(m, old_top);
-  char* old_end = oldsp->base + oldsp->size;
-  size_t ssize = pad_request(sizeof(struct malloc_segment));
-  char* rawsp = old_end - (ssize + FOUR_SIZE_T_SIZES + CHUNK_ALIGN_MASK);
-  size_t offset = align_offset(chunk2mem(rawsp));
-  char* asp = rawsp + offset;
-  char* csp = (asp < (old_top + MIN_CHUNK_SIZE))? old_top : asp;
-  mchunkptr sp = (mchunkptr)csp;
-  msegmentptr ss = (msegmentptr)(chunk2mem(sp));
-  mchunkptr tnext = chunk_plus_offset(sp, ssize);
-  mchunkptr p = tnext;
-  int nfences = 0;
-
-  /* reset top to new space */
-  init_top(m, (mchunkptr)tbase, tsize - TOP_FOOT_SIZE);
-
-  /* Set up segment record */
-  assert(is_aligned(ss));
-  set_size_and_pinuse_of_inuse_chunk(m, sp, ssize);
-  *ss = m->seg; /* Push current record */
-  m->seg.base = tbase;
-  m->seg.size = tsize;
-  m->seg.sflags = mmapped;
-  m->seg.next = ss;
-
-  /* Insert trailing fenceposts */
-  for (;;) {
-    mchunkptr nextp = chunk_plus_offset(p, SIZE_T_SIZE);
-    p->head = FENCEPOST_HEAD;
-    ++nfences;
-    if ((char*)(&(nextp->head)) < old_end)
-      p = nextp;
-    else
-      break;
-  }
-  assert(nfences >= 2);
-
-  /* Insert the rest of old top into a bin as an ordinary free chunk */
-  if (csp != old_top) {
-    mchunkptr q = (mchunkptr)old_top;
-    size_t psize = csp - old_top;
-    mchunkptr tn = chunk_plus_offset(q, psize);
-    set_free_with_pinuse(q, psize, tn);
-    insert_chunk(m, q, psize);
-  }
-
-  check_top_chunk(m, m->top);
-}
-
-/* -------------------------- System allocation -------------------------- */
-
-/* Get memory from system using MORECORE or MMAP */
-static void* sys_alloc(mstate m, size_t nb) {
-  char* tbase = CMFAIL;
-  size_t tsize = 0;
-  flag_t mmap_flag = 0;
-
-  ensure_initialization();
-
-  /* Directly map large chunks, but only if already initialized */
-  if (use_mmap(m) && nb >= mparams.mmap_threshold && m->topsize != 0) {
-    void* mem = mmap_alloc(m, nb);
-    if (mem != 0)
-      return mem;
-  }
-
-  /*
-    Try getting memory in any of three ways (in most-preferred to
-    least-preferred order):
-    1. A call to MORECORE that can normally contiguously extend memory.
-       (disabled if not MORECORE_CONTIGUOUS or not HAVE_MORECORE or
-       or main space is mmapped or a previous contiguous call failed)
-    2. A call to MMAP new space (disabled if not HAVE_MMAP).
-       Note that under the default settings, if MORECORE is unable to
-       fulfill a request, and HAVE_MMAP is true, then mmap is
-       used as a noncontiguous system allocator. This is a useful backup
-       strategy for systems with holes in address spaces -- in this case
-       sbrk cannot contiguously expand the heap, but mmap may be able to
-       find space.
-    3. A call to MORECORE that cannot usually contiguously extend memory.
-       (disabled if not HAVE_MORECORE)
-
-   In all cases, we need to request enough bytes from system to ensure
-   we can malloc nb bytes upon success, so pad with enough space for
-   top_foot, plus alignment-pad to make sure we don't lose bytes if
-   not on boundary, and round this up to a granularity unit.
-  */
-
-  if (MORECORE_CONTIGUOUS && !use_noncontiguous(m)) {
-    char* br = CMFAIL;
-    msegmentptr ss = (m->top == 0)? 0 : segment_holding(m, (char*)m->top);
-    size_t asize = 0;
-    ACQUIRE_MALLOC_GLOBAL_LOCK();
-
-    if (ss == 0) {  /* First time through or recovery */
-      char* base = (char*)CALL_MORECORE(0);
-      if (base != CMFAIL) {
-        asize = granularity_align(nb + SYS_ALLOC_PADDING);
-        /* Adjust to end on a page boundary */
-        if (!is_page_aligned(base))
-          asize += (page_align((size_t)base) - (size_t)base);
-        /* Can't call MORECORE if size is negative when treated as signed */
-        if (asize < HALF_MAX_SIZE_T &&
-            (br = (char*)(CALL_MORECORE(asize))) == base) {
-          tbase = base;
-          tsize = asize;
-        }
-      }
-    }
-    else {
-      /* Subtract out existing available top space from MORECORE request. */
-      asize = granularity_align(nb - m->topsize + SYS_ALLOC_PADDING);
-      /* Use mem here only if it did continuously extend old space */
-      if (asize < HALF_MAX_SIZE_T &&
-          (br = (char*)(CALL_MORECORE(asize))) == ss->base+ss->size) {
-        tbase = br;
-        tsize = asize;
-      }
-    }
-
-    if (tbase == CMFAIL) {    /* Cope with partial failure */
-      if (br != CMFAIL) {    /* Try to use/extend the space we did get */
-        if (asize < HALF_MAX_SIZE_T &&
-            asize < nb + SYS_ALLOC_PADDING) {
-          size_t esize = granularity_align(nb + SYS_ALLOC_PADDING - asize);
-          if (esize < HALF_MAX_SIZE_T) {
-            char* end = (char*)CALL_MORECORE(esize);
-            if (end != CMFAIL)
-              asize += esize;
-            else {            /* Can't use; try to release */
-              (void) CALL_MORECORE(-asize);
-              br = CMFAIL;
-            }
-          }
-        }
-      }
-      if (br != CMFAIL) {    /* Use the space we did get */
-        tbase = br;
-        tsize = asize;
-      }
-      else
-        disable_contiguous(m); /* Don't try contiguous path in the future */
-    }
-
-    RELEASE_MALLOC_GLOBAL_LOCK();
-  }
-
-  if (HAVE_MMAP && tbase == CMFAIL) {  /* Try MMAP */
-    size_t rsize = granularity_align(nb + SYS_ALLOC_PADDING);
-    if (rsize > nb) { /* Fail if wraps around zero */
-      char* mp = (char*)(CALL_MMAP(rsize));
-      if (mp != CMFAIL) {
-        tbase = mp;
-        tsize = rsize;
-        mmap_flag = USE_MMAP_BIT;
-      }
-    }
-  }
-
-  if (HAVE_MORECORE && tbase == CMFAIL) { /* Try noncontiguous MORECORE */
-    size_t asize = granularity_align(nb + SYS_ALLOC_PADDING);
-    if (asize < HALF_MAX_SIZE_T) {
-      char* br = CMFAIL;
-      char* end = CMFAIL;
-      ACQUIRE_MALLOC_GLOBAL_LOCK();
-      br = (char*)(CALL_MORECORE(asize));
-      end = (char*)(CALL_MORECORE(0));
-      RELEASE_MALLOC_GLOBAL_LOCK();
-      if (br != CMFAIL && end != CMFAIL && br < end) {
-        size_t ssize = end - br;
-        if (ssize > nb + TOP_FOOT_SIZE) {
-          tbase = br;
-          tsize = ssize;
-        }
-      }
-    }
-  }
-
-  if (tbase != CMFAIL) {
-
-    if ((m->footprint += tsize) > m->max_footprint)
-      m->max_footprint = m->footprint;
-
-    if (!is_initialized(m)) { /* first-time initialization */
-      if (m->least_addr == 0 || tbase < m->least_addr)
-        m->least_addr = tbase;
-      m->seg.base = tbase;
-      m->seg.size = tsize;
-      m->seg.sflags = mmap_flag;
-      m->magic = mparams.magic;
-      m->release_checks = MAX_RELEASE_CHECK_RATE;
-      init_bins(m);
-#if !ONLY_MSPACES
-      if (is_global(m))
-        init_top(m, (mchunkptr)tbase, tsize - TOP_FOOT_SIZE);
-      else
-#endif
-      {
-        /* Offset top by embedded malloc_state */
-        mchunkptr mn = next_chunk(mem2chunk(m));
-        init_top(m, mn, (size_t)((tbase + tsize) - (char*)mn) -TOP_FOOT_SIZE);
-      }
-    }
-
-    else {
-      /* Try to merge with an existing segment */
-      msegmentptr sp = &m->seg;
-      /* Only consider most recent segment if traversal suppressed */
-      while (sp != 0 && tbase != sp->base + sp->size)
-        sp = (NO_SEGMENT_TRAVERSAL) ? 0 : sp->next;
-      if (sp != 0 &&
-          !is_extern_segment(sp) &&
-          (sp->sflags & USE_MMAP_BIT) == mmap_flag &&
-          segment_holds(sp, m->top)) { /* append */
-        sp->size += tsize;
-        init_top(m, m->top, m->topsize + tsize);
-      }
-      else {
-        if (tbase < m->least_addr)
-          m->least_addr = tbase;
-        sp = &m->seg;
-        while (sp != 0 && sp->base != tbase + tsize)
-          sp = (NO_SEGMENT_TRAVERSAL) ? 0 : sp->next;
-        if (sp != 0 &&
-            !is_extern_segment(sp) &&
-            (sp->sflags & USE_MMAP_BIT) == mmap_flag) {
-          char* oldbase = sp->base;
-          sp->base = tbase;
-          sp->size += tsize;
-          return prepend_alloc(m, tbase, oldbase, nb);
-        }
-        else
-          add_segment(m, tbase, tsize, mmap_flag);
-      }
-    }
-
-    if (nb < m->topsize) { /* Allocate from new or extended top space */
-      size_t rsize = m->topsize -= nb;
-      mchunkptr p = m->top;
-      mchunkptr r = m->top = chunk_plus_offset(p, nb);
-      r->head = rsize | PINUSE_BIT;
-      set_size_and_pinuse_of_inuse_chunk(m, p, nb);
-      check_top_chunk(m, m->top);
-      check_malloced_chunk(m, chunk2mem(p), nb);
-      return chunk2mem(p);
-    }
-  }
-
-  MALLOC_FAILURE_ACTION;
-  return 0;
-}
-
-/* -----------------------  system deallocation -------------------------- */
-
-/* Unmap and unlink any mmapped segments that don't contain used chunks */
-static size_t release_unused_segments(mstate m) {
-  size_t released = 0;
-  int nsegs = 0;
-  msegmentptr pred = &m->seg;
-  msegmentptr sp = pred->next;
-  while (sp != 0) {
-    char* base = sp->base;
-    size_t size = sp->size;
-    msegmentptr next = sp->next;
-    ++nsegs;
-    if (is_mmapped_segment(sp) && !is_extern_segment(sp)) {
-      mchunkptr p = align_as_chunk(base);
-      size_t psize = chunksize(p);
-      /* Can unmap if first chunk holds entire segment and not pinned */
-      if (!is_inuse(p) && (char*)p + psize >= base + size - TOP_FOOT_SIZE) {
-        tchunkptr tp = (tchunkptr)p;
-        assert(segment_holds(sp, (char*)sp));
-        if (p == m->dv) {
-          m->dv = 0;
-          m->dvsize = 0;
-        }
-        else {
-          unlink_large_chunk(m, tp);
-        }
-        if (CALL_MUNMAP(base, size) == 0) {
-          released += size;
-          m->footprint -= size;
-          /* unlink obsoleted record */
-          sp = pred;
-          sp->next = next;
-        }
-        else { /* back out if cannot unmap */
-          insert_large_chunk(m, tp, psize);
-        }
-      }
-    }
-    if (NO_SEGMENT_TRAVERSAL) /* scan only first segment */
-      break;
-    pred = sp;
-    sp = next;
-  }
-  /* Reset check counter */
-  m->release_checks = ((nsegs > MAX_RELEASE_CHECK_RATE)?
-                       nsegs : MAX_RELEASE_CHECK_RATE);
-  return released;
-}
-
-static int sys_trim(mstate m, size_t pad) {
-  size_t released = 0;
-  ensure_initialization();
-  if (pad < MAX_REQUEST && is_initialized(m)) {
-    pad += TOP_FOOT_SIZE; /* ensure enough room for segment overhead */
-
-    if (m->topsize > pad) {
-      /* Shrink top space in granularity-size units, keeping at least one */
-      size_t unit = mparams.granularity;
-      size_t extra = ((m->topsize - pad + (unit - SIZE_T_ONE)) / unit -
-                      SIZE_T_ONE) * unit;
-      msegmentptr sp = segment_holding(m, (char*)m->top);
-
-      if (!is_extern_segment(sp)) {
-        if (is_mmapped_segment(sp)) {
-          if (HAVE_MMAP &&
-              sp->size >= extra &&
-              !has_segment_link(m, sp)) { /* can't shrink if pinned */
-            size_t newsize = sp->size - extra;
-            /* Prefer mremap, fall back to munmap */
-            if ((CALL_MREMAP(sp->base, sp->size, newsize, 0) != MFAIL) ||
-                (CALL_MUNMAP(sp->base + newsize, extra) == 0)) {
-              released = extra;
-            }
-          }
-        }
-        else if (HAVE_MORECORE) {
-          if (extra >= HALF_MAX_SIZE_T) /* Avoid wrapping negative */
-            extra = (HALF_MAX_SIZE_T) + SIZE_T_ONE - unit;
-          ACQUIRE_MALLOC_GLOBAL_LOCK();
-          {
-            /* Make sure end of memory is where we last set it. */
-            char* old_br = (char*)(CALL_MORECORE(0));
-            if (old_br == sp->base + sp->size) {
-              char* rel_br = (char*)(CALL_MORECORE(-extra));
-              char* new_br = (char*)(CALL_MORECORE(0));
-              if (rel_br != CMFAIL && new_br < old_br)
-                released = old_br - new_br;
-            }
-          }
-          RELEASE_MALLOC_GLOBAL_LOCK();
-        }
-      }
-
-      if (released != 0) {
-        sp->size -= released;
-        m->footprint -= released;
-        init_top(m, m->top, m->topsize - released);
-        check_top_chunk(m, m->top);
-      }
-    }
-
-    /* Unmap any unused mmapped segments */
-    if (HAVE_MMAP)
-      released += release_unused_segments(m);
-
-    /* On failure, disable autotrim to avoid repeated failed future calls */
-    if (released == 0 && m->topsize > m->trim_check)
-      m->trim_check = MAX_SIZE_T;
-  }
-
-  return (released != 0)? 1 : 0;
-}
-
-
-/* ---------------------------- malloc support --------------------------- */
-
-/* allocate a large request from the best fitting chunk in a treebin */
-static void* tmalloc_large(mstate m, size_t nb) {
-  tchunkptr v = 0;
-  size_t rsize = -nb; /* Unsigned negation */
-  tchunkptr t;
-  bindex_t idx;
-  compute_tree_index(nb, idx);
-  if ((t = *treebin_at(m, idx)) != 0) {
-    /* Traverse tree for this bin looking for node with size == nb */
-    size_t sizebits = nb << leftshift_for_tree_index(idx);
-    tchunkptr rst = 0;  /* The deepest untaken right subtree */
-    for (;;) {
-      tchunkptr rt;
-      size_t trem = chunksize(t) - nb;
-      if (trem < rsize) {
-        v = t;
-        if ((rsize = trem) == 0)
-          break;
-      }
-      rt = t->child[1];
-      t = t->child[(sizebits >> (SIZE_T_BITSIZE-SIZE_T_ONE)) & 1];
-      if (rt != 0 && rt != t)
-        rst = rt;
-      if (t == 0) {
-        t = rst; /* set t to least subtree holding sizes > nb */
-        break;
-      }
-      sizebits <<= 1;
-    }
-  }
-  if (t == 0 && v == 0) { /* set t to root of next non-empty treebin */
-    binmap_t leftbits = left_bits(idx2bit(idx)) & m->treemap;
-    if (leftbits != 0) {
-      bindex_t i;
-      binmap_t leastbit = least_bit(leftbits);
-      compute_bit2idx(leastbit, i);
-      t = *treebin_at(m, i);
-    }
-  }
-
-  while (t != 0) { /* find smallest of tree or subtree */
-    size_t trem = chunksize(t) - nb;
-    if (trem < rsize) {
-      rsize = trem;
-      v = t;
-    }
-    t = leftmost_child(t);
-  }
-
-  /*  If dv is a better fit, return 0 so malloc will use it */
-  if (v != 0 && rsize < (size_t)(m->dvsize - nb)) {
-    if (RTCHECK(ok_address(m, v))) { /* split */
-      mchunkptr r = chunk_plus_offset(v, nb);
-      assert(chunksize(v) == rsize + nb);
-      if (RTCHECK(ok_next(v, r))) {
-        unlink_large_chunk(m, v);
-        if (rsize < MIN_CHUNK_SIZE)
-          set_inuse_and_pinuse(m, v, (rsize + nb));
-        else {
-          set_size_and_pinuse_of_inuse_chunk(m, v, nb);
-          set_size_and_pinuse_of_free_chunk(r, rsize);
-          insert_chunk(m, r, rsize);
-        }
-        return chunk2mem(v);
-      }
-    }
-    CORRUPTION_ERROR_ACTION(m);
-  }
-  return 0;
-}
-
-/* allocate a small request from the best fitting chunk in a treebin */
-static void* tmalloc_small(mstate m, size_t nb) {
-  tchunkptr t, v;
-  size_t rsize;
-  bindex_t i;
-  binmap_t leastbit = least_bit(m->treemap);
-  compute_bit2idx(leastbit, i);
-  v = t = *treebin_at(m, i);
-  rsize = chunksize(t) - nb;
-
-  while ((t = leftmost_child(t)) != 0) {
-    size_t trem = chunksize(t) - nb;
-    if (trem < rsize) {
-      rsize = trem;
-      v = t;
-    }
-  }
-
-  if (RTCHECK(ok_address(m, v))) {
-    mchunkptr r = chunk_plus_offset(v, nb);
-    assert(chunksize(v) == rsize + nb);
-    if (RTCHECK(ok_next(v, r))) {
-      unlink_large_chunk(m, v);
-      if (rsize < MIN_CHUNK_SIZE)
-        set_inuse_and_pinuse(m, v, (rsize + nb));
-      else {
-        set_size_and_pinuse_of_inuse_chunk(m, v, nb);
-        set_size_and_pinuse_of_free_chunk(r, rsize);
-        replace_dv(m, r, rsize);
-      }
-      return chunk2mem(v);
-    }
-  }
-
-  CORRUPTION_ERROR_ACTION(m);
-  return 0;
-}
-
-/* --------------------------- realloc support --------------------------- */
-
-static void* internal_realloc(mstate m, void* oldmem, size_t bytes) {
-  if (bytes >= MAX_REQUEST) {
-    MALLOC_FAILURE_ACTION;
-    return 0;
-  }
-  if (!PREACTION(m)) {
-    mchunkptr oldp = mem2chunk(oldmem);
-    size_t oldsize = chunksize(oldp);
-    mchunkptr next = chunk_plus_offset(oldp, oldsize);
-    mchunkptr newp = 0;
-    void* extra = 0;
-
-    /* Try to either shrink or extend into top. Else malloc-copy-free */
-
-    if (RTCHECK(ok_address(m, oldp) && ok_inuse(oldp) &&
-                ok_next(oldp, next) && ok_pinuse(next))) {
-      size_t nb = request2size(bytes);
-      if (is_mmapped(oldp))
-        newp = mmap_resize(m, oldp, nb);
-      else if (oldsize >= nb) { /* already big enough */
-        size_t rsize = oldsize - nb;
-        newp = oldp;
-        if (rsize >= MIN_CHUNK_SIZE) {
-          mchunkptr remainder = chunk_plus_offset(newp, nb);
-          set_inuse(m, newp, nb);
-          set_inuse_and_pinuse(m, remainder, rsize);
-          extra = chunk2mem(remainder);
-        }
-      }
-      else if (next == m->top && oldsize + m->topsize > nb) {
-        /* Expand into top */
-        size_t newsize = oldsize + m->topsize;
-        size_t newtopsize = newsize - nb;
-        mchunkptr newtop = chunk_plus_offset(oldp, nb);
-        set_inuse(m, oldp, nb);
-        newtop->head = newtopsize |PINUSE_BIT;
-        m->top = newtop;
-        m->topsize = newtopsize;
-        newp = oldp;
-      }
-    }
-    else {
-      USAGE_ERROR_ACTION(m, oldmem);
-      POSTACTION(m);
-      return 0;
-    }
-#if DEBUG
-    if (newp != 0) {
-      check_inuse_chunk(m, newp); /* Check requires lock */
-    }
-#endif
-
-    POSTACTION(m);
-
-    if (newp != 0) {
-      if (extra != 0) {
-        internal_free(m, extra);
-      }
-      return chunk2mem(newp);
-    }
-    else {
-      void* newmem = internal_malloc(m, bytes);
-      if (newmem != 0) {
-        size_t oc = oldsize - overhead_for(oldp);
-        memcpy(newmem, oldmem, (oc < bytes)? oc : bytes);
-        internal_free(m, oldmem);
-      }
-      return newmem;
-    }
-  }
-  return 0;
-}
-
-/* --------------------------- memalign support -------------------------- */
-
-static void* internal_memalign(mstate m, size_t alignment, size_t bytes) {
-  if (alignment <= MALLOC_ALIGNMENT)    /* Can just use malloc */
-    return internal_malloc(m, bytes);
-  if (alignment <  MIN_CHUNK_SIZE) /* must be at least a minimum chunk size */
-    alignment = MIN_CHUNK_SIZE;
-  if ((alignment & (alignment-SIZE_T_ONE)) != 0) {/* Ensure a power of 2 */
-    size_t a = MALLOC_ALIGNMENT << 1;
-    while (a < alignment) a <<= 1;
-    alignment = a;
-  }
-
-  if (bytes >= MAX_REQUEST - alignment) {
-    if (m != 0)  { /* Test isn't needed but avoids compiler warning */
-      MALLOC_FAILURE_ACTION;
-    }
-  }
-  else {
-    size_t nb = request2size(bytes);
-    size_t req = nb + alignment + MIN_CHUNK_SIZE - CHUNK_OVERHEAD;
-    char* mem = (char*)internal_malloc(m, req);
-    if (mem != 0) {
-      void* leader = 0;
-      void* trailer = 0;
-      mchunkptr p = mem2chunk(mem);
-
-      if (PREACTION(m)) return 0;
-      if ((((size_t)(mem)) % alignment) != 0) { /* misaligned */
-        /*
-          Find an aligned spot inside chunk.  Since we need to give
-          back leading space in a chunk of at least MIN_CHUNK_SIZE, if
-          the first calculation places us at a spot with less than
-          MIN_CHUNK_SIZE leader, we can move to the next aligned spot.
-          We've allocated enough total room so that this is always
-          possible.
-        */
-        char* br = (char*)mem2chunk((size_t)(((size_t)(mem +
-                                                       alignment -
-                                                       SIZE_T_ONE)) &
-                                             -alignment));
-        char* pos = ((size_t)(br - (char*)(p)) >= MIN_CHUNK_SIZE)?
-          br : br+alignment;
-        mchunkptr newp = (mchunkptr)pos;
-        size_t leadsize = pos - (char*)(p);
-        size_t newsize = chunksize(p) - leadsize;
-
-        if (is_mmapped(p)) { /* For mmapped chunks, just adjust offset */
-          newp->prev_foot = p->prev_foot + leadsize;
-          newp->head = newsize;
-        }
-        else { /* Otherwise, give back leader, use the rest */
-          set_inuse(m, newp, newsize);
-          set_inuse(m, p, leadsize);
-          leader = chunk2mem(p);
-        }
-        p = newp;
-      }
-
-      /* Give back spare room at the end */
-      if (!is_mmapped(p)) {
-        size_t size = chunksize(p);
-        if (size > nb + MIN_CHUNK_SIZE) {
-          size_t remainder_size = size - nb;
-          mchunkptr remainder = chunk_plus_offset(p, nb);
-          set_inuse(m, p, nb);
-          set_inuse(m, remainder, remainder_size);
-          trailer = chunk2mem(remainder);
-        }
-      }
-
-      assert (chunksize(p) >= nb);
-      assert((((size_t)(chunk2mem(p))) % alignment) == 0);
-      check_inuse_chunk(m, p);
-      POSTACTION(m);
-      if (leader != 0) {
-        internal_free(m, leader);
-      }
-      if (trailer != 0) {
-        internal_free(m, trailer);
-      }
-      return chunk2mem(p);
-    }
-  }
-  return 0;
-}
-
-/* ------------------------ comalloc/coalloc support --------------------- */
-
-static void** ialloc(mstate m,
-                     size_t n_elements,
-                     size_t* sizes,
-                     int opts,
-                     void* chunks[]) {
-  /*
-    This provides common support for independent_X routines, handling
-    all of the combinations that can result.
-
-    The opts arg has:
-    bit 0 set if all elements are same size (using sizes[0])
-    bit 1 set if elements should be zeroed
-  */
-
-  size_t    element_size;   /* chunksize of each element, if all same */
-  size_t    contents_size;  /* total size of elements */
-  size_t    array_size;     /* request size of pointer array */
-  void*     mem;            /* malloced aggregate space */
-  mchunkptr p;              /* corresponding chunk */
-  size_t    remainder_size; /* remaining bytes while splitting */
-  void**    marray;         /* either "chunks" or malloced ptr array */
-  mchunkptr array_chunk;    /* chunk for malloced ptr array */
-  flag_t    was_enabled;    /* to disable mmap */
-  size_t    size;
-  size_t    i;
-
-  ensure_initialization();
-  /* compute array length, if needed */
-  if (chunks != 0) {
-    if (n_elements == 0)
-      return chunks; /* nothing to do */
-    marray = chunks;
-    array_size = 0;
-  }
-  else {
-    /* if empty req, must still return chunk representing empty array */
-    if (n_elements == 0)
-      return (void**)internal_malloc(m, 0);
-    marray = 0;
-    array_size = request2size(n_elements * (sizeof(void*)));
-  }
-
-  /* compute total element size */
-  if (opts & 0x1) { /* all-same-size */
-    element_size = request2size(*sizes);
-    contents_size = n_elements * element_size;
-  }
-  else { /* add up all the sizes */
-    element_size = 0;
-    contents_size = 0;
-    for (i = 0; i != n_elements; ++i)
-      contents_size += request2size(sizes[i]);
-  }
-
-  size = contents_size + array_size;
-
-  /*
-     Allocate the aggregate chunk.  First disable direct-mmapping so
-     malloc won't use it, since we would not be able to later
-     free/realloc space internal to a segregated mmap region.
-  */
-  was_enabled = use_mmap(m);
-  disable_mmap(m);
-  mem = internal_malloc(m, size - CHUNK_OVERHEAD);
-  if (was_enabled)
-    enable_mmap(m);
-  if (mem == 0)
-    return 0;
-
-  if (PREACTION(m)) return 0;
-  p = mem2chunk(mem);
-  remainder_size = chunksize(p);
-
-  assert(!is_mmapped(p));
-
-  if (opts & 0x2) {       /* optionally clear the elements */
-    memset((size_t*)mem, 0, remainder_size - SIZE_T_SIZE - array_size);
-  }
-
-  /* If not provided, allocate the pointer array as final part of chunk */
-  if (marray == 0) {
-    size_t  array_chunk_size;
-    array_chunk = chunk_plus_offset(p, contents_size);
-    array_chunk_size = remainder_size - contents_size;
-    marray = (void**) (chunk2mem(array_chunk));
-    set_size_and_pinuse_of_inuse_chunk(m, array_chunk, array_chunk_size);
-    remainder_size = contents_size;
-  }
-
-  /* split out elements */
-  for (i = 0; ; ++i) {
-    marray[i] = chunk2mem(p);
-    if (i != n_elements-1) {
-      if (element_size != 0)
-        size = element_size;
-      else
-        size = request2size(sizes[i]);
-      remainder_size -= size;
-      set_size_and_pinuse_of_inuse_chunk(m, p, size);
-      p = chunk_plus_offset(p, size);
-    }
-    else { /* the final element absorbs any overallocation slop */
-      set_size_and_pinuse_of_inuse_chunk(m, p, remainder_size);
-      break;
-    }
-  }
-
-#if DEBUG
-  if (marray != chunks) {
-    /* final element must have exactly exhausted chunk */
-    if (element_size != 0) {
-      assert(remainder_size == element_size);
-    }
-    else {
-      assert(remainder_size == request2size(sizes[i]));
-    }
-    check_inuse_chunk(m, mem2chunk(marray));
-  }
-  for (i = 0; i != n_elements; ++i)
-    check_inuse_chunk(m, mem2chunk(marray[i]));
-
-#endif /* DEBUG */
-
-  POSTACTION(m);
-  return marray;
-}
-
-
-/* -------------------------- public routines ---------------------------- */
-
-#if !ONLY_MSPACES
-
-void* dlmalloc(size_t bytes) {
-  /*
-     Basic algorithm:
-     If a small request (< 256 bytes minus per-chunk overhead):
-       1. If one exists, use a remainderless chunk in associated smallbin.
-          (Remainderless means that there are too few excess bytes to
-          represent as a chunk.)
-       2. If it is big enough, use the dv chunk, which is normally the
-          chunk adjacent to the one used for the most recent small request.
-       3. If one exists, split the smallest available chunk in a bin,
-          saving remainder in dv.
-       4. If it is big enough, use the top chunk.
-       5. If available, get memory from system and use it
-     Otherwise, for a large request:
-       1. Find the smallest available binned chunk that fits, and use it
-          if it is better fitting than dv chunk, splitting if necessary.
-       2. If better fitting than any binned chunk, use the dv chunk.
-       3. If it is big enough, use the top chunk.
-       4. If request size >= mmap threshold, try to directly mmap this chunk.
-       5. If available, get memory from system and use it
-
-     The ugly goto's here ensure that postaction occurs along all paths.
-  */
-
-#if USE_LOCKS
-  ensure_initialization(); /* initialize in sys_alloc if not using locks */
-#endif
-
-  if (!PREACTION(gm)) {
-    void* mem;
-    size_t nb;
-    if (bytes <= MAX_SMALL_REQUEST) {
-      bindex_t idx;
-      binmap_t smallbits;
-      nb = (bytes < MIN_REQUEST)? MIN_CHUNK_SIZE : pad_request(bytes);
-      idx = small_index(nb);
-      smallbits = gm->smallmap >> idx;
-
-      if ((smallbits & 0x3U) != 0) { /* Remainderless fit to a smallbin. */
-        mchunkptr b, p;
-        idx += ~smallbits & 1;       /* Uses next bin if idx empty */
-        b = smallbin_at(gm, idx);
-        p = b->fd;
-        assert(chunksize(p) == small_index2size(idx));
-        unlink_first_small_chunk(gm, b, p, idx);
-        set_inuse_and_pinuse(gm, p, small_index2size(idx));
-        mem = chunk2mem(p);
-        check_malloced_chunk(gm, mem, nb);
-        goto postaction;
-      }
-
-      else if (nb > gm->dvsize) {
-        if (smallbits != 0) { /* Use chunk in next nonempty smallbin */
-          mchunkptr b, p, r;
-          size_t rsize;
-          bindex_t i;
-          binmap_t leftbits = (smallbits << idx) & left_bits(idx2bit(idx));
-          binmap_t leastbit = least_bit(leftbits);
-          compute_bit2idx(leastbit, i);
-          b = smallbin_at(gm, i);
-          p = b->fd;
-          assert(chunksize(p) == small_index2size(i));
-          unlink_first_small_chunk(gm, b, p, i);
-          rsize = small_index2size(i) - nb;
-          /* Fit here cannot be remainderless if 4byte sizes */
-          if (SIZE_T_SIZE != 4 && rsize < MIN_CHUNK_SIZE)
-            set_inuse_and_pinuse(gm, p, small_index2size(i));
-          else {
-            set_size_and_pinuse_of_inuse_chunk(gm, p, nb);
-            r = chunk_plus_offset(p, nb);
-            set_size_and_pinuse_of_free_chunk(r, rsize);
-            replace_dv(gm, r, rsize);
-          }
-          mem = chunk2mem(p);
-          check_malloced_chunk(gm, mem, nb);
-          goto postaction;
-        }
-
-        else if (gm->treemap != 0 && (mem = tmalloc_small(gm, nb)) != 0) {
-          check_malloced_chunk(gm, mem, nb);
-          goto postaction;
-        }
-      }
-    }
-    else if (bytes >= MAX_REQUEST)
-      nb = MAX_SIZE_T; /* Too big to allocate. Force failure (in sys alloc) */
-    else {
-      nb = pad_request(bytes);
-      if (gm->treemap != 0 && (mem = tmalloc_large(gm, nb)) != 0) {
-        check_malloced_chunk(gm, mem, nb);
-        goto postaction;
-      }
-    }
-
-    if (nb <= gm->dvsize) {
-      size_t rsize = gm->dvsize - nb;
-      mchunkptr p = gm->dv;
-      if (rsize >= MIN_CHUNK_SIZE) { /* split dv */
-        mchunkptr r = gm->dv = chunk_plus_offset(p, nb);
-        gm->dvsize = rsize;
-        set_size_and_pinuse_of_free_chunk(r, rsize);
-        set_size_and_pinuse_of_inuse_chunk(gm, p, nb);
-      }
-      else { /* exhaust dv */
-        size_t dvs = gm->dvsize;
-        gm->dvsize = 0;
-        gm->dv = 0;
-        set_inuse_and_pinuse(gm, p, dvs);
-      }
-      mem = chunk2mem(p);
-      check_malloced_chunk(gm, mem, nb);
-      goto postaction;
-    }
-
-    else if (nb < gm->topsize) { /* Split top */
-      size_t rsize = gm->topsize -= nb;
-      mchunkptr p = gm->top;
-      mchunkptr r = gm->top = chunk_plus_offset(p, nb);
-      r->head = rsize | PINUSE_BIT;
-      set_size_and_pinuse_of_inuse_chunk(gm, p, nb);
-      mem = chunk2mem(p);
-      check_top_chunk(gm, gm->top);
-      check_malloced_chunk(gm, mem, nb);
-      goto postaction;
-    }
-
-    mem = sys_alloc(gm, nb);
-
-  postaction:
-    POSTACTION(gm);
-    return mem;
-  }
-
-  return 0;
-}
-
-void dlfree(void* mem) {
-  /*
-     Consolidate freed chunks with preceeding or succeeding bordering
-     free chunks, if they exist, and then place in a bin.  Intermixed
-     with special cases for top, dv, mmapped chunks, and usage errors.
-  */
-
-  if (mem != 0) {
-    mchunkptr p  = mem2chunk(mem);
-#if FOOTERS
-    mstate fm = get_mstate_for(p);
-    if (!ok_magic(fm)) {
-      USAGE_ERROR_ACTION(fm, p);
-      return;
-    }
-#else /* FOOTERS */
-#define fm gm
-#endif /* FOOTERS */
-    if (!PREACTION(fm)) {
-      check_inuse_chunk(fm, p);
-      if (RTCHECK(ok_address(fm, p) && ok_inuse(p))) {
-        size_t psize = chunksize(p);
-        mchunkptr next = chunk_plus_offset(p, psize);
-        if (!pinuse(p)) {
-          size_t prevsize = p->prev_foot;
-          if (is_mmapped(p)) {
-            psize += prevsize + MMAP_FOOT_PAD;
-            if (CALL_MUNMAP((char*)p - prevsize, psize) == 0)
-              fm->footprint -= psize;
-            goto postaction;
-          }
-          else {
-            mchunkptr prev = chunk_minus_offset(p, prevsize);
-            psize += prevsize;
-            p = prev;
-            if (RTCHECK(ok_address(fm, prev))) { /* consolidate backward */
-              if (p != fm->dv) {
-                unlink_chunk(fm, p, prevsize);
-              }
-              else if ((next->head & INUSE_BITS) == INUSE_BITS) {
-                fm->dvsize = psize;
-                set_free_with_pinuse(p, psize, next);
-                goto postaction;
-              }
-            }
-            else
-              goto erroraction;
-          }
-        }
-
-        if (RTCHECK(ok_next(p, next) && ok_pinuse(next))) {
-          if (!cinuse(next)) {  /* consolidate forward */
-            if (next == fm->top) {
-              size_t tsize = fm->topsize += psize;
-              fm->top = p;
-              p->head = tsize | PINUSE_BIT;
-              if (p == fm->dv) {
-                fm->dv = 0;
-                fm->dvsize = 0;
-              }
-              if (should_trim(fm, tsize))
-                sys_trim(fm, 0);
-              goto postaction;
-            }
-            else if (next == fm->dv) {
-              size_t dsize = fm->dvsize += psize;
-              fm->dv = p;
-              set_size_and_pinuse_of_free_chunk(p, dsize);
-              goto postaction;
-            }
-            else {
-              size_t nsize = chunksize(next);
-              psize += nsize;
-              unlink_chunk(fm, next, nsize);
-              set_size_and_pinuse_of_free_chunk(p, psize);
-              if (p == fm->dv) {
-                fm->dvsize = psize;
-                goto postaction;
-              }
-            }
-          }
-          else
-            set_free_with_pinuse(p, psize, next);
-
-          if (is_small(psize)) {
-            insert_small_chunk(fm, p, psize);
-            check_free_chunk(fm, p);
-          }
-          else {
-            tchunkptr tp = (tchunkptr)p;
-            insert_large_chunk(fm, tp, psize);
-            check_free_chunk(fm, p);
-            if (--fm->release_checks == 0)
-              release_unused_segments(fm);
-          }
-          goto postaction;
-        }
-      }
-    erroraction:
-      USAGE_ERROR_ACTION(fm, p);
-    postaction:
-      POSTACTION(fm);
-    }
-  }
-#if !FOOTERS
-#undef fm
-#endif /* FOOTERS */
-}
-
-void* dlcalloc(size_t n_elements, size_t elem_size) {
-  void* mem;
-  size_t req = 0;
-  if (n_elements != 0) {
-    req = n_elements * elem_size;
-    if (((n_elements | elem_size) & ~(size_t)0xffff) &&
-        (req / n_elements != elem_size))
-      req = MAX_SIZE_T; /* force downstream failure on overflow */
-  }
-  mem = dlmalloc(req);
-  if (mem != 0 && calloc_must_clear(mem2chunk(mem)))
-    memset(mem, 0, req);
-  return mem;
-}
-
-void* dlrealloc(void* oldmem, size_t bytes) {
-  if (oldmem == 0)
-    return dlmalloc(bytes);
-#ifdef REALLOC_ZERO_BYTES_FREES
-  if (bytes == 0) {
-    dlfree(oldmem);
-    return 0;
-  }
-#endif /* REALLOC_ZERO_BYTES_FREES */
-  else {
-#if ! FOOTERS
-    mstate m = gm;
-#else /* FOOTERS */
-    mstate m = get_mstate_for(mem2chunk(oldmem));
-    if (!ok_magic(m)) {
-      USAGE_ERROR_ACTION(m, oldmem);
-      return 0;
-    }
-#endif /* FOOTERS */
-    return internal_realloc(m, oldmem, bytes);
-  }
-}
-
-void* dlmemalign(size_t alignment, size_t bytes) {
-  return internal_memalign(gm, alignment, bytes);
-}
-
-void** dlindependent_calloc(size_t n_elements, size_t elem_size,
-                                 void* chunks[]) {
-  size_t sz = elem_size; /* serves as 1-element array */
-  return ialloc(gm, n_elements, &sz, 3, chunks);
-}
-
-void** dlindependent_comalloc(size_t n_elements, size_t sizes[],
-                                   void* chunks[]) {
-  return ialloc(gm, n_elements, sizes, 0, chunks);
-}
-
-void* dlvalloc(size_t bytes) {
-  size_t pagesz;
-  ensure_initialization();
-  pagesz = mparams.page_size;
-  return dlmemalign(pagesz, bytes);
-}
-
-void* dlpvalloc(size_t bytes) {
-  size_t pagesz;
-  ensure_initialization();
-  pagesz = mparams.page_size;
-  return dlmemalign(pagesz, (bytes + pagesz - SIZE_T_ONE) & ~(pagesz - SIZE_T_ONE));
-}
-
-int dlmalloc_trim(size_t pad) {
-  int result = 0;
-  ensure_initialization();
-  if (!PREACTION(gm)) {
-    result = sys_trim(gm, pad);
-    POSTACTION(gm);
-  }
-  return result;
-}
-
-size_t dlmalloc_footprint(void) {
-  return gm->footprint;
-}
-
-size_t dlmalloc_max_footprint(void) {
-  return gm->max_footprint;
-}
-
-#if !NO_MALLINFO
-struct mallinfo dlmallinfo(void) {
-  return internal_mallinfo(gm);
-}
-#endif /* NO_MALLINFO */
-
-void dlmalloc_stats() {
-  internal_malloc_stats(gm);
-}
-
-int dlmallopt(int param_number, int value) {
-  return change_mparam(param_number, value);
-}
-
-#endif /* !ONLY_MSPACES */
-
-size_t dlmalloc_usable_size(void* mem) {
-  if (mem != 0) {
-    mchunkptr p = mem2chunk(mem);
-    if (is_inuse(p))
-      return chunksize(p) - overhead_for(p);
-  }
-  return 0;
-}
-
-/* ----------------------------- user mspaces ---------------------------- */
-
-#if MSPACES
-
-static mstate init_user_mstate(char* tbase, size_t tsize) {
-  size_t msize = pad_request(sizeof(struct malloc_state));
-  mchunkptr mn;
-  mchunkptr msp = align_as_chunk(tbase);
-  mstate m = (mstate)(chunk2mem(msp));
-  memset(m, 0, msize);
-  INITIAL_LOCK(&m->mutex);
-  msp->head = (msize|INUSE_BITS);
-  m->seg.base = m->least_addr = tbase;
-  m->seg.size = m->footprint = m->max_footprint = tsize;
-  m->magic = mparams.magic;
-  m->release_checks = MAX_RELEASE_CHECK_RATE;
-  m->mflags = mparams.default_mflags;
-  m->extp = 0;
-  m->exts = 0;
-  disable_contiguous(m);
-  init_bins(m);
-  mn = next_chunk(mem2chunk(m));
-  init_top(m, mn, (size_t)((tbase + tsize) - (char*)mn) - TOP_FOOT_SIZE);
-  check_top_chunk(m, m->top);
-  return m;
-}
-
-mspace create_mspace(size_t capacity, int locked) {
-  mstate m = 0;
-  size_t msize;
-  ensure_initialization();
-  msize = pad_request(sizeof(struct malloc_state));
-  if (capacity < (size_t) -(msize + TOP_FOOT_SIZE + mparams.page_size)) {
-    size_t rs = ((capacity == 0)? mparams.granularity :
-                 (capacity + TOP_FOOT_SIZE + msize));
-    size_t tsize = granularity_align(rs);
-    char* tbase = (char*)(CALL_MMAP(tsize));
-    if (tbase != CMFAIL) {
-      m = init_user_mstate(tbase, tsize);
-      m->seg.sflags = USE_MMAP_BIT;
-      set_lock(m, locked);
-    }
-  }
-  return (mspace)m;
-}
-
-mspace create_mspace_with_base(void* base, size_t capacity, int locked) {
-  mstate m = 0;
-  size_t msize;
-  ensure_initialization();
-  msize = pad_request(sizeof(struct malloc_state));
-  if (capacity > msize + TOP_FOOT_SIZE &&
-      capacity < (size_t) -(msize + TOP_FOOT_SIZE + mparams.page_size)) {
-    m = init_user_mstate((char*)base, capacity);
-    m->seg.sflags = EXTERN_BIT;
-    set_lock(m, locked);
-  }
-  return (mspace)m;
-}
-
-int mspace_track_large_chunks(mspace msp, int enable) {
-  int ret = 0;
-  mstate ms = (mstate)msp;
-  if (!PREACTION(ms)) {
-    if (!use_mmap(ms))
-      ret = 1;
-    if (!enable)
-      enable_mmap(ms);
-    else
-      disable_mmap(ms);
-    POSTACTION(ms);
-  }
-  return ret;
-}
-
-size_t destroy_mspace(mspace msp) {
-  size_t freed = 0;
-  mstate ms = (mstate)msp;
-  if (ok_magic(ms)) {
-    msegmentptr sp = &ms->seg;
-    while (sp != 0) {
-      char* base = sp->base;
-      size_t size = sp->size;
-      flag_t flag = sp->sflags;
-      sp = sp->next;
-      if ((flag & USE_MMAP_BIT) && !(flag & EXTERN_BIT) &&
-          CALL_MUNMAP(base, size) == 0)
-        freed += size;
-    }
-  }
-  else {
-    USAGE_ERROR_ACTION(ms,ms);
-  }
-  return freed;
-}
-
-/*
-  mspace versions of routines are near-clones of the global
-  versions. This is not so nice but better than the alternatives.
-*/
-
-
-void* mspace_malloc(mspace msp, size_t bytes) {
-  mstate ms = (mstate)msp;
-  if (!ok_magic(ms)) {
-    USAGE_ERROR_ACTION(ms,ms);
-    return 0;
-  }
-  if (!PREACTION(ms)) {
-    void* mem;
-    size_t nb;
-    if (bytes <= MAX_SMALL_REQUEST) {
-      bindex_t idx;
-      binmap_t smallbits;
-      nb = (bytes < MIN_REQUEST)? MIN_CHUNK_SIZE : pad_request(bytes);
-      idx = small_index(nb);
-      smallbits = ms->smallmap >> idx;
-
-      if ((smallbits & 0x3U) != 0) { /* Remainderless fit to a smallbin. */
-        mchunkptr b, p;
-        idx += ~smallbits & 1;       /* Uses next bin if idx empty */
-        b = smallbin_at(ms, idx);
-        p = b->fd;
-        assert(chunksize(p) == small_index2size(idx));
-        unlink_first_small_chunk(ms, b, p, idx);
-        set_inuse_and_pinuse(ms, p, small_index2size(idx));
-        mem = chunk2mem(p);
-        check_malloced_chunk(ms, mem, nb);
-        goto postaction;
-      }
-
-      else if (nb > ms->dvsize) {
-        if (smallbits != 0) { /* Use chunk in next nonempty smallbin */
-          mchunkptr b, p, r;
-          size_t rsize;
-          bindex_t i;
-          binmap_t leftbits = (smallbits << idx) & left_bits(idx2bit(idx));
-          binmap_t leastbit = least_bit(leftbits);
-          compute_bit2idx(leastbit, i);
-          b = smallbin_at(ms, i);
-          p = b->fd;
-          assert(chunksize(p) == small_index2size(i));
-          unlink_first_small_chunk(ms, b, p, i);
-          rsize = small_index2size(i) - nb;
-          /* Fit here cannot be remainderless if 4byte sizes */
-          if (SIZE_T_SIZE != 4 && rsize < MIN_CHUNK_SIZE)
-            set_inuse_and_pinuse(ms, p, small_index2size(i));
-          else {
-            set_size_and_pinuse_of_inuse_chunk(ms, p, nb);
-            r = chunk_plus_offset(p, nb);
-            set_size_and_pinuse_of_free_chunk(r, rsize);
-            replace_dv(ms, r, rsize);
-          }
-          mem = chunk2mem(p);
-          check_malloced_chunk(ms, mem, nb);
-          goto postaction;
-        }
-
-        else if (ms->treemap != 0 && (mem = tmalloc_small(ms, nb)) != 0) {
-          check_malloced_chunk(ms, mem, nb);
-          goto postaction;
-        }
-      }
-    }
-    else if (bytes >= MAX_REQUEST)
-      nb = MAX_SIZE_T; /* Too big to allocate. Force failure (in sys alloc) */
-    else {
-      nb = pad_request(bytes);
-      if (ms->treemap != 0 && (mem = tmalloc_large(ms, nb)) != 0) {
-        check_malloced_chunk(ms, mem, nb);
-        goto postaction;
-      }
-    }
-
-    if (nb <= ms->dvsize) {
-      size_t rsize = ms->dvsize - nb;
-      mchunkptr p = ms->dv;
-      if (rsize >= MIN_CHUNK_SIZE) { /* split dv */
-        mchunkptr r = ms->dv = chunk_plus_offset(p, nb);
-        ms->dvsize = rsize;
-        set_size_and_pinuse_of_free_chunk(r, rsize);
-        set_size_and_pinuse_of_inuse_chunk(ms, p, nb);
-      }
-      else { /* exhaust dv */
-        size_t dvs = ms->dvsize;
-        ms->dvsize = 0;
-        ms->dv = 0;
-        set_inuse_and_pinuse(ms, p, dvs);
-      }
-      mem = chunk2mem(p);
-      check_malloced_chunk(ms, mem, nb);
-      goto postaction;
-    }
-
-    else if (nb < ms->topsize) { /* Split top */
-      size_t rsize = ms->topsize -= nb;
-      mchunkptr p = ms->top;
-      mchunkptr r = ms->top = chunk_plus_offset(p, nb);
-      r->head = rsize | PINUSE_BIT;
-      set_size_and_pinuse_of_inuse_chunk(ms, p, nb);
-      mem = chunk2mem(p);
-      check_top_chunk(ms, ms->top);
-      check_malloced_chunk(ms, mem, nb);
-      goto postaction;
-    }
-
-    mem = sys_alloc(ms, nb);
-
-  postaction:
-    POSTACTION(ms);
-    return mem;
-  }
-
-  return 0;
-}
-
-void mspace_free(mspace msp, void* mem) {
-  if (mem != 0) {
-    mchunkptr p  = mem2chunk(mem);
-#if FOOTERS
-    mstate fm = get_mstate_for(p);
-    msp = msp; /* placate people compiling -Wunused */
-#else /* FOOTERS */
-    mstate fm = (mstate)msp;
-#endif /* FOOTERS */
-    if (!ok_magic(fm)) {
-      USAGE_ERROR_ACTION(fm, p);
-      return;
-    }
-    if (!PREACTION(fm)) {
-      check_inuse_chunk(fm, p);
-      if (RTCHECK(ok_address(fm, p) && ok_inuse(p))) {
-        size_t psize = chunksize(p);
-        mchunkptr next = chunk_plus_offset(p, psize);
-        if (!pinuse(p)) {
-          size_t prevsize = p->prev_foot;
-          if (is_mmapped(p)) {
-            psize += prevsize + MMAP_FOOT_PAD;
-            if (CALL_MUNMAP((char*)p - prevsize, psize) == 0)
-              fm->footprint -= psize;
-            goto postaction;
-          }
-          else {
-            mchunkptr prev = chunk_minus_offset(p, prevsize);
-            psize += prevsize;
-            p = prev;
-            if (RTCHECK(ok_address(fm, prev))) { /* consolidate backward */
-              if (p != fm->dv) {
-                unlink_chunk(fm, p, prevsize);
-              }
-              else if ((next->head & INUSE_BITS) == INUSE_BITS) {
-                fm->dvsize = psize;
-                set_free_with_pinuse(p, psize, next);
-                goto postaction;
-              }
-            }
-            else
-              goto erroraction;
-          }
-        }
-
-        if (RTCHECK(ok_next(p, next) && ok_pinuse(next))) {
-          if (!cinuse(next)) {  /* consolidate forward */
-            if (next == fm->top) {
-              size_t tsize = fm->topsize += psize;
-              fm->top = p;
-              p->head = tsize | PINUSE_BIT;
-              if (p == fm->dv) {
-                fm->dv = 0;
-                fm->dvsize = 0;
-              }
-              if (should_trim(fm, tsize))
-                sys_trim(fm, 0);
-              goto postaction;
-            }
-            else if (next == fm->dv) {
-              size_t dsize = fm->dvsize += psize;
-              fm->dv = p;
-              set_size_and_pinuse_of_free_chunk(p, dsize);
-              goto postaction;
-            }
-            else {
-              size_t nsize = chunksize(next);
-              psize += nsize;
-              unlink_chunk(fm, next, nsize);
-              set_size_and_pinuse_of_free_chunk(p, psize);
-              if (p == fm->dv) {
-                fm->dvsize = psize;
-                goto postaction;
-              }
-            }
-          }
-          else
-            set_free_with_pinuse(p, psize, next);
-
-          if (is_small(psize)) {
-            insert_small_chunk(fm, p, psize);
-            check_free_chunk(fm, p);
-          }
-          else {
-            tchunkptr tp = (tchunkptr)p;
-            insert_large_chunk(fm, tp, psize);
-            check_free_chunk(fm, p);
-            if (--fm->release_checks == 0)
-              release_unused_segments(fm);
-          }
-          goto postaction;
-        }
-      }
-    erroraction:
-      USAGE_ERROR_ACTION(fm, p);
-    postaction:
-      POSTACTION(fm);
-    }
-  }
-}
-
-void* mspace_calloc(mspace msp, size_t n_elements, size_t elem_size) {
-  void* mem;
-  size_t req = 0;
-  mstate ms = (mstate)msp;
-  if (!ok_magic(ms)) {
-    USAGE_ERROR_ACTION(ms,ms);
-    return 0;
-  }
-  if (n_elements != 0) {
-    req = n_elements * elem_size;
-    if (((n_elements | elem_size) & ~(size_t)0xffff) &&
-        (req / n_elements != elem_size))
-      req = MAX_SIZE_T; /* force downstream failure on overflow */
-  }
-  mem = internal_malloc(ms, req);
-  if (mem != 0 && calloc_must_clear(mem2chunk(mem)))
-    memset(mem, 0, req);
-  return mem;
-}
-
-void* mspace_realloc(mspace msp, void* oldmem, size_t bytes) {
-  if (oldmem == 0)
-    return mspace_malloc(msp, bytes);
-#ifdef REALLOC_ZERO_BYTES_FREES
-  if (bytes == 0) {
-    mspace_free(msp, oldmem);
-    return 0;
-  }
-#endif /* REALLOC_ZERO_BYTES_FREES */
-  else {
-#if FOOTERS
-    mchunkptr p  = mem2chunk(oldmem);
-    mstate ms = get_mstate_for(p);
-#else /* FOOTERS */
-    mstate ms = (mstate)msp;
-#endif /* FOOTERS */
-    if (!ok_magic(ms)) {
-      USAGE_ERROR_ACTION(ms,ms);
-      return 0;
-    }
-    return internal_realloc(ms, oldmem, bytes);
-  }
-}
-
-void* mspace_memalign(mspace msp, size_t alignment, size_t bytes) {
-  mstate ms = (mstate)msp;
-  if (!ok_magic(ms)) {
-    USAGE_ERROR_ACTION(ms,ms);
-    return 0;
-  }
-  return internal_memalign(ms, alignment, bytes);
-}
-
-void** mspace_independent_calloc(mspace msp, size_t n_elements,
-                                 size_t elem_size, void* chunks[]) {
-  size_t sz = elem_size; /* serves as 1-element array */
-  mstate ms = (mstate)msp;
-  if (!ok_magic(ms)) {
-    USAGE_ERROR_ACTION(ms,ms);
-    return 0;
-  }
-  return ialloc(ms, n_elements, &sz, 3, chunks);
-}
-
-void** mspace_independent_comalloc(mspace msp, size_t n_elements,
-                                   size_t sizes[], void* chunks[]) {
-  mstate ms = (mstate)msp;
-  if (!ok_magic(ms)) {
-    USAGE_ERROR_ACTION(ms,ms);
-    return 0;
-  }
-  return ialloc(ms, n_elements, sizes, 0, chunks);
-}
-
-int mspace_trim(mspace msp, size_t pad) {
-  int result = 0;
-  mstate ms = (mstate)msp;
-  if (ok_magic(ms)) {
-    if (!PREACTION(ms)) {
-      result = sys_trim(ms, pad);
-      POSTACTION(ms);
-    }
-  }
-  else {
-    USAGE_ERROR_ACTION(ms,ms);
-  }
-  return result;
-}
-
-void mspace_malloc_stats(mspace msp) {
-  mstate ms = (mstate)msp;
-  if (ok_magic(ms)) {
-    internal_malloc_stats(ms);
-  }
-  else {
-    USAGE_ERROR_ACTION(ms,ms);
-  }
-}
-
-size_t mspace_footprint(mspace msp) {
-  size_t result = 0;
-  mstate ms = (mstate)msp;
-  if (ok_magic(ms)) {
-    result = ms->footprint;
-  }
-  else {
-    USAGE_ERROR_ACTION(ms,ms);
-  }
-  return result;
-}
-
-
-size_t mspace_max_footprint(mspace msp) {
-  size_t result = 0;
-  mstate ms = (mstate)msp;
-  if (ok_magic(ms)) {
-    result = ms->max_footprint;
-  }
-  else {
-    USAGE_ERROR_ACTION(ms,ms);
-  }
-  return result;
-}
-
-
-#if !NO_MALLINFO
-struct mallinfo mspace_mallinfo(mspace msp) {
-  mstate ms = (mstate)msp;
-  if (!ok_magic(ms)) {
-    USAGE_ERROR_ACTION(ms,ms);
-  }
-  return internal_mallinfo(ms);
-}
-#endif /* NO_MALLINFO */
-
-size_t mspace_usable_size(void* mem) {
-  if (mem != 0) {
-    mchunkptr p = mem2chunk(mem);
-    if (is_inuse(p))
-      return chunksize(p) - overhead_for(p);
-  }
-  return 0;
-}
-
-int mspace_mallopt(int param_number, int value) {
-  return change_mparam(param_number, value);
-}
-
-#endif /* MSPACES */
-
-
-/* -------------------- Alternative MORECORE functions ------------------- */
-
-/*
-  Guidelines for creating a custom version of MORECORE:
-
-  * For best performance, MORECORE should allocate in multiples of pagesize.
-  * MORECORE may allocate more memory than requested. (Or even less,
-      but this will usually result in a malloc failure.)
-  * MORECORE must not allocate memory when given argument zero, but
-      instead return one past the end address of memory from previous
-      nonzero call.
-  * For best performance, consecutive calls to MORECORE with positive
-      arguments should return increasing addresses, indicating that
-      space has been contiguously extended.
-  * Even though consecutive calls to MORECORE need not return contiguous
-      addresses, it must be OK for malloc'ed chunks to span multiple
-      regions in those cases where they do happen to be contiguous.
-  * MORECORE need not handle negative arguments -- it may instead
-      just return MFAIL when given negative arguments.
-      Negative arguments are always multiples of pagesize. MORECORE
-      must not misinterpret negative args as large positive unsigned
-      args. You can suppress all such calls from even occurring by defining
-      MORECORE_CANNOT_TRIM,
-
-  As an example alternative MORECORE, here is a custom allocator
-  kindly contributed for pre-OSX macOS.  It uses virtually but not
-  necessarily physically contiguous non-paged memory (locked in,
-  present and won't get swapped out).  You can use it by uncommenting
-  this section, adding some #includes, and setting up the appropriate
-  defines above:
-
-      #define MORECORE osMoreCore
-
-  There is also a shutdown routine that should somehow be called for
-  cleanup upon program exit.
-
-  #define MAX_POOL_ENTRIES 100
-  #define MINIMUM_MORECORE_SIZE  (64 * 1024U)
-  static int next_os_pool;
-  void *our_os_pools[MAX_POOL_ENTRIES];
-
-  void *osMoreCore(int size)
-  {
-    void *ptr = 0;
-    static void *sbrk_top = 0;
-
-    if (size > 0)
-    {
-      if (size < MINIMUM_MORECORE_SIZE)
-         size = MINIMUM_MORECORE_SIZE;
-      if (CurrentExecutionLevel() == kTaskLevel)
-         ptr = PoolAllocateResident(size + RM_PAGE_SIZE, 0);
-      if (ptr == 0)
-      {
-        return (void *) MFAIL;
-      }
-      // save ptrs so they can be freed during cleanup
-      our_os_pools[next_os_pool] = ptr;
-      next_os_pool++;
-      ptr = (void *) ((((size_t) ptr) + RM_PAGE_MASK) & ~RM_PAGE_MASK);
-      sbrk_top = (char *) ptr + size;
-      return ptr;
-    }
-    else if (size < 0)
-    {
-      // we don't currently support shrink behavior
-      return (void *) MFAIL;
-    }
-    else
-    {
-      return sbrk_top;
-    }
-  }
-
-  // cleanup any allocated memory pools
-  // called as last thing before shutting down driver
-
-  void osCleanupMem(void)
-  {
-    void **ptr;
-
-    for (ptr = our_os_pools; ptr < &our_os_pools[MAX_POOL_ENTRIES]; ptr++)
-      if (*ptr)
-      {
-         PoolDeallocate(*ptr);
-         *ptr = 0;
-      }
-  }
-
-*/
-
-
-/* -----------------------------------------------------------------------
-History:
-    V2.8.4 Wed May 27 09:56:23 2009  Doug Lea  (dl at gee)
-      * Use zeros instead of prev foot for is_mmapped
-      * Add mspace_track_large_chunks; thanks to Jean Brouwers
-      * Fix set_inuse in internal_realloc; thanks to Jean Brouwers
-      * Fix insufficient sys_alloc padding when using 16byte alignment
-      * Fix bad error check in mspace_footprint
-      * Adaptations for ptmalloc; thanks to Wolfram Gloger.
-      * Reentrant spin locks; thanks to Earl Chew and others
-      * Win32 improvements; thanks to Niall Douglas and Earl Chew
-      * Add NO_SEGMENT_TRAVERSAL and MAX_RELEASE_CHECK_RATE options
-      * Extension hook in malloc_state
-      * Various small adjustments to reduce warnings on some compilers
-      * Various configuration extensions/changes for more platforms. Thanks
-         to all who contributed these.
-
-    V2.8.3 Thu Sep 22 11:16:32 2005  Doug Lea  (dl at gee)
-      * Add max_footprint functions
-      * Ensure all appropriate literals are size_t
-      * Fix conditional compilation problem for some #define settings
-      * Avoid concatenating segments with the one provided
-        in create_mspace_with_base
-      * Rename some variables to avoid compiler shadowing warnings
-      * Use explicit lock initialization.
-      * Better handling of sbrk interference.
-      * Simplify and fix segment insertion, trimming and mspace_destroy
-      * Reinstate REALLOC_ZERO_BYTES_FREES option from 2.7.x
-      * Thanks especially to Dennis Flanagan for help on these.
-
-    V2.8.2 Sun Jun 12 16:01:10 2005  Doug Lea  (dl at gee)
-      * Fix memalign brace error.
-
-    V2.8.1 Wed Jun  8 16:11:46 2005  Doug Lea  (dl at gee)
-      * Fix improper #endif nesting in C++
-      * Add explicit casts needed for C++
-
-    V2.8.0 Mon May 30 14:09:02 2005  Doug Lea  (dl at gee)
-      * Use trees for large bins
-      * Support mspaces
-      * Use segments to unify sbrk-based and mmap-based system allocation,
-        removing need for emulation on most platforms without sbrk.
-      * Default safety checks
-      * Optional footer checks. Thanks to William Robertson for the idea.
-      * Internal code refactoring
-      * Incorporate suggestions and platform-specific changes.
-        Thanks to Dennis Flanagan, Colin Plumb, Niall Douglas,
-        Aaron Bachmann,  Emery Berger, and others.
-      * Speed up non-fastbin processing enough to remove fastbins.
-      * Remove useless cfree() to avoid conflicts with other apps.
-      * Remove internal memcpy, memset. Compilers handle builtins better.
-      * Remove some options that no one ever used and rename others.
-
-    V2.7.2 Sat Aug 17 09:07:30 2002  Doug Lea  (dl at gee)
-      * Fix malloc_state bitmap array misdeclaration
-
-    V2.7.1 Thu Jul 25 10:58:03 2002  Doug Lea  (dl at gee)
-      * Allow tuning of FIRST_SORTED_BIN_SIZE
-      * Use PTR_UINT as type for all ptr->int casts. Thanks to John Belmonte.
-      * Better detection and support for non-contiguousness of MORECORE.
-        Thanks to Andreas Mueller, Conal Walsh, and Wolfram Gloger
-      * Bypass most of malloc if no frees. Thanks To Emery Berger.
-      * Fix freeing of old top non-contiguous chunk im sysmalloc.
-      * Raised default trim and map thresholds to 256K.
-      * Fix mmap-related #defines. Thanks to Lubos Lunak.
-      * Fix copy macros; added LACKS_FCNTL_H. Thanks to Neal Walfield.
-      * Branch-free bin calculation
-      * Default trim and mmap thresholds now 256K.
-
-    V2.7.0 Sun Mar 11 14:14:06 2001  Doug Lea  (dl at gee)
-      * Introduce independent_comalloc and independent_calloc.
-        Thanks to Michael Pachos for motivation and help.
-      * Make optional .h file available
-      * Allow > 2GB requests on 32bit systems.
-      * new WIN32 sbrk, mmap, munmap, lock code from <Walter@GeNeSys-e.de>.
-        Thanks also to Andreas Mueller <a.mueller at paradatec.de>,
-        and Anonymous.
-      * Allow override of MALLOC_ALIGNMENT (Thanks to Ruud Waij for
-        helping test this.)
-      * memalign: check alignment arg
-      * realloc: don't try to shift chunks backwards, since this
-        leads to  more fragmentation in some programs and doesn't
-        seem to help in any others.
-      * Collect all cases in malloc requiring system memory into sysmalloc
-      * Use mmap as backup to sbrk
-      * Place all internal state in malloc_state
-      * Introduce fastbins (although similar to 2.5.1)
-      * Many minor tunings and cosmetic improvements
-      * Introduce USE_PUBLIC_MALLOC_WRAPPERS, USE_MALLOC_LOCK
-      * Introduce MALLOC_FAILURE_ACTION, MORECORE_CONTIGUOUS
-        Thanks to Tony E. Bennett <tbennett@nvidia.com> and others.
-      * Include errno.h to support default failure action.
-
-    V2.6.6 Sun Dec  5 07:42:19 1999  Doug Lea  (dl at gee)
-      * return null for negative arguments
-      * Added Several WIN32 cleanups from Martin C. Fong <mcfong at yahoo.com>
-         * Add 'LACKS_SYS_PARAM_H' for those systems without 'sys/param.h'
-          (e.g. WIN32 platforms)
-         * Cleanup header file inclusion for WIN32 platforms
-         * Cleanup code to avoid Microsoft Visual C++ compiler complaints
-         * Add 'USE_DL_PREFIX' to quickly allow co-existence with existing
-           memory allocation routines
-         * Set 'malloc_getpagesize' for WIN32 platforms (needs more work)
-         * Use 'assert' rather than 'ASSERT' in WIN32 code to conform to
-           usage of 'assert' in non-WIN32 code
-         * Improve WIN32 'sbrk()' emulation's 'findRegion()' routine to
-           avoid infinite loop
-      * Always call 'fREe()' rather than 'free()'
-
-    V2.6.5 Wed Jun 17 15:57:31 1998  Doug Lea  (dl at gee)
-      * Fixed ordering problem with boundary-stamping
-
-    V2.6.3 Sun May 19 08:17:58 1996  Doug Lea  (dl at gee)
-      * Added pvalloc, as recommended by H.J. Liu
-      * Added 64bit pointer support mainly from Wolfram Gloger
-      * Added anonymously donated WIN32 sbrk emulation
-      * Malloc, calloc, getpagesize: add optimizations from Raymond Nijssen
-      * malloc_extend_top: fix mask error that caused wastage after
-        foreign sbrks
-      * Add linux mremap support code from HJ Liu
-
-    V2.6.2 Tue Dec  5 06:52:55 1995  Doug Lea  (dl at gee)
-      * Integrated most documentation with the code.
-      * Add support for mmap, with help from
-        Wolfram Gloger (Gloger@lrz.uni-muenchen.de).
-      * Use last_remainder in more cases.
-      * Pack bins using idea from  colin@nyx10.cs.du.edu
-      * Use ordered bins instead of best-fit threshhold
-      * Eliminate block-local decls to simplify tracing and debugging.
-      * Support another case of realloc via move into top
-      * Fix error occuring when initial sbrk_base not word-aligned.
-      * Rely on page size for units instead of SBRK_UNIT to
-        avoid surprises about sbrk alignment conventions.
-      * Add mallinfo, mallopt. Thanks to Raymond Nijssen
-        (raymond@es.ele.tue.nl) for the suggestion.
-      * Add `pad' argument to malloc_trim and top_pad mallopt parameter.
-      * More precautions for cases where other routines call sbrk,
-        courtesy of Wolfram Gloger (Gloger@lrz.uni-muenchen.de).
-      * Added macros etc., allowing use in linux libc from
-        H.J. Lu (hjl@gnu.ai.mit.edu)
-      * Inverted this history list
-
-    V2.6.1 Sat Dec  2 14:10:57 1995  Doug Lea  (dl at gee)
-      * Re-tuned and fixed to behave more nicely with V2.6.0 changes.
-      * Removed all preallocation code since under current scheme
-        the work required to undo bad preallocations exceeds
-        the work saved in good cases for most test programs.
-      * No longer use return list or unconsolidated bins since
-        no scheme using them consistently outperforms those that don't
-        given above changes.
-      * Use best fit for very large chunks to prevent some worst-cases.
-      * Added some support for debugging
-
-    V2.6.0 Sat Nov  4 07:05:23 1995  Doug Lea  (dl at gee)
-      * Removed footers when chunks are in use. Thanks to
-        Paul Wilson (wilson@cs.texas.edu) for the suggestion.
-
-    V2.5.4 Wed Nov  1 07:54:51 1995  Doug Lea  (dl at gee)
-      * Added malloc_trim, with help from Wolfram Gloger
-        (wmglo@Dent.MED.Uni-Muenchen.DE).
-
-    V2.5.3 Tue Apr 26 10:16:01 1994  Doug Lea  (dl at g)
-
-    V2.5.2 Tue Apr  5 16:20:40 1994  Doug Lea  (dl at g)
-      * realloc: try to expand in both directions
-      * malloc: swap order of clean-bin strategy;
-      * realloc: only conditionally expand backwards
-      * Try not to scavenge used bins
-      * Use bin counts as a guide to preallocation
-      * Occasionally bin return list chunks in first scan
-      * Add a few optimizations from colin@nyx10.cs.du.edu
-
-    V2.5.1 Sat Aug 14 15:40:43 1993  Doug Lea  (dl at g)
-      * faster bin computation & slightly different binning
-      * merged all consolidations to one part of malloc proper
-         (eliminating old malloc_find_space & malloc_clean_bin)
-      * Scan 2 returns chunks (not just 1)
-      * Propagate failure in realloc if malloc returns 0
-      * Add stuff to allow compilation on non-ANSI compilers
-          from kpv@research.att.com
-
-    V2.5 Sat Aug  7 07:41:59 1993  Doug Lea  (dl at g.oswego.edu)
-      * removed potential for odd address access in prev_chunk
-      * removed dependency on getpagesize.h
-      * misc cosmetics and a bit more internal documentation
-      * anticosmetics: mangled names in macros to evade debugger strangeness
-      * tested on sparc, hp-700, dec-mips, rs6000
-          with gcc & native cc (hp, dec only) allowing
-          Detlefs & Zorn comparison study (in SIGPLAN Notices.)
-
-    Trial version Fri Aug 28 13:14:29 1992  Doug Lea  (dl at g.oswego.edu)
-      * Based loosely on libg++-1.2X malloc. (It retains some of the overall
-         structure of old version,  but most details differ.)
-
-*/
-
-#endif
+#ifdef NEDMALLOC_ENABLED
+/*
+  This is a version (aka dlmalloc) of malloc/free/realloc written by
+  Doug Lea and released to the public domain, as explained at
+  http://creativecommons.org/licenses/publicdomain.  Send questions,
+  comments, complaints, performance data, etc to dl@cs.oswego.edu
+
+* Version 2.8.4 Wed May 27 09:56:23 2009  Doug Lea  (dl at gee)
+
+   Note: There may be an updated version of this malloc obtainable at
+           ftp://gee.cs.oswego.edu/pub/misc/malloc.c
+         Check before installing!
+
+* Quickstart
+
+  This library is all in one file to simplify the most common usage:
+  ftp it, compile it (-O3), and link it into another program. All of
+  the compile-time options default to reasonable values for use on
+  most platforms.  You might later want to step through various
+  compile-time and dynamic tuning options.
+
+  For convenience, an include file for code using this malloc is at:
+     ftp://gee.cs.oswego.edu/pub/misc/malloc-2.8.4.h
+  You don't really need this .h file unless you call functions not
+  defined in your system include files.  The .h file contains only the
+  excerpts from this file needed for using this malloc on ANSI C/C++
+  systems, so long as you haven't changed compile-time options about
+  naming and tuning parameters.  If you do, then you can create your
+  own malloc.h that does include all settings by cutting at the point
+  indicated below. Note that you may already by default be using a C
+  library containing a malloc that is based on some version of this
+  malloc (for example in linux). You might still want to use the one
+  in this file to customize settings or to avoid overheads associated
+  with library versions.
+
+* Vital statistics:
+
+  Supported pointer/size_t representation:       4 or 8 bytes
+       size_t MUST be an unsigned type of the same width as
+       pointers. (If you are using an ancient system that declares
+       size_t as a signed type, or need it to be a different width
+       than pointers, you can use a previous release of this malloc
+       (e.g. 2.7.2) supporting these.)
+
+  Alignment:                                     8 bytes (default)
+       This suffices for nearly all current machines and C compilers.
+       However, you can define MALLOC_ALIGNMENT to be wider than this
+       if necessary (up to 128bytes), at the expense of using more space.
+
+  Minimum overhead per allocated chunk:   4 or  8 bytes (if 4byte sizes)
+                                          8 or 16 bytes (if 8byte sizes)
+       Each malloced chunk has a hidden word of overhead holding size
+       and status information, and additional cross-check word
+       if FOOTERS is defined.
+
+  Minimum allocated size: 4-byte ptrs:  16 bytes    (including overhead)
+                          8-byte ptrs:  32 bytes    (including overhead)
+
+       Even a request for zero bytes (i.e., malloc(0)) returns a
+       pointer to something of the minimum allocatable size.
+       The maximum overhead wastage (i.e., number of extra bytes
+       allocated than were requested in malloc) is less than or equal
+       to the minimum size, except for requests >= mmap_threshold that
+       are serviced via mmap(), where the worst case wastage is about
+       32 bytes plus the remainder from a system page (the minimal
+       mmap unit); typically 4096 or 8192 bytes.
+
+  Security: static-safe; optionally more or less
+       The "security" of malloc refers to the ability of malicious
+       code to accentuate the effects of errors (for example, freeing
+       space that is not currently malloc'ed or overwriting past the
+       ends of chunks) in code that calls malloc.  This malloc
+       guarantees not to modify any memory locations below the base of
+       heap, i.e., static variables, even in the presence of usage
+       errors.  The routines additionally detect most improper frees
+       and reallocs.  All this holds as long as the static bookkeeping
+       for malloc itself is not corrupted by some other means.  This
+       is only one aspect of security -- these checks do not, and
+       cannot, detect all possible programming errors.
+
+       If FOOTERS is defined nonzero, then each allocated chunk
+       carries an additional check word to verify that it was malloced
+       from its space.  These check words are the same within each
+       execution of a program using malloc, but differ across
+       executions, so externally crafted fake chunks cannot be
+       freed. This improves security by rejecting frees/reallocs that
+       could corrupt heap memory, in addition to the checks preventing
+       writes to statics that are always on.  This may further improve
+       security at the expense of time and space overhead.  (Note that
+       FOOTERS may also be worth using with MSPACES.)
+
+       By default detected errors cause the program to abort (calling
+       "abort()"). You can override this to instead proceed past
+       errors by defining PROCEED_ON_ERROR.  In this case, a bad free
+       has no effect, and a malloc that encounters a bad address
+       caused by user overwrites will ignore the bad address by
+       dropping pointers and indices to all known memory. This may
+       be appropriate for programs that should continue if at all
+       possible in the face of programming errors, although they may
+       run out of memory because dropped memory is never reclaimed.
+
+       If you don't like either of these options, you can define
+       CORRUPTION_ERROR_ACTION and USAGE_ERROR_ACTION to do anything
+       else. And if if you are sure that your program using malloc has
+       no errors or vulnerabilities, you can define INSECURE to 1,
+       which might (or might not) provide a small performance improvement.
+
+  Thread-safety: NOT thread-safe unless USE_LOCKS defined
+       When USE_LOCKS is defined, each public call to malloc, free,
+       etc is surrounded with either a pthread mutex or a win32
+       spinlock (depending on WIN32). This is not especially fast, and
+       can be a major bottleneck.  It is designed only to provide
+       minimal protection in concurrent environments, and to provide a
+       basis for extensions.  If you are using malloc in a concurrent
+       program, consider instead using nedmalloc
+       (http://www.nedprod.com/programs/portable/nedmalloc/) or
+       ptmalloc (See http://www.malloc.de), which are derived
+       from versions of this malloc.
+
+  System requirements: Any combination of MORECORE and/or MMAP/MUNMAP
+       This malloc can use unix sbrk or any emulation (invoked using
+       the CALL_MORECORE macro) and/or mmap/munmap or any emulation
+       (invoked using CALL_MMAP/CALL_MUNMAP) to get and release system
+       memory.  On most unix systems, it tends to work best if both
+       MORECORE and MMAP are enabled.  On Win32, it uses emulations
+       based on VirtualAlloc. It also uses common C library functions
+       like memset.
+
+  Compliance: I believe it is compliant with the Single Unix Specification
+       (See http://www.unix.org). Also SVID/XPG, ANSI C, and probably
+       others as well.
+
+* Overview of algorithms
+
+  This is not the fastest, most space-conserving, most portable, or
+  most tunable malloc ever written. However it is among the fastest
+  while also being among the most space-conserving, portable and
+  tunable.  Consistent balance across these factors results in a good
+  general-purpose allocator for malloc-intensive programs.
+
+  In most ways, this malloc is a best-fit allocator. Generally, it
+  chooses the best-fitting existing chunk for a request, with ties
+  broken in approximately least-recently-used order. (This strategy
+  normally maintains low fragmentation.) However, for requests less
+  than 256bytes, it deviates from best-fit when there is not an
+  exactly fitting available chunk by preferring to use space adjacent
+  to that used for the previous small request, as well as by breaking
+  ties in approximately most-recently-used order. (These enhance
+  locality of series of small allocations.)  And for very large requests
+  (>= 256Kb by default), it relies on system memory mapping
+  facilities, if supported.  (This helps avoid carrying around and
+  possibly fragmenting memory used only for large chunks.)
+
+  All operations (except malloc_stats and mallinfo) have execution
+  times that are bounded by a constant factor of the number of bits in
+  a size_t, not counting any clearing in calloc or copying in realloc,
+  or actions surrounding MORECORE and MMAP that have times
+  proportional to the number of non-contiguous regions returned by
+  system allocation routines, which is often just 1. In real-time
+  applications, you can optionally suppress segment traversals using
+  NO_SEGMENT_TRAVERSAL, which assures bounded execution even when
+  system allocators return non-contiguous spaces, at the typical
+  expense of carrying around more memory and increased fragmentation.
+
+  The implementation is not very modular and seriously overuses
+  macros. Perhaps someday all C compilers will do as good a job
+  inlining modular code as can now be done by brute-force expansion,
+  but now, enough of them seem not to.
+
+  Some compilers issue a lot of warnings about code that is
+  dead/unreachable only on some platforms, and also about intentional
+  uses of negation on unsigned types. All known cases of each can be
+  ignored.
+
+  For a longer but out of date high-level description, see
+     http://gee.cs.oswego.edu/dl/html/malloc.html
+
+* MSPACES
+  If MSPACES is defined, then in addition to malloc, free, etc.,
+  this file also defines mspace_malloc, mspace_free, etc. These
+  are versions of malloc routines that take an "mspace" argument
+  obtained using create_mspace, to control all internal bookkeeping.
+  If ONLY_MSPACES is defined, only these versions are compiled.
+  So if you would like to use this allocator for only some allocations,
+  and your system malloc for others, you can compile with
+  ONLY_MSPACES and then do something like...
+    static mspace mymspace = create_mspace(0,0); // for example
+    #define mymalloc(bytes)  mspace_malloc(mymspace, bytes)
+
+  (Note: If you only need one instance of an mspace, you can instead
+  use "USE_DL_PREFIX" to relabel the global malloc.)
+
+  You can similarly create thread-local allocators by storing
+  mspaces as thread-locals. For example:
+    static __thread mspace tlms = 0;
+    void*  tlmalloc(size_t bytes) {
+      if (tlms == 0) tlms = create_mspace(0, 0);
+      return mspace_malloc(tlms, bytes);
+    }
+    void  tlfree(void* mem) { mspace_free(tlms, mem); }
+
+  Unless FOOTERS is defined, each mspace is completely independent.
+  You cannot allocate from one and free to another (although
+  conformance is only weakly checked, so usage errors are not always
+  caught). If FOOTERS is defined, then each chunk carries around a tag
+  indicating its originating mspace, and frees are directed to their
+  originating spaces.
+
+ -------------------------  Compile-time options ---------------------------
+
+Be careful in setting #define values for numerical constants of type
+size_t. On some systems, literal values are not automatically extended
+to size_t precision unless they are explicitly casted. You can also
+use the symbolic values MAX_SIZE_T, SIZE_T_ONE, etc below.
+
+WIN32                    default: defined if _WIN32 defined
+  Defining WIN32 sets up defaults for MS environment and compilers.
+  Otherwise defaults are for unix. Beware that there seem to be some
+  cases where this malloc might not be a pure drop-in replacement for
+  Win32 malloc: Random-looking failures from Win32 GDI API's (eg;
+  SetDIBits()) may be due to bugs in some video driver implementations
+  when pixel buffers are malloc()ed, and the region spans more than
+  one VirtualAlloc()ed region. Because dlmalloc uses a small (64Kb)
+  default granularity, pixel buffers may straddle virtual allocation
+  regions more often than when using the Microsoft allocator.  You can
+  avoid this by using VirtualAlloc() and VirtualFree() for all pixel
+  buffers rather than using malloc().  If this is not possible,
+  recompile this malloc with a larger DEFAULT_GRANULARITY.
+
+MALLOC_ALIGNMENT         default: (size_t)8
+  Controls the minimum alignment for malloc'ed chunks.  It must be a
+  power of two and at least 8, even on machines for which smaller
+  alignments would suffice. It may be defined as larger than this
+  though. Note however that code and data structures are optimized for
+  the case of 8-byte alignment.
+
+MSPACES                  default: 0 (false)
+  If true, compile in support for independent allocation spaces.
+  This is only supported if HAVE_MMAP is true.
+
+ONLY_MSPACES             default: 0 (false)
+  If true, only compile in mspace versions, not regular versions.
+
+USE_LOCKS                default: 0 (false)
+  Causes each call to each public routine to be surrounded with
+  pthread or WIN32 mutex lock/unlock. (If set true, this can be
+  overridden on a per-mspace basis for mspace versions.) If set to a
+  non-zero value other than 1, locks are used, but their
+  implementation is left out, so lock functions must be supplied manually,
+  as described below.
+
+USE_SPIN_LOCKS           default: 1 iff USE_LOCKS and on x86 using gcc or MSC
+  If true, uses custom spin locks for locking. This is currently
+  supported only for x86 platforms using gcc or recent MS compilers.
+  Otherwise, posix locks or win32 critical sections are used.
+
+FOOTERS                  default: 0
+  If true, provide extra checking and dispatching by placing
+  information in the footers of allocated chunks. This adds
+  space and time overhead.
+
+INSECURE                 default: 0
+  If true, omit checks for usage errors and heap space overwrites.
+
+USE_DL_PREFIX            default: NOT defined
+  Causes compiler to prefix all public routines with the string 'dl'.
+  This can be useful when you only want to use this malloc in one part
+  of a program, using your regular system malloc elsewhere.
+
+ABORT                    default: defined as abort()
+  Defines how to abort on failed checks.  On most systems, a failed
+  check cannot die with an "assert" or even print an informative
+  message, because the underlying print routines in turn call malloc,
+  which will fail again.  Generally, the best policy is to simply call
+  abort(). It's not very useful to do more than this because many
+  errors due to overwriting will show up as address faults (null, odd
+  addresses etc) rather than malloc-triggered checks, so will also
+  abort.  Also, most compilers know that abort() does not return, so
+  can better optimize code conditionally calling it.
+
+PROCEED_ON_ERROR           default: defined as 0 (false)
+  Controls whether detected bad addresses cause them to bypassed
+  rather than aborting. If set, detected bad arguments to free and
+  realloc are ignored. And all bookkeeping information is zeroed out
+  upon a detected overwrite of freed heap space, thus losing the
+  ability to ever return it from malloc again, but enabling the
+  application to proceed. If PROCEED_ON_ERROR is defined, the
+  static variable malloc_corruption_error_count is compiled in
+  and can be examined to see if errors have occurred. This option
+  generates slower code than the default abort policy.
+
+DEBUG                    default: NOT defined
+  The DEBUG setting is mainly intended for people trying to modify
+  this code or diagnose problems when porting to new platforms.
+  However, it may also be able to better isolate user errors than just
+  using runtime checks.  The assertions in the check routines spell
+  out in more detail the assumptions and invariants underlying the
+  algorithms.  The checking is fairly extensive, and will slow down
+  execution noticeably. Calling malloc_stats or mallinfo with DEBUG
+  set will attempt to check every non-mmapped allocated and free chunk
+  in the course of computing the summaries.
+
+ABORT_ON_ASSERT_FAILURE   default: defined as 1 (true)
+  Debugging assertion failures can be nearly impossible if your
+  version of the assert macro causes malloc to be called, which will
+  lead to a cascade of further failures, blowing the runtime stack.
+  ABORT_ON_ASSERT_FAILURE cause assertions failures to call abort(),
+  which will usually make debugging easier.
+
+MALLOC_FAILURE_ACTION     default: sets errno to ENOMEM, or no-op on win32
+  The action to take before "return 0" when malloc fails to be able to
+  return memory because there is none available.
+
+HAVE_MORECORE             default: 1 (true) unless win32 or ONLY_MSPACES
+  True if this system supports sbrk or an emulation of it.
+
+MORECORE                  default: sbrk
+  The name of the sbrk-style system routine to call to obtain more
+  memory.  See below for guidance on writing custom MORECORE
+  functions. The type of the argument to sbrk/MORECORE varies across
+  systems.  It cannot be size_t, because it supports negative
+  arguments, so it is normally the signed type of the same width as
+  size_t (sometimes declared as "intptr_t").  It doesn't much matter
+  though. Internally, we only call it with arguments less than half
+  the max value of a size_t, which should work across all reasonable
+  possibilities, although sometimes generating compiler warnings.
+
+MORECORE_CONTIGUOUS       default: 1 (true) if HAVE_MORECORE
+  If true, take advantage of fact that consecutive calls to MORECORE
+  with positive arguments always return contiguous increasing
+  addresses.  This is true of unix sbrk. It does not hurt too much to
+  set it true anyway, since malloc copes with non-contiguities.
+  Setting it false when definitely non-contiguous saves time
+  and possibly wasted space it would take to discover this though.
+
+MORECORE_CANNOT_TRIM      default: NOT defined
+  True if MORECORE cannot release space back to the system when given
+  negative arguments. This is generally necessary only if you are
+  using a hand-crafted MORECORE function that cannot handle negative
+  arguments.
+
+NO_SEGMENT_TRAVERSAL       default: 0
+  If non-zero, suppresses traversals of memory segments
+  returned by either MORECORE or CALL_MMAP. This disables
+  merging of segments that are contiguous, and selectively
+  releasing them to the OS if unused, but bounds execution times.
+
+HAVE_MMAP                 default: 1 (true)
+  True if this system supports mmap or an emulation of it.  If so, and
+  HAVE_MORECORE is not true, MMAP is used for all system
+  allocation. If set and HAVE_MORECORE is true as well, MMAP is
+  primarily used to directly allocate very large blocks. It is also
+  used as a backup strategy in cases where MORECORE fails to provide
+  space from system. Note: A single call to MUNMAP is assumed to be
+  able to unmap memory that may have be allocated using multiple calls
+  to MMAP, so long as they are adjacent.
+
+HAVE_MREMAP               default: 1 on linux, else 0
+  If true realloc() uses mremap() to re-allocate large blocks and
+  extend or shrink allocation spaces.
+
+MMAP_CLEARS               default: 1 except on WINCE.
+  True if mmap clears memory so calloc doesn't need to. This is true
+  for standard unix mmap using /dev/zero and on WIN32 except for WINCE.
+
+USE_BUILTIN_FFS            default: 0 (i.e., not used)
+  Causes malloc to use the builtin ffs() function to compute indices.
+  Some compilers may recognize and intrinsify ffs to be faster than the
+  supplied C version. Also, the case of x86 using gcc is special-cased
+  to an asm instruction, so is already as fast as it can be, and so
+  this setting has no effect. Similarly for Win32 under recent MS compilers.
+  (On most x86s, the asm version is only slightly faster than the C version.)
+
+malloc_getpagesize         default: derive from system includes, or 4096.
+  The system page size. To the extent possible, this malloc manages
+  memory from the system in page-size units.  This may be (and
+  usually is) a function rather than a constant. This is ignored
+  if WIN32, where page size is determined using getSystemInfo during
+  initialization. This may be several megabytes if ENABLE_LARGE_PAGES
+  is enabled.
+
+ENABLE_LARGE_PAGES         default: NOT defined
+  Causes the system page size to be the value of GetLargePageMinimum()
+  if that function is available (Windows Server 2003/Vista or later).
+  This allows the use of large page entries in the MMU which can
+  significantly improve performance in large working set applications
+  as TLB cache load is reduced by a factor of three. Note that enabling
+  this option is equal to locking the process' memory in current
+  implementations of Windows and requires the SE_LOCK_MEMORY_PRIVILEGE
+  to be held by the process in order to succeed.
+
+USE_DEV_RANDOM             default: 0 (i.e., not used)
+  Causes malloc to use /dev/random to initialize secure magic seed for
+  stamping footers. Otherwise, the current time is used.
+
+NO_MALLINFO                default: 0
+  If defined, don't compile "mallinfo". This can be a simple way
+  of dealing with mismatches between system declarations and
+  those in this file.
+
+MALLINFO_FIELD_TYPE        default: size_t
+  The type of the fields in the mallinfo struct. This was originally
+  defined as "int" in SVID etc, but is more usefully defined as
+  size_t. The value is used only if  HAVE_USR_INCLUDE_MALLOC_H is not set
+
+REALLOC_ZERO_BYTES_FREES    default: not defined
+  This should be set if a call to realloc with zero bytes should
+  be the same as a call to free. Some people think it should. Otherwise,
+  since this malloc returns a unique pointer for malloc(0), so does
+  realloc(p, 0).
+
+LACKS_UNISTD_H, LACKS_FCNTL_H, LACKS_SYS_PARAM_H, LACKS_SYS_MMAN_H
+LACKS_STRINGS_H, LACKS_STRING_H, LACKS_SYS_TYPES_H,  LACKS_ERRNO_H
+LACKS_STDLIB_H                default: NOT defined unless on WIN32
+  Define these if your system does not have these header files.
+  You might need to manually insert some of the declarations they provide.
+
+DEFAULT_GRANULARITY        default: page size if MORECORE_CONTIGUOUS,
+                                system_info.dwAllocationGranularity in WIN32,
+                                GetLargePageMinimum() if ENABLE_LARGE_PAGES,
+                                otherwise 64K.
+      Also settable using mallopt(M_GRANULARITY, x)
+  The unit for allocating and deallocating memory from the system.  On
+  most systems with contiguous MORECORE, there is no reason to
+  make this more than a page. However, systems with MMAP tend to
+  either require or encourage larger granularities.  You can increase
+  this value to prevent system allocation functions to be called so
+  often, especially if they are slow.  The value must be at least one
+  page and must be a power of two.  Setting to 0 causes initialization
+  to either page size or win32 region size.  (Note: In previous
+  versions of malloc, the equivalent of this option was called
+  "TOP_PAD")
+
+DEFAULT_GRANULARITY_ALIGNED default: undefined (which means page size)
+  Whether to enforce alignment when allocating and deallocating memory
+  from the system i.e. the base address of all allocations will be
+  aligned to DEFAULT_GRANULARITY if it is set. Note that enabling this carries
+  some overhead as multiple calls must now be made when probing for a valid
+  aligned value, however it does greatly ease the checking for whether
+  a given memory pointer was allocated by this allocator rather than
+  some other.
+
+DEFAULT_TRIM_THRESHOLD    default: 2MB
+      Also settable using mallopt(M_TRIM_THRESHOLD, x)
+  The maximum amount of unused top-most memory to keep before
+  releasing via malloc_trim in free().  Automatic trimming is mainly
+  useful in long-lived programs using contiguous MORECORE.  Because
+  trimming via sbrk can be slow on some systems, and can sometimes be
+  wasteful (in cases where programs immediately afterward allocate
+  more large chunks) the value should be high enough so that your
+  overall system performance would improve by releasing this much
+  memory.  As a rough guide, you might set to a value close to the
+  average size of a process (program) running on your system.
+  Releasing this much memory would allow such a process to run in
+  memory.  Generally, it is worth tuning trim thresholds when a
+  program undergoes phases where several large chunks are allocated
+  and released in ways that can reuse each other's storage, perhaps
+  mixed with phases where there are no such chunks at all. The trim
+  value must be greater than page size to have any useful effect.  To
+  disable trimming completely, you can set to MAX_SIZE_T. Note that the trick
+  some people use of mallocing a huge space and then freeing it at
+  program startup, in an attempt to reserve system memory, doesn't
+  have the intended effect under automatic trimming, since that memory
+  will immediately be returned to the system.
+
+DEFAULT_MMAP_THRESHOLD       default: 256K
+      Also settable using mallopt(M_MMAP_THRESHOLD, x)
+  The request size threshold for using MMAP to directly service a
+  request. Requests of at least this size that cannot be allocated
+  using already-existing space will be serviced via mmap.  (If enough
+  normal freed space already exists it is used instead.)  Using mmap
+  segregates relatively large chunks of memory so that they can be
+  individually obtained and released from the host system. A request
+  serviced through mmap is never reused by any other request (at least
+  not directly; the system may just so happen to remap successive
+  requests to the same locations).  Segregating space in this way has
+  the benefits that: Mmapped space can always be individually released
+  back to the system, which helps keep the system level memory demands
+  of a long-lived program low.  Also, mapped memory doesn't become
+  `locked' between other chunks, as can happen with normally allocated
+  chunks, which means that even trimming via malloc_trim would not
+  release them.  However, it has the disadvantage that the space
+  cannot be reclaimed, consolidated, and then used to service later
+  requests, as happens with normal chunks.  The advantages of mmap
+  nearly always outweigh disadvantages for "large" chunks, but the
+  value of "large" may vary across systems.  The default is an
+  empirically derived value that works well in most systems. You can
+  disable mmap by setting to MAX_SIZE_T.
+
+MAX_RELEASE_CHECK_RATE   default: 4095 unless not HAVE_MMAP
+  The number of consolidated frees between checks to release
+  unused segments when freeing. When using non-contiguous segments,
+  especially with multiple mspaces, checking only for topmost space
+  doesn't always suffice to trigger trimming. To compensate for this,
+  free() will, with a period of MAX_RELEASE_CHECK_RATE (or the
+  current number of segments, if greater) try to release unused
+  segments to the OS when freeing chunks that result in
+  consolidation. The best value for this parameter is a compromise
+  between slowing down frees with relatively costly checks that
+  rarely trigger versus holding on to unused memory. To effectively
+  disable, set to MAX_SIZE_T. This may lead to a very slight speed
+  improvement at the expense of carrying around more memory.
+*/
+
+/* Version identifier to allow people to support multiple versions */
+#ifndef DLMALLOC_VERSION
+#define DLMALLOC_VERSION 20804
+#endif /* DLMALLOC_VERSION */
+
+#ifndef WIN32
+#ifdef _WIN32
+#define WIN32 1
+#endif  /* _WIN32 */
+#ifdef _WIN32_WCE
+#define LACKS_FCNTL_H
+#define WIN32 1
+#endif /* _WIN32_WCE */
+#endif  /* WIN32 */
+#ifdef WIN32
+#define WIN32_LEAN_AND_MEAN
+#include <windows.h>
+#include <tchar.h>
+#define HAVE_MMAP 1
+#define HAVE_MORECORE 0
+#define LACKS_UNISTD_H
+#define LACKS_SYS_PARAM_H
+#define LACKS_SYS_MMAN_H
+#define LACKS_STRING_H
+#define LACKS_STRINGS_H
+#define LACKS_SYS_TYPES_H
+#define LACKS_ERRNO_H
+#ifndef MALLOC_FAILURE_ACTION
+#define MALLOC_FAILURE_ACTION
+#endif /* MALLOC_FAILURE_ACTION */
+#ifdef _WIN32_WCE /* WINCE reportedly does not clear */
+#define MMAP_CLEARS 0
+#else
+#define MMAP_CLEARS 1
+#endif /* _WIN32_WCE */
+#endif  /* WIN32 */
+
+#if defined(DARWIN) || defined(_DARWIN)
+/* Mac OSX docs advise not to use sbrk; it seems better to use mmap */
+#ifndef HAVE_MORECORE
+#define HAVE_MORECORE 0
+#define HAVE_MMAP 1
+/* OSX allocators provide 16 byte alignment */
+#ifndef MALLOC_ALIGNMENT
+#define MALLOC_ALIGNMENT ((size_t)16U)
+#endif
+#endif  /* HAVE_MORECORE */
+#endif  /* DARWIN */
+
+#ifndef LACKS_SYS_TYPES_H
+#include <sys/types.h>  /* For size_t */
+#endif  /* LACKS_SYS_TYPES_H */
+
+#if (defined(__GNUC__) && ((defined(__i386__) || defined(__x86_64__)))) || (defined(_MSC_VER) && _MSC_VER>=1310)
+#define SPIN_LOCKS_AVAILABLE 1
+#else
+#define SPIN_LOCKS_AVAILABLE 0
+#endif
+
+/* The maximum possible size_t value has all bits set */
+#define MAX_SIZE_T           (~(size_t)0)
+
+#ifndef ONLY_MSPACES
+#define ONLY_MSPACES 0     /* define to a value */
+#else
+#define ONLY_MSPACES 1
+#endif  /* ONLY_MSPACES */
+#ifndef MSPACES
+#if ONLY_MSPACES
+#define MSPACES 1
+#else   /* ONLY_MSPACES */
+#define MSPACES 0
+#endif  /* ONLY_MSPACES */
+#endif  /* MSPACES */
+#ifndef MALLOC_ALIGNMENT
+#define MALLOC_ALIGNMENT ((size_t)8U)
+#endif  /* MALLOC_ALIGNMENT */
+#ifndef FOOTERS
+#define FOOTERS 0
+#endif  /* FOOTERS */
+#ifndef ABORT
+#define ABORT  abort()
+#endif  /* ABORT */
+#ifndef ABORT_ON_ASSERT_FAILURE
+#define ABORT_ON_ASSERT_FAILURE 1
+#endif  /* ABORT_ON_ASSERT_FAILURE */
+#ifndef PROCEED_ON_ERROR
+#define PROCEED_ON_ERROR 0
+#endif  /* PROCEED_ON_ERROR */
+#ifndef USE_LOCKS
+#define USE_LOCKS 0
+#endif  /* USE_LOCKS */
+#ifndef USE_SPIN_LOCKS
+#if USE_LOCKS && SPIN_LOCKS_AVAILABLE
+#define USE_SPIN_LOCKS 1
+#else
+#define USE_SPIN_LOCKS 0
+#endif /* USE_LOCKS && SPIN_LOCKS_AVAILABLE. */
+#endif /* USE_SPIN_LOCKS */
+#ifndef INSECURE
+#define INSECURE 0
+#endif  /* INSECURE */
+#ifndef HAVE_MMAP
+#define HAVE_MMAP 1
+#endif  /* HAVE_MMAP */
+#ifndef MMAP_CLEARS
+#define MMAP_CLEARS 1
+#endif  /* MMAP_CLEARS */
+#ifndef HAVE_MREMAP
+#ifdef linux
+#define HAVE_MREMAP 1
+#else   /* linux */
+#define HAVE_MREMAP 0
+#endif  /* linux */
+#endif  /* HAVE_MREMAP */
+#ifndef MALLOC_FAILURE_ACTION
+#define MALLOC_FAILURE_ACTION  errno = ENOMEM;
+#endif  /* MALLOC_FAILURE_ACTION */
+#ifndef HAVE_MORECORE
+#if ONLY_MSPACES
+#define HAVE_MORECORE 0
+#else   /* ONLY_MSPACES */
+#define HAVE_MORECORE 1
+#endif  /* ONLY_MSPACES */
+#endif  /* HAVE_MORECORE */
+#if !HAVE_MORECORE
+#define MORECORE_CONTIGUOUS 0
+#else   /* !HAVE_MORECORE */
+#define MORECORE_DEFAULT sbrk
+#ifndef MORECORE_CONTIGUOUS
+#define MORECORE_CONTIGUOUS 1
+#endif  /* MORECORE_CONTIGUOUS */
+#endif  /* HAVE_MORECORE */
+#ifndef DEFAULT_GRANULARITY
+#if (MORECORE_CONTIGUOUS || defined(WIN32))
+#define DEFAULT_GRANULARITY (0)  /* 0 means to compute in init_mparams */
+#else   /* MORECORE_CONTIGUOUS */
+#define DEFAULT_GRANULARITY ((size_t)64U * (size_t)1024U)
+#endif  /* MORECORE_CONTIGUOUS */
+#endif  /* DEFAULT_GRANULARITY */
+#ifndef DEFAULT_TRIM_THRESHOLD
+#ifndef MORECORE_CANNOT_TRIM
+#define DEFAULT_TRIM_THRESHOLD ((size_t)2U * (size_t)1024U * (size_t)1024U)
+#else   /* MORECORE_CANNOT_TRIM */
+#define DEFAULT_TRIM_THRESHOLD MAX_SIZE_T
+#endif  /* MORECORE_CANNOT_TRIM */
+#endif  /* DEFAULT_TRIM_THRESHOLD */
+#ifndef DEFAULT_MMAP_THRESHOLD
+#if HAVE_MMAP
+#define DEFAULT_MMAP_THRESHOLD ((size_t)256U * (size_t)1024U)
+#else   /* HAVE_MMAP */
+#define DEFAULT_MMAP_THRESHOLD MAX_SIZE_T
+#endif  /* HAVE_MMAP */
+#endif  /* DEFAULT_MMAP_THRESHOLD */
+#ifndef MAX_RELEASE_CHECK_RATE
+#if HAVE_MMAP
+#define MAX_RELEASE_CHECK_RATE 4095
+#else
+#define MAX_RELEASE_CHECK_RATE MAX_SIZE_T
+#endif /* HAVE_MMAP */
+#endif /* MAX_RELEASE_CHECK_RATE */
+#ifndef USE_BUILTIN_FFS
+#define USE_BUILTIN_FFS 0
+#endif  /* USE_BUILTIN_FFS */
+#ifndef USE_DEV_RANDOM
+#define USE_DEV_RANDOM 0
+#endif  /* USE_DEV_RANDOM */
+#ifndef NO_MALLINFO
+#define NO_MALLINFO 0
+#endif  /* NO_MALLINFO */
+#ifndef MALLINFO_FIELD_TYPE
+#define MALLINFO_FIELD_TYPE size_t
+#endif  /* MALLINFO_FIELD_TYPE */
+#ifndef NO_SEGMENT_TRAVERSAL
+#define NO_SEGMENT_TRAVERSAL 0
+#endif /* NO_SEGMENT_TRAVERSAL */
+
+/*
+  mallopt tuning options.  SVID/XPG defines four standard parameter
+  numbers for mallopt, normally defined in malloc.h.  None of these
+  are used in this malloc, so setting them has no effect. But this
+  malloc does support the following options.
+*/
+
+#define M_TRIM_THRESHOLD     (-1)
+#define M_GRANULARITY        (-2)
+#define M_MMAP_THRESHOLD     (-3)
+
+/* ------------------------ Mallinfo declarations ------------------------ */
+
+#if !NO_MALLINFO
+/*
+  This version of malloc supports the standard SVID/XPG mallinfo
+  routine that returns a struct containing usage properties and
+  statistics. It should work on any system that has a
+  /usr/include/malloc.h defining struct mallinfo.  The main
+  declaration needed is the mallinfo struct that is returned (by-copy)
+  by mallinfo().  The malloinfo struct contains a bunch of fields that
+  are not even meaningful in this version of malloc.  These fields are
+  are instead filled by mallinfo() with other numbers that might be of
+  interest.
+
+  HAVE_USR_INCLUDE_MALLOC_H should be set if you have a
+  /usr/include/malloc.h file that includes a declaration of struct
+  mallinfo.  If so, it is included; else a compliant version is
+  declared below.  These must be precisely the same for mallinfo() to
+  work.  The original SVID version of this struct, defined on most
+  systems with mallinfo, declares all fields as ints. But some others
+  define as unsigned long. If your system defines the fields using a
+  type of different width than listed here, you MUST #include your
+  system version and #define HAVE_USR_INCLUDE_MALLOC_H.
+*/
+
+/* #define HAVE_USR_INCLUDE_MALLOC_H */
+
+#ifdef HAVE_USR_INCLUDE_MALLOC_H
+#include "/usr/include/malloc.h"
+#else /* HAVE_USR_INCLUDE_MALLOC_H */
+#ifndef STRUCT_MALLINFO_DECLARED
+#define STRUCT_MALLINFO_DECLARED 1
+struct mallinfo {
+  MALLINFO_FIELD_TYPE arena;    /* non-mmapped space allocated from system */
+  MALLINFO_FIELD_TYPE ordblks;  /* number of free chunks */
+  MALLINFO_FIELD_TYPE smblks;   /* always 0 */
+  MALLINFO_FIELD_TYPE hblks;    /* always 0 */
+  MALLINFO_FIELD_TYPE hblkhd;   /* space in mmapped regions */
+  MALLINFO_FIELD_TYPE usmblks;  /* maximum total allocated space */
+  MALLINFO_FIELD_TYPE fsmblks;  /* always 0 */
+  MALLINFO_FIELD_TYPE uordblks; /* total allocated space */
+  MALLINFO_FIELD_TYPE fordblks; /* total free space */
+  MALLINFO_FIELD_TYPE keepcost; /* releasable (via malloc_trim) space */
+};
+#endif /* STRUCT_MALLINFO_DECLARED */
+#endif /* HAVE_USR_INCLUDE_MALLOC_H */
+#endif /* NO_MALLINFO */
+
+/*
+  Try to persuade compilers to inline. The most critical functions for
+  inlining are defined as macros, so these aren't used for them.
+*/
+
+#ifndef FORCEINLINE
+  #if defined(__GNUC__)
+#define FORCEINLINE __inline __attribute__ ((always_inline))
+  #elif defined(_MSC_VER)
+    #define FORCEINLINE __forceinline
+  #endif
+#endif
+#ifndef NOINLINE
+  #if defined(__GNUC__)
+    #define NOINLINE __attribute__ ((noinline))
+  #elif defined(_MSC_VER)
+    #define NOINLINE __declspec(noinline)
+  #else
+    #define NOINLINE
+  #endif
+#endif
+
+#ifdef __cplusplus
+extern "C" {
+#ifndef FORCEINLINE
+ #define FORCEINLINE inline
+#endif
+#endif /* __cplusplus */
+#ifndef FORCEINLINE
+ #define FORCEINLINE
+#endif
+
+#if !ONLY_MSPACES
+
+/* ------------------- Declarations of public routines ------------------- */
+
+#ifndef USE_DL_PREFIX
+#define dlcalloc               calloc
+#define dlfree                 free
+#define dlmalloc               malloc
+#define dlmemalign             memalign
+#define dlrealloc              realloc
+#define dlvalloc               valloc
+#define dlpvalloc              pvalloc
+#define dlmallinfo             mallinfo
+#define dlmallopt              mallopt
+#define dlmalloc_trim          malloc_trim
+#define dlmalloc_stats         malloc_stats
+#define dlmalloc_usable_size   malloc_usable_size
+#define dlmalloc_footprint     malloc_footprint
+#define dlmalloc_max_footprint malloc_max_footprint
+#define dlindependent_calloc   independent_calloc
+#define dlindependent_comalloc independent_comalloc
+#endif /* USE_DL_PREFIX */
+
+
+/*
+  malloc(size_t n)
+  Returns a pointer to a newly allocated chunk of at least n bytes, or
+  null if no space is available, in which case errno is set to ENOMEM
+  on ANSI C systems.
+
+  If n is zero, malloc returns a minimum-sized chunk. (The minimum
+  size is 16 bytes on most 32bit systems, and 32 bytes on 64bit
+  systems.)  Note that size_t is an unsigned type, so calls with
+  arguments that would be negative if signed are interpreted as
+  requests for huge amounts of space, which will often fail. The
+  maximum supported value of n differs across systems, but is in all
+  cases less than the maximum representable value of a size_t.
+*/
+void* dlmalloc(size_t);
+
+/*
+  free(void* p)
+  Releases the chunk of memory pointed to by p, that had been previously
+  allocated using malloc or a related routine such as realloc.
+  It has no effect if p is null. If p was not malloced or already
+  freed, free(p) will by default cause the current program to abort.
+*/
+void  dlfree(void*);
+
+/*
+  calloc(size_t n_elements, size_t element_size);
+  Returns a pointer to n_elements * element_size bytes, with all locations
+  set to zero.
+*/
+void* dlcalloc(size_t, size_t);
+
+/*
+  realloc(void* p, size_t n)
+  Returns a pointer to a chunk of size n that contains the same data
+  as does chunk p up to the minimum of (n, p's size) bytes, or null
+  if no space is available.
+
+  The returned pointer may or may not be the same as p. The algorithm
+  prefers extending p in most cases when possible, otherwise it
+  employs the equivalent of a malloc-copy-free sequence.
+
+  If p is null, realloc is equivalent to malloc.
+
+  If space is not available, realloc returns null, errno is set (if on
+  ANSI) and p is NOT freed.
+
+  if n is for fewer bytes than already held by p, the newly unused
+  space is lopped off and freed if possible.  realloc with a size
+  argument of zero (re)allocates a minimum-sized chunk.
+
+  The old unix realloc convention of allowing the last-free'd chunk
+  to be used as an argument to realloc is not supported.
+*/
+
+void* dlrealloc(void*, size_t);
+
+/*
+  memalign(size_t alignment, size_t n);
+  Returns a pointer to a newly allocated chunk of n bytes, aligned
+  in accord with the alignment argument.
+
+  The alignment argument should be a power of two. If the argument is
+  not a power of two, the nearest greater power is used.
+  8-byte alignment is guaranteed by normal malloc calls, so don't
+  bother calling memalign with an argument of 8 or less.
+
+  Overreliance on memalign is a sure way to fragment space.
+*/
+void* dlmemalign(size_t, size_t);
+
+/*
+  valloc(size_t n);
+  Equivalent to memalign(pagesize, n), where pagesize is the page
+  size of the system. If the pagesize is unknown, 4096 is used.
+*/
+void* dlvalloc(size_t);
+
+/*
+  mallopt(int parameter_number, int parameter_value)
+  Sets tunable parameters The format is to provide a
+  (parameter-number, parameter-value) pair.  mallopt then sets the
+  corresponding parameter to the argument value if it can (i.e., so
+  long as the value is meaningful), and returns 1 if successful else
+  0.  To workaround the fact that mallopt is specified to use int,
+  not size_t parameters, the value -1 is specially treated as the
+  maximum unsigned size_t value.
+
+  SVID/XPG/ANSI defines four standard param numbers for mallopt,
+  normally defined in malloc.h.  None of these are use in this malloc,
+  so setting them has no effect. But this malloc also supports other
+  options in mallopt. See below for details.  Briefly, supported
+  parameters are as follows (listed defaults are for "typical"
+  configurations).
+
+  Symbol            param #  default    allowed param values
+  M_TRIM_THRESHOLD     -1   2*1024*1024   any   (-1 disables)
+  M_GRANULARITY        -2     page size   any power of 2 >= page size
+  M_MMAP_THRESHOLD     -3      256*1024   any   (or 0 if no MMAP support)
+*/
+int dlmallopt(int, int);
+
+/*
+  malloc_footprint();
+  Returns the number of bytes obtained from the system.  The total
+  number of bytes allocated by malloc, realloc etc., is less than this
+  value. Unlike mallinfo, this function returns only a precomputed
+  result, so can be called frequently to monitor memory consumption.
+  Even if locks are otherwise defined, this function does not use them,
+  so results might not be up to date.
+*/
+size_t dlmalloc_footprint(void);
+
+/*
+  malloc_max_footprint();
+  Returns the maximum number of bytes obtained from the system. This
+  value will be greater than current footprint if deallocated space
+  has been reclaimed by the system. The peak number of bytes allocated
+  by malloc, realloc etc., is less than this value. Unlike mallinfo,
+  this function returns only a precomputed result, so can be called
+  frequently to monitor memory consumption.  Even if locks are
+  otherwise defined, this function does not use them, so results might
+  not be up to date.
+*/
+size_t dlmalloc_max_footprint(void);
+
+#if !NO_MALLINFO
+/*
+  mallinfo()
+  Returns (by copy) a struct containing various summary statistics:
+
+  arena:     current total non-mmapped bytes allocated from system
+  ordblks:   the number of free chunks
+  smblks:    always zero.
+  hblks:     current number of mmapped regions
+  hblkhd:    total bytes held in mmapped regions
+  usmblks:   the maximum total allocated space. This will be greater
+                than current total if trimming has occurred.
+  fsmblks:   always zero
+  uordblks:  current total allocated space (normal or mmapped)
+  fordblks:  total free space
+  keepcost:  the maximum number of bytes that could ideally be released
+               back to system via malloc_trim. ("ideally" means that
+               it ignores page restrictions etc.)
+
+  Because these fields are ints, but internal bookkeeping may
+  be kept as longs, the reported values may wrap around zero and
+  thus be inaccurate.
+*/
+struct mallinfo dlmallinfo(void);
+#endif /* NO_MALLINFO */
+
+/*
+  independent_calloc(size_t n_elements, size_t element_size, void* chunks[]);
+
+  independent_calloc is similar to calloc, but instead of returning a
+  single cleared space, it returns an array of pointers to n_elements
+  independent elements that can hold contents of size elem_size, each
+  of which starts out cleared, and can be independently freed,
+  realloc'ed etc. The elements are guaranteed to be adjacently
+  allocated (this is not guaranteed to occur with multiple callocs or
+  mallocs), which may also improve cache locality in some
+  applications.
+
+  The "chunks" argument is optional (i.e., may be null, which is
+  probably the most typical usage). If it is null, the returned array
+  is itself dynamically allocated and should also be freed when it is
+  no longer needed. Otherwise, the chunks array must be of at least
+  n_elements in length. It is filled in with the pointers to the
+  chunks.
+
+  In either case, independent_calloc returns this pointer array, or
+  null if the allocation failed.  If n_elements is zero and "chunks"
+  is null, it returns a chunk representing an array with zero elements
+  (which should be freed if not wanted).
+
+  Each element must be individually freed when it is no longer
+  needed. If you'd like to instead be able to free all at once, you
+  should instead use regular calloc and assign pointers into this
+  space to represent elements.  (In this case though, you cannot
+  independently free elements.)
+
+  independent_calloc simplifies and speeds up implementations of many
+  kinds of pools.  It may also be useful when constructing large data
+  structures that initially have a fixed number of fixed-sized nodes,
+  but the number is not known at compile time, and some of the nodes
+  may later need to be freed. For example:
+
+  struct Node { int item; struct Node* next; };
+
+  struct Node* build_list() {
+    struct Node** pool;
+    int n = read_number_of_nodes_needed();
+    if (n <= 0) return 0;
+    pool = (struct Node**)(independent_calloc(n, sizeof(struct Node), 0);
+    if (pool == 0) die();
+    // organize into a linked list...
+    struct Node* first = pool[0];
+    for (i = 0; i < n-1; ++i)
+      pool[i]->next = pool[i+1];
+    free(pool);     // Can now free the array (or not, if it is needed later)
+    return first;
+  }
+*/
+void** dlindependent_calloc(size_t, size_t, void**);
+
+/*
+  independent_comalloc(size_t n_elements, size_t sizes[], void* chunks[]);
+
+  independent_comalloc allocates, all at once, a set of n_elements
+  chunks with sizes indicated in the "sizes" array.    It returns
+  an array of pointers to these elements, each of which can be
+  independently freed, realloc'ed etc. The elements are guaranteed to
+  be adjacently allocated (this is not guaranteed to occur with
+  multiple callocs or mallocs), which may also improve cache locality
+  in some applications.
+
+  The "chunks" argument is optional (i.e., may be null). If it is null
+  the returned array is itself dynamically allocated and should also
+  be freed when it is no longer needed. Otherwise, the chunks array
+  must be of at least n_elements in length. It is filled in with the
+  pointers to the chunks.
+
+  In either case, independent_comalloc returns this pointer array, or
+  null if the allocation failed.  If n_elements is zero and chunks is
+  null, it returns a chunk representing an array with zero elements
+  (which should be freed if not wanted).
+
+  Each element must be individually freed when it is no longer
+  needed. If you'd like to instead be able to free all at once, you
+  should instead use a single regular malloc, and assign pointers at
+  particular offsets in the aggregate space. (In this case though, you
+  cannot independently free elements.)
+
+  independent_comallac differs from independent_calloc in that each
+  element may have a different size, and also that it does not
+  automatically clear elements.
+
+  independent_comalloc can be used to speed up allocation in cases
+  where several structs or objects must always be allocated at the
+  same time.  For example:
+
+  struct Head { ... }
+  struct Foot { ... }
+
+  void send_message(char* msg) {
+    int msglen = strlen(msg);
+    size_t sizes[3] = { sizeof(struct Head), msglen, sizeof(struct Foot) };
+    void* chunks[3];
+    if (independent_comalloc(3, sizes, chunks) == 0)
+      die();
+    struct Head* head = (struct Head*)(chunks[0]);
+    char*        body = (char*)(chunks[1]);
+    struct Foot* foot = (struct Foot*)(chunks[2]);
+    // ...
+  }
+
+  In general though, independent_comalloc is worth using only for
+  larger values of n_elements. For small values, you probably won't
+  detect enough difference from series of malloc calls to bother.
+
+  Overuse of independent_comalloc can increase overall memory usage,
+  since it cannot reuse existing noncontiguous small chunks that
+  might be available for some of the elements.
+*/
+void** dlindependent_comalloc(size_t, size_t*, void**);
+
+
+/*
+  pvalloc(size_t n);
+  Equivalent to valloc(minimum-page-that-holds(n)), that is,
+  round up n to nearest pagesize.
+ */
+void*  dlpvalloc(size_t);
+
+/*
+  malloc_trim(size_t pad);
+
+  If possible, gives memory back to the system (via negative arguments
+  to sbrk) if there is unused memory at the `high' end of the malloc
+  pool or in unused MMAP segments. You can call this after freeing
+  large blocks of memory to potentially reduce the system-level memory
+  requirements of a program. However, it cannot guarantee to reduce
+  memory. Under some allocation patterns, some large free blocks of
+  memory will be locked between two used chunks, so they cannot be
+  given back to the system.
+
+  The `pad' argument to malloc_trim represents the amount of free
+  trailing space to leave untrimmed. If this argument is zero, only
+  the minimum amount of memory to maintain internal data structures
+  will be left. Non-zero arguments can be supplied to maintain enough
+  trailing space to service future expected allocations without having
+  to re-obtain memory from the system.
+
+  Malloc_trim returns 1 if it actually released any memory, else 0.
+*/
+int  dlmalloc_trim(size_t);
+
+/*
+  malloc_stats();
+  Prints on stderr the amount of space obtained from the system (both
+  via sbrk and mmap), the maximum amount (which may be more than
+  current if malloc_trim and/or munmap got called), and the current
+  number of bytes allocated via malloc (or realloc, etc) but not yet
+  freed. Note that this is the number of bytes allocated, not the
+  number requested. It will be larger than the number requested
+  because of alignment and bookkeeping overhead. Because it includes
+  alignment wastage as being in use, this figure may be greater than
+  zero even when no user-level chunks are allocated.
+
+  The reported current and maximum system memory can be inaccurate if
+  a program makes other calls to system memory allocation functions
+  (normally sbrk) outside of malloc.
+
+  malloc_stats prints only the most commonly interesting statistics.
+  More information can be obtained by calling mallinfo.
+*/
+void  dlmalloc_stats(void);
+
+#endif /* ONLY_MSPACES */
+
+/*
+  malloc_usable_size(void* p);
+
+  Returns the number of bytes you can actually use in
+  an allocated chunk, which may be more than you requested (although
+  often not) due to alignment and minimum size constraints.
+  You can use this many bytes without worrying about
+  overwriting other allocated objects. This is not a particularly great
+  programming practice. malloc_usable_size can be more useful in
+  debugging and assertions, for example:
+
+  p = malloc(n);
+  assert(malloc_usable_size(p) >= 256);
+*/
+size_t dlmalloc_usable_size(void*);
+
+
+#if MSPACES
+
+/*
+  mspace is an opaque type representing an independent
+  region of space that supports mspace_malloc, etc.
+*/
+typedef void* mspace;
+
+/*
+  create_mspace creates and returns a new independent space with the
+  given initial capacity, or, if 0, the default granularity size.  It
+  returns null if there is no system memory available to create the
+  space.  If argument locked is non-zero, the space uses a separate
+  lock to control access. The capacity of the space will grow
+  dynamically as needed to service mspace_malloc requests.  You can
+  control the sizes of incremental increases of this space by
+  compiling with a different DEFAULT_GRANULARITY or dynamically
+  setting with mallopt(M_GRANULARITY, value).
+*/
+mspace create_mspace(size_t capacity, int locked);
+
+/*
+  destroy_mspace destroys the given space, and attempts to return all
+  of its memory back to the system, returning the total number of
+  bytes freed. After destruction, the results of access to all memory
+  used by the space become undefined.
+*/
+size_t destroy_mspace(mspace msp);
+
+/*
+  create_mspace_with_base uses the memory supplied as the initial base
+  of a new mspace. Part (less than 128*sizeof(size_t) bytes) of this
+  space is used for bookkeeping, so the capacity must be at least this
+  large. (Otherwise 0 is returned.) When this initial space is
+  exhausted, additional memory will be obtained from the system.
+  Destroying this space will deallocate all additionally allocated
+  space (if possible) but not the initial base.
+*/
+mspace create_mspace_with_base(void* base, size_t capacity, int locked);
+
+/*
+  mspace_track_large_chunks controls whether requests for large chunks
+  are allocated in their own untracked mmapped regions, separate from
+  others in this mspace. By default large chunks are not tracked,
+  which reduces fragmentation. However, such chunks are not
+  necessarily released to the system upon destroy_mspace.  Enabling
+  tracking by setting to true may increase fragmentation, but avoids
+  leakage when relying on destroy_mspace to release all memory
+  allocated using this space.  The function returns the previous
+  setting.
+*/
+int mspace_track_large_chunks(mspace msp, int enable);
+
+
+/*
+  mspace_malloc behaves as malloc, but operates within
+  the given space.
+*/
+void* mspace_malloc(mspace msp, size_t bytes);
+
+/*
+  mspace_free behaves as free, but operates within
+  the given space.
+
+  If compiled with FOOTERS==1, mspace_free is not actually needed.
+  free may be called instead of mspace_free because freed chunks from
+  any space are handled by their originating spaces.
+*/
+void mspace_free(mspace msp, void* mem);
+
+/*
+  mspace_realloc behaves as realloc, but operates within
+  the given space.
+
+  If compiled with FOOTERS==1, mspace_realloc is not actually
+  needed.  realloc may be called instead of mspace_realloc because
+  realloced chunks from any space are handled by their originating
+  spaces.
+*/
+void* mspace_realloc(mspace msp, void* mem, size_t newsize);
+
+/*
+  mspace_calloc behaves as calloc, but operates within
+  the given space.
+*/
+void* mspace_calloc(mspace msp, size_t n_elements, size_t elem_size);
+
+/*
+  mspace_memalign behaves as memalign, but operates within
+  the given space.
+*/
+void* mspace_memalign(mspace msp, size_t alignment, size_t bytes);
+
+/*
+  mspace_independent_calloc behaves as independent_calloc, but
+  operates within the given space.
+*/
+void** mspace_independent_calloc(mspace msp, size_t n_elements,
+                                 size_t elem_size, void* chunks[]);
+
+/*
+  mspace_independent_comalloc behaves as independent_comalloc, but
+  operates within the given space.
+*/
+void** mspace_independent_comalloc(mspace msp, size_t n_elements,
+                                   size_t sizes[], void* chunks[]);
+
+/*
+  mspace_footprint() returns the number of bytes obtained from the
+  system for this space.
+*/
+size_t mspace_footprint(mspace msp);
+
+/*
+  mspace_max_footprint() returns the peak number of bytes obtained from the
+  system for this space.
+*/
+size_t mspace_max_footprint(mspace msp);
+
+
+#if !NO_MALLINFO
+/*
+  mspace_mallinfo behaves as mallinfo, but reports properties of
+  the given space.
+*/
+struct mallinfo mspace_mallinfo(mspace msp);
+#endif /* NO_MALLINFO */
+
+/*
+  malloc_usable_size(void* p) behaves the same as malloc_usable_size;
+*/
+  size_t mspace_usable_size(void* mem);
+
+/*
+  mspace_malloc_stats behaves as malloc_stats, but reports
+  properties of the given space.
+*/
+void mspace_malloc_stats(mspace msp);
+
+/*
+  mspace_trim behaves as malloc_trim, but
+  operates within the given space.
+*/
+int mspace_trim(mspace msp, size_t pad);
+
+/*
+  An alias for mallopt.
+*/
+int mspace_mallopt(int, int);
+
+#endif /* MSPACES */
+
+#ifdef __cplusplus
+}  /* end of extern "C" */
+#endif /* __cplusplus */
+
+/*
+  ========================================================================
+  To make a fully customizable malloc.h header file, cut everything
+  above this line, put into file malloc.h, edit to suit, and #include it
+  on the next line, as well as in programs that use this malloc.
+  ========================================================================
+*/
+
+/* #include "malloc.h" */
+
+/*------------------------------ internal #includes ---------------------- */
+
+#ifdef WIN32
+#pragma warning( disable : 4146 ) /* no "unsigned" warnings */
+#endif /* WIN32 */
+
+#include <stdio.h>       /* for printing in malloc_stats */
+
+#ifndef LACKS_ERRNO_H
+#include <errno.h>       /* for MALLOC_FAILURE_ACTION */
+#endif /* LACKS_ERRNO_H */
+#if FOOTERS || DEBUG
+#include <time.h>        /* for magic initialization */
+#endif /* FOOTERS */
+#ifndef LACKS_STDLIB_H
+#include <stdlib.h>      /* for abort() */
+#endif /* LACKS_STDLIB_H */
+#ifdef DEBUG
+#if ABORT_ON_ASSERT_FAILURE
+#undef assert
+#define assert(x) if(!(x)) ABORT
+#else /* ABORT_ON_ASSERT_FAILURE */
+#include <assert.h>
+#endif /* ABORT_ON_ASSERT_FAILURE */
+#else  /* DEBUG */
+#ifndef assert
+#define assert(x)
+#endif
+#define DEBUG 0
+#endif /* DEBUG */
+#ifndef LACKS_STRING_H
+#include <string.h>      /* for memset etc */
+#endif  /* LACKS_STRING_H */
+#if USE_BUILTIN_FFS
+#ifndef LACKS_STRINGS_H
+#include <strings.h>     /* for ffs */
+#endif /* LACKS_STRINGS_H */
+#endif /* USE_BUILTIN_FFS */
+#if HAVE_MMAP
+#ifndef LACKS_SYS_MMAN_H
+/* On some versions of linux, mremap decl in mman.h needs __USE_GNU set */
+#if (defined(linux) && !defined(__USE_GNU))
+#define __USE_GNU 1
+#include <sys/mman.h>    /* for mmap */
+#undef __USE_GNU
+#else
+#include <sys/mman.h>    /* for mmap */
+#endif /* linux */
+#endif /* LACKS_SYS_MMAN_H */
+#ifndef LACKS_FCNTL_H
+#include <fcntl.h>
+#endif /* LACKS_FCNTL_H */
+#endif /* HAVE_MMAP */
+#ifndef LACKS_UNISTD_H
+#include <unistd.h>     /* for sbrk, sysconf */
+#else /* LACKS_UNISTD_H */
+#if !defined(__FreeBSD__) && !defined(__OpenBSD__) && !defined(__NetBSD__)
+extern void*     sbrk(ptrdiff_t);
+#endif /* FreeBSD etc */
+#endif /* LACKS_UNISTD_H */
+
+/* Declarations for locking */
+#if USE_LOCKS
+#ifndef WIN32
+#include <pthread.h>
+#if defined (__SVR4) && defined (__sun)  /* solaris */
+#include <thread.h>
+#endif /* solaris */
+#else
+#ifndef _M_AMD64
+/* These are already defined on AMD64 builds */
+#ifdef __cplusplus
+extern "C" {
+#endif /* __cplusplus */
+LONG __cdecl _InterlockedCompareExchange(LONG volatile *Dest, LONG Exchange, LONG Comp);
+LONG __cdecl _InterlockedExchange(LONG volatile *Target, LONG Value);
+#ifdef __cplusplus
+}
+#endif /* __cplusplus */
+#endif /* _M_AMD64 */
+#pragma intrinsic (_InterlockedCompareExchange)
+#pragma intrinsic (_InterlockedExchange)
+#define interlockedcompareexchange _InterlockedCompareExchange
+#define interlockedexchange _InterlockedExchange
+#endif /* Win32 */
+#endif /* USE_LOCKS */
+
+/* Declarations for bit scanning on win32 */
+#if defined(_MSC_VER) && _MSC_VER>=1300
+#ifndef BitScanForward	/* Try to avoid pulling in WinNT.h */
+#ifdef __cplusplus
+extern "C" {
+#endif /* __cplusplus */
+unsigned char _BitScanForward(unsigned long *index, unsigned long mask);
+unsigned char _BitScanReverse(unsigned long *index, unsigned long mask);
+#ifdef __cplusplus
+}
+#endif /* __cplusplus */
+
+#define BitScanForward _BitScanForward
+#define BitScanReverse _BitScanReverse
+#pragma intrinsic(_BitScanForward)
+#pragma intrinsic(_BitScanReverse)
+#endif /* BitScanForward */
+#endif /* defined(_MSC_VER) && _MSC_VER>=1300 */
+
+#ifndef WIN32
+#ifndef malloc_getpagesize
+#  ifdef _SC_PAGESIZE         /* some SVR4 systems omit an underscore */
+#    ifndef _SC_PAGE_SIZE
+#      define _SC_PAGE_SIZE _SC_PAGESIZE
+#    endif
+#  endif
+#  ifdef _SC_PAGE_SIZE
+#    define malloc_getpagesize sysconf(_SC_PAGE_SIZE)
+#  else
+#    if defined(BSD) || defined(DGUX) || defined(HAVE_GETPAGESIZE)
+       extern size_t getpagesize();
+#      define malloc_getpagesize getpagesize()
+#    else
+#      ifdef WIN32 /* use supplied emulation of getpagesize */
+#        define malloc_getpagesize getpagesize()
+#      else
+#        ifndef LACKS_SYS_PARAM_H
+#          include <sys/param.h>
+#        endif
+#        ifdef EXEC_PAGESIZE
+#          define malloc_getpagesize EXEC_PAGESIZE
+#        else
+#          ifdef NBPG
+#            ifndef CLSIZE
+#              define malloc_getpagesize NBPG
+#            else
+#              define malloc_getpagesize (NBPG * CLSIZE)
+#            endif
+#          else
+#            ifdef NBPC
+#              define malloc_getpagesize NBPC
+#            else
+#              ifdef PAGESIZE
+#                define malloc_getpagesize PAGESIZE
+#              else /* just guess */
+#                define malloc_getpagesize ((size_t)4096U)
+#              endif
+#            endif
+#          endif
+#        endif
+#      endif
+#    endif
+#  endif
+#endif
+#endif
+
+
+
+/* ------------------- size_t and alignment properties -------------------- */
+
+/* The byte and bit size of a size_t */
+#define SIZE_T_SIZE         (sizeof(size_t))
+#define SIZE_T_BITSIZE      (sizeof(size_t) << 3)
+
+/* Some constants coerced to size_t */
+/* Annoying but necessary to avoid errors on some platforms */
+#define SIZE_T_ZERO         ((size_t)0)
+#define SIZE_T_ONE          ((size_t)1)
+#define SIZE_T_TWO          ((size_t)2)
+#define SIZE_T_FOUR         ((size_t)4)
+#define TWO_SIZE_T_SIZES    (SIZE_T_SIZE<<1)
+#define FOUR_SIZE_T_SIZES   (SIZE_T_SIZE<<2)
+#define SIX_SIZE_T_SIZES    (FOUR_SIZE_T_SIZES+TWO_SIZE_T_SIZES)
+#define HALF_MAX_SIZE_T     (MAX_SIZE_T / 2U)
+
+/* The bit mask value corresponding to MALLOC_ALIGNMENT */
+#define CHUNK_ALIGN_MASK    (MALLOC_ALIGNMENT - SIZE_T_ONE)
+
+/* True if address a has acceptable alignment */
+#define is_aligned(A)       (((size_t)((A)) & (CHUNK_ALIGN_MASK)) == 0)
+
+/* the number of bytes to offset an address to align it */
+#define align_offset(A)\
+ ((((size_t)(A) & CHUNK_ALIGN_MASK) == 0)? 0 :\
+  ((MALLOC_ALIGNMENT - ((size_t)(A) & CHUNK_ALIGN_MASK)) & CHUNK_ALIGN_MASK))
+
+/*
+  malloc_params holds global properties, including those that can be
+  dynamically set using mallopt. There is a single instance, mparams,
+  initialized in init_mparams. Note that the non-zeroness of "magic"
+  also serves as an initialization flag.
+*/
+typedef unsigned int flag_t;
+struct malloc_params {
+  volatile size_t magic;
+  size_t page_size;
+  size_t granularity;
+  size_t mmap_threshold;
+  size_t trim_threshold;
+  flag_t default_mflags;
+};
+
+static struct malloc_params mparams;
+
+/* Ensure mparams initialized */
+#define ensure_initialization() (void)(mparams.magic != 0 || init_mparams())
+
+/* -------------------------- MMAP preliminaries ------------------------- */
+
+/*
+   If HAVE_MORECORE or HAVE_MMAP are false, we just define calls and
+   checks to fail so compiler optimizer can delete code rather than
+   using so many "#if"s.
+*/
+
+
+/* MORECORE and MMAP must return MFAIL on failure */
+#define MFAIL                ((void*)(MAX_SIZE_T))
+#define CMFAIL               ((char*)(MFAIL)) /* defined for convenience */
+
+#if HAVE_MMAP
+
+#ifndef WIN32
+#if !defined(MAP_ANONYMOUS) && defined(MAP_ANON)
+#define MAP_ANONYMOUS        MAP_ANON
+#endif /* MAP_ANON */
+#ifdef DEFAULT_GRANULARITY_ALIGNED
+#define MMAP_IMPL mmap_aligned
+static void* lastAlignedmmap; /* Used as a hint */
+static void* mmap_aligned(void *start, size_t length, int prot, int flags, int fd, off_t offset) {
+  void* baseaddress = 0;
+  void* ptr = 0;
+  if(!start) {
+    baseaddress = lastAlignedmmap;
+    for(;;) {
+      if(baseaddress) flags|=MAP_FIXED;
+      ptr = mmap(baseaddress, length, prot, flags, fd, offset);
+      if(!ptr)
+        baseaddress = (void*)((size_t)baseaddress + mparams.granularity);
+      else if((size_t)ptr & (mparams.granularity - SIZE_T_ONE)) {
+        munmap(ptr, length);
+        baseaddress = (void*)(((size_t)ptr + mparams.granularity) & ~(mparams.granularity - SIZE_T_ONE));
+      }
+      else break;
+    }
+  }
+  else ptr = mmap(start, length, prot, flags, fd, offset);
+  if(ptr) lastAlignedmmap = (void*)((size_t) ptr + mparams.granularity);
+  return ptr;
+}
+#else
+#define MMAP_IMPL mmap
+#endif /* DEFAULT_GRANULARITY_ALIGNED */
+#define MUNMAP_DEFAULT(a, s)  munmap((a), (s))
+#define MMAP_PROT            (PROT_READ|PROT_WRITE)
+#ifdef MAP_ANONYMOUS
+#define MMAP_FLAGS           (MAP_PRIVATE|MAP_ANONYMOUS)
+#define MMAP_DEFAULT(s)       MMAP_IMPL(0, (s), MMAP_PROT, MMAP_FLAGS, -1, 0)
+#else /* MAP_ANONYMOUS */
+/*
+   Nearly all versions of mmap support MAP_ANONYMOUS, so the following
+   is unlikely to be needed, but is supplied just in case.
+*/
+#define MMAP_FLAGS           (MAP_PRIVATE)
+static int dev_zero_fd = -1; /* Cached file descriptor for /dev/zero. */
+#define MMAP_DEFAULT(s) ((dev_zero_fd < 0) ? \
+           (dev_zero_fd = open("/dev/zero", O_RDWR), \
+            MMAP_IMPL(0, (s), MMAP_PROT, MMAP_FLAGS, dev_zero_fd, 0)) : \
+            MMAP_IMPL(0, (s), MMAP_PROT, MMAP_FLAGS, dev_zero_fd, 0))
+#endif /* MAP_ANONYMOUS */
+
+#define DIRECT_MMAP_DEFAULT(s) MMAP_DEFAULT(s)
+
+#else /* WIN32 */
+
+/* Win32 MMAP via VirtualAlloc */
+#ifdef DEFAULT_GRANULARITY_ALIGNED
+static void* lastWin32mmap; /* Used as a hint */
+#endif /* DEFAULT_GRANULARITY_ALIGNED */
+#ifdef ENABLE_LARGE_PAGES
+static int largepagesavailable = 1;
+#endif /* ENABLE_LARGE_PAGES */
+static FORCEINLINE void* win32mmap(size_t size) {
+  void* baseaddress = 0;
+  void* ptr = 0;
+#ifdef ENABLE_LARGE_PAGES
+  /* Note that large pages are *always* allocated on a large page boundary.
+  If however granularity is small then don't waste a kernel call if size
+  isn't around the size of a large page */
+  if(largepagesavailable && size >= 1*1024*1024) {
+    ptr = VirtualAlloc(baseaddress, size, MEM_RESERVE|MEM_COMMIT|MEM_LARGE_PAGES, PAGE_READWRITE);
+    if(!ptr && ERROR_PRIVILEGE_NOT_HELD==GetLastError()) largepagesavailable=0;
+  }
+#endif
+  if(!ptr) {
+#ifdef DEFAULT_GRANULARITY_ALIGNED
+    /* We try to avoid overhead by speculatively reserving at aligned
+    addresses until we succeed */
+    baseaddress = lastWin32mmap;
+    for(;;) {
+      void* reserveaddr = VirtualAlloc(baseaddress, size, MEM_RESERVE, PAGE_READWRITE);
+      if(!reserveaddr)
+        baseaddress = (void*)((size_t)baseaddress + mparams.granularity);
+      else if((size_t)reserveaddr & (mparams.granularity - SIZE_T_ONE)) {
+        VirtualFree(reserveaddr, 0, MEM_RELEASE);
+        baseaddress = (void*)(((size_t)reserveaddr + mparams.granularity) & ~(mparams.granularity - SIZE_T_ONE));
+      }
+      else break;
+    }
+#endif
+    if(!ptr) ptr = VirtualAlloc(baseaddress, size, baseaddress ? MEM_COMMIT : MEM_RESERVE|MEM_COMMIT, PAGE_READWRITE);
+#if DEBUG
+    if(lastWin32mmap && ptr!=lastWin32mmap) printf("Non-contiguous VirtualAlloc between %p and %p\n", ptr, lastWin32mmap);
+#endif
+#ifdef DEFAULT_GRANULARITY_ALIGNED
+    if(ptr) lastWin32mmap = (void*)((size_t) ptr + mparams.granularity);
+#endif
+  }
+#if DEBUG
+#ifdef ENABLE_LARGE_PAGES
+  printf("VirtualAlloc returns %p size %u. LargePagesAvailable=%d\n", ptr, size, largepagesavailable);
+#else
+  printf("VirtualAlloc returns %p size %u\n", ptr, size);
+#endif
+#endif
+  return (ptr != 0)? ptr: MFAIL;
+}
+
+/* For direct MMAP, use MEM_TOP_DOWN to minimize interference */
+static FORCEINLINE void* win32direct_mmap(size_t size) {
+  void* ptr = VirtualAlloc(0, size, MEM_RESERVE|MEM_COMMIT|MEM_TOP_DOWN,
+                           PAGE_READWRITE);
+  return (ptr != 0)? ptr: MFAIL;
+}
+
+/* This function supports releasing coalesed segments */
+static FORCEINLINE int win32munmap(void* ptr, size_t size) {
+  MEMORY_BASIC_INFORMATION minfo;
+  char* cptr = (char*)ptr;
+  while (size) {
+    if (VirtualQuery(cptr, &minfo, sizeof(minfo)) == 0)
+      return -1;
+    if (minfo.BaseAddress != cptr || minfo.AllocationBase != cptr ||
+        minfo.State != MEM_COMMIT || minfo.RegionSize > size)
+      return -1;
+    if (VirtualFree(cptr, 0, MEM_RELEASE) == 0)
+      return -1;
+    cptr += minfo.RegionSize;
+    size -= minfo.RegionSize;
+  }
+  return 0;
+}
+
+#define MMAP_DEFAULT(s)             win32mmap(s)
+#define MUNMAP_DEFAULT(a, s)        win32munmap((a), (s))
+#define DIRECT_MMAP_DEFAULT(s)      win32direct_mmap(s)
+#endif /* WIN32 */
+#endif /* HAVE_MMAP */
+
+#if HAVE_MREMAP
+#ifndef WIN32
+#define MREMAP_DEFAULT(addr, osz, nsz, mv) mremap((addr), (osz), (nsz), (mv))
+#endif /* WIN32 */
+#endif /* HAVE_MREMAP */
+
+
+/**
+ * Define CALL_MORECORE
+ */
+#if HAVE_MORECORE
+    #ifdef MORECORE
+        #define CALL_MORECORE(S)    MORECORE(S)
+    #else  /* MORECORE */
+        #define CALL_MORECORE(S)    MORECORE_DEFAULT(S)
+    #endif /* MORECORE */
+#else  /* HAVE_MORECORE */
+    #define CALL_MORECORE(S)        MFAIL
+#endif /* HAVE_MORECORE */
+
+/**
+ * Define CALL_MMAP/CALL_MUNMAP/CALL_DIRECT_MMAP
+ */
+#if HAVE_MMAP
+    #define USE_MMAP_BIT            (SIZE_T_ONE)
+
+    #ifdef MMAP
+        #define CALL_MMAP(s)        MMAP(s)
+    #else /* MMAP */
+        #define CALL_MMAP(s)        MMAP_DEFAULT(s)
+    #endif /* MMAP */
+    #ifdef MUNMAP
+        #define CALL_MUNMAP(a, s)   MUNMAP((a), (s))
+    #else /* MUNMAP */
+        #define CALL_MUNMAP(a, s)   MUNMAP_DEFAULT((a), (s))
+    #endif /* MUNMAP */
+    #ifdef DIRECT_MMAP
+        #define CALL_DIRECT_MMAP(s) DIRECT_MMAP(s)
+    #else /* DIRECT_MMAP */
+        #define CALL_DIRECT_MMAP(s) DIRECT_MMAP_DEFAULT(s)
+    #endif /* DIRECT_MMAP */
+#else  /* HAVE_MMAP */
+    #define USE_MMAP_BIT            (SIZE_T_ZERO)
+
+    #define MMAP(s)                 MFAIL
+    #define MUNMAP(a, s)            (-1)
+    #define DIRECT_MMAP(s)          MFAIL
+    #define CALL_DIRECT_MMAP(s)     DIRECT_MMAP(s)
+    #define CALL_MMAP(s)            MMAP(s)
+    #define CALL_MUNMAP(a, s)       MUNMAP((a), (s))
+#endif /* HAVE_MMAP */
+
+/**
+ * Define CALL_MREMAP
+ */
+#if HAVE_MMAP && HAVE_MREMAP
+    #ifdef MREMAP
+        #define CALL_MREMAP(addr, osz, nsz, mv) MREMAP((addr), (osz), (nsz), (mv))
+    #else /* MREMAP */
+        #define CALL_MREMAP(addr, osz, nsz, mv) MREMAP_DEFAULT((addr), (osz), (nsz), (mv))
+    #endif /* MREMAP */
+#else  /* HAVE_MMAP && HAVE_MREMAP */
+    #define CALL_MREMAP(addr, osz, nsz, mv)     MFAIL
+#endif /* HAVE_MMAP && HAVE_MREMAP */
+
+/* mstate bit set if continguous morecore disabled or failed */
+#define USE_NONCONTIGUOUS_BIT (4U)
+
+/* segment bit set in create_mspace_with_base */
+#define EXTERN_BIT            (8U)
+
+
+/* --------------------------- Lock preliminaries ------------------------ */
+
+/*
+  When locks are defined, there is one global lock, plus
+  one per-mspace lock.
+
+  The global lock_ensures that mparams.magic and other unique
+  mparams values are initialized only once. It also protects
+  sequences of calls to MORECORE.  In many cases sys_alloc requires
+  two calls, that should not be interleaved with calls by other
+  threads.  This does not protect against direct calls to MORECORE
+  by other threads not using this lock, so there is still code to
+  cope the best we can on interference.
+
+  Per-mspace locks surround calls to malloc, free, etc.  To enable use
+  in layered extensions, per-mspace locks are reentrant.
+
+  Because lock-protected regions generally have bounded times, it is
+  OK to use the supplied simple spinlocks in the custom versions for
+  x86. Spinlocks are likely to improve performance for lightly
+  contended applications, but worsen performance under heavy
+  contention.
+
+  If USE_LOCKS is > 1, the definitions of lock routines here are
+  bypassed, in which case you will need to define the type MLOCK_T,
+  and at least INITIAL_LOCK, ACQUIRE_LOCK, RELEASE_LOCK and possibly
+  TRY_LOCK (which is not used in this malloc, but commonly needed in
+  extensions.)  You must also declare a
+    static MLOCK_T malloc_global_mutex = { initialization values };.
+
+*/
+
+#if USE_LOCKS == 1
+
+#if USE_SPIN_LOCKS && SPIN_LOCKS_AVAILABLE
+#ifndef WIN32
+
+/* Custom pthread-style spin locks on x86 and x64 for gcc */
+struct pthread_mlock_t {
+  volatile unsigned int l;
+  char cachelinepadding[64];
+  unsigned int c;
+  pthread_t threadid;
+};
+#define MLOCK_T               struct pthread_mlock_t
+#define CURRENT_THREAD        pthread_self()
+#define INITIAL_LOCK(sl)      ((sl)->threadid = 0, (sl)->l = (sl)->c = 0, 0)
+#define ACQUIRE_LOCK(sl)      pthread_acquire_lock(sl)
+#define RELEASE_LOCK(sl)      pthread_release_lock(sl)
+#define TRY_LOCK(sl)          pthread_try_lock(sl)
+#define SPINS_PER_YIELD       63
+
+static MLOCK_T malloc_global_mutex = { 0, "", 0, 0};
+
+static FORCEINLINE int pthread_acquire_lock (MLOCK_T *sl) {
+  int spins = 0;
+  volatile unsigned int* lp = &sl->l;
+  for (;;) {
+    if (*lp != 0) {
+      if (sl->threadid == CURRENT_THREAD) {
+        ++sl->c;
+        return 0;
+      }
+    }
+    else {
+      /* place args to cmpxchgl in locals to evade oddities in some gccs */
+      int cmp = 0;
+      int val = 1;
+      int ret;
+      __asm__ __volatile__  ("lock; cmpxchgl %1, %2"
+                             : "=a" (ret)
+                             : "r" (val), "m" (*(lp)), "0"(cmp)
+                             : "memory", "cc");
+      if (!ret) {
+        assert(!sl->threadid);
+        sl->threadid = CURRENT_THREAD;
+        sl->c = 1;
+        return 0;
+      }
+    }
+    if ((++spins & SPINS_PER_YIELD) == 0) {
+#if defined (__SVR4) && defined (__sun) /* solaris */
+      thr_yield();
+#else
+#if defined(__linux__) || defined(__FreeBSD__) || defined(__APPLE__)
+      sched_yield();
+#else  /* no-op yield on unknown systems */
+      ;
+#endif /* __linux__ || __FreeBSD__ || __APPLE__ */
+#endif /* solaris */
+    }
+  }
+}
+
+static FORCEINLINE void pthread_release_lock (MLOCK_T *sl) {
+  volatile unsigned int* lp = &sl->l;
+  assert(*lp != 0);
+  assert(sl->threadid == CURRENT_THREAD);
+  if (--sl->c == 0) {
+    sl->threadid = 0;
+    int prev = 0;
+    int ret;
+    __asm__ __volatile__ ("lock; xchgl %0, %1"
+                          : "=r" (ret)
+                          : "m" (*(lp)), "0"(prev)
+                          : "memory");
+  }
+}
+
+static FORCEINLINE int pthread_try_lock (MLOCK_T *sl) {
+  volatile unsigned int* lp = &sl->l;
+  if (*lp != 0) {
+    if (sl->threadid == CURRENT_THREAD) {
+      ++sl->c;
+      return 1;
+    }
+  }
+  else {
+    int cmp = 0;
+    int val = 1;
+    int ret;
+    __asm__ __volatile__  ("lock; cmpxchgl %1, %2"
+                           : "=a" (ret)
+                           : "r" (val), "m" (*(lp)), "0"(cmp)
+                           : "memory", "cc");
+    if (!ret) {
+      assert(!sl->threadid);
+      sl->threadid = CURRENT_THREAD;
+      sl->c = 1;
+      return 1;
+    }
+  }
+  return 0;
+}
+
+
+#else /* WIN32 */
+/* Custom win32-style spin locks on x86 and x64 for MSC */
+struct win32_mlock_t {
+  volatile long l;
+  char cachelinepadding[64];
+  unsigned int c;
+  long threadid;
+};
+
+#define MLOCK_T               struct win32_mlock_t
+#define CURRENT_THREAD        ((long)GetCurrentThreadId())
+#define INITIAL_LOCK(sl)      ((sl)->threadid = 0, (sl)->l = (sl)->c = 0, 0)
+#define ACQUIRE_LOCK(sl)      win32_acquire_lock(sl)
+#define RELEASE_LOCK(sl)      win32_release_lock(sl)
+#define TRY_LOCK(sl)          win32_try_lock(sl)
+#define SPINS_PER_YIELD       63
+
+static MLOCK_T malloc_global_mutex = { 0, 0, 0};
+
+static FORCEINLINE int win32_acquire_lock (MLOCK_T *sl) {
+  int spins = 0;
+  for (;;) {
+    if (sl->l != 0) {
+      if (sl->threadid == CURRENT_THREAD) {
+        ++sl->c;
+        return 0;
+      }
+    }
+    else {
+      if (!interlockedexchange(&sl->l, 1)) {
+        assert(!sl->threadid);
+        sl->threadid = CURRENT_THREAD;
+        sl->c = 1;
+        return 0;
+      }
+    }
+    if ((++spins & SPINS_PER_YIELD) == 0)
+      SleepEx(0, FALSE);
+  }
+}
+
+static FORCEINLINE void win32_release_lock (MLOCK_T *sl) {
+  assert(sl->threadid == CURRENT_THREAD);
+  assert(sl->l != 0);
+  if (--sl->c == 0) {
+    sl->threadid = 0;
+    interlockedexchange (&sl->l, 0);
+  }
+}
+
+static FORCEINLINE int win32_try_lock (MLOCK_T *sl) {
+  if (sl->l != 0) {
+    if (sl->threadid == CURRENT_THREAD) {
+      ++sl->c;
+      return 1;
+    }
+  }
+  else {
+    if (!interlockedexchange(&sl->l, 1)){
+      assert(!sl->threadid);
+      sl->threadid = CURRENT_THREAD;
+      sl->c = 1;
+      return 1;
+    }
+  }
+  return 0;
+}
+
+#endif /* WIN32 */
+#else /* USE_SPIN_LOCKS */
+
+#ifndef WIN32
+/* pthreads-based locks */
+
+#define MLOCK_T               pthread_mutex_t
+#define CURRENT_THREAD        pthread_self()
+#define INITIAL_LOCK(sl)      pthread_init_lock(sl)
+#define ACQUIRE_LOCK(sl)      pthread_mutex_lock(sl)
+#define RELEASE_LOCK(sl)      pthread_mutex_unlock(sl)
+#define TRY_LOCK(sl)          (!pthread_mutex_trylock(sl))
+
+static MLOCK_T malloc_global_mutex = PTHREAD_MUTEX_INITIALIZER;
+
+/* Cope with old-style linux recursive lock initialization by adding */
+/* skipped internal declaration from pthread.h */
+#ifdef linux
+#ifndef PTHREAD_MUTEX_RECURSIVE
+extern int pthread_mutexattr_setkind_np __P ((pthread_mutexattr_t *__attr,
+					   int __kind));
+#define PTHREAD_MUTEX_RECURSIVE PTHREAD_MUTEX_RECURSIVE_NP
+#define pthread_mutexattr_settype(x,y) pthread_mutexattr_setkind_np(x,y)
+#endif
+#endif
+
+static int pthread_init_lock (MLOCK_T *sl) {
+  pthread_mutexattr_t attr;
+  if (pthread_mutexattr_init(&attr)) return 1;
+  if (pthread_mutexattr_settype(&attr, PTHREAD_MUTEX_RECURSIVE)) return 1;
+  if (pthread_mutex_init(sl, &attr)) return 1;
+  if (pthread_mutexattr_destroy(&attr)) return 1;
+  return 0;
+}
+
+#else /* WIN32 */
+/* Win32 critical sections */
+#define MLOCK_T               CRITICAL_SECTION
+#define CURRENT_THREAD        GetCurrentThreadId()
+#define INITIAL_LOCK(s)       (!InitializeCriticalSectionAndSpinCount((s), 0x80000000|4000))
+#define ACQUIRE_LOCK(s)       (EnterCriticalSection(sl), 0)
+#define RELEASE_LOCK(s)       LeaveCriticalSection(sl)
+#define TRY_LOCK(s)           TryEnterCriticalSection(sl)
+#define NEED_GLOBAL_LOCK_INIT
+
+static MLOCK_T malloc_global_mutex;
+static volatile long malloc_global_mutex_status;
+
+/* Use spin loop to initialize global lock */
+static void init_malloc_global_mutex() {
+  for (;;) {
+    long stat = malloc_global_mutex_status;
+    if (stat > 0)
+      return;
+    /* transition to < 0 while initializing, then to > 0) */
+    if (stat == 0 &&
+        interlockedcompareexchange(&malloc_global_mutex_status, -1, 0) == 0) {
+      InitializeCriticalSection(&malloc_global_mutex);
+      interlockedexchange(&malloc_global_mutex_status,1);
+      return;
+    }
+    SleepEx(0, FALSE);
+  }
+}
+
+#endif /* WIN32 */
+#endif /* USE_SPIN_LOCKS */
+#endif /* USE_LOCKS == 1 */
+
+/* -----------------------  User-defined locks ------------------------ */
+
+#if USE_LOCKS > 1
+/* Define your own lock implementation here */
+/* #define INITIAL_LOCK(sl)  ... */
+/* #define ACQUIRE_LOCK(sl)  ... */
+/* #define RELEASE_LOCK(sl)  ... */
+/* #define TRY_LOCK(sl) ... */
+/* static MLOCK_T malloc_global_mutex = ... */
+#endif /* USE_LOCKS > 1 */
+
+/* -----------------------  Lock-based state ------------------------ */
+
+#if USE_LOCKS
+#define USE_LOCK_BIT               (2U)
+#else  /* USE_LOCKS */
+#define USE_LOCK_BIT               (0U)
+#define INITIAL_LOCK(l)
+#endif /* USE_LOCKS */
+
+#if USE_LOCKS
+#ifndef ACQUIRE_MALLOC_GLOBAL_LOCK
+#define ACQUIRE_MALLOC_GLOBAL_LOCK()  ACQUIRE_LOCK(&malloc_global_mutex);
+#endif
+#ifndef RELEASE_MALLOC_GLOBAL_LOCK
+#define RELEASE_MALLOC_GLOBAL_LOCK()  RELEASE_LOCK(&malloc_global_mutex);
+#endif
+#else  /* USE_LOCKS */
+#define ACQUIRE_MALLOC_GLOBAL_LOCK()
+#define RELEASE_MALLOC_GLOBAL_LOCK()
+#endif /* USE_LOCKS */
+
+
+/* -----------------------  Chunk representations ------------------------ */
+
+/*
+  (The following includes lightly edited explanations by Colin Plumb.)
+
+  The malloc_chunk declaration below is misleading (but accurate and
+  necessary).  It declares a "view" into memory allowing access to
+  necessary fields at known offsets from a given base.
+
+  Chunks of memory are maintained using a `boundary tag' method as
+  originally described by Knuth.  (See the paper by Paul Wilson
+  ftp://ftp.cs.utexas.edu/pub/garbage/allocsrv.ps for a survey of such
+  techniques.)  Sizes of free chunks are stored both in the front of
+  each chunk and at the end.  This makes consolidating fragmented
+  chunks into bigger chunks fast.  The head fields also hold bits
+  representing whether chunks are free or in use.
+
+  Here are some pictures to make it clearer.  They are "exploded" to
+  show that the state of a chunk can be thought of as extending from
+  the high 31 bits of the head field of its header through the
+  prev_foot and PINUSE_BIT bit of the following chunk header.
+
+  A chunk that's in use looks like:
+
+   chunk-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+           | Size of previous chunk (if P = 0)                             |
+           +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+         +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ |P|
+         | Size of this chunk                                         1| +-+
+   mem-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+         |                                                               |
+         +-                                                             -+
+         |                                                               |
+         +-                                                             -+
+         |                                                               :
+         +-      size - sizeof(size_t) available payload bytes          -+
+         :                                                               |
+ chunk-> +-                                                             -+
+         |                                                               |
+         +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+       +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ |1|
+       | Size of next chunk (may or may not be in use)               | +-+
+ mem-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+
+    And if it's free, it looks like this:
+
+   chunk-> +-                                                             -+
+           | User payload (must be in use, or we would have merged!)       |
+           +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+         +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ |P|
+         | Size of this chunk                                         0| +-+
+   mem-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+         | Next pointer                                                  |
+         +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+         | Prev pointer                                                  |
+         +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+         |                                                               :
+         +-      size - sizeof(struct chunk) unused bytes               -+
+         :                                                               |
+ chunk-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+         | Size of this chunk                                            |
+         +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+       +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ |0|
+       | Size of next chunk (must be in use, or we would have merged)| +-+
+ mem-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+       |                                                               :
+       +- User payload                                                -+
+       :                                                               |
+       +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+                                                                     |0|
+                                                                     +-+
+  Note that since we always merge adjacent free chunks, the chunks
+  adjacent to a free chunk must be in use.
+
+  Given a pointer to a chunk (which can be derived trivially from the
+  payload pointer) we can, in O(1) time, find out whether the adjacent
+  chunks are free, and if so, unlink them from the lists that they
+  are on and merge them with the current chunk.
+
+  Chunks always begin on even word boundaries, so the mem portion
+  (which is returned to the user) is also on an even word boundary, and
+  thus at least double-word aligned.
+
+  The P (PINUSE_BIT) bit, stored in the unused low-order bit of the
+  chunk size (which is always a multiple of two words), is an in-use
+  bit for the *previous* chunk.  If that bit is *clear*, then the
+  word before the current chunk size contains the previous chunk
+  size, and can be used to find the front of the previous chunk.
+  The very first chunk allocated always has this bit set, preventing
+  access to non-existent (or non-owned) memory. If pinuse is set for
+  any given chunk, then you CANNOT determine the size of the
+  previous chunk, and might even get a memory addressing fault when
+  trying to do so.
+
+  The C (CINUSE_BIT) bit, stored in the unused second-lowest bit of
+  the chunk size redundantly records whether the current chunk is
+  inuse (unless the chunk is mmapped). This redundancy enables usage
+  checks within free and realloc, and reduces indirection when freeing
+  and consolidating chunks.
+
+  Each freshly allocated chunk must have both cinuse and pinuse set.
+  That is, each allocated chunk borders either a previously allocated
+  and still in-use chunk, or the base of its memory arena. This is
+  ensured by making all allocations from the the `lowest' part of any
+  found chunk.  Further, no free chunk physically borders another one,
+  so each free chunk is known to be preceded and followed by either
+  inuse chunks or the ends of memory.
+
+  Note that the `foot' of the current chunk is actually represented
+  as the prev_foot of the NEXT chunk. This makes it easier to
+  deal with alignments etc but can be very confusing when trying
+  to extend or adapt this code.
+
+  The exceptions to all this are
+
+     1. The special chunk `top' is the top-most available chunk (i.e.,
+        the one bordering the end of available memory). It is treated
+        specially.  Top is never included in any bin, is used only if
+        no other chunk is available, and is released back to the
+        system if it is very large (see M_TRIM_THRESHOLD).  In effect,
+        the top chunk is treated as larger (and thus less well
+        fitting) than any other available chunk.  The top chunk
+        doesn't update its trailing size field since there is no next
+        contiguous chunk that would have to index off it. However,
+        space is still allocated for it (TOP_FOOT_SIZE) to enable
+        separation or merging when space is extended.
+
+     3. Chunks allocated via mmap, have both cinuse and pinuse bits
+        cleared in their head fields.  Because they are allocated
+        one-by-one, each must carry its own prev_foot field, which is
+        also used to hold the offset this chunk has within its mmapped
+        region, which is needed to preserve alignment. Each mmapped
+        chunk is trailed by the first two fields of a fake next-chunk
+        for sake of usage checks.
+
+*/
+
+struct malloc_chunk {
+  size_t               prev_foot;  /* Size of previous chunk (if free).  */
+  size_t               head;       /* Size and inuse bits. */
+  struct malloc_chunk* fd;         /* double links -- used only if free. */
+  struct malloc_chunk* bk;
+};
+
+typedef struct malloc_chunk  mchunk;
+typedef struct malloc_chunk* mchunkptr;
+typedef struct malloc_chunk* sbinptr;  /* The type of bins of chunks */
+typedef unsigned int bindex_t;         /* Described below */
+typedef unsigned int binmap_t;         /* Described below */
+
+/* ------------------- Chunks sizes and alignments ----------------------- */
+
+#define MCHUNK_SIZE         (sizeof(mchunk))
+
+#if FOOTERS
+#define CHUNK_OVERHEAD      (TWO_SIZE_T_SIZES)
+#else /* FOOTERS */
+#define CHUNK_OVERHEAD      (SIZE_T_SIZE)
+#endif /* FOOTERS */
+
+/* MMapped chunks need a second word of overhead ... */
+#define MMAP_CHUNK_OVERHEAD (TWO_SIZE_T_SIZES)
+/* ... and additional padding for fake next-chunk at foot */
+#define MMAP_FOOT_PAD       (FOUR_SIZE_T_SIZES)
+
+/* The smallest size we can malloc is an aligned minimal chunk */
+#define MIN_CHUNK_SIZE\
+  ((MCHUNK_SIZE + CHUNK_ALIGN_MASK) & ~CHUNK_ALIGN_MASK)
+
+/* conversion from malloc headers to user pointers, and back */
+#define chunk2mem(p)        ((void*)((char*)(p)       + TWO_SIZE_T_SIZES))
+#define mem2chunk(mem)      ((mchunkptr)((char*)(mem) - TWO_SIZE_T_SIZES))
+/* chunk associated with aligned address A */
+#define align_as_chunk(A)   (mchunkptr)((A) + align_offset(chunk2mem(A)))
+
+/* Bounds on request (not chunk) sizes. */
+#define MAX_REQUEST         ((-MIN_CHUNK_SIZE) << 2)
+#define MIN_REQUEST         (MIN_CHUNK_SIZE - CHUNK_OVERHEAD - SIZE_T_ONE)
+
+/* pad request bytes into a usable size */
+#define pad_request(req) \
+   (((req) + CHUNK_OVERHEAD + CHUNK_ALIGN_MASK) & ~CHUNK_ALIGN_MASK)
+
+/* pad request, checking for minimum (but not maximum) */
+#define request2size(req) \
+  (((req) < MIN_REQUEST)? MIN_CHUNK_SIZE : pad_request(req))
+
+
+/* ------------------ Operations on head and foot fields ----------------- */
+
+/*
+  The head field of a chunk is or'ed with PINUSE_BIT when previous
+  adjacent chunk in use, and or'ed with CINUSE_BIT if this chunk is in
+  use, unless mmapped, in which case both bits are cleared.
+
+  FLAG4_BIT is not used by this malloc, but might be useful in extensions.
+*/
+
+#define PINUSE_BIT          (SIZE_T_ONE)
+#define CINUSE_BIT          (SIZE_T_TWO)
+#define FLAG4_BIT           (SIZE_T_FOUR)
+#define INUSE_BITS          (PINUSE_BIT|CINUSE_BIT)
+#define FLAG_BITS           (PINUSE_BIT|CINUSE_BIT|FLAG4_BIT)
+
+/* Head value for fenceposts */
+#define FENCEPOST_HEAD      (INUSE_BITS|SIZE_T_SIZE)
+
+/* extraction of fields from head words */
+#define cinuse(p)           ((p)->head & CINUSE_BIT)
+#define pinuse(p)           ((p)->head & PINUSE_BIT)
+#define is_inuse(p)         (((p)->head & INUSE_BITS) != PINUSE_BIT)
+#define is_mmapped(p)       (((p)->head & INUSE_BITS) == 0)
+
+#define chunksize(p)        ((p)->head & ~(FLAG_BITS))
+
+#define clear_pinuse(p)     ((p)->head &= ~PINUSE_BIT)
+
+/* Treat space at ptr +/- offset as a chunk */
+#define chunk_plus_offset(p, s)  ((mchunkptr)(((char*)(p)) + (s)))
+#define chunk_minus_offset(p, s) ((mchunkptr)(((char*)(p)) - (s)))
+
+/* Ptr to next or previous physical malloc_chunk. */
+#define next_chunk(p) ((mchunkptr)( ((char*)(p)) + ((p)->head & ~FLAG_BITS)))
+#define prev_chunk(p) ((mchunkptr)( ((char*)(p)) - ((p)->prev_foot) ))
+
+/* extract next chunk's pinuse bit */
+#define next_pinuse(p)  ((next_chunk(p)->head) & PINUSE_BIT)
+
+/* Get/set size at footer */
+#define get_foot(p, s)  (((mchunkptr)((char*)(p) + (s)))->prev_foot)
+#define set_foot(p, s)  (((mchunkptr)((char*)(p) + (s)))->prev_foot = (s))
+
+/* Set size, pinuse bit, and foot */
+#define set_size_and_pinuse_of_free_chunk(p, s)\
+  ((p)->head = (s|PINUSE_BIT), set_foot(p, s))
+
+/* Set size, pinuse bit, foot, and clear next pinuse */
+#define set_free_with_pinuse(p, s, n)\
+  (clear_pinuse(n), set_size_and_pinuse_of_free_chunk(p, s))
+
+/* Get the internal overhead associated with chunk p */
+#define overhead_for(p)\
+ (is_mmapped(p)? MMAP_CHUNK_OVERHEAD : CHUNK_OVERHEAD)
+
+/* Return true if malloced space is not necessarily cleared */
+#if MMAP_CLEARS
+#define calloc_must_clear(p) (!is_mmapped(p))
+#else /* MMAP_CLEARS */
+#define calloc_must_clear(p) (1)
+#endif /* MMAP_CLEARS */
+
+/* ---------------------- Overlaid data structures ----------------------- */
+
+/*
+  When chunks are not in use, they are treated as nodes of either
+  lists or trees.
+
+  "Small"  chunks are stored in circular doubly-linked lists, and look
+  like this:
+
+    chunk-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+            |             Size of previous chunk                            |
+            +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+    `head:' |             Size of chunk, in bytes                         |P|
+      mem-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+            |             Forward pointer to next chunk in list             |
+            +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+            |             Back pointer to previous chunk in list            |
+            +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+            |             Unused space (may be 0 bytes long)                .
+            .                                                               .
+            .                                                               |
+nextchunk-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+    `foot:' |             Size of chunk, in bytes                           |
+            +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+
+  Larger chunks are kept in a form of bitwise digital trees (aka
+  tries) keyed on chunksizes.  Because malloc_tree_chunks are only for
+  free chunks greater than 256 bytes, their size doesn't impose any
+  constraints on user chunk sizes.  Each node looks like:
+
+    chunk-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+            |             Size of previous chunk                            |
+            +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+    `head:' |             Size of chunk, in bytes                         |P|
+      mem-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+            |             Forward pointer to next chunk of same size        |
+            +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+            |             Back pointer to previous chunk of same size       |
+            +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+            |             Pointer to left child (child[0])                  |
+            +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+            |             Pointer to right child (child[1])                 |
+            +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+            |             Pointer to parent                                 |
+            +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+            |             bin index of this chunk                           |
+            +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+            |             Unused space                                      .
+            .                                                               |
+nextchunk-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+    `foot:' |             Size of chunk, in bytes                           |
+            +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+
+  Each tree holding treenodes is a tree of unique chunk sizes.  Chunks
+  of the same size are arranged in a circularly-linked list, with only
+  the oldest chunk (the next to be used, in our FIFO ordering)
+  actually in the tree.  (Tree members are distinguished by a non-null
+  parent pointer.)  If a chunk with the same size an an existing node
+  is inserted, it is linked off the existing node using pointers that
+  work in the same way as fd/bk pointers of small chunks.
+
+  Each tree contains a power of 2 sized range of chunk sizes (the
+  smallest is 0x100 <= x < 0x180), which is is divided in half at each
+  tree level, with the chunks in the smaller half of the range (0x100
+  <= x < 0x140 for the top nose) in the left subtree and the larger
+  half (0x140 <= x < 0x180) in the right subtree.  This is, of course,
+  done by inspecting individual bits.
+
+  Using these rules, each node's left subtree contains all smaller
+  sizes than its right subtree.  However, the node at the root of each
+  subtree has no particular ordering relationship to either.  (The
+  dividing line between the subtree sizes is based on trie relation.)
+  If we remove the last chunk of a given size from the interior of the
+  tree, we need to replace it with a leaf node.  The tree ordering
+  rules permit a node to be replaced by any leaf below it.
+
+  The smallest chunk in a tree (a common operation in a best-fit
+  allocator) can be found by walking a path to the leftmost leaf in
+  the tree.  Unlike a usual binary tree, where we follow left child
+  pointers until we reach a null, here we follow the right child
+  pointer any time the left one is null, until we reach a leaf with
+  both child pointers null. The smallest chunk in the tree will be
+  somewhere along that path.
+
+  The worst case number of steps to add, find, or remove a node is
+  bounded by the number of bits differentiating chunks within
+  bins. Under current bin calculations, this ranges from 6 up to 21
+  (for 32 bit sizes) or up to 53 (for 64 bit sizes). The typical case
+  is of course much better.
+*/
+
+struct malloc_tree_chunk {
+  /* The first four fields must be compatible with malloc_chunk */
+  size_t                    prev_foot;
+  size_t                    head;
+  struct malloc_tree_chunk* fd;
+  struct malloc_tree_chunk* bk;
+
+  struct malloc_tree_chunk* child[2];
+  struct malloc_tree_chunk* parent;
+  bindex_t                  index;
+};
+
+typedef struct malloc_tree_chunk  tchunk;
+typedef struct malloc_tree_chunk* tchunkptr;
+typedef struct malloc_tree_chunk* tbinptr; /* The type of bins of trees */
+
+/* A little helper macro for trees */
+#define leftmost_child(t) ((t)->child[0] != 0? (t)->child[0] : (t)->child[1])
+
+/* ----------------------------- Segments -------------------------------- */
+
+/*
+  Each malloc space may include non-contiguous segments, held in a
+  list headed by an embedded malloc_segment record representing the
+  top-most space. Segments also include flags holding properties of
+  the space. Large chunks that are directly allocated by mmap are not
+  included in this list. They are instead independently created and
+  destroyed without otherwise keeping track of them.
+
+  Segment management mainly comes into play for spaces allocated by
+  MMAP.  Any call to MMAP might or might not return memory that is
+  adjacent to an existing segment.  MORECORE normally contiguously
+  extends the current space, so this space is almost always adjacent,
+  which is simpler and faster to deal with. (This is why MORECORE is
+  used preferentially to MMAP when both are available -- see
+  sys_alloc.)  When allocating using MMAP, we don't use any of the
+  hinting mechanisms (inconsistently) supported in various
+  implementations of unix mmap, or distinguish reserving from
+  committing memory. Instead, we just ask for space, and exploit
+  contiguity when we get it.  It is probably possible to do
+  better than this on some systems, but no general scheme seems
+  to be significantly better.
+
+  Management entails a simpler variant of the consolidation scheme
+  used for chunks to reduce fragmentation -- new adjacent memory is
+  normally prepended or appended to an existing segment. However,
+  there are limitations compared to chunk consolidation that mostly
+  reflect the fact that segment processing is relatively infrequent
+  (occurring only when getting memory from system) and that we
+  don't expect to have huge numbers of segments:
+
+  * Segments are not indexed, so traversal requires linear scans.  (It
+    would be possible to index these, but is not worth the extra
+    overhead and complexity for most programs on most platforms.)
+  * New segments are only appended to old ones when holding top-most
+    memory; if they cannot be prepended to others, they are held in
+    different segments.
+
+  Except for the top-most segment of an mstate, each segment record
+  is kept at the tail of its segment. Segments are added by pushing
+  segment records onto the list headed by &mstate.seg for the
+  containing mstate.
+
+  Segment flags control allocation/merge/deallocation policies:
+  * If EXTERN_BIT set, then we did not allocate this segment,
+    and so should not try to deallocate or merge with others.
+    (This currently holds only for the initial segment passed
+    into create_mspace_with_base.)
+  * If USE_MMAP_BIT set, the segment may be merged with
+    other surrounding mmapped segments and trimmed/de-allocated
+    using munmap.
+  * If neither bit is set, then the segment was obtained using
+    MORECORE so can be merged with surrounding MORECORE'd segments
+    and deallocated/trimmed using MORECORE with negative arguments.
+*/
+
+struct malloc_segment {
+  char*        base;             /* base address */
+  size_t       size;             /* allocated size */
+  struct malloc_segment* next;   /* ptr to next segment */
+  flag_t       sflags;           /* mmap and extern flag */
+};
+
+#define is_mmapped_segment(S)  ((S)->sflags & USE_MMAP_BIT)
+#define is_extern_segment(S)   ((S)->sflags & EXTERN_BIT)
+
+typedef struct malloc_segment  msegment;
+typedef struct malloc_segment* msegmentptr;
+
+/* ---------------------------- malloc_state ----------------------------- */
+
+/*
+   A malloc_state holds all of the bookkeeping for a space.
+   The main fields are:
+
+  Top
+    The topmost chunk of the currently active segment. Its size is
+    cached in topsize.  The actual size of topmost space is
+    topsize+TOP_FOOT_SIZE, which includes space reserved for adding
+    fenceposts and segment records if necessary when getting more
+    space from the system.  The size at which to autotrim top is
+    cached from mparams in trim_check, except that it is disabled if
+    an autotrim fails.
+
+  Designated victim (dv)
+    This is the preferred chunk for servicing small requests that
+    don't have exact fits.  It is normally the chunk split off most
+    recently to service another small request.  Its size is cached in
+    dvsize. The link fields of this chunk are not maintained since it
+    is not kept in a bin.
+
+  SmallBins
+    An array of bin headers for free chunks.  These bins hold chunks
+    with sizes less than MIN_LARGE_SIZE bytes. Each bin contains
+    chunks of all the same size, spaced 8 bytes apart.  To simplify
+    use in double-linked lists, each bin header acts as a malloc_chunk
+    pointing to the real first node, if it exists (else pointing to
+    itself).  This avoids special-casing for headers.  But to avoid
+    waste, we allocate only the fd/bk pointers of bins, and then use
+    repositioning tricks to treat these as the fields of a chunk.
+
+  TreeBins
+    Treebins are pointers to the roots of trees holding a range of
+    sizes. There are 2 equally spaced treebins for each power of two
+    from TREE_SHIFT to TREE_SHIFT+16. The last bin holds anything
+    larger.
+
+  Bin maps
+    There is one bit map for small bins ("smallmap") and one for
+    treebins ("treemap).  Each bin sets its bit when non-empty, and
+    clears the bit when empty.  Bit operations are then used to avoid
+    bin-by-bin searching -- nearly all "search" is done without ever
+    looking at bins that won't be selected.  The bit maps
+    conservatively use 32 bits per map word, even if on 64bit system.
+    For a good description of some of the bit-based techniques used
+    here, see Henry S. Warren Jr's book "Hacker's Delight" (and
+    supplement at http://hackersdelight.org/). Many of these are
+    intended to reduce the branchiness of paths through malloc etc, as
+    well as to reduce the number of memory locations read or written.
+
+  Segments
+    A list of segments headed by an embedded malloc_segment record
+    representing the initial space.
+
+  Address check support
+    The least_addr field is the least address ever obtained from
+    MORECORE or MMAP. Attempted frees and reallocs of any address less
+    than this are trapped (unless INSECURE is defined).
+
+  Magic tag
+    A cross-check field that should always hold same value as mparams.magic.
+
+  Flags
+    Bits recording whether to use MMAP, locks, or contiguous MORECORE
+
+  Statistics
+    Each space keeps track of current and maximum system memory
+    obtained via MORECORE or MMAP.
+
+  Trim support
+    Fields holding the amount of unused topmost memory that should trigger
+    timming, and a counter to force periodic scanning to release unused
+    non-topmost segments.
+
+  Locking
+    If USE_LOCKS is defined, the "mutex" lock is acquired and released
+    around every public call using this mspace.
+
+  Extension support
+    A void* pointer and a size_t field that can be used to help implement
+    extensions to this malloc.
+*/
+
+/* Bin types, widths and sizes */
+#define NSMALLBINS        (32U)
+#define NTREEBINS         (32U)
+#define SMALLBIN_SHIFT    (3U)
+#define SMALLBIN_WIDTH    (SIZE_T_ONE << SMALLBIN_SHIFT)
+#define TREEBIN_SHIFT     (8U)
+#define MIN_LARGE_SIZE    (SIZE_T_ONE << TREEBIN_SHIFT)
+#define MAX_SMALL_SIZE    (MIN_LARGE_SIZE - SIZE_T_ONE)
+#define MAX_SMALL_REQUEST (MAX_SMALL_SIZE - CHUNK_ALIGN_MASK - CHUNK_OVERHEAD)
+
+struct malloc_state {
+  binmap_t   smallmap;
+  binmap_t   treemap;
+  size_t     dvsize;
+  size_t     topsize;
+  char*      least_addr;
+  mchunkptr  dv;
+  mchunkptr  top;
+  size_t     trim_check;
+  size_t     release_checks;
+  size_t     magic;
+  mchunkptr  smallbins[(NSMALLBINS+1)*2];
+  tbinptr    treebins[NTREEBINS];
+  size_t     footprint;
+  size_t     max_footprint;
+  flag_t     mflags;
+  msegment   seg;
+#if USE_LOCKS
+  MLOCK_T    mutex;     /* locate lock among fields that rarely change */
+#endif /* USE_LOCKS */
+  void*      extp;      /* Unused but available for extensions */
+  size_t     exts;
+};
+
+typedef struct malloc_state*    mstate;
+
+/* ------------- Global malloc_state and malloc_params ------------------- */
+
+#if !ONLY_MSPACES
+
+/* The global malloc_state used for all non-"mspace" calls */
+static struct malloc_state _gm_;
+#define gm                 (&_gm_)
+#define is_global(M)       ((M) == &_gm_)
+
+#endif /* !ONLY_MSPACES */
+
+#define is_initialized(M)  ((M)->top != 0)
+
+/* -------------------------- system alloc setup ------------------------- */
+
+/* Operations on mflags */
+
+#define use_lock(M)           ((M)->mflags &   USE_LOCK_BIT)
+#define enable_lock(M)        ((M)->mflags |=  USE_LOCK_BIT)
+#define disable_lock(M)       ((M)->mflags &= ~USE_LOCK_BIT)
+
+#define use_mmap(M)           ((M)->mflags &   USE_MMAP_BIT)
+#define enable_mmap(M)        ((M)->mflags |=  USE_MMAP_BIT)
+#define disable_mmap(M)       ((M)->mflags &= ~USE_MMAP_BIT)
+
+#define use_noncontiguous(M)  ((M)->mflags &   USE_NONCONTIGUOUS_BIT)
+#define disable_contiguous(M) ((M)->mflags |=  USE_NONCONTIGUOUS_BIT)
+
+#define set_lock(M,L)\
+ ((M)->mflags = (L)?\
+  ((M)->mflags | USE_LOCK_BIT) :\
+  ((M)->mflags & ~USE_LOCK_BIT))
+
+/* page-align a size */
+#define page_align(S)\
+ (((S) + (mparams.page_size - SIZE_T_ONE)) & ~(mparams.page_size - SIZE_T_ONE))
+
+/* granularity-align a size */
+#define granularity_align(S)\
+  (((S) + (mparams.granularity - SIZE_T_ONE))\
+   & ~(mparams.granularity - SIZE_T_ONE))
+
+
+/* For mmap, use granularity alignment on windows, else page-align */
+#ifdef WIN32
+#define mmap_align(S) granularity_align(S)
+#else
+#define mmap_align(S) page_align(S)
+#endif
+
+/* For sys_alloc, enough padding to ensure can malloc request on success */
+#define SYS_ALLOC_PADDING (TOP_FOOT_SIZE + MALLOC_ALIGNMENT)
+
+#define is_page_aligned(S)\
+   (((size_t)(S) & (mparams.page_size - SIZE_T_ONE)) == 0)
+#define is_granularity_aligned(S)\
+   (((size_t)(S) & (mparams.granularity - SIZE_T_ONE)) == 0)
+
+/*  True if segment S holds address A */
+#define segment_holds(S, A)\
+  ((char*)(A) >= S->base && (char*)(A) < S->base + S->size)
+
+/* Return segment holding given address */
+static msegmentptr segment_holding(mstate m, char* addr) {
+  msegmentptr sp = &m->seg;
+  for (;;) {
+    if (addr >= sp->base && addr < sp->base + sp->size)
+      return sp;
+    if ((sp = sp->next) == 0)
+      return 0;
+  }
+}
+
+/* Return true if segment contains a segment link */
+static int has_segment_link(mstate m, msegmentptr ss) {
+  msegmentptr sp = &m->seg;
+  for (;;) {
+    if ((char*)sp >= ss->base && (char*)sp < ss->base + ss->size)
+      return 1;
+    if ((sp = sp->next) == 0)
+      return 0;
+  }
+}
+
+#ifndef MORECORE_CANNOT_TRIM
+#define should_trim(M,s)  ((s) > (M)->trim_check)
+#else  /* MORECORE_CANNOT_TRIM */
+#define should_trim(M,s)  (0)
+#endif /* MORECORE_CANNOT_TRIM */
+
+/*
+  TOP_FOOT_SIZE is padding at the end of a segment, including space
+  that may be needed to place segment records and fenceposts when new
+  noncontiguous segments are added.
+*/
+#define TOP_FOOT_SIZE\
+  (align_offset(chunk2mem(0))+pad_request(sizeof(struct malloc_segment))+MIN_CHUNK_SIZE)
+
+
+/* -------------------------------  Hooks -------------------------------- */
+
+/*
+  PREACTION should be defined to return 0 on success, and nonzero on
+  failure. If you are not using locking, you can redefine these to do
+  anything you like.
+*/
+
+#if USE_LOCKS
+
+#define PREACTION(M)  ((use_lock(M))? ACQUIRE_LOCK(&(M)->mutex) : 0)
+#define POSTACTION(M) { if (use_lock(M)) RELEASE_LOCK(&(M)->mutex); }
+#else /* USE_LOCKS */
+
+#ifndef PREACTION
+#define PREACTION(M) (0)
+#endif  /* PREACTION */
+
+#ifndef POSTACTION
+#define POSTACTION(M)
+#endif  /* POSTACTION */
+
+#endif /* USE_LOCKS */
+
+/*
+  CORRUPTION_ERROR_ACTION is triggered upon detected bad addresses.
+  USAGE_ERROR_ACTION is triggered on detected bad frees and
+  reallocs. The argument p is an address that might have triggered the
+  fault. It is ignored by the two predefined actions, but might be
+  useful in custom actions that try to help diagnose errors.
+*/
+
+#if PROCEED_ON_ERROR
+
+/* A count of the number of corruption errors causing resets */
+int malloc_corruption_error_count;
+
+/* default corruption action */
+static void reset_on_error(mstate m);
+
+#define CORRUPTION_ERROR_ACTION(m)  reset_on_error(m)
+#define USAGE_ERROR_ACTION(m, p)
+
+#else /* PROCEED_ON_ERROR */
+
+#ifndef CORRUPTION_ERROR_ACTION
+#define CORRUPTION_ERROR_ACTION(m) ABORT
+#endif /* CORRUPTION_ERROR_ACTION */
+
+#ifndef USAGE_ERROR_ACTION
+#define USAGE_ERROR_ACTION(m,p) ABORT
+#endif /* USAGE_ERROR_ACTION */
+
+#endif /* PROCEED_ON_ERROR */
+
+/* -------------------------- Debugging setup ---------------------------- */
+
+#if ! DEBUG
+
+#define check_free_chunk(M,P)
+#define check_inuse_chunk(M,P)
+#define check_malloced_chunk(M,P,N)
+#define check_mmapped_chunk(M,P)
+#define check_malloc_state(M)
+#define check_top_chunk(M,P)
+
+#else /* DEBUG */
+#define check_free_chunk(M,P)       do_check_free_chunk(M,P)
+#define check_inuse_chunk(M,P)      do_check_inuse_chunk(M,P)
+#define check_top_chunk(M,P)        do_check_top_chunk(M,P)
+#define check_malloced_chunk(M,P,N) do_check_malloced_chunk(M,P,N)
+#define check_mmapped_chunk(M,P)    do_check_mmapped_chunk(M,P)
+#define check_malloc_state(M)       do_check_malloc_state(M)
+
+static void   do_check_any_chunk(mstate m, mchunkptr p);
+static void   do_check_top_chunk(mstate m, mchunkptr p);
+static void   do_check_mmapped_chunk(mstate m, mchunkptr p);
+static void   do_check_inuse_chunk(mstate m, mchunkptr p);
+static void   do_check_free_chunk(mstate m, mchunkptr p);
+static void   do_check_malloced_chunk(mstate m, void* mem, size_t s);
+static void   do_check_tree(mstate m, tchunkptr t);
+static void   do_check_treebin(mstate m, bindex_t i);
+static void   do_check_smallbin(mstate m, bindex_t i);
+static void   do_check_malloc_state(mstate m);
+static int    bin_find(mstate m, mchunkptr x);
+static size_t traverse_and_check(mstate m);
+#endif /* DEBUG */
+
+/* ---------------------------- Indexing Bins ---------------------------- */
+
+#define is_small(s)         (((s) >> SMALLBIN_SHIFT) < NSMALLBINS)
+#define small_index(s)      (bindex_t)((s)  >> SMALLBIN_SHIFT)
+#define small_index2size(i) ((i)  << SMALLBIN_SHIFT)
+#define MIN_SMALL_INDEX     (small_index(MIN_CHUNK_SIZE))
+
+/* addressing by index. See above about smallbin repositioning */
+#define smallbin_at(M, i)   ((sbinptr)((char*)&((M)->smallbins[(i)<<1])))
+#define treebin_at(M,i)     (&((M)->treebins[i]))
+
+/* assign tree index for size S to variable I. Use x86 asm if possible  */
+#if defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__))
+#define compute_tree_index(S, I)\
+{\
+  unsigned int X = S >> TREEBIN_SHIFT;\
+  if (X == 0)\
+    I = 0;\
+  else if (X > 0xFFFF)\
+    I = NTREEBINS-1;\
+  else {\
+    unsigned int K;\
+    __asm__("bsrl\t%1, %0\n\t" : "=r" (K) : "g"  (X));\
+    I =  (bindex_t)((K << 1) + ((S >> (K + (TREEBIN_SHIFT-1)) & 1)));\
+  }\
+}
+
+#elif defined (__INTEL_COMPILER)
+#define compute_tree_index(S, I)\
+{\
+  size_t X = S >> TREEBIN_SHIFT;\
+  if (X == 0)\
+    I = 0;\
+  else if (X > 0xFFFF)\
+    I = NTREEBINS-1;\
+  else {\
+    unsigned int K = _bit_scan_reverse (X); \
+    I =  (bindex_t)((K << 1) + ((S >> (K + (TREEBIN_SHIFT-1)) & 1)));\
+  }\
+}
+
+#elif defined(_MSC_VER) && _MSC_VER>=1300
+#define compute_tree_index(S, I)\
+{\
+  size_t X = S >> TREEBIN_SHIFT;\
+  if (X == 0)\
+    I = 0;\
+  else if (X > 0xFFFF)\
+    I = NTREEBINS-1;\
+  else {\
+    unsigned int K;\
+    _BitScanReverse((DWORD *) &K, (DWORD) X);\
+    I =  (bindex_t)((K << 1) + ((S >> (K + (TREEBIN_SHIFT-1)) & 1)));\
+  }\
+}
+
+#else /* GNUC */
+#define compute_tree_index(S, I)\
+{\
+  size_t X = S >> TREEBIN_SHIFT;\
+  if (X == 0)\
+    I = 0;\
+  else if (X > 0xFFFF)\
+    I = NTREEBINS-1;\
+  else {\
+    unsigned int Y = (unsigned int)X;\
+    unsigned int N = ((Y - 0x100) >> 16) & 8;\
+    unsigned int K = (((Y <<= N) - 0x1000) >> 16) & 4;\
+    N += K;\
+    N += K = (((Y <<= K) - 0x4000) >> 16) & 2;\
+    K = 14 - N + ((Y <<= K) >> 15);\
+    I = (K << 1) + ((S >> (K + (TREEBIN_SHIFT-1)) & 1));\
+  }\
+}
+#endif /* GNUC */
+
+/* Bit representing maximum resolved size in a treebin at i */
+#define bit_for_tree_index(i) \
+   (i == NTREEBINS-1)? (SIZE_T_BITSIZE-1) : (((i) >> 1) + TREEBIN_SHIFT - 2)
+
+/* Shift placing maximum resolved bit in a treebin at i as sign bit */
+#define leftshift_for_tree_index(i) \
+   ((i == NTREEBINS-1)? 0 : \
+    ((SIZE_T_BITSIZE-SIZE_T_ONE) - (((i) >> 1) + TREEBIN_SHIFT - 2)))
+
+/* The size of the smallest chunk held in bin with index i */
+#define minsize_for_tree_index(i) \
+   ((SIZE_T_ONE << (((i) >> 1) + TREEBIN_SHIFT)) |  \
+   (((size_t)((i) & SIZE_T_ONE)) << (((i) >> 1) + TREEBIN_SHIFT - 1)))
+
+
+/* ------------------------ Operations on bin maps ----------------------- */
+
+/* bit corresponding to given index */
+#define idx2bit(i)              ((binmap_t)(1) << (i))
+
+/* Mark/Clear bits with given index */
+#define mark_smallmap(M,i)      ((M)->smallmap |=  idx2bit(i))
+#define clear_smallmap(M,i)     ((M)->smallmap &= ~idx2bit(i))
+#define smallmap_is_marked(M,i) ((M)->smallmap &   idx2bit(i))
+
+#define mark_treemap(M,i)       ((M)->treemap  |=  idx2bit(i))
+#define clear_treemap(M,i)      ((M)->treemap  &= ~idx2bit(i))
+#define treemap_is_marked(M,i)  ((M)->treemap  &   idx2bit(i))
+
+/* isolate the least set bit of a bitmap */
+#define least_bit(x)         ((x) & -(x))
+
+/* mask with all bits to left of least bit of x on */
+#define left_bits(x)         ((x<<1) | -(x<<1))
+
+/* mask with all bits to left of or equal to least bit of x on */
+#define same_or_left_bits(x) ((x) | -(x))
+
+/* index corresponding to given bit. Use x86 asm if possible */
+
+#if defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__))
+#define compute_bit2idx(X, I)\
+{\
+  unsigned int J;\
+  __asm__("bsfl\t%1, %0\n\t" : "=r" (J) : "g" (X));\
+  I = (bindex_t)J;\
+}
+
+#elif defined (__INTEL_COMPILER)
+#define compute_bit2idx(X, I)\
+{\
+  unsigned int J;\
+  J = _bit_scan_forward (X); \
+  I = (bindex_t)J;\
+}
+
+#elif defined(_MSC_VER) && _MSC_VER>=1300
+#define compute_bit2idx(X, I)\
+{\
+  unsigned int J;\
+  _BitScanForward((DWORD *) &J, X);\
+  I = (bindex_t)J;\
+}
+
+#elif USE_BUILTIN_FFS
+#define compute_bit2idx(X, I) I = ffs(X)-1
+
+#else
+#define compute_bit2idx(X, I)\
+{\
+  unsigned int Y = X - 1;\
+  unsigned int K = Y >> (16-4) & 16;\
+  unsigned int N = K;        Y >>= K;\
+  N += K = Y >> (8-3) &  8;  Y >>= K;\
+  N += K = Y >> (4-2) &  4;  Y >>= K;\
+  N += K = Y >> (2-1) &  2;  Y >>= K;\
+  N += K = Y >> (1-0) &  1;  Y >>= K;\
+  I = (bindex_t)(N + Y);\
+}
+#endif /* GNUC */
+
+
+/* ----------------------- Runtime Check Support ------------------------- */
+
+/*
+  For security, the main invariant is that malloc/free/etc never
+  writes to a static address other than malloc_state, unless static
+  malloc_state itself has been corrupted, which cannot occur via
+  malloc (because of these checks). In essence this means that we
+  believe all pointers, sizes, maps etc held in malloc_state, but
+  check all of those linked or offsetted from other embedded data
+  structures.  These checks are interspersed with main code in a way
+  that tends to minimize their run-time cost.
+
+  When FOOTERS is defined, in addition to range checking, we also
+  verify footer fields of inuse chunks, which can be used guarantee
+  that the mstate controlling malloc/free is intact.  This is a
+  streamlined version of the approach described by William Robertson
+  et al in "Run-time Detection of Heap-based Overflows" LISA'03
+  http://www.usenix.org/events/lisa03/tech/robertson.html The footer
+  of an inuse chunk holds the xor of its mstate and a random seed,
+  that is checked upon calls to free() and realloc().  This is
+  (probablistically) unguessable from outside the program, but can be
+  computed by any code successfully malloc'ing any chunk, so does not
+  itself provide protection against code that has already broken
+  security through some other means.  Unlike Robertson et al, we
+  always dynamically check addresses of all offset chunks (previous,
+  next, etc). This turns out to be cheaper than relying on hashes.
+*/
+
+#if !INSECURE
+/* Check if address a is at least as high as any from MORECORE or MMAP */
+#define ok_address(M, a) ((char*)(a) >= (M)->least_addr)
+/* Check if address of next chunk n is higher than base chunk p */
+#define ok_next(p, n)    ((char*)(p) < (char*)(n))
+/* Check if p has inuse status */
+#define ok_inuse(p)     is_inuse(p)
+/* Check if p has its pinuse bit on */
+#define ok_pinuse(p)     pinuse(p)
+
+#else /* !INSECURE */
+#define ok_address(M, a) (1)
+#define ok_next(b, n)    (1)
+#define ok_inuse(p)      (1)
+#define ok_pinuse(p)     (1)
+#endif /* !INSECURE */
+
+#if (FOOTERS && !INSECURE)
+/* Check if (alleged) mstate m has expected magic field */
+#define ok_magic(M)      ((M)->magic == mparams.magic)
+#else  /* (FOOTERS && !INSECURE) */
+#define ok_magic(M)      (1)
+#endif /* (FOOTERS && !INSECURE) */
+
+
+/* In gcc, use __builtin_expect to minimize impact of checks */
+#if !INSECURE
+#if defined(__GNUC__) && __GNUC__ >= 3
+#define RTCHECK(e)  __builtin_expect(e, 1)
+#else /* GNUC */
+#define RTCHECK(e)  (e)
+#endif /* GNUC */
+#else /* !INSECURE */
+#define RTCHECK(e)  (1)
+#endif /* !INSECURE */
+
+/* macros to set up inuse chunks with or without footers */
+
+#if !FOOTERS
+
+#define mark_inuse_foot(M,p,s)
+
+/* Macros for setting head/foot of non-mmapped chunks */
+
+/* Set cinuse bit and pinuse bit of next chunk */
+#define set_inuse(M,p,s)\
+  ((p)->head = (((p)->head & PINUSE_BIT)|s|CINUSE_BIT),\
+  ((mchunkptr)(((char*)(p)) + (s)))->head |= PINUSE_BIT)
+
+/* Set cinuse and pinuse of this chunk and pinuse of next chunk */
+#define set_inuse_and_pinuse(M,p,s)\
+  ((p)->head = (s|PINUSE_BIT|CINUSE_BIT),\
+  ((mchunkptr)(((char*)(p)) + (s)))->head |= PINUSE_BIT)
+
+/* Set size, cinuse and pinuse bit of this chunk */
+#define set_size_and_pinuse_of_inuse_chunk(M, p, s)\
+  ((p)->head = (s|PINUSE_BIT|CINUSE_BIT))
+
+#else /* FOOTERS */
+
+/* Set foot of inuse chunk to be xor of mstate and seed */
+#define mark_inuse_foot(M,p,s)\
+  (((mchunkptr)((char*)(p) + (s)))->prev_foot = ((size_t)(M) ^ mparams.magic))
+
+#define get_mstate_for(p)\
+  ((mstate)(((mchunkptr)((char*)(p) +\
+    (chunksize(p))))->prev_foot ^ mparams.magic))
+
+#define set_inuse(M,p,s)\
+  ((p)->head = (((p)->head & PINUSE_BIT)|s|CINUSE_BIT),\
+  (((mchunkptr)(((char*)(p)) + (s)))->head |= PINUSE_BIT), \
+  mark_inuse_foot(M,p,s))
+
+#define set_inuse_and_pinuse(M,p,s)\
+  ((p)->head = (s|PINUSE_BIT|CINUSE_BIT),\
+  (((mchunkptr)(((char*)(p)) + (s)))->head |= PINUSE_BIT),\
+ mark_inuse_foot(M,p,s))
+
+#define set_size_and_pinuse_of_inuse_chunk(M, p, s)\
+  ((p)->head = (s|PINUSE_BIT|CINUSE_BIT),\
+  mark_inuse_foot(M, p, s))
+
+#endif /* !FOOTERS */
+
+/* ---------------------------- setting mparams -------------------------- */
+
+#ifdef ENABLE_LARGE_PAGES
+typedef size_t (WINAPI *GetLargePageMinimum_t)(void);
+#endif
+
+/* Initialize mparams */
+static int init_mparams(void) {
+#ifdef NEED_GLOBAL_LOCK_INIT
+  if (malloc_global_mutex_status <= 0)
+    init_malloc_global_mutex();
+#endif
+
+  ACQUIRE_MALLOC_GLOBAL_LOCK();
+  if (mparams.magic == 0) {
+    size_t magic;
+    size_t psize;
+    size_t gsize;
+
+#ifndef WIN32
+    psize = malloc_getpagesize;
+    gsize = ((DEFAULT_GRANULARITY != 0)? DEFAULT_GRANULARITY : psize);
+#else /* WIN32 */
+    {
+      SYSTEM_INFO system_info;
+      GetSystemInfo(&system_info);
+      psize = system_info.dwPageSize;
+      gsize = ((DEFAULT_GRANULARITY != 0)?
+               DEFAULT_GRANULARITY : system_info.dwAllocationGranularity);
+#ifdef ENABLE_LARGE_PAGES
+      { 
+          GetLargePageMinimum_t GetLargePageMinimum_ = (GetLargePageMinimum_t) GetProcAddress(GetModuleHandle(__T("kernel32.dll")), "GetLargePageMinimum");
+          if(GetLargePageMinimum_) {
+              size_t largepagesize = GetLargePageMinimum_();
+              if(largepagesize) {
+                  psize = largepagesize;
+                  gsize = ((DEFAULT_GRANULARITY != 0)?
+                           DEFAULT_GRANULARITY : largepagesize);
+                  if(gsize < largepagesize) gsize = largepagesize;
+              }
+          }
+      }
+#endif
+    }
+#endif /* WIN32 */
+
+    /* Sanity-check configuration:
+       size_t must be unsigned and as wide as pointer type.
+       ints must be at least 4 bytes.
+       alignment must be at least 8.
+       Alignment, min chunk size, and page size must all be powers of 2.
+    */
+    if ((sizeof(size_t) != sizeof(char*)) ||
+        (MAX_SIZE_T < MIN_CHUNK_SIZE)  ||
+        (sizeof(int) < 4)  ||
+        (MALLOC_ALIGNMENT < (size_t)8U) ||
+        ((MALLOC_ALIGNMENT & (MALLOC_ALIGNMENT-SIZE_T_ONE)) != 0) ||
+        ((MCHUNK_SIZE      & (MCHUNK_SIZE-SIZE_T_ONE))      != 0) ||
+        ((gsize            & (gsize-SIZE_T_ONE))            != 0) ||
+        ((psize            & (psize-SIZE_T_ONE))            != 0))
+      ABORT;
+
+    mparams.granularity = gsize;
+    mparams.page_size = psize;
+    mparams.mmap_threshold = DEFAULT_MMAP_THRESHOLD;
+    mparams.trim_threshold = DEFAULT_TRIM_THRESHOLD;
+#if MORECORE_CONTIGUOUS
+    mparams.default_mflags = USE_LOCK_BIT|USE_MMAP_BIT;
+#else  /* MORECORE_CONTIGUOUS */
+    mparams.default_mflags = USE_LOCK_BIT|USE_MMAP_BIT|USE_NONCONTIGUOUS_BIT;
+#endif /* MORECORE_CONTIGUOUS */
+
+#if !ONLY_MSPACES
+    /* Set up lock for main malloc area */
+    gm->mflags = mparams.default_mflags;
+    INITIAL_LOCK(&gm->mutex);
+#endif
+
+    {
+#if USE_DEV_RANDOM
+      int fd;
+      unsigned char buf[sizeof(size_t)];
+      /* Try to use /dev/urandom, else fall back on using time */
+      if ((fd = open("/dev/urandom", O_RDONLY)) >= 0 &&
+          read(fd, buf, sizeof(buf)) == sizeof(buf)) {
+        magic = *((size_t *) buf);
+        close(fd);
+      }
+      else
+#endif /* USE_DEV_RANDOM */
+#ifdef WIN32
+        magic = (size_t)(GetTickCount() ^ (size_t)0x55555555U);
+#else
+        magic = (size_t)(time(0) ^ (size_t)0x55555555U);
+#endif
+      magic |= (size_t)8U;    /* ensure nonzero */
+      magic &= ~(size_t)7U;   /* improve chances of fault for bad values */
+      mparams.magic = magic;
+    }
+  }
+
+  RELEASE_MALLOC_GLOBAL_LOCK();
+  return 1;
+}
+
+/* support for mallopt */
+static int change_mparam(int param_number, int value) {
+  size_t val;
+  ensure_initialization();
+  val = (value == -1)? MAX_SIZE_T : (size_t)value;
+  switch(param_number) {
+  case M_TRIM_THRESHOLD:
+    mparams.trim_threshold = val;
+    return 1;
+  case M_GRANULARITY:
+    if (val >= mparams.page_size && ((val & (val-1)) == 0)) {
+      mparams.granularity = val;
+      return 1;
+    }
+    else
+      return 0;
+  case M_MMAP_THRESHOLD:
+    mparams.mmap_threshold = val;
+    return 1;
+  default:
+    return 0;
+  }
+}
+
+#if DEBUG
+/* ------------------------- Debugging Support --------------------------- */
+
+/* Check properties of any chunk, whether free, inuse, mmapped etc  */
+static void do_check_any_chunk(mstate m, mchunkptr p) {
+  assert((is_aligned(chunk2mem(p))) || (p->head == FENCEPOST_HEAD));
+  assert(ok_address(m, p));
+}
+
+/* Check properties of top chunk */
+static void do_check_top_chunk(mstate m, mchunkptr p) {
+  msegmentptr sp = segment_holding(m, (char*)p);
+  size_t  sz = p->head & ~INUSE_BITS; /* third-lowest bit can be set! */
+  assert(sp != 0);
+  assert((is_aligned(chunk2mem(p))) || (p->head == FENCEPOST_HEAD));
+  assert(ok_address(m, p));
+  assert(sz == m->topsize);
+  assert(sz > 0);
+  assert(sz == ((sp->base + sp->size) - (char*)p) - TOP_FOOT_SIZE);
+  assert(pinuse(p));
+  assert(!pinuse(chunk_plus_offset(p, sz)));
+}
+
+/* Check properties of (inuse) mmapped chunks */
+static void do_check_mmapped_chunk(mstate m, mchunkptr p) {
+  size_t  sz = chunksize(p);
+  size_t len = (sz + (p->prev_foot) + MMAP_FOOT_PAD);
+  assert(is_mmapped(p));
+  assert(use_mmap(m));
+  assert((is_aligned(chunk2mem(p))) || (p->head == FENCEPOST_HEAD));
+  assert(ok_address(m, p));
+  assert(!is_small(sz));
+  assert((len & (mparams.page_size-SIZE_T_ONE)) == 0);
+  assert(chunk_plus_offset(p, sz)->head == FENCEPOST_HEAD);
+  assert(chunk_plus_offset(p, sz+SIZE_T_SIZE)->head == 0);
+}
+
+/* Check properties of inuse chunks */
+static void do_check_inuse_chunk(mstate m, mchunkptr p) {
+  do_check_any_chunk(m, p);
+  assert(is_inuse(p));
+  assert(next_pinuse(p));
+  /* If not pinuse and not mmapped, previous chunk has OK offset */
+  assert(is_mmapped(p) || pinuse(p) || next_chunk(prev_chunk(p)) == p);
+  if (is_mmapped(p))
+    do_check_mmapped_chunk(m, p);
+}
+
+/* Check properties of free chunks */
+static void do_check_free_chunk(mstate m, mchunkptr p) {
+  size_t sz = chunksize(p);
+  mchunkptr next = chunk_plus_offset(p, sz);
+  do_check_any_chunk(m, p);
+  assert(!is_inuse(p));
+  assert(!next_pinuse(p));
+  assert (!is_mmapped(p));
+  if (p != m->dv && p != m->top) {
+    if (sz >= MIN_CHUNK_SIZE) {
+      assert((sz & CHUNK_ALIGN_MASK) == 0);
+      assert(is_aligned(chunk2mem(p)));
+      assert(next->prev_foot == sz);
+      assert(pinuse(p));
+      assert (next == m->top || is_inuse(next));
+      assert(p->fd->bk == p);
+      assert(p->bk->fd == p);
+    }
+    else  /* markers are always of size SIZE_T_SIZE */
+      assert(sz == SIZE_T_SIZE);
+  }
+}
+
+/* Check properties of malloced chunks at the point they are malloced */
+static void do_check_malloced_chunk(mstate m, void* mem, size_t s) {
+  if (mem != 0) {
+    mchunkptr p = mem2chunk(mem);
+    size_t sz = p->head & ~INUSE_BITS;
+    do_check_inuse_chunk(m, p);
+    assert((sz & CHUNK_ALIGN_MASK) == 0);
+    assert(sz >= MIN_CHUNK_SIZE);
+    assert(sz >= s);
+    /* unless mmapped, size is less than MIN_CHUNK_SIZE more than request */
+    assert(is_mmapped(p) || sz < (s + MIN_CHUNK_SIZE));
+  }
+}
+
+/* Check a tree and its subtrees.  */
+static void do_check_tree(mstate m, tchunkptr t) {
+  tchunkptr head = 0;
+  tchunkptr u = t;
+  bindex_t tindex = t->index;
+  size_t tsize = chunksize(t);
+  bindex_t idx;
+  compute_tree_index(tsize, idx);
+  assert(tindex == idx);
+  assert(tsize >= MIN_LARGE_SIZE);
+  assert(tsize >= minsize_for_tree_index(idx));
+  assert((idx == NTREEBINS-1) || (tsize < minsize_for_tree_index((idx+1))));
+
+  do { /* traverse through chain of same-sized nodes */
+    do_check_any_chunk(m, ((mchunkptr)u));
+    assert(u->index == tindex);
+    assert(chunksize(u) == tsize);
+    assert(!is_inuse(u));
+    assert(!next_pinuse(u));
+    assert(u->fd->bk == u);
+    assert(u->bk->fd == u);
+    if (u->parent == 0) {
+      assert(u->child[0] == 0);
+      assert(u->child[1] == 0);
+    }
+    else {
+      assert(head == 0); /* only one node on chain has parent */
+      head = u;
+      assert(u->parent != u);
+      assert (u->parent->child[0] == u ||
+              u->parent->child[1] == u ||
+              *((tbinptr*)(u->parent)) == u);
+      if (u->child[0] != 0) {
+        assert(u->child[0]->parent == u);
+        assert(u->child[0] != u);
+        do_check_tree(m, u->child[0]);
+      }
+      if (u->child[1] != 0) {
+        assert(u->child[1]->parent == u);
+        assert(u->child[1] != u);
+        do_check_tree(m, u->child[1]);
+      }
+      if (u->child[0] != 0 && u->child[1] != 0) {
+        assert(chunksize(u->child[0]) < chunksize(u->child[1]));
+      }
+    }
+    u = u->fd;
+  } while (u != t);
+  assert(head != 0);
+}
+
+/*  Check all the chunks in a treebin.  */
+static void do_check_treebin(mstate m, bindex_t i) {
+  tbinptr* tb = treebin_at(m, i);
+  tchunkptr t = *tb;
+  int empty = (m->treemap & (1U << i)) == 0;
+  if (t == 0)
+    assert(empty);
+  if (!empty)
+    do_check_tree(m, t);
+}
+
+/*  Check all the chunks in a smallbin.  */
+static void do_check_smallbin(mstate m, bindex_t i) {
+  sbinptr b = smallbin_at(m, i);
+  mchunkptr p = b->bk;
+  unsigned int empty = (m->smallmap & (1U << i)) == 0;
+  if (p == b)
+    assert(empty);
+  if (!empty) {
+    for (; p != b; p = p->bk) {
+      size_t size = chunksize(p);
+      mchunkptr q;
+      /* each chunk claims to be free */
+      do_check_free_chunk(m, p);
+      /* chunk belongs in bin */
+      assert(small_index(size) == i);
+      assert(p->bk == b || chunksize(p->bk) == chunksize(p));
+      /* chunk is followed by an inuse chunk */
+      q = next_chunk(p);
+      if (q->head != FENCEPOST_HEAD)
+        do_check_inuse_chunk(m, q);
+    }
+  }
+}
+
+/* Find x in a bin. Used in other check functions. */
+static int bin_find(mstate m, mchunkptr x) {
+  size_t size = chunksize(x);
+  if (is_small(size)) {
+    bindex_t sidx = small_index(size);
+    sbinptr b = smallbin_at(m, sidx);
+    if (smallmap_is_marked(m, sidx)) {
+      mchunkptr p = b;
+      do {
+        if (p == x)
+          return 1;
+      } while ((p = p->fd) != b);
+    }
+  }
+  else {
+    bindex_t tidx;
+    compute_tree_index(size, tidx);
+    if (treemap_is_marked(m, tidx)) {
+      tchunkptr t = *treebin_at(m, tidx);
+      size_t sizebits = size << leftshift_for_tree_index(tidx);
+      while (t != 0 && chunksize(t) != size) {
+        t = t->child[(sizebits >> (SIZE_T_BITSIZE-SIZE_T_ONE)) & 1];
+        sizebits <<= 1;
+      }
+      if (t != 0) {
+        tchunkptr u = t;
+        do {
+          if (u == (tchunkptr)x)
+            return 1;
+        } while ((u = u->fd) != t);
+      }
+    }
+  }
+  return 0;
+}
+
+/* Traverse each chunk and check it; return total */
+static size_t traverse_and_check(mstate m) {
+  size_t sum = 0;
+  if (is_initialized(m)) {
+    msegmentptr s = &m->seg;
+    sum += m->topsize + TOP_FOOT_SIZE;
+    while (s != 0) {
+      mchunkptr q = align_as_chunk(s->base);
+      mchunkptr lastq = 0;
+      assert(pinuse(q));
+      while (segment_holds(s, q) &&
+             q != m->top && q->head != FENCEPOST_HEAD) {
+        sum += chunksize(q);
+        if (is_inuse(q)) {
+          assert(!bin_find(m, q));
+          do_check_inuse_chunk(m, q);
+        }
+        else {
+          assert(q == m->dv || bin_find(m, q));
+          assert(lastq == 0 || is_inuse(lastq)); /* Not 2 consecutive free */
+          do_check_free_chunk(m, q);
+        }
+        lastq = q;
+        q = next_chunk(q);
+      }
+      s = s->next;
+    }
+  }
+  return sum;
+}
+
+/* Check all properties of malloc_state. */
+static void do_check_malloc_state(mstate m) {
+  bindex_t i;
+  size_t total;
+  /* check bins */
+  for (i = 0; i < NSMALLBINS; ++i)
+    do_check_smallbin(m, i);
+  for (i = 0; i < NTREEBINS; ++i)
+    do_check_treebin(m, i);
+
+  if (m->dvsize != 0) { /* check dv chunk */
+    do_check_any_chunk(m, m->dv);
+    assert(m->dvsize == chunksize(m->dv));
+    assert(m->dvsize >= MIN_CHUNK_SIZE);
+    assert(bin_find(m, m->dv) == 0);
+  }
+
+  if (m->top != 0) {   /* check top chunk */
+    do_check_top_chunk(m, m->top);
+    /*assert(m->topsize == chunksize(m->top)); redundant */
+    assert(m->topsize > 0);
+    assert(bin_find(m, m->top) == 0);
+  }
+
+  total = traverse_and_check(m);
+  assert(total <= m->footprint);
+  assert(m->footprint <= m->max_footprint);
+}
+#endif /* DEBUG */
+
+/* ----------------------------- statistics ------------------------------ */
+
+#if !NO_MALLINFO
+static struct mallinfo internal_mallinfo(mstate m) {
+  struct mallinfo nm = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
+  ensure_initialization();
+  if (!PREACTION(m)) {
+    check_malloc_state(m);
+    if (is_initialized(m)) {
+      size_t nfree = SIZE_T_ONE; /* top always free */
+      size_t mfree = m->topsize + TOP_FOOT_SIZE;
+      size_t sum = mfree;
+      msegmentptr s = &m->seg;
+      while (s != 0) {
+        mchunkptr q = align_as_chunk(s->base);
+        while (segment_holds(s, q) &&
+               q != m->top && q->head != FENCEPOST_HEAD) {
+          size_t sz = chunksize(q);
+          sum += sz;
+          if (!is_inuse(q)) {
+            mfree += sz;
+            ++nfree;
+          }
+          q = next_chunk(q);
+        }
+        s = s->next;
+      }
+
+      nm.arena    = sum;
+      nm.ordblks  = nfree;
+      nm.hblkhd   = m->footprint - sum;
+      nm.usmblks  = m->max_footprint;
+      nm.uordblks = m->footprint - mfree;
+      nm.fordblks = mfree;
+      nm.keepcost = m->topsize;
+    }
+
+    POSTACTION(m);
+  }
+  return nm;
+}
+#endif /* !NO_MALLINFO */
+
+static void internal_malloc_stats(mstate m) {
+  ensure_initialization();
+  if (!PREACTION(m)) {
+    size_t maxfp = 0;
+    size_t fp = 0;
+    size_t used = 0;
+    check_malloc_state(m);
+    if (is_initialized(m)) {
+      msegmentptr s = &m->seg;
+      maxfp = m->max_footprint;
+      fp = m->footprint;
+      used = fp - (m->topsize + TOP_FOOT_SIZE);
+
+      while (s != 0) {
+        mchunkptr q = align_as_chunk(s->base);
+        while (segment_holds(s, q) &&
+               q != m->top && q->head != FENCEPOST_HEAD) {
+          if (!is_inuse(q))
+            used -= chunksize(q);
+          q = next_chunk(q);
+        }
+        s = s->next;
+      }
+    }
+
+    fprintf(stderr, "max system bytes = %10lu\n", (unsigned long)(maxfp));
+    fprintf(stderr, "system bytes     = %10lu\n", (unsigned long)(fp));
+    fprintf(stderr, "in use bytes     = %10lu\n", (unsigned long)(used));
+
+    POSTACTION(m);
+  }
+}
+
+/* ----------------------- Operations on smallbins ----------------------- */
+
+/*
+  Various forms of linking and unlinking are defined as macros.  Even
+  the ones for trees, which are very long but have very short typical
+  paths.  This is ugly but reduces reliance on inlining support of
+  compilers.
+*/
+
+/* Link a free chunk into a smallbin  */
+#define insert_small_chunk(M, P, S) {\
+  bindex_t I  = small_index(S);\
+  mchunkptr B = smallbin_at(M, I);\
+  mchunkptr F = B;\
+  assert(S >= MIN_CHUNK_SIZE);\
+  if (!smallmap_is_marked(M, I))\
+    mark_smallmap(M, I);\
+  else if (RTCHECK(ok_address(M, B->fd)))\
+    F = B->fd;\
+  else {\
+    CORRUPTION_ERROR_ACTION(M);\
+  }\
+  B->fd = P;\
+  F->bk = P;\
+  P->fd = F;\
+  P->bk = B;\
+}
+
+/* Unlink a chunk from a smallbin  */
+#define unlink_small_chunk(M, P, S) {\
+  mchunkptr F = P->fd;\
+  mchunkptr B = P->bk;\
+  bindex_t I = small_index(S);\
+  assert(P != B);\
+  assert(P != F);\
+  assert(chunksize(P) == small_index2size(I));\
+  if (F == B)\
+    clear_smallmap(M, I);\
+  else if (RTCHECK((F == smallbin_at(M,I) || ok_address(M, F)) &&\
+                   (B == smallbin_at(M,I) || ok_address(M, B)))) {\
+    F->bk = B;\
+    B->fd = F;\
+  }\
+  else {\
+    CORRUPTION_ERROR_ACTION(M);\
+  }\
+}
+
+/* Unlink the first chunk from a smallbin */
+#define unlink_first_small_chunk(M, B, P, I) {\
+  mchunkptr F = P->fd;\
+  assert(P != B);\
+  assert(P != F);\
+  assert(chunksize(P) == small_index2size(I));\
+  if (B == F)\
+    clear_smallmap(M, I);\
+  else if (RTCHECK(ok_address(M, F))) {\
+    B->fd = F;\
+    F->bk = B;\
+  }\
+  else {\
+    CORRUPTION_ERROR_ACTION(M);\
+  }\
+}
+
+
+
+/* Replace dv node, binning the old one */
+/* Used only when dvsize known to be small */
+#define replace_dv(M, P, S) {\
+  size_t DVS = M->dvsize;\
+  if (DVS != 0) {\
+    mchunkptr DV = M->dv;\
+    assert(is_small(DVS));\
+    insert_small_chunk(M, DV, DVS);\
+  }\
+  M->dvsize = S;\
+  M->dv = P;\
+}
+
+/* ------------------------- Operations on trees ------------------------- */
+
+/* Insert chunk into tree */
+#define insert_large_chunk(M, X, S) {\
+  tbinptr* H;\
+  bindex_t I;\
+  compute_tree_index(S, I);\
+  H = treebin_at(M, I);\
+  X->index = I;\
+  X->child[0] = X->child[1] = 0;\
+  if (!treemap_is_marked(M, I)) {\
+    mark_treemap(M, I);\
+    *H = X;\
+    X->parent = (tchunkptr)H;\
+    X->fd = X->bk = X;\
+  }\
+  else {\
+    tchunkptr T = *H;\
+    size_t K = S << leftshift_for_tree_index(I);\
+    for (;;) {\
+      if (chunksize(T) != S) {\
+        tchunkptr* C = &(T->child[(K >> (SIZE_T_BITSIZE-SIZE_T_ONE)) & 1]);\
+        K <<= 1;\
+        if (*C != 0)\
+          T = *C;\
+        else if (RTCHECK(ok_address(M, C))) {\
+          *C = X;\
+          X->parent = T;\
+          X->fd = X->bk = X;\
+          break;\
+        }\
+        else {\
+          CORRUPTION_ERROR_ACTION(M);\
+          break;\
+        }\
+      }\
+      else {\
+        tchunkptr F = T->fd;\
+        if (RTCHECK(ok_address(M, T) && ok_address(M, F))) {\
+          T->fd = F->bk = X;\
+          X->fd = F;\
+          X->bk = T;\
+          X->parent = 0;\
+          break;\
+        }\
+        else {\
+          CORRUPTION_ERROR_ACTION(M);\
+          break;\
+        }\
+      }\
+    }\
+  }\
+}
+
+/*
+  Unlink steps:
+
+  1. If x is a chained node, unlink it from its same-sized fd/bk links
+     and choose its bk node as its replacement.
+  2. If x was the last node of its size, but not a leaf node, it must
+     be replaced with a leaf node (not merely one with an open left or
+     right), to make sure that lefts and rights of descendents
+     correspond properly to bit masks.  We use the rightmost descendent
+     of x.  We could use any other leaf, but this is easy to locate and
+     tends to counteract removal of leftmosts elsewhere, and so keeps
+     paths shorter than minimally guaranteed.  This doesn't loop much
+     because on average a node in a tree is near the bottom.
+  3. If x is the base of a chain (i.e., has parent links) relink
+     x's parent and children to x's replacement (or null if none).
+*/
+
+#define unlink_large_chunk(M, X) {\
+  tchunkptr XP = X->parent;\
+  tchunkptr R;\
+  if (X->bk != X) {\
+    tchunkptr F = X->fd;\
+    R = X->bk;\
+    if (RTCHECK(ok_address(M, F))) {\
+      F->bk = R;\
+      R->fd = F;\
+    }\
+    else {\
+      CORRUPTION_ERROR_ACTION(M);\
+    }\
+  }\
+  else {\
+    tchunkptr* RP;\
+    if (((R = *(RP = &(X->child[1]))) != 0) ||\
+        ((R = *(RP = &(X->child[0]))) != 0)) {\
+      tchunkptr* CP;\
+      while ((*(CP = &(R->child[1])) != 0) ||\
+             (*(CP = &(R->child[0])) != 0)) {\
+        R = *(RP = CP);\
+      }\
+      if (RTCHECK(ok_address(M, RP)))\
+        *RP = 0;\
+      else {\
+        CORRUPTION_ERROR_ACTION(M);\
+      }\
+    }\
+  }\
+  if (XP != 0) {\
+    tbinptr* H = treebin_at(M, X->index);\
+    if (X == *H) {\
+      if ((*H = R) == 0) \
+        clear_treemap(M, X->index);\
+    }\
+    else if (RTCHECK(ok_address(M, XP))) {\
+      if (XP->child[0] == X) \
+        XP->child[0] = R;\
+      else \
+        XP->child[1] = R;\
+    }\
+    else\
+      CORRUPTION_ERROR_ACTION(M);\
+    if (R != 0) {\
+      if (RTCHECK(ok_address(M, R))) {\
+        tchunkptr C0, C1;\
+        R->parent = XP;\
+        if ((C0 = X->child[0]) != 0) {\
+          if (RTCHECK(ok_address(M, C0))) {\
+            R->child[0] = C0;\
+            C0->parent = R;\
+          }\
+          else\
+            CORRUPTION_ERROR_ACTION(M);\
+        }\
+        if ((C1 = X->child[1]) != 0) {\
+          if (RTCHECK(ok_address(M, C1))) {\
+            R->child[1] = C1;\
+            C1->parent = R;\
+          }\
+          else\
+            CORRUPTION_ERROR_ACTION(M);\
+        }\
+      }\
+      else\
+        CORRUPTION_ERROR_ACTION(M);\
+    }\
+  }\
+}
+
+/* Relays to large vs small bin operations */
+
+#define insert_chunk(M, P, S)\
+  if (is_small(S)) insert_small_chunk(M, P, S)\
+  else { tchunkptr TP = (tchunkptr)(P); insert_large_chunk(M, TP, S); }
+
+#define unlink_chunk(M, P, S)\
+  if (is_small(S)) unlink_small_chunk(M, P, S)\
+  else { tchunkptr TP = (tchunkptr)(P); unlink_large_chunk(M, TP); }
+
+
+/* Relays to internal calls to malloc/free from realloc, memalign etc */
+
+#if ONLY_MSPACES
+#define internal_malloc(m, b) mspace_malloc(m, b)
+#define internal_free(m, mem) mspace_free(m,mem);
+#else /* ONLY_MSPACES */
+#if MSPACES
+#define internal_malloc(m, b)\
+   (m == gm)? dlmalloc(b) : mspace_malloc(m, b)
+#define internal_free(m, mem)\
+   if (m == gm) dlfree(mem); else mspace_free(m,mem);
+#else /* MSPACES */
+#define internal_malloc(m, b) dlmalloc(b)
+#define internal_free(m, mem) dlfree(mem)
+#endif /* MSPACES */
+#endif /* ONLY_MSPACES */
+
+/* -----------------------  Direct-mmapping chunks ----------------------- */
+
+/*
+  Directly mmapped chunks are set up with an offset to the start of
+  the mmapped region stored in the prev_foot field of the chunk. This
+  allows reconstruction of the required argument to MUNMAP when freed,
+  and also allows adjustment of the returned chunk to meet alignment
+  requirements (especially in memalign).
+*/
+
+/* Malloc using mmap */
+static void* mmap_alloc(mstate m, size_t nb) {
+  size_t mmsize = mmap_align(nb + SIX_SIZE_T_SIZES + CHUNK_ALIGN_MASK);
+  if (mmsize > nb) {     /* Check for wrap around 0 */
+    char* mm = (char*)(CALL_DIRECT_MMAP(mmsize));
+    if (mm != CMFAIL) {
+      size_t offset = align_offset(chunk2mem(mm));
+      size_t psize = mmsize - offset - MMAP_FOOT_PAD;
+      mchunkptr p = (mchunkptr)(mm + offset);
+      p->prev_foot = offset;
+      p->head = psize;
+      mark_inuse_foot(m, p, psize);
+      chunk_plus_offset(p, psize)->head = FENCEPOST_HEAD;
+      chunk_plus_offset(p, psize+SIZE_T_SIZE)->head = 0;
+
+      if (m->least_addr == 0 || mm < m->least_addr)
+        m->least_addr = mm;
+      if ((m->footprint += mmsize) > m->max_footprint)
+        m->max_footprint = m->footprint;
+      assert(is_aligned(chunk2mem(p)));
+      check_mmapped_chunk(m, p);
+      return chunk2mem(p);
+    }
+  }
+  return 0;
+}
+
+/* Realloc using mmap */
+static mchunkptr mmap_resize(mstate m, mchunkptr oldp, size_t nb) {
+  size_t oldsize = chunksize(oldp);
+  if (is_small(nb)) /* Can't shrink mmap regions below small size */
+    return 0;
+  /* Keep old chunk if big enough but not too big */
+  if (oldsize >= nb + SIZE_T_SIZE &&
+      (oldsize - nb) <= (mparams.granularity << 1))
+    return oldp;
+  else {
+    size_t offset = oldp->prev_foot;
+    size_t oldmmsize = oldsize + offset + MMAP_FOOT_PAD;
+    size_t newmmsize = mmap_align(nb + SIX_SIZE_T_SIZES + CHUNK_ALIGN_MASK);
+    char* cp = (char*)CALL_MREMAP((char*)oldp - offset,
+                                  oldmmsize, newmmsize, 1);
+    if (cp != CMFAIL) {
+      mchunkptr newp = (mchunkptr)(cp + offset);
+      size_t psize = newmmsize - offset - MMAP_FOOT_PAD;
+      newp->head = psize;
+      mark_inuse_foot(m, newp, psize);
+      chunk_plus_offset(newp, psize)->head = FENCEPOST_HEAD;
+      chunk_plus_offset(newp, psize+SIZE_T_SIZE)->head = 0;
+
+      if (cp < m->least_addr)
+        m->least_addr = cp;
+      if ((m->footprint += newmmsize - oldmmsize) > m->max_footprint)
+        m->max_footprint = m->footprint;
+      check_mmapped_chunk(m, newp);
+      return newp;
+    }
+  }
+  return 0;
+}
+
+/* -------------------------- mspace management -------------------------- */
+
+/* Initialize top chunk and its size */
+static void init_top(mstate m, mchunkptr p, size_t psize) {
+  /* Ensure alignment */
+  size_t offset = align_offset(chunk2mem(p));
+  p = (mchunkptr)((char*)p + offset);
+  psize -= offset;
+
+  m->top = p;
+  m->topsize = psize;
+  p->head = psize | PINUSE_BIT;
+  /* set size of fake trailing chunk holding overhead space only once */
+  chunk_plus_offset(p, psize)->head = TOP_FOOT_SIZE;
+  m->trim_check = mparams.trim_threshold; /* reset on each update */
+}
+
+/* Initialize bins for a new mstate that is otherwise zeroed out */
+static void init_bins(mstate m) {
+  /* Establish circular links for smallbins */
+  bindex_t i;
+  for (i = 0; i < NSMALLBINS; ++i) {
+    sbinptr bin = smallbin_at(m,i);
+    bin->fd = bin->bk = bin;
+  }
+}
+
+#if PROCEED_ON_ERROR
+
+/* default corruption action */
+static void reset_on_error(mstate m) {
+  int i;
+  ++malloc_corruption_error_count;
+  /* Reinitialize fields to forget about all memory */
+  m->smallbins = m->treebins = 0;
+  m->dvsize = m->topsize = 0;
+  m->seg.base = 0;
+  m->seg.size = 0;
+  m->seg.next = 0;
+  m->top = m->dv = 0;
+  for (i = 0; i < NTREEBINS; ++i)
+    *treebin_at(m, i) = 0;
+  init_bins(m);
+}
+#endif /* PROCEED_ON_ERROR */
+
+/* Allocate chunk and prepend remainder with chunk in successor base. */
+static void* prepend_alloc(mstate m, char* newbase, char* oldbase,
+                           size_t nb) {
+  mchunkptr p = align_as_chunk(newbase);
+  mchunkptr oldfirst = align_as_chunk(oldbase);
+  size_t psize = (char*)oldfirst - (char*)p;
+  mchunkptr q = chunk_plus_offset(p, nb);
+  size_t qsize = psize - nb;
+  set_size_and_pinuse_of_inuse_chunk(m, p, nb);
+
+  assert((char*)oldfirst > (char*)q);
+  assert(pinuse(oldfirst));
+  assert(qsize >= MIN_CHUNK_SIZE);
+
+  /* consolidate remainder with first chunk of old base */
+  if (oldfirst == m->top) {
+    size_t tsize = m->topsize += qsize;
+    m->top = q;
+    q->head = tsize | PINUSE_BIT;
+    check_top_chunk(m, q);
+  }
+  else if (oldfirst == m->dv) {
+    size_t dsize = m->dvsize += qsize;
+    m->dv = q;
+    set_size_and_pinuse_of_free_chunk(q, dsize);
+  }
+  else {
+    if (!is_inuse(oldfirst)) {
+      size_t nsize = chunksize(oldfirst);
+      unlink_chunk(m, oldfirst, nsize);
+      oldfirst = chunk_plus_offset(oldfirst, nsize);
+      qsize += nsize;
+    }
+    set_free_with_pinuse(q, qsize, oldfirst);
+    insert_chunk(m, q, qsize);
+    check_free_chunk(m, q);
+  }
+
+  check_malloced_chunk(m, chunk2mem(p), nb);
+  return chunk2mem(p);
+}
+
+/* Add a segment to hold a new noncontiguous region */
+static void add_segment(mstate m, char* tbase, size_t tsize, flag_t mmapped) {
+  /* Determine locations and sizes of segment, fenceposts, old top */
+  char* old_top = (char*)m->top;
+  msegmentptr oldsp = segment_holding(m, old_top);
+  char* old_end = oldsp->base + oldsp->size;
+  size_t ssize = pad_request(sizeof(struct malloc_segment));
+  char* rawsp = old_end - (ssize + FOUR_SIZE_T_SIZES + CHUNK_ALIGN_MASK);
+  size_t offset = align_offset(chunk2mem(rawsp));
+  char* asp = rawsp + offset;
+  char* csp = (asp < (old_top + MIN_CHUNK_SIZE))? old_top : asp;
+  mchunkptr sp = (mchunkptr)csp;
+  msegmentptr ss = (msegmentptr)(chunk2mem(sp));
+  mchunkptr tnext = chunk_plus_offset(sp, ssize);
+  mchunkptr p = tnext;
+  int nfences = 0;
+
+  /* reset top to new space */
+  init_top(m, (mchunkptr)tbase, tsize - TOP_FOOT_SIZE);
+
+  /* Set up segment record */
+  assert(is_aligned(ss));
+  set_size_and_pinuse_of_inuse_chunk(m, sp, ssize);
+  *ss = m->seg; /* Push current record */
+  m->seg.base = tbase;
+  m->seg.size = tsize;
+  m->seg.sflags = mmapped;
+  m->seg.next = ss;
+
+  /* Insert trailing fenceposts */
+  for (;;) {
+    mchunkptr nextp = chunk_plus_offset(p, SIZE_T_SIZE);
+    p->head = FENCEPOST_HEAD;
+    ++nfences;
+    if ((char*)(&(nextp->head)) < old_end)
+      p = nextp;
+    else
+      break;
+  }
+  assert(nfences >= 2);
+
+  /* Insert the rest of old top into a bin as an ordinary free chunk */
+  if (csp != old_top) {
+    mchunkptr q = (mchunkptr)old_top;
+    size_t psize = csp - old_top;
+    mchunkptr tn = chunk_plus_offset(q, psize);
+    set_free_with_pinuse(q, psize, tn);
+    insert_chunk(m, q, psize);
+  }
+
+  check_top_chunk(m, m->top);
+}
+
+/* -------------------------- System allocation -------------------------- */
+
+/* Get memory from system using MORECORE or MMAP */
+static void* sys_alloc(mstate m, size_t nb) {
+  char* tbase = CMFAIL;
+  size_t tsize = 0;
+  flag_t mmap_flag = 0;
+
+  ensure_initialization();
+
+  /* Directly map large chunks, but only if already initialized */
+  if (use_mmap(m) && nb >= mparams.mmap_threshold && m->topsize != 0) {
+    void* mem = mmap_alloc(m, nb);
+    if (mem != 0)
+      return mem;
+  }
+
+  /*
+    Try getting memory in any of three ways (in most-preferred to
+    least-preferred order):
+    1. A call to MORECORE that can normally contiguously extend memory.
+       (disabled if not MORECORE_CONTIGUOUS or not HAVE_MORECORE or
+       or main space is mmapped or a previous contiguous call failed)
+    2. A call to MMAP new space (disabled if not HAVE_MMAP).
+       Note that under the default settings, if MORECORE is unable to
+       fulfill a request, and HAVE_MMAP is true, then mmap is
+       used as a noncontiguous system allocator. This is a useful backup
+       strategy for systems with holes in address spaces -- in this case
+       sbrk cannot contiguously expand the heap, but mmap may be able to
+       find space.
+    3. A call to MORECORE that cannot usually contiguously extend memory.
+       (disabled if not HAVE_MORECORE)
+
+   In all cases, we need to request enough bytes from system to ensure
+   we can malloc nb bytes upon success, so pad with enough space for
+   top_foot, plus alignment-pad to make sure we don't lose bytes if
+   not on boundary, and round this up to a granularity unit.
+  */
+
+  if (MORECORE_CONTIGUOUS && !use_noncontiguous(m)) {
+    char* br = CMFAIL;
+    msegmentptr ss = (m->top == 0)? 0 : segment_holding(m, (char*)m->top);
+    size_t asize = 0;
+    ACQUIRE_MALLOC_GLOBAL_LOCK();
+
+    if (ss == 0) {  /* First time through or recovery */
+      char* base = (char*)CALL_MORECORE(0);
+      if (base != CMFAIL) {
+        asize = granularity_align(nb + SYS_ALLOC_PADDING);
+        /* Adjust to end on a page boundary */
+        if (!is_page_aligned(base))
+          asize += (page_align((size_t)base) - (size_t)base);
+        /* Can't call MORECORE if size is negative when treated as signed */
+        if (asize < HALF_MAX_SIZE_T &&
+            (br = (char*)(CALL_MORECORE(asize))) == base) {
+          tbase = base;
+          tsize = asize;
+        }
+      }
+    }
+    else {
+      /* Subtract out existing available top space from MORECORE request. */
+      asize = granularity_align(nb - m->topsize + SYS_ALLOC_PADDING);
+      /* Use mem here only if it did continuously extend old space */
+      if (asize < HALF_MAX_SIZE_T &&
+          (br = (char*)(CALL_MORECORE(asize))) == ss->base+ss->size) {
+        tbase = br;
+        tsize = asize;
+      }
+    }
+
+    if (tbase == CMFAIL) {    /* Cope with partial failure */
+      if (br != CMFAIL) {    /* Try to use/extend the space we did get */
+        if (asize < HALF_MAX_SIZE_T &&
+            asize < nb + SYS_ALLOC_PADDING) {
+          size_t esize = granularity_align(nb + SYS_ALLOC_PADDING - asize);
+          if (esize < HALF_MAX_SIZE_T) {
+            char* end = (char*)CALL_MORECORE(esize);
+            if (end != CMFAIL)
+              asize += esize;
+            else {            /* Can't use; try to release */
+              (void) CALL_MORECORE(-asize);
+              br = CMFAIL;
+            }
+          }
+        }
+      }
+      if (br != CMFAIL) {    /* Use the space we did get */
+        tbase = br;
+        tsize = asize;
+      }
+      else
+        disable_contiguous(m); /* Don't try contiguous path in the future */
+    }
+
+    RELEASE_MALLOC_GLOBAL_LOCK();
+  }
+
+  if (HAVE_MMAP && tbase == CMFAIL) {  /* Try MMAP */
+    size_t rsize = granularity_align(nb + SYS_ALLOC_PADDING);
+    if (rsize > nb) { /* Fail if wraps around zero */
+      char* mp = (char*)(CALL_MMAP(rsize));
+      if (mp != CMFAIL) {
+        tbase = mp;
+        tsize = rsize;
+        mmap_flag = USE_MMAP_BIT;
+      }
+    }
+  }
+
+  if (HAVE_MORECORE && tbase == CMFAIL) { /* Try noncontiguous MORECORE */
+    size_t asize = granularity_align(nb + SYS_ALLOC_PADDING);
+    if (asize < HALF_MAX_SIZE_T) {
+      char* br = CMFAIL;
+      char* end = CMFAIL;
+      ACQUIRE_MALLOC_GLOBAL_LOCK();
+      br = (char*)(CALL_MORECORE(asize));
+      end = (char*)(CALL_MORECORE(0));
+      RELEASE_MALLOC_GLOBAL_LOCK();
+      if (br != CMFAIL && end != CMFAIL && br < end) {
+        size_t ssize = end - br;
+        if (ssize > nb + TOP_FOOT_SIZE) {
+          tbase = br;
+          tsize = ssize;
+        }
+      }
+    }
+  }
+
+  if (tbase != CMFAIL) {
+
+    if ((m->footprint += tsize) > m->max_footprint)
+      m->max_footprint = m->footprint;
+
+    if (!is_initialized(m)) { /* first-time initialization */
+      if (m->least_addr == 0 || tbase < m->least_addr)
+        m->least_addr = tbase;
+      m->seg.base = tbase;
+      m->seg.size = tsize;
+      m->seg.sflags = mmap_flag;
+      m->magic = mparams.magic;
+      m->release_checks = MAX_RELEASE_CHECK_RATE;
+      init_bins(m);
+#if !ONLY_MSPACES
+      if (is_global(m))
+        init_top(m, (mchunkptr)tbase, tsize - TOP_FOOT_SIZE);
+      else
+#endif
+      {
+        /* Offset top by embedded malloc_state */
+        mchunkptr mn = next_chunk(mem2chunk(m));
+        init_top(m, mn, (size_t)((tbase + tsize) - (char*)mn) -TOP_FOOT_SIZE);
+      }
+    }
+
+    else {
+      /* Try to merge with an existing segment */
+      msegmentptr sp = &m->seg;
+      /* Only consider most recent segment if traversal suppressed */
+      while (sp != 0 && tbase != sp->base + sp->size)
+        sp = (NO_SEGMENT_TRAVERSAL) ? 0 : sp->next;
+      if (sp != 0 &&
+          !is_extern_segment(sp) &&
+          (sp->sflags & USE_MMAP_BIT) == mmap_flag &&
+          segment_holds(sp, m->top)) { /* append */
+        sp->size += tsize;
+        init_top(m, m->top, m->topsize + tsize);
+      }
+      else {
+        if (tbase < m->least_addr)
+          m->least_addr = tbase;
+        sp = &m->seg;
+        while (sp != 0 && sp->base != tbase + tsize)
+          sp = (NO_SEGMENT_TRAVERSAL) ? 0 : sp->next;
+        if (sp != 0 &&
+            !is_extern_segment(sp) &&
+            (sp->sflags & USE_MMAP_BIT) == mmap_flag) {
+          char* oldbase = sp->base;
+          sp->base = tbase;
+          sp->size += tsize;
+          return prepend_alloc(m, tbase, oldbase, nb);
+        }
+        else
+          add_segment(m, tbase, tsize, mmap_flag);
+      }
+    }
+
+    if (nb < m->topsize) { /* Allocate from new or extended top space */
+      size_t rsize = m->topsize -= nb;
+      mchunkptr p = m->top;
+      mchunkptr r = m->top = chunk_plus_offset(p, nb);
+      r->head = rsize | PINUSE_BIT;
+      set_size_and_pinuse_of_inuse_chunk(m, p, nb);
+      check_top_chunk(m, m->top);
+      check_malloced_chunk(m, chunk2mem(p), nb);
+      return chunk2mem(p);
+    }
+  }
+
+  MALLOC_FAILURE_ACTION;
+  return 0;
+}
+
+/* -----------------------  system deallocation -------------------------- */
+
+/* Unmap and unlink any mmapped segments that don't contain used chunks */
+static size_t release_unused_segments(mstate m) {
+  size_t released = 0;
+  int nsegs = 0;
+  msegmentptr pred = &m->seg;
+  msegmentptr sp = pred->next;
+  while (sp != 0) {
+    char* base = sp->base;
+    size_t size = sp->size;
+    msegmentptr next = sp->next;
+    ++nsegs;
+    if (is_mmapped_segment(sp) && !is_extern_segment(sp)) {
+      mchunkptr p = align_as_chunk(base);
+      size_t psize = chunksize(p);
+      /* Can unmap if first chunk holds entire segment and not pinned */
+      if (!is_inuse(p) && (char*)p + psize >= base + size - TOP_FOOT_SIZE) {
+        tchunkptr tp = (tchunkptr)p;
+        assert(segment_holds(sp, (char*)sp));
+        if (p == m->dv) {
+          m->dv = 0;
+          m->dvsize = 0;
+        }
+        else {
+          unlink_large_chunk(m, tp);
+        }
+        if (CALL_MUNMAP(base, size) == 0) {
+          released += size;
+          m->footprint -= size;
+          /* unlink obsoleted record */
+          sp = pred;
+          sp->next = next;
+        }
+        else { /* back out if cannot unmap */
+          insert_large_chunk(m, tp, psize);
+        }
+      }
+    }
+    if (NO_SEGMENT_TRAVERSAL) /* scan only first segment */
+      break;
+    pred = sp;
+    sp = next;
+  }
+  /* Reset check counter */
+  m->release_checks = ((nsegs > MAX_RELEASE_CHECK_RATE)?
+                       nsegs : MAX_RELEASE_CHECK_RATE);
+  return released;
+}
+
+static int sys_trim(mstate m, size_t pad) {
+  size_t released = 0;
+  ensure_initialization();
+  if (pad < MAX_REQUEST && is_initialized(m)) {
+    pad += TOP_FOOT_SIZE; /* ensure enough room for segment overhead */
+
+    if (m->topsize > pad) {
+      /* Shrink top space in granularity-size units, keeping at least one */
+      size_t unit = mparams.granularity;
+      size_t extra = ((m->topsize - pad + (unit - SIZE_T_ONE)) / unit -
+                      SIZE_T_ONE) * unit;
+      msegmentptr sp = segment_holding(m, (char*)m->top);
+
+      if (!is_extern_segment(sp)) {
+        if (is_mmapped_segment(sp)) {
+          if (HAVE_MMAP &&
+              sp->size >= extra &&
+              !has_segment_link(m, sp)) { /* can't shrink if pinned */
+            size_t newsize = sp->size - extra;
+            /* Prefer mremap, fall back to munmap */
+            if ((CALL_MREMAP(sp->base, sp->size, newsize, 0) != MFAIL) ||
+                (CALL_MUNMAP(sp->base + newsize, extra) == 0)) {
+              released = extra;
+            }
+          }
+        }
+        else if (HAVE_MORECORE) {
+          if (extra >= HALF_MAX_SIZE_T) /* Avoid wrapping negative */
+            extra = (HALF_MAX_SIZE_T) + SIZE_T_ONE - unit;
+          ACQUIRE_MALLOC_GLOBAL_LOCK();
+          {
+            /* Make sure end of memory is where we last set it. */
+            char* old_br = (char*)(CALL_MORECORE(0));
+            if (old_br == sp->base + sp->size) {
+              char* rel_br = (char*)(CALL_MORECORE(-extra));
+              char* new_br = (char*)(CALL_MORECORE(0));
+              if (rel_br != CMFAIL && new_br < old_br)
+                released = old_br - new_br;
+            }
+          }
+          RELEASE_MALLOC_GLOBAL_LOCK();
+        }
+      }
+
+      if (released != 0) {
+        sp->size -= released;
+        m->footprint -= released;
+        init_top(m, m->top, m->topsize - released);
+        check_top_chunk(m, m->top);
+      }
+    }
+
+    /* Unmap any unused mmapped segments */
+    if (HAVE_MMAP)
+      released += release_unused_segments(m);
+
+    /* On failure, disable autotrim to avoid repeated failed future calls */
+    if (released == 0 && m->topsize > m->trim_check)
+      m->trim_check = MAX_SIZE_T;
+  }
+
+  return (released != 0)? 1 : 0;
+}
+
+
+/* ---------------------------- malloc support --------------------------- */
+
+/* allocate a large request from the best fitting chunk in a treebin */
+static void* tmalloc_large(mstate m, size_t nb) {
+  tchunkptr v = 0;
+  size_t rsize = -nb; /* Unsigned negation */
+  tchunkptr t;
+  bindex_t idx;
+  compute_tree_index(nb, idx);
+  if ((t = *treebin_at(m, idx)) != 0) {
+    /* Traverse tree for this bin looking for node with size == nb */
+    size_t sizebits = nb << leftshift_for_tree_index(idx);
+    tchunkptr rst = 0;  /* The deepest untaken right subtree */
+    for (;;) {
+      tchunkptr rt;
+      size_t trem = chunksize(t) - nb;
+      if (trem < rsize) {
+        v = t;
+        if ((rsize = trem) == 0)
+          break;
+      }
+      rt = t->child[1];
+      t = t->child[(sizebits >> (SIZE_T_BITSIZE-SIZE_T_ONE)) & 1];
+      if (rt != 0 && rt != t)
+        rst = rt;
+      if (t == 0) {
+        t = rst; /* set t to least subtree holding sizes > nb */
+        break;
+      }
+      sizebits <<= 1;
+    }
+  }
+  if (t == 0 && v == 0) { /* set t to root of next non-empty treebin */
+    binmap_t leftbits = left_bits(idx2bit(idx)) & m->treemap;
+    if (leftbits != 0) {
+      bindex_t i;
+      binmap_t leastbit = least_bit(leftbits);
+      compute_bit2idx(leastbit, i);
+      t = *treebin_at(m, i);
+    }
+  }
+
+  while (t != 0) { /* find smallest of tree or subtree */
+    size_t trem = chunksize(t) - nb;
+    if (trem < rsize) {
+      rsize = trem;
+      v = t;
+    }
+    t = leftmost_child(t);
+  }
+
+  /*  If dv is a better fit, return 0 so malloc will use it */
+  if (v != 0 && rsize < (size_t)(m->dvsize - nb)) {
+    if (RTCHECK(ok_address(m, v))) { /* split */
+      mchunkptr r = chunk_plus_offset(v, nb);
+      assert(chunksize(v) == rsize + nb);
+      if (RTCHECK(ok_next(v, r))) {
+        unlink_large_chunk(m, v);
+        if (rsize < MIN_CHUNK_SIZE)
+          set_inuse_and_pinuse(m, v, (rsize + nb));
+        else {
+          set_size_and_pinuse_of_inuse_chunk(m, v, nb);
+          set_size_and_pinuse_of_free_chunk(r, rsize);
+          insert_chunk(m, r, rsize);
+        }
+        return chunk2mem(v);
+      }
+    }
+    CORRUPTION_ERROR_ACTION(m);
+  }
+  return 0;
+}
+
+/* allocate a small request from the best fitting chunk in a treebin */
+static void* tmalloc_small(mstate m, size_t nb) {
+  tchunkptr t, v;
+  size_t rsize;
+  bindex_t i;
+  binmap_t leastbit = least_bit(m->treemap);
+  compute_bit2idx(leastbit, i);
+  v = t = *treebin_at(m, i);
+  rsize = chunksize(t) - nb;
+
+  while ((t = leftmost_child(t)) != 0) {
+    size_t trem = chunksize(t) - nb;
+    if (trem < rsize) {
+      rsize = trem;
+      v = t;
+    }
+  }
+
+  if (RTCHECK(ok_address(m, v))) {
+    mchunkptr r = chunk_plus_offset(v, nb);
+    assert(chunksize(v) == rsize + nb);
+    if (RTCHECK(ok_next(v, r))) {
+      unlink_large_chunk(m, v);
+      if (rsize < MIN_CHUNK_SIZE)
+        set_inuse_and_pinuse(m, v, (rsize + nb));
+      else {
+        set_size_and_pinuse_of_inuse_chunk(m, v, nb);
+        set_size_and_pinuse_of_free_chunk(r, rsize);
+        replace_dv(m, r, rsize);
+      }
+      return chunk2mem(v);
+    }
+  }
+
+  CORRUPTION_ERROR_ACTION(m);
+  return 0;
+}
+
+/* --------------------------- realloc support --------------------------- */
+
+static void* internal_realloc(mstate m, void* oldmem, size_t bytes) {
+  if (bytes >= MAX_REQUEST) {
+    MALLOC_FAILURE_ACTION;
+    return 0;
+  }
+  if (!PREACTION(m)) {
+    mchunkptr oldp = mem2chunk(oldmem);
+    size_t oldsize = chunksize(oldp);
+    mchunkptr next = chunk_plus_offset(oldp, oldsize);
+    mchunkptr newp = 0;
+    void* extra = 0;
+
+    /* Try to either shrink or extend into top. Else malloc-copy-free */
+
+    if (RTCHECK(ok_address(m, oldp) && ok_inuse(oldp) &&
+                ok_next(oldp, next) && ok_pinuse(next))) {
+      size_t nb = request2size(bytes);
+      if (is_mmapped(oldp))
+        newp = mmap_resize(m, oldp, nb);
+      else if (oldsize >= nb) { /* already big enough */
+        size_t rsize = oldsize - nb;
+        newp = oldp;
+        if (rsize >= MIN_CHUNK_SIZE) {
+          mchunkptr remainder = chunk_plus_offset(newp, nb);
+          set_inuse(m, newp, nb);
+          set_inuse_and_pinuse(m, remainder, rsize);
+          extra = chunk2mem(remainder);
+        }
+      }
+      else if (next == m->top && oldsize + m->topsize > nb) {
+        /* Expand into top */
+        size_t newsize = oldsize + m->topsize;
+        size_t newtopsize = newsize - nb;
+        mchunkptr newtop = chunk_plus_offset(oldp, nb);
+        set_inuse(m, oldp, nb);
+        newtop->head = newtopsize |PINUSE_BIT;
+        m->top = newtop;
+        m->topsize = newtopsize;
+        newp = oldp;
+      }
+    }
+    else {
+      USAGE_ERROR_ACTION(m, oldmem);
+      POSTACTION(m);
+      return 0;
+    }
+#if DEBUG
+    if (newp != 0) {
+      check_inuse_chunk(m, newp); /* Check requires lock */
+    }
+#endif
+
+    POSTACTION(m);
+
+    if (newp != 0) {
+      if (extra != 0) {
+        internal_free(m, extra);
+      }
+      return chunk2mem(newp);
+    }
+    else {
+      void* newmem = internal_malloc(m, bytes);
+      if (newmem != 0) {
+        size_t oc = oldsize - overhead_for(oldp);
+        memcpy(newmem, oldmem, (oc < bytes)? oc : bytes);
+        internal_free(m, oldmem);
+      }
+      return newmem;
+    }
+  }
+  return 0;
+}
+
+/* --------------------------- memalign support -------------------------- */
+
+static void* internal_memalign(mstate m, size_t alignment, size_t bytes) {
+  if (alignment <= MALLOC_ALIGNMENT)    /* Can just use malloc */
+    return internal_malloc(m, bytes);
+  if (alignment <  MIN_CHUNK_SIZE) /* must be at least a minimum chunk size */
+    alignment = MIN_CHUNK_SIZE;
+  if ((alignment & (alignment-SIZE_T_ONE)) != 0) {/* Ensure a power of 2 */
+    size_t a = MALLOC_ALIGNMENT << 1;
+    while (a < alignment) a <<= 1;
+    alignment = a;
+  }
+
+  if (bytes >= MAX_REQUEST - alignment) {
+    if (m != 0)  { /* Test isn't needed but avoids compiler warning */
+      MALLOC_FAILURE_ACTION;
+    }
+  }
+  else {
+    size_t nb = request2size(bytes);
+    size_t req = nb + alignment + MIN_CHUNK_SIZE - CHUNK_OVERHEAD;
+    char* mem = (char*)internal_malloc(m, req);
+    if (mem != 0) {
+      void* leader = 0;
+      void* trailer = 0;
+      mchunkptr p = mem2chunk(mem);
+
+      if (PREACTION(m)) return 0;
+      if ((((size_t)(mem)) % alignment) != 0) { /* misaligned */
+        /*
+          Find an aligned spot inside chunk.  Since we need to give
+          back leading space in a chunk of at least MIN_CHUNK_SIZE, if
+          the first calculation places us at a spot with less than
+          MIN_CHUNK_SIZE leader, we can move to the next aligned spot.
+          We've allocated enough total room so that this is always
+          possible.
+        */
+        char* br = (char*)mem2chunk((size_t)(((size_t)(mem +
+                                                       alignment -
+                                                       SIZE_T_ONE)) &
+                                             -alignment));
+        char* pos = ((size_t)(br - (char*)(p)) >= MIN_CHUNK_SIZE)?
+          br : br+alignment;
+        mchunkptr newp = (mchunkptr)pos;
+        size_t leadsize = pos - (char*)(p);
+        size_t newsize = chunksize(p) - leadsize;
+
+        if (is_mmapped(p)) { /* For mmapped chunks, just adjust offset */
+          newp->prev_foot = p->prev_foot + leadsize;
+          newp->head = newsize;
+        }
+        else { /* Otherwise, give back leader, use the rest */
+          set_inuse(m, newp, newsize);
+          set_inuse(m, p, leadsize);
+          leader = chunk2mem(p);
+        }
+        p = newp;
+      }
+
+      /* Give back spare room at the end */
+      if (!is_mmapped(p)) {
+        size_t size = chunksize(p);
+        if (size > nb + MIN_CHUNK_SIZE) {
+          size_t remainder_size = size - nb;
+          mchunkptr remainder = chunk_plus_offset(p, nb);
+          set_inuse(m, p, nb);
+          set_inuse(m, remainder, remainder_size);
+          trailer = chunk2mem(remainder);
+        }
+      }
+
+      assert (chunksize(p) >= nb);
+      assert((((size_t)(chunk2mem(p))) % alignment) == 0);
+      check_inuse_chunk(m, p);
+      POSTACTION(m);
+      if (leader != 0) {
+        internal_free(m, leader);
+      }
+      if (trailer != 0) {
+        internal_free(m, trailer);
+      }
+      return chunk2mem(p);
+    }
+  }
+  return 0;
+}
+
+/* ------------------------ comalloc/coalloc support --------------------- */
+
+static void** ialloc(mstate m,
+                     size_t n_elements,
+                     size_t* sizes,
+                     int opts,
+                     void* chunks[]) {
+  /*
+    This provides common support for independent_X routines, handling
+    all of the combinations that can result.
+
+    The opts arg has:
+    bit 0 set if all elements are same size (using sizes[0])
+    bit 1 set if elements should be zeroed
+  */
+
+  size_t    element_size;   /* chunksize of each element, if all same */
+  size_t    contents_size;  /* total size of elements */
+  size_t    array_size;     /* request size of pointer array */
+  void*     mem;            /* malloced aggregate space */
+  mchunkptr p;              /* corresponding chunk */
+  size_t    remainder_size; /* remaining bytes while splitting */
+  void**    marray;         /* either "chunks" or malloced ptr array */
+  mchunkptr array_chunk;    /* chunk for malloced ptr array */
+  flag_t    was_enabled;    /* to disable mmap */
+  size_t    size;
+  size_t    i;
+
+  ensure_initialization();
+  /* compute array length, if needed */
+  if (chunks != 0) {
+    if (n_elements == 0)
+      return chunks; /* nothing to do */
+    marray = chunks;
+    array_size = 0;
+  }
+  else {
+    /* if empty req, must still return chunk representing empty array */
+    if (n_elements == 0)
+      return (void**)internal_malloc(m, 0);
+    marray = 0;
+    array_size = request2size(n_elements * (sizeof(void*)));
+  }
+
+  /* compute total element size */
+  if (opts & 0x1) { /* all-same-size */
+    element_size = request2size(*sizes);
+    contents_size = n_elements * element_size;
+  }
+  else { /* add up all the sizes */
+    element_size = 0;
+    contents_size = 0;
+    for (i = 0; i != n_elements; ++i)
+      contents_size += request2size(sizes[i]);
+  }
+
+  size = contents_size + array_size;
+
+  /*
+     Allocate the aggregate chunk.  First disable direct-mmapping so
+     malloc won't use it, since we would not be able to later
+     free/realloc space internal to a segregated mmap region.
+  */
+  was_enabled = use_mmap(m);
+  disable_mmap(m);
+  mem = internal_malloc(m, size - CHUNK_OVERHEAD);
+  if (was_enabled)
+    enable_mmap(m);
+  if (mem == 0)
+    return 0;
+
+  if (PREACTION(m)) return 0;
+  p = mem2chunk(mem);
+  remainder_size = chunksize(p);
+
+  assert(!is_mmapped(p));
+
+  if (opts & 0x2) {       /* optionally clear the elements */
+    memset((size_t*)mem, 0, remainder_size - SIZE_T_SIZE - array_size);
+  }
+
+  /* If not provided, allocate the pointer array as final part of chunk */
+  if (marray == 0) {
+    size_t  array_chunk_size;
+    array_chunk = chunk_plus_offset(p, contents_size);
+    array_chunk_size = remainder_size - contents_size;
+    marray = (void**) (chunk2mem(array_chunk));
+    set_size_and_pinuse_of_inuse_chunk(m, array_chunk, array_chunk_size);
+    remainder_size = contents_size;
+  }
+
+  /* split out elements */
+  for (i = 0; ; ++i) {
+    marray[i] = chunk2mem(p);
+    if (i != n_elements-1) {
+      if (element_size != 0)
+        size = element_size;
+      else
+        size = request2size(sizes[i]);
+      remainder_size -= size;
+      set_size_and_pinuse_of_inuse_chunk(m, p, size);
+      p = chunk_plus_offset(p, size);
+    }
+    else { /* the final element absorbs any overallocation slop */
+      set_size_and_pinuse_of_inuse_chunk(m, p, remainder_size);
+      break;
+    }
+  }
+
+#if DEBUG
+  if (marray != chunks) {
+    /* final element must have exactly exhausted chunk */
+    if (element_size != 0) {
+      assert(remainder_size == element_size);
+    }
+    else {
+      assert(remainder_size == request2size(sizes[i]));
+    }
+    check_inuse_chunk(m, mem2chunk(marray));
+  }
+  for (i = 0; i != n_elements; ++i)
+    check_inuse_chunk(m, mem2chunk(marray[i]));
+
+#endif /* DEBUG */
+
+  POSTACTION(m);
+  return marray;
+}
+
+
+/* -------------------------- public routines ---------------------------- */
+
+#if !ONLY_MSPACES
+
+void* dlmalloc(size_t bytes) {
+  /*
+     Basic algorithm:
+     If a small request (< 256 bytes minus per-chunk overhead):
+       1. If one exists, use a remainderless chunk in associated smallbin.
+          (Remainderless means that there are too few excess bytes to
+          represent as a chunk.)
+       2. If it is big enough, use the dv chunk, which is normally the
+          chunk adjacent to the one used for the most recent small request.
+       3. If one exists, split the smallest available chunk in a bin,
+          saving remainder in dv.
+       4. If it is big enough, use the top chunk.
+       5. If available, get memory from system and use it
+     Otherwise, for a large request:
+       1. Find the smallest available binned chunk that fits, and use it
+          if it is better fitting than dv chunk, splitting if necessary.
+       2. If better fitting than any binned chunk, use the dv chunk.
+       3. If it is big enough, use the top chunk.
+       4. If request size >= mmap threshold, try to directly mmap this chunk.
+       5. If available, get memory from system and use it
+
+     The ugly goto's here ensure that postaction occurs along all paths.
+  */
+
+#if USE_LOCKS
+  ensure_initialization(); /* initialize in sys_alloc if not using locks */
+#endif
+
+  if (!PREACTION(gm)) {
+    void* mem;
+    size_t nb;
+    if (bytes <= MAX_SMALL_REQUEST) {
+      bindex_t idx;
+      binmap_t smallbits;
+      nb = (bytes < MIN_REQUEST)? MIN_CHUNK_SIZE : pad_request(bytes);
+      idx = small_index(nb);
+      smallbits = gm->smallmap >> idx;
+
+      if ((smallbits & 0x3U) != 0) { /* Remainderless fit to a smallbin. */
+        mchunkptr b, p;
+        idx += ~smallbits & 1;       /* Uses next bin if idx empty */
+        b = smallbin_at(gm, idx);
+        p = b->fd;
+        assert(chunksize(p) == small_index2size(idx));
+        unlink_first_small_chunk(gm, b, p, idx);
+        set_inuse_and_pinuse(gm, p, small_index2size(idx));
+        mem = chunk2mem(p);
+        check_malloced_chunk(gm, mem, nb);
+        goto postaction;
+      }
+
+      else if (nb > gm->dvsize) {
+        if (smallbits != 0) { /* Use chunk in next nonempty smallbin */
+          mchunkptr b, p, r;
+          size_t rsize;
+          bindex_t i;
+          binmap_t leftbits = (smallbits << idx) & left_bits(idx2bit(idx));
+          binmap_t leastbit = least_bit(leftbits);
+          compute_bit2idx(leastbit, i);
+          b = smallbin_at(gm, i);
+          p = b->fd;
+          assert(chunksize(p) == small_index2size(i));
+          unlink_first_small_chunk(gm, b, p, i);
+          rsize = small_index2size(i) - nb;
+          /* Fit here cannot be remainderless if 4byte sizes */
+          if (SIZE_T_SIZE != 4 && rsize < MIN_CHUNK_SIZE)
+            set_inuse_and_pinuse(gm, p, small_index2size(i));
+          else {
+            set_size_and_pinuse_of_inuse_chunk(gm, p, nb);
+            r = chunk_plus_offset(p, nb);
+            set_size_and_pinuse_of_free_chunk(r, rsize);
+            replace_dv(gm, r, rsize);
+          }
+          mem = chunk2mem(p);
+          check_malloced_chunk(gm, mem, nb);
+          goto postaction;
+        }
+
+        else if (gm->treemap != 0 && (mem = tmalloc_small(gm, nb)) != 0) {
+          check_malloced_chunk(gm, mem, nb);
+          goto postaction;
+        }
+      }
+    }
+    else if (bytes >= MAX_REQUEST)
+      nb = MAX_SIZE_T; /* Too big to allocate. Force failure (in sys alloc) */
+    else {
+      nb = pad_request(bytes);
+      if (gm->treemap != 0 && (mem = tmalloc_large(gm, nb)) != 0) {
+        check_malloced_chunk(gm, mem, nb);
+        goto postaction;
+      }
+    }
+
+    if (nb <= gm->dvsize) {
+      size_t rsize = gm->dvsize - nb;
+      mchunkptr p = gm->dv;
+      if (rsize >= MIN_CHUNK_SIZE) { /* split dv */
+        mchunkptr r = gm->dv = chunk_plus_offset(p, nb);
+        gm->dvsize = rsize;
+        set_size_and_pinuse_of_free_chunk(r, rsize);
+        set_size_and_pinuse_of_inuse_chunk(gm, p, nb);
+      }
+      else { /* exhaust dv */
+        size_t dvs = gm->dvsize;
+        gm->dvsize = 0;
+        gm->dv = 0;
+        set_inuse_and_pinuse(gm, p, dvs);
+      }
+      mem = chunk2mem(p);
+      check_malloced_chunk(gm, mem, nb);
+      goto postaction;
+    }
+
+    else if (nb < gm->topsize) { /* Split top */
+      size_t rsize = gm->topsize -= nb;
+      mchunkptr p = gm->top;
+      mchunkptr r = gm->top = chunk_plus_offset(p, nb);
+      r->head = rsize | PINUSE_BIT;
+      set_size_and_pinuse_of_inuse_chunk(gm, p, nb);
+      mem = chunk2mem(p);
+      check_top_chunk(gm, gm->top);
+      check_malloced_chunk(gm, mem, nb);
+      goto postaction;
+    }
+
+    mem = sys_alloc(gm, nb);
+
+  postaction:
+    POSTACTION(gm);
+    return mem;
+  }
+
+  return 0;
+}
+
+void dlfree(void* mem) {
+  /*
+     Consolidate freed chunks with preceeding or succeeding bordering
+     free chunks, if they exist, and then place in a bin.  Intermixed
+     with special cases for top, dv, mmapped chunks, and usage errors.
+  */
+
+  if (mem != 0) {
+    mchunkptr p  = mem2chunk(mem);
+#if FOOTERS
+    mstate fm = get_mstate_for(p);
+    if (!ok_magic(fm)) {
+      USAGE_ERROR_ACTION(fm, p);
+      return;
+    }
+#else /* FOOTERS */
+#define fm gm
+#endif /* FOOTERS */
+    if (!PREACTION(fm)) {
+      check_inuse_chunk(fm, p);
+      if (RTCHECK(ok_address(fm, p) && ok_inuse(p))) {
+        size_t psize = chunksize(p);
+        mchunkptr next = chunk_plus_offset(p, psize);
+        if (!pinuse(p)) {
+          size_t prevsize = p->prev_foot;
+          if (is_mmapped(p)) {
+            psize += prevsize + MMAP_FOOT_PAD;
+            if (CALL_MUNMAP((char*)p - prevsize, psize) == 0)
+              fm->footprint -= psize;
+            goto postaction;
+          }
+          else {
+            mchunkptr prev = chunk_minus_offset(p, prevsize);
+            psize += prevsize;
+            p = prev;
+            if (RTCHECK(ok_address(fm, prev))) { /* consolidate backward */
+              if (p != fm->dv) {
+                unlink_chunk(fm, p, prevsize);
+              }
+              else if ((next->head & INUSE_BITS) == INUSE_BITS) {
+                fm->dvsize = psize;
+                set_free_with_pinuse(p, psize, next);
+                goto postaction;
+              }
+            }
+            else
+              goto erroraction;
+          }
+        }
+
+        if (RTCHECK(ok_next(p, next) && ok_pinuse(next))) {
+          if (!cinuse(next)) {  /* consolidate forward */
+            if (next == fm->top) {
+              size_t tsize = fm->topsize += psize;
+              fm->top = p;
+              p->head = tsize | PINUSE_BIT;
+              if (p == fm->dv) {
+                fm->dv = 0;
+                fm->dvsize = 0;
+              }
+              if (should_trim(fm, tsize))
+                sys_trim(fm, 0);
+              goto postaction;
+            }
+            else if (next == fm->dv) {
+              size_t dsize = fm->dvsize += psize;
+              fm->dv = p;
+              set_size_and_pinuse_of_free_chunk(p, dsize);
+              goto postaction;
+            }
+            else {
+              size_t nsize = chunksize(next);
+              psize += nsize;
+              unlink_chunk(fm, next, nsize);
+              set_size_and_pinuse_of_free_chunk(p, psize);
+              if (p == fm->dv) {
+                fm->dvsize = psize;
+                goto postaction;
+              }
+            }
+          }
+          else
+            set_free_with_pinuse(p, psize, next);
+
+          if (is_small(psize)) {
+            insert_small_chunk(fm, p, psize);
+            check_free_chunk(fm, p);
+          }
+          else {
+            tchunkptr tp = (tchunkptr)p;
+            insert_large_chunk(fm, tp, psize);
+            check_free_chunk(fm, p);
+            if (--fm->release_checks == 0)
+              release_unused_segments(fm);
+          }
+          goto postaction;
+        }
+      }
+    erroraction:
+      USAGE_ERROR_ACTION(fm, p);
+    postaction:
+      POSTACTION(fm);
+    }
+  }
+#if !FOOTERS
+#undef fm
+#endif /* FOOTERS */
+}
+
+void* dlcalloc(size_t n_elements, size_t elem_size) {
+  void* mem;
+  size_t req = 0;
+  if (n_elements != 0) {
+    req = n_elements * elem_size;
+    if (((n_elements | elem_size) & ~(size_t)0xffff) &&
+        (req / n_elements != elem_size))
+      req = MAX_SIZE_T; /* force downstream failure on overflow */
+  }
+  mem = dlmalloc(req);
+  if (mem != 0 && calloc_must_clear(mem2chunk(mem)))
+    memset(mem, 0, req);
+  return mem;
+}
+
+void* dlrealloc(void* oldmem, size_t bytes) {
+  if (oldmem == 0)
+    return dlmalloc(bytes);
+#ifdef REALLOC_ZERO_BYTES_FREES
+  if (bytes == 0) {
+    dlfree(oldmem);
+    return 0;
+  }
+#endif /* REALLOC_ZERO_BYTES_FREES */
+  else {
+#if ! FOOTERS
+    mstate m = gm;
+#else /* FOOTERS */
+    mstate m = get_mstate_for(mem2chunk(oldmem));
+    if (!ok_magic(m)) {
+      USAGE_ERROR_ACTION(m, oldmem);
+      return 0;
+    }
+#endif /* FOOTERS */
+    return internal_realloc(m, oldmem, bytes);
+  }
+}
+
+void* dlmemalign(size_t alignment, size_t bytes) {
+  return internal_memalign(gm, alignment, bytes);
+}
+
+void** dlindependent_calloc(size_t n_elements, size_t elem_size,
+                                 void* chunks[]) {
+  size_t sz = elem_size; /* serves as 1-element array */
+  return ialloc(gm, n_elements, &sz, 3, chunks);
+}
+
+void** dlindependent_comalloc(size_t n_elements, size_t sizes[],
+                                   void* chunks[]) {
+  return ialloc(gm, n_elements, sizes, 0, chunks);
+}
+
+void* dlvalloc(size_t bytes) {
+  size_t pagesz;
+  ensure_initialization();
+  pagesz = mparams.page_size;
+  return dlmemalign(pagesz, bytes);
+}
+
+void* dlpvalloc(size_t bytes) {
+  size_t pagesz;
+  ensure_initialization();
+  pagesz = mparams.page_size;
+  return dlmemalign(pagesz, (bytes + pagesz - SIZE_T_ONE) & ~(pagesz - SIZE_T_ONE));
+}
+
+int dlmalloc_trim(size_t pad) {
+  int result = 0;
+  ensure_initialization();
+  if (!PREACTION(gm)) {
+    result = sys_trim(gm, pad);
+    POSTACTION(gm);
+  }
+  return result;
+}
+
+size_t dlmalloc_footprint(void) {
+  return gm->footprint;
+}
+
+size_t dlmalloc_max_footprint(void) {
+  return gm->max_footprint;
+}
+
+#if !NO_MALLINFO
+struct mallinfo dlmallinfo(void) {
+  return internal_mallinfo(gm);
+}
+#endif /* NO_MALLINFO */
+
+void dlmalloc_stats() {
+  internal_malloc_stats(gm);
+}
+
+int dlmallopt(int param_number, int value) {
+  return change_mparam(param_number, value);
+}
+
+#endif /* !ONLY_MSPACES */
+
+size_t dlmalloc_usable_size(void* mem) {
+  if (mem != 0) {
+    mchunkptr p = mem2chunk(mem);
+    if (is_inuse(p))
+      return chunksize(p) - overhead_for(p);
+  }
+  return 0;
+}
+
+/* ----------------------------- user mspaces ---------------------------- */
+
+#if MSPACES
+
+static mstate init_user_mstate(char* tbase, size_t tsize) {
+  size_t msize = pad_request(sizeof(struct malloc_state));
+  mchunkptr mn;
+  mchunkptr msp = align_as_chunk(tbase);
+  mstate m = (mstate)(chunk2mem(msp));
+  memset(m, 0, msize);
+  INITIAL_LOCK(&m->mutex);
+  msp->head = (msize|INUSE_BITS);
+  m->seg.base = m->least_addr = tbase;
+  m->seg.size = m->footprint = m->max_footprint = tsize;
+  m->magic = mparams.magic;
+  m->release_checks = MAX_RELEASE_CHECK_RATE;
+  m->mflags = mparams.default_mflags;
+  m->extp = 0;
+  m->exts = 0;
+  disable_contiguous(m);
+  init_bins(m);
+  mn = next_chunk(mem2chunk(m));
+  init_top(m, mn, (size_t)((tbase + tsize) - (char*)mn) - TOP_FOOT_SIZE);
+  check_top_chunk(m, m->top);
+  return m;
+}
+
+mspace create_mspace(size_t capacity, int locked) {
+  mstate m = 0;
+  size_t msize;
+  ensure_initialization();
+  msize = pad_request(sizeof(struct malloc_state));
+  if (capacity < (size_t) -(msize + TOP_FOOT_SIZE + mparams.page_size)) {
+    size_t rs = ((capacity == 0)? mparams.granularity :
+                 (capacity + TOP_FOOT_SIZE + msize));
+    size_t tsize = granularity_align(rs);
+    char* tbase = (char*)(CALL_MMAP(tsize));
+    if (tbase != CMFAIL) {
+      m = init_user_mstate(tbase, tsize);
+      m->seg.sflags = USE_MMAP_BIT;
+      set_lock(m, locked);
+    }
+  }
+  return (mspace)m;
+}
+
+mspace create_mspace_with_base(void* base, size_t capacity, int locked) {
+  mstate m = 0;
+  size_t msize;
+  ensure_initialization();
+  msize = pad_request(sizeof(struct malloc_state));
+  if (capacity > msize + TOP_FOOT_SIZE &&
+      capacity < (size_t) -(msize + TOP_FOOT_SIZE + mparams.page_size)) {
+    m = init_user_mstate((char*)base, capacity);
+    m->seg.sflags = EXTERN_BIT;
+    set_lock(m, locked);
+  }
+  return (mspace)m;
+}
+
+int mspace_track_large_chunks(mspace msp, int enable) {
+  int ret = 0;
+  mstate ms = (mstate)msp;
+  if (!PREACTION(ms)) {
+    if (!use_mmap(ms))
+      ret = 1;
+    if (!enable)
+      enable_mmap(ms);
+    else
+      disable_mmap(ms);
+    POSTACTION(ms);
+  }
+  return ret;
+}
+
+size_t destroy_mspace(mspace msp) {
+  size_t freed = 0;
+  mstate ms = (mstate)msp;
+  if (ok_magic(ms)) {
+    msegmentptr sp = &ms->seg;
+    while (sp != 0) {
+      char* base = sp->base;
+      size_t size = sp->size;
+      flag_t flag = sp->sflags;
+      sp = sp->next;
+      if ((flag & USE_MMAP_BIT) && !(flag & EXTERN_BIT) &&
+          CALL_MUNMAP(base, size) == 0)
+        freed += size;
+    }
+  }
+  else {
+    USAGE_ERROR_ACTION(ms,ms);
+  }
+  return freed;
+}
+
+/*
+  mspace versions of routines are near-clones of the global
+  versions. This is not so nice but better than the alternatives.
+*/
+
+
+void* mspace_malloc(mspace msp, size_t bytes) {
+  mstate ms = (mstate)msp;
+  if (!ok_magic(ms)) {
+    USAGE_ERROR_ACTION(ms,ms);
+    return 0;
+  }
+  if (!PREACTION(ms)) {
+    void* mem;
+    size_t nb;
+    if (bytes <= MAX_SMALL_REQUEST) {
+      bindex_t idx;
+      binmap_t smallbits;
+      nb = (bytes < MIN_REQUEST)? MIN_CHUNK_SIZE : pad_request(bytes);
+      idx = small_index(nb);
+      smallbits = ms->smallmap >> idx;
+
+      if ((smallbits & 0x3U) != 0) { /* Remainderless fit to a smallbin. */
+        mchunkptr b, p;
+        idx += ~smallbits & 1;       /* Uses next bin if idx empty */
+        b = smallbin_at(ms, idx);
+        p = b->fd;
+        assert(chunksize(p) == small_index2size(idx));
+        unlink_first_small_chunk(ms, b, p, idx);
+        set_inuse_and_pinuse(ms, p, small_index2size(idx));
+        mem = chunk2mem(p);
+        check_malloced_chunk(ms, mem, nb);
+        goto postaction;
+      }
+
+      else if (nb > ms->dvsize) {
+        if (smallbits != 0) { /* Use chunk in next nonempty smallbin */
+          mchunkptr b, p, r;
+          size_t rsize;
+          bindex_t i;
+          binmap_t leftbits = (smallbits << idx) & left_bits(idx2bit(idx));
+          binmap_t leastbit = least_bit(leftbits);
+          compute_bit2idx(leastbit, i);
+          b = smallbin_at(ms, i);
+          p = b->fd;
+          assert(chunksize(p) == small_index2size(i));
+          unlink_first_small_chunk(ms, b, p, i);
+          rsize = small_index2size(i) - nb;
+          /* Fit here cannot be remainderless if 4byte sizes */
+          if (SIZE_T_SIZE != 4 && rsize < MIN_CHUNK_SIZE)
+            set_inuse_and_pinuse(ms, p, small_index2size(i));
+          else {
+            set_size_and_pinuse_of_inuse_chunk(ms, p, nb);
+            r = chunk_plus_offset(p, nb);
+            set_size_and_pinuse_of_free_chunk(r, rsize);
+            replace_dv(ms, r, rsize);
+          }
+          mem = chunk2mem(p);
+          check_malloced_chunk(ms, mem, nb);
+          goto postaction;
+        }
+
+        else if (ms->treemap != 0 && (mem = tmalloc_small(ms, nb)) != 0) {
+          check_malloced_chunk(ms, mem, nb);
+          goto postaction;
+        }
+      }
+    }
+    else if (bytes >= MAX_REQUEST)
+      nb = MAX_SIZE_T; /* Too big to allocate. Force failure (in sys alloc) */
+    else {
+      nb = pad_request(bytes);
+      if (ms->treemap != 0 && (mem = tmalloc_large(ms, nb)) != 0) {
+        check_malloced_chunk(ms, mem, nb);
+        goto postaction;
+      }
+    }
+
+    if (nb <= ms->dvsize) {
+      size_t rsize = ms->dvsize - nb;
+      mchunkptr p = ms->dv;
+      if (rsize >= MIN_CHUNK_SIZE) { /* split dv */
+        mchunkptr r = ms->dv = chunk_plus_offset(p, nb);
+        ms->dvsize = rsize;
+        set_size_and_pinuse_of_free_chunk(r, rsize);
+        set_size_and_pinuse_of_inuse_chunk(ms, p, nb);
+      }
+      else { /* exhaust dv */
+        size_t dvs = ms->dvsize;
+        ms->dvsize = 0;
+        ms->dv = 0;
+        set_inuse_and_pinuse(ms, p, dvs);
+      }
+      mem = chunk2mem(p);
+      check_malloced_chunk(ms, mem, nb);
+      goto postaction;
+    }
+
+    else if (nb < ms->topsize) { /* Split top */
+      size_t rsize = ms->topsize -= nb;
+      mchunkptr p = ms->top;
+      mchunkptr r = ms->top = chunk_plus_offset(p, nb);
+      r->head = rsize | PINUSE_BIT;
+      set_size_and_pinuse_of_inuse_chunk(ms, p, nb);
+      mem = chunk2mem(p);
+      check_top_chunk(ms, ms->top);
+      check_malloced_chunk(ms, mem, nb);
+      goto postaction;
+    }
+
+    mem = sys_alloc(ms, nb);
+
+  postaction:
+    POSTACTION(ms);
+    return mem;
+  }
+
+  return 0;
+}
+
+void mspace_free(mspace msp, void* mem) {
+  if (mem != 0) {
+    mchunkptr p  = mem2chunk(mem);
+#if FOOTERS
+    mstate fm = get_mstate_for(p);
+    msp = msp; /* placate people compiling -Wunused */
+#else /* FOOTERS */
+    mstate fm = (mstate)msp;
+#endif /* FOOTERS */
+    if (!ok_magic(fm)) {
+      USAGE_ERROR_ACTION(fm, p);
+      return;
+    }
+    if (!PREACTION(fm)) {
+      check_inuse_chunk(fm, p);
+      if (RTCHECK(ok_address(fm, p) && ok_inuse(p))) {
+        size_t psize = chunksize(p);
+        mchunkptr next = chunk_plus_offset(p, psize);
+        if (!pinuse(p)) {
+          size_t prevsize = p->prev_foot;
+          if (is_mmapped(p)) {
+            psize += prevsize + MMAP_FOOT_PAD;
+            if (CALL_MUNMAP((char*)p - prevsize, psize) == 0)
+              fm->footprint -= psize;
+            goto postaction;
+          }
+          else {
+            mchunkptr prev = chunk_minus_offset(p, prevsize);
+            psize += prevsize;
+            p = prev;
+            if (RTCHECK(ok_address(fm, prev))) { /* consolidate backward */
+              if (p != fm->dv) {
+                unlink_chunk(fm, p, prevsize);
+              }
+              else if ((next->head & INUSE_BITS) == INUSE_BITS) {
+                fm->dvsize = psize;
+                set_free_with_pinuse(p, psize, next);
+                goto postaction;
+              }
+            }
+            else
+              goto erroraction;
+          }
+        }
+
+        if (RTCHECK(ok_next(p, next) && ok_pinuse(next))) {
+          if (!cinuse(next)) {  /* consolidate forward */
+            if (next == fm->top) {
+              size_t tsize = fm->topsize += psize;
+              fm->top = p;
+              p->head = tsize | PINUSE_BIT;
+              if (p == fm->dv) {
+                fm->dv = 0;
+                fm->dvsize = 0;
+              }
+              if (should_trim(fm, tsize))
+                sys_trim(fm, 0);
+              goto postaction;
+            }
+            else if (next == fm->dv) {
+              size_t dsize = fm->dvsize += psize;
+              fm->dv = p;
+              set_size_and_pinuse_of_free_chunk(p, dsize);
+              goto postaction;
+            }
+            else {
+              size_t nsize = chunksize(next);
+              psize += nsize;
+              unlink_chunk(fm, next, nsize);
+              set_size_and_pinuse_of_free_chunk(p, psize);
+              if (p == fm->dv) {
+                fm->dvsize = psize;
+                goto postaction;
+              }
+            }
+          }
+          else
+            set_free_with_pinuse(p, psize, next);
+
+          if (is_small(psize)) {
+            insert_small_chunk(fm, p, psize);
+            check_free_chunk(fm, p);
+          }
+          else {
+            tchunkptr tp = (tchunkptr)p;
+            insert_large_chunk(fm, tp, psize);
+            check_free_chunk(fm, p);
+            if (--fm->release_checks == 0)
+              release_unused_segments(fm);
+          }
+          goto postaction;
+        }
+      }
+    erroraction:
+      USAGE_ERROR_ACTION(fm, p);
+    postaction:
+      POSTACTION(fm);
+    }
+  }
+}
+
+void* mspace_calloc(mspace msp, size_t n_elements, size_t elem_size) {
+  void* mem;
+  size_t req = 0;
+  mstate ms = (mstate)msp;
+  if (!ok_magic(ms)) {
+    USAGE_ERROR_ACTION(ms,ms);
+    return 0;
+  }
+  if (n_elements != 0) {
+    req = n_elements * elem_size;
+    if (((n_elements | elem_size) & ~(size_t)0xffff) &&
+        (req / n_elements != elem_size))
+      req = MAX_SIZE_T; /* force downstream failure on overflow */
+  }
+  mem = internal_malloc(ms, req);
+  if (mem != 0 && calloc_must_clear(mem2chunk(mem)))
+    memset(mem, 0, req);
+  return mem;
+}
+
+void* mspace_realloc(mspace msp, void* oldmem, size_t bytes) {
+  if (oldmem == 0)
+    return mspace_malloc(msp, bytes);
+#ifdef REALLOC_ZERO_BYTES_FREES
+  if (bytes == 0) {
+    mspace_free(msp, oldmem);
+    return 0;
+  }
+#endif /* REALLOC_ZERO_BYTES_FREES */
+  else {
+#if FOOTERS
+    mchunkptr p  = mem2chunk(oldmem);
+    mstate ms = get_mstate_for(p);
+#else /* FOOTERS */
+    mstate ms = (mstate)msp;
+#endif /* FOOTERS */
+    if (!ok_magic(ms)) {
+      USAGE_ERROR_ACTION(ms,ms);
+      return 0;
+    }
+    return internal_realloc(ms, oldmem, bytes);
+  }
+}
+
+void* mspace_memalign(mspace msp, size_t alignment, size_t bytes) {
+  mstate ms = (mstate)msp;
+  if (!ok_magic(ms)) {
+    USAGE_ERROR_ACTION(ms,ms);
+    return 0;
+  }
+  return internal_memalign(ms, alignment, bytes);
+}
+
+void** mspace_independent_calloc(mspace msp, size_t n_elements,
+                                 size_t elem_size, void* chunks[]) {
+  size_t sz = elem_size; /* serves as 1-element array */
+  mstate ms = (mstate)msp;
+  if (!ok_magic(ms)) {
+    USAGE_ERROR_ACTION(ms,ms);
+    return 0;
+  }
+  return ialloc(ms, n_elements, &sz, 3, chunks);
+}
+
+void** mspace_independent_comalloc(mspace msp, size_t n_elements,
+                                   size_t sizes[], void* chunks[]) {
+  mstate ms = (mstate)msp;
+  if (!ok_magic(ms)) {
+    USAGE_ERROR_ACTION(ms,ms);
+    return 0;
+  }
+  return ialloc(ms, n_elements, sizes, 0, chunks);
+}
+
+int mspace_trim(mspace msp, size_t pad) {
+  int result = 0;
+  mstate ms = (mstate)msp;
+  if (ok_magic(ms)) {
+    if (!PREACTION(ms)) {
+      result = sys_trim(ms, pad);
+      POSTACTION(ms);
+    }
+  }
+  else {
+    USAGE_ERROR_ACTION(ms,ms);
+  }
+  return result;
+}
+
+void mspace_malloc_stats(mspace msp) {
+  mstate ms = (mstate)msp;
+  if (ok_magic(ms)) {
+    internal_malloc_stats(ms);
+  }
+  else {
+    USAGE_ERROR_ACTION(ms,ms);
+  }
+}
+
+size_t mspace_footprint(mspace msp) {
+  size_t result = 0;
+  mstate ms = (mstate)msp;
+  if (ok_magic(ms)) {
+    result = ms->footprint;
+  }
+  else {
+    USAGE_ERROR_ACTION(ms,ms);
+  }
+  return result;
+}
+
+
+size_t mspace_max_footprint(mspace msp) {
+  size_t result = 0;
+  mstate ms = (mstate)msp;
+  if (ok_magic(ms)) {
+    result = ms->max_footprint;
+  }
+  else {
+    USAGE_ERROR_ACTION(ms,ms);
+  }
+  return result;
+}
+
+
+#if !NO_MALLINFO
+struct mallinfo mspace_mallinfo(mspace msp) {
+  mstate ms = (mstate)msp;
+  if (!ok_magic(ms)) {
+    USAGE_ERROR_ACTION(ms,ms);
+  }
+  return internal_mallinfo(ms);
+}
+#endif /* NO_MALLINFO */
+
+size_t mspace_usable_size(void* mem) {
+  if (mem != 0) {
+    mchunkptr p = mem2chunk(mem);
+    if (is_inuse(p))
+      return chunksize(p) - overhead_for(p);
+  }
+  return 0;
+}
+
+int mspace_mallopt(int param_number, int value) {
+  return change_mparam(param_number, value);
+}
+
+#endif /* MSPACES */
+
+
+/* -------------------- Alternative MORECORE functions ------------------- */
+
+/*
+  Guidelines for creating a custom version of MORECORE:
+
+  * For best performance, MORECORE should allocate in multiples of pagesize.
+  * MORECORE may allocate more memory than requested. (Or even less,
+      but this will usually result in a malloc failure.)
+  * MORECORE must not allocate memory when given argument zero, but
+      instead return one past the end address of memory from previous
+      nonzero call.
+  * For best performance, consecutive calls to MORECORE with positive
+      arguments should return increasing addresses, indicating that
+      space has been contiguously extended.
+  * Even though consecutive calls to MORECORE need not return contiguous
+      addresses, it must be OK for malloc'ed chunks to span multiple
+      regions in those cases where they do happen to be contiguous.
+  * MORECORE need not handle negative arguments -- it may instead
+      just return MFAIL when given negative arguments.
+      Negative arguments are always multiples of pagesize. MORECORE
+      must not misinterpret negative args as large positive unsigned
+      args. You can suppress all such calls from even occurring by defining
+      MORECORE_CANNOT_TRIM,
+
+  As an example alternative MORECORE, here is a custom allocator
+  kindly contributed for pre-OSX macOS.  It uses virtually but not
+  necessarily physically contiguous non-paged memory (locked in,
+  present and won't get swapped out).  You can use it by uncommenting
+  this section, adding some #includes, and setting up the appropriate
+  defines above:
+
+      #define MORECORE osMoreCore
+
+  There is also a shutdown routine that should somehow be called for
+  cleanup upon program exit.
+
+  #define MAX_POOL_ENTRIES 100
+  #define MINIMUM_MORECORE_SIZE  (64 * 1024U)
+  static int next_os_pool;
+  void *our_os_pools[MAX_POOL_ENTRIES];
+
+  void *osMoreCore(int size)
+  {
+    void *ptr = 0;
+    static void *sbrk_top = 0;
+
+    if (size > 0)
+    {
+      if (size < MINIMUM_MORECORE_SIZE)
+         size = MINIMUM_MORECORE_SIZE;
+      if (CurrentExecutionLevel() == kTaskLevel)
+         ptr = PoolAllocateResident(size + RM_PAGE_SIZE, 0);
+      if (ptr == 0)
+      {
+        return (void *) MFAIL;
+      }
+      // save ptrs so they can be freed during cleanup
+      our_os_pools[next_os_pool] = ptr;
+      next_os_pool++;
+      ptr = (void *) ((((size_t) ptr) + RM_PAGE_MASK) & ~RM_PAGE_MASK);
+      sbrk_top = (char *) ptr + size;
+      return ptr;
+    }
+    else if (size < 0)
+    {
+      // we don't currently support shrink behavior
+      return (void *) MFAIL;
+    }
+    else
+    {
+      return sbrk_top;
+    }
+  }
+
+  // cleanup any allocated memory pools
+  // called as last thing before shutting down driver
+
+  void osCleanupMem(void)
+  {
+    void **ptr;
+
+    for (ptr = our_os_pools; ptr < &our_os_pools[MAX_POOL_ENTRIES]; ptr++)
+      if (*ptr)
+      {
+         PoolDeallocate(*ptr);
+         *ptr = 0;
+      }
+  }
+
+*/
+
+
+/* -----------------------------------------------------------------------
+History:
+    V2.8.4 Wed May 27 09:56:23 2009  Doug Lea  (dl at gee)
+      * Use zeros instead of prev foot for is_mmapped
+      * Add mspace_track_large_chunks; thanks to Jean Brouwers
+      * Fix set_inuse in internal_realloc; thanks to Jean Brouwers
+      * Fix insufficient sys_alloc padding when using 16byte alignment
+      * Fix bad error check in mspace_footprint
+      * Adaptations for ptmalloc; thanks to Wolfram Gloger.
+      * Reentrant spin locks; thanks to Earl Chew and others
+      * Win32 improvements; thanks to Niall Douglas and Earl Chew
+      * Add NO_SEGMENT_TRAVERSAL and MAX_RELEASE_CHECK_RATE options
+      * Extension hook in malloc_state
+      * Various small adjustments to reduce warnings on some compilers
+      * Various configuration extensions/changes for more platforms. Thanks
+         to all who contributed these.
+
+    V2.8.3 Thu Sep 22 11:16:32 2005  Doug Lea  (dl at gee)
+      * Add max_footprint functions
+      * Ensure all appropriate literals are size_t
+      * Fix conditional compilation problem for some #define settings
+      * Avoid concatenating segments with the one provided
+        in create_mspace_with_base
+      * Rename some variables to avoid compiler shadowing warnings
+      * Use explicit lock initialization.
+      * Better handling of sbrk interference.
+      * Simplify and fix segment insertion, trimming and mspace_destroy
+      * Reinstate REALLOC_ZERO_BYTES_FREES option from 2.7.x
+      * Thanks especially to Dennis Flanagan for help on these.
+
+    V2.8.2 Sun Jun 12 16:01:10 2005  Doug Lea  (dl at gee)
+      * Fix memalign brace error.
+
+    V2.8.1 Wed Jun  8 16:11:46 2005  Doug Lea  (dl at gee)
+      * Fix improper #endif nesting in C++
+      * Add explicit casts needed for C++
+
+    V2.8.0 Mon May 30 14:09:02 2005  Doug Lea  (dl at gee)
+      * Use trees for large bins
+      * Support mspaces
+      * Use segments to unify sbrk-based and mmap-based system allocation,
+        removing need for emulation on most platforms without sbrk.
+      * Default safety checks
+      * Optional footer checks. Thanks to William Robertson for the idea.
+      * Internal code refactoring
+      * Incorporate suggestions and platform-specific changes.
+        Thanks to Dennis Flanagan, Colin Plumb, Niall Douglas,
+        Aaron Bachmann,  Emery Berger, and others.
+      * Speed up non-fastbin processing enough to remove fastbins.
+      * Remove useless cfree() to avoid conflicts with other apps.
+      * Remove internal memcpy, memset. Compilers handle builtins better.
+      * Remove some options that no one ever used and rename others.
+
+    V2.7.2 Sat Aug 17 09:07:30 2002  Doug Lea  (dl at gee)
+      * Fix malloc_state bitmap array misdeclaration
+
+    V2.7.1 Thu Jul 25 10:58:03 2002  Doug Lea  (dl at gee)
+      * Allow tuning of FIRST_SORTED_BIN_SIZE
+      * Use PTR_UINT as type for all ptr->int casts. Thanks to John Belmonte.
+      * Better detection and support for non-contiguousness of MORECORE.
+        Thanks to Andreas Mueller, Conal Walsh, and Wolfram Gloger
+      * Bypass most of malloc if no frees. Thanks To Emery Berger.
+      * Fix freeing of old top non-contiguous chunk im sysmalloc.
+      * Raised default trim and map thresholds to 256K.
+      * Fix mmap-related #defines. Thanks to Lubos Lunak.
+      * Fix copy macros; added LACKS_FCNTL_H. Thanks to Neal Walfield.
+      * Branch-free bin calculation
+      * Default trim and mmap thresholds now 256K.
+
+    V2.7.0 Sun Mar 11 14:14:06 2001  Doug Lea  (dl at gee)
+      * Introduce independent_comalloc and independent_calloc.
+        Thanks to Michael Pachos for motivation and help.
+      * Make optional .h file available
+      * Allow > 2GB requests on 32bit systems.
+      * new WIN32 sbrk, mmap, munmap, lock code from <Walter@GeNeSys-e.de>.
+        Thanks also to Andreas Mueller <a.mueller at paradatec.de>,
+        and Anonymous.
+      * Allow override of MALLOC_ALIGNMENT (Thanks to Ruud Waij for
+        helping test this.)
+      * memalign: check alignment arg
+      * realloc: don't try to shift chunks backwards, since this
+        leads to  more fragmentation in some programs and doesn't
+        seem to help in any others.
+      * Collect all cases in malloc requiring system memory into sysmalloc
+      * Use mmap as backup to sbrk
+      * Place all internal state in malloc_state
+      * Introduce fastbins (although similar to 2.5.1)
+      * Many minor tunings and cosmetic improvements
+      * Introduce USE_PUBLIC_MALLOC_WRAPPERS, USE_MALLOC_LOCK
+      * Introduce MALLOC_FAILURE_ACTION, MORECORE_CONTIGUOUS
+        Thanks to Tony E. Bennett <tbennett@nvidia.com> and others.
+      * Include errno.h to support default failure action.
+
+    V2.6.6 Sun Dec  5 07:42:19 1999  Doug Lea  (dl at gee)
+      * return null for negative arguments
+      * Added Several WIN32 cleanups from Martin C. Fong <mcfong at yahoo.com>
+         * Add 'LACKS_SYS_PARAM_H' for those systems without 'sys/param.h'
+          (e.g. WIN32 platforms)
+         * Cleanup header file inclusion for WIN32 platforms
+         * Cleanup code to avoid Microsoft Visual C++ compiler complaints
+         * Add 'USE_DL_PREFIX' to quickly allow co-existence with existing
+           memory allocation routines
+         * Set 'malloc_getpagesize' for WIN32 platforms (needs more work)
+         * Use 'assert' rather than 'ASSERT' in WIN32 code to conform to
+           usage of 'assert' in non-WIN32 code
+         * Improve WIN32 'sbrk()' emulation's 'findRegion()' routine to
+           avoid infinite loop
+      * Always call 'fREe()' rather than 'free()'
+
+    V2.6.5 Wed Jun 17 15:57:31 1998  Doug Lea  (dl at gee)
+      * Fixed ordering problem with boundary-stamping
+
+    V2.6.3 Sun May 19 08:17:58 1996  Doug Lea  (dl at gee)
+      * Added pvalloc, as recommended by H.J. Liu
+      * Added 64bit pointer support mainly from Wolfram Gloger
+      * Added anonymously donated WIN32 sbrk emulation
+      * Malloc, calloc, getpagesize: add optimizations from Raymond Nijssen
+      * malloc_extend_top: fix mask error that caused wastage after
+        foreign sbrks
+      * Add linux mremap support code from HJ Liu
+
+    V2.6.2 Tue Dec  5 06:52:55 1995  Doug Lea  (dl at gee)
+      * Integrated most documentation with the code.
+      * Add support for mmap, with help from
+        Wolfram Gloger (Gloger@lrz.uni-muenchen.de).
+      * Use last_remainder in more cases.
+      * Pack bins using idea from  colin@nyx10.cs.du.edu
+      * Use ordered bins instead of best-fit threshhold
+      * Eliminate block-local decls to simplify tracing and debugging.
+      * Support another case of realloc via move into top
+      * Fix error occuring when initial sbrk_base not word-aligned.
+      * Rely on page size for units instead of SBRK_UNIT to
+        avoid surprises about sbrk alignment conventions.
+      * Add mallinfo, mallopt. Thanks to Raymond Nijssen
+        (raymond@es.ele.tue.nl) for the suggestion.
+      * Add `pad' argument to malloc_trim and top_pad mallopt parameter.
+      * More precautions for cases where other routines call sbrk,
+        courtesy of Wolfram Gloger (Gloger@lrz.uni-muenchen.de).
+      * Added macros etc., allowing use in linux libc from
+        H.J. Lu (hjl@gnu.ai.mit.edu)
+      * Inverted this history list
+
+    V2.6.1 Sat Dec  2 14:10:57 1995  Doug Lea  (dl at gee)
+      * Re-tuned and fixed to behave more nicely with V2.6.0 changes.
+      * Removed all preallocation code since under current scheme
+        the work required to undo bad preallocations exceeds
+        the work saved in good cases for most test programs.
+      * No longer use return list or unconsolidated bins since
+        no scheme using them consistently outperforms those that don't
+        given above changes.
+      * Use best fit for very large chunks to prevent some worst-cases.
+      * Added some support for debugging
+
+    V2.6.0 Sat Nov  4 07:05:23 1995  Doug Lea  (dl at gee)
+      * Removed footers when chunks are in use. Thanks to
+        Paul Wilson (wilson@cs.texas.edu) for the suggestion.
+
+    V2.5.4 Wed Nov  1 07:54:51 1995  Doug Lea  (dl at gee)
+      * Added malloc_trim, with help from Wolfram Gloger
+        (wmglo@Dent.MED.Uni-Muenchen.DE).
+
+    V2.5.3 Tue Apr 26 10:16:01 1994  Doug Lea  (dl at g)
+
+    V2.5.2 Tue Apr  5 16:20:40 1994  Doug Lea  (dl at g)
+      * realloc: try to expand in both directions
+      * malloc: swap order of clean-bin strategy;
+      * realloc: only conditionally expand backwards
+      * Try not to scavenge used bins
+      * Use bin counts as a guide to preallocation
+      * Occasionally bin return list chunks in first scan
+      * Add a few optimizations from colin@nyx10.cs.du.edu
+
+    V2.5.1 Sat Aug 14 15:40:43 1993  Doug Lea  (dl at g)
+      * faster bin computation & slightly different binning
+      * merged all consolidations to one part of malloc proper
+         (eliminating old malloc_find_space & malloc_clean_bin)
+      * Scan 2 returns chunks (not just 1)
+      * Propagate failure in realloc if malloc returns 0
+      * Add stuff to allow compilation on non-ANSI compilers
+          from kpv@research.att.com
+
+    V2.5 Sat Aug  7 07:41:59 1993  Doug Lea  (dl at g.oswego.edu)
+      * removed potential for odd address access in prev_chunk
+      * removed dependency on getpagesize.h
+      * misc cosmetics and a bit more internal documentation
+      * anticosmetics: mangled names in macros to evade debugger strangeness
+      * tested on sparc, hp-700, dec-mips, rs6000
+          with gcc & native cc (hp, dec only) allowing
+          Detlefs & Zorn comparison study (in SIGPLAN Notices.)
+
+    Trial version Fri Aug 28 13:14:29 1992  Doug Lea  (dl at g.oswego.edu)
+      * Based loosely on libg++-1.2X malloc. (It retains some of the overall
+         structure of old version,  but most details differ.)
+
+*/
+
+#endif
diff --git a/drivers/nedmalloc/nedmalloc.cpp b/drivers/nedmalloc/nedmalloc.cpp
index 8845d96549..9aac277a2a 100644
--- a/drivers/nedmalloc/nedmalloc.cpp
+++ b/drivers/nedmalloc/nedmalloc.cpp
@@ -1,1467 +1,1467 @@
-#ifdef NEDMALLOC_ENABLED
-/* Alternative malloc implementation for multiple threads without
-lock contention based on dlmalloc. (C) 2005-2009 Niall Douglas
-
-Boost Software License - Version 1.0 - August 17th, 2003
-
-Permission is hereby granted, free of charge, to any person or organization
-obtaining a copy of the software and accompanying documentation covered by
-this license (the "Software") to use, reproduce, display, distribute,
-execute, and transmit the Software, and to prepare derivative works of the
-Software, and to permit third-parties to whom the Software is furnished to
-do so, all subject to the following:
-
-The copyright notices in the Software and this entire statement, including
-the above license grant, this restriction and the following disclaimer,
-must be included in all copies of the Software, in whole or in part, and
-all derivative works of the Software, unless such copies or derivative
-works are solely in the form of machine-executable object code generated by
-a source language processor.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT
-SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE
-FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE,
-ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
-DEALINGS IN THE SOFTWARE.
-*/
-
-#ifdef _MSC_VER
-/* Enable full aliasing on MSVC */
-/*#pragma optimize("a", on)*/
-#pragma warning(push)
-#pragma warning(disable:4100)	/* unreferenced formal parameter */
-#pragma warning(disable:4127)	/* conditional expression is constant */
-#pragma warning(disable:4706)	/* assignment within conditional expression */
-#endif
-
-/*#define ENABLE_TOLERANT_NEDMALLOC 1*/
-/*#define ENABLE_FAST_HEAP_DETECTION 1*/
-/*#define NEDMALLOC_DEBUG 1*/
-
-/*#define FULLSANITYCHECKS*/
-/* If link time code generation is on, don't force or prevent inlining */
-#if defined(_MSC_VER) && defined(NEDMALLOC_DLL_EXPORTS)
-#define FORCEINLINE
-#define NOINLINE
-#endif
-
-
-#include "nedmalloc.h"
-#ifdef WIN32
- #include <malloc.h>
- #include <stddef.h>
-#endif
-#if USE_ALLOCATOR==1
- #define MSPACES 1
- #define ONLY_MSPACES 1
-#endif
-#define USE_DL_PREFIX 1
-#ifndef USE_LOCKS
- #define USE_LOCKS 1
-#endif
-#define FOOTERS 1           /* Need to enable footers so frees lock the right mspace */
-#ifndef NEDMALLOC_DEBUG
- #if defined(DEBUG) || defined(_DEBUG)
-  #define NEDMALLOC_DEBUG 1
- #else
-  #define NEDMALLOC_DEBUG 0
- #endif
-#endif
-/* We need to consistently define DEBUG=0|1, _DEBUG and NDEBUG for dlmalloc */
-#undef DEBUG
-#undef _DEBUG
-#if NEDMALLOC_DEBUG
- #define _DEBUG
- #define DEBUG 1
-#else
- #define DEBUG 0
-#endif
-#ifdef NDEBUG               /* Disable assert checking on release builds */
- #undef DEBUG
- #undef _DEBUG
-#endif
-/* The default of 64Kb means we spend too much time kernel-side */
-#ifndef DEFAULT_GRANULARITY
-#define DEFAULT_GRANULARITY (1*1024*1024)
-#if DEBUG
-#define DEFAULT_GRANULARITY_ALIGNED
-#endif
-#endif
-/*#define USE_SPIN_LOCKS 0*/
-
-
-#include "malloc.c.h"
-#ifdef NDEBUG               /* Disable assert checking on release builds */
- #undef DEBUG
-#elif !NEDMALLOC_DEBUG
- #ifdef __GNUC__
-  #warning DEBUG is defined so allocator will run with assert checking! Define NDEBUG to run at full speed.
- #elif defined(_MSC_VER)
-  #pragma message(__FILE__ ": WARNING: DEBUG is defined so allocator will run with assert checking! Define NDEBUG to run at full speed.")
- #endif
-#endif
-
-/* The maximum concurrent threads in a pool possible */
-#ifndef MAXTHREADSINPOOL
-#define MAXTHREADSINPOOL 16
-#endif
-/* The maximum number of threadcaches which can be allocated */
-#ifndef THREADCACHEMAXCACHES
-#define THREADCACHEMAXCACHES 256
-#endif
-/* The maximum size to be allocated from the thread cache */
-#ifndef THREADCACHEMAX
-#define THREADCACHEMAX 8192
-#endif
-#if 0
-/* The number of cache entries for finer grained bins. This is (topbitpos(THREADCACHEMAX)-4)*2 */
-#define THREADCACHEMAXBINS ((13-4)*2)
-#else
-/* The number of cache entries. This is (topbitpos(THREADCACHEMAX)-4) */
-#define THREADCACHEMAXBINS (13-4)
-#endif
-/* Point at which the free space in a thread cache is garbage collected */
-#ifndef THREADCACHEMAXFREESPACE
-#define THREADCACHEMAXFREESPACE (512*1024)
-#endif
-
-
-#ifdef WIN32
- #define TLSVAR			DWORD
- #define TLSALLOC(k)	(*(k)=TlsAlloc(), TLS_OUT_OF_INDEXES==*(k))
- #define TLSFREE(k)		(!TlsFree(k))
- #define TLSGET(k)		TlsGetValue(k)
- #define TLSSET(k, a)	(!TlsSetValue(k, a))
- #ifdef DEBUG
-static LPVOID ChkedTlsGetValue(DWORD idx)
-{
-	LPVOID ret=TlsGetValue(idx);
-	assert(S_OK==GetLastError());
-	return ret;
-}
-  #undef TLSGET
-  #define TLSGET(k) ChkedTlsGetValue(k)
- #endif
-#else
- #define TLSVAR			pthread_key_t
- #define TLSALLOC(k)	pthread_key_create(k, 0)
- #define TLSFREE(k)		pthread_key_delete(k)
- #define TLSGET(k)		pthread_getspecific(k)
- #define TLSSET(k, a)	pthread_setspecific(k, a)
-#endif
-
-#if defined(__cplusplus)
-#if !defined(NO_NED_NAMESPACE)
-namespace nedalloc {
-#else
-extern "C" {
-#endif
-#endif
-
-#if USE_ALLOCATOR==0
-static void *unsupported_operation(const char *opname) THROWSPEC
-{
-	fprintf(stderr, "nedmalloc: The operation %s is not supported under this build configuration\n", opname);
-	abort();
-	return 0;
-}
-static size_t mspacecounter=(size_t) 0xdeadbeef;
-#endif
-#ifndef ENABLE_FAST_HEAP_DETECTION
-static void *RESTRICT leastusedaddress;
-static size_t largestusedblock;
-#endif
-
-static FORCEINLINE void *CallMalloc(void *RESTRICT mspace, size_t size, size_t alignment) THROWSPEC
-{
-	void *RESTRICT ret=0;
-	size_t _alignment=alignment;
-#if USE_MAGIC_HEADERS
-	size_t *_ret=0;
-	size+=alignment+3*sizeof(size_t);
-	_alignment=0;
-#endif
-#if USE_ALLOCATOR==0
-	ret=_alignment ? 
-#ifdef _MSC_VER
-	/* This is the MSVCRT equivalent */
-		_aligned_malloc(size, _alignment)
-#elif defined(__linux__) || defined(__FreeBSD__) || defined(__APPLE__)
-	/* This is the glibc/ptmalloc2/dlmalloc/BSD libc equivalent.  */
-		memalign(_alignment, size)
-#else
-#error Cannot aligned allocate with the memory allocator of an unknown system!
-#endif
-		: malloc(size);
-#elif USE_ALLOCATOR==1
-	ret=_alignment ? mspace_memalign((mstate) mspace, _alignment, size) : mspace_malloc((mstate) mspace, size);
-#ifndef ENABLE_FAST_HEAP_DETECTION
-	if(ret)
-	{
-		size_t truesize=chunksize(mem2chunk(ret));
-		if(!leastusedaddress || (void *)((mstate) mspace)->least_addr<leastusedaddress) leastusedaddress=(void *)((mstate) mspace)->least_addr;
-		if(!largestusedblock || truesize>largestusedblock) largestusedblock=(truesize+mparams.page_size) & ~(mparams.page_size-1);
-	}
-#endif
-#endif
-	if(!ret) return 0;
-#if USE_MAGIC_HEADERS
-	_ret=(size_t *) ret;
-	ret=(void *)(_ret+3);
-	if(alignment) ret=(void *)(((size_t) ret+alignment-1)&~(alignment-1));
-	for(; _ret<(size_t *)ret-2; _ret++) *_ret=*(size_t *)"NEDMALOC";
-	_ret[0]=(size_t) mspace;
-	_ret[1]=size-3*sizeof(size_t);
-#endif
-	return ret;
-}
-
-static FORCEINLINE void *CallCalloc(void *RESTRICT mspace, size_t size, size_t alignment) THROWSPEC
-{
-	void *RESTRICT ret=0;
-#if USE_MAGIC_HEADERS
-	size_t *_ret=0;
-	size+=alignment+3*sizeof(size_t);
-#endif
-#if USE_ALLOCATOR==0
-	ret=calloc(1, size);
-#elif USE_ALLOCATOR==1
-	ret=mspace_calloc((mstate) mspace, 1, size);
-#ifndef ENABLE_FAST_HEAP_DETECTION
-	if(ret)
-	{
-		size_t truesize=chunksize(mem2chunk(ret));
-		if(!leastusedaddress || (void *)((mstate) mspace)->least_addr<leastusedaddress) leastusedaddress=(void *)((mstate) mspace)->least_addr;
-		if(!largestusedblock || truesize>largestusedblock) largestusedblock=(truesize+mparams.page_size) & ~(mparams.page_size-1);
-	}
-#endif
-#endif
-	if(!ret) return 0;
-#if USE_MAGIC_HEADERS
-	_ret=(size_t *) ret;
-	ret=(void *)(_ret+3);
-	if(alignment) ret=(void *)(((size_t) ret+alignment-1)&~(alignment-1));
-	for(; _ret<(size_t *)ret-2; _ret++) *_ret=*(size_t *) "NEDMALOC";
-	_ret[0]=(size_t) mspace;
-	_ret[1]=size-3*sizeof(size_t);
-#endif
-	return ret;
-}
-
-static FORCEINLINE void *CallRealloc(void *RESTRICT mspace, void *RESTRICT mem, int isforeign, size_t oldsize, size_t newsize) THROWSPEC
-{
-	void *RESTRICT ret=0;
-#if USE_MAGIC_HEADERS
-	mstate oldmspace=0;
-	size_t *_ret=0, *_mem=(size_t *) mem-3;
-#endif
-	if(isforeign)
-	{	/* Transfer */
-#if USE_MAGIC_HEADERS
-		assert(_mem[0]!=*(size_t *) "NEDMALOC");
-#endif
-		if((ret=CallMalloc(mspace, newsize, 0)))
-		{
-#if defined(DEBUG)
-			printf("*** nedmalloc frees system allocated block %p\n", mem);
-#endif
-			memcpy(ret, mem, oldsize<newsize ? oldsize : newsize);
-			free(mem);
-		}
-		return ret;
-	}
-#if USE_MAGIC_HEADERS
-	assert(_mem[0]==*(size_t *) "NEDMALOC");
-	newsize+=3*sizeof(size_t);
-	oldmspace=(mstate) _mem[1];
-	assert(oldsize>=_mem[2]);
-	for(; *_mem==*(size_t *) "NEDMALOC"; *_mem--=*(size_t *) "nedmaloc");
-	mem=(void *)(++_mem);
-#endif
-#if USE_ALLOCATOR==0
-	ret=realloc(mem, newsize);
-#elif USE_ALLOCATOR==1
-	ret=mspace_realloc((mstate) mspace, mem, newsize);
-#ifndef ENABLE_FAST_HEAP_DETECTION
-	if(ret)
-	{
-		size_t truesize=chunksize(mem2chunk(ret));
-		if(!largestusedblock || truesize>largestusedblock) largestusedblock=(truesize+mparams.page_size) & ~(mparams.page_size-1);
-	}
-#endif
-#endif
-	if(!ret)
-	{	/* Put it back the way it was */
-#if USE_MAGIC_HEADERS
-		for(; *_mem==0; *_mem++=*(size_t *) "NEDMALOC");
-#endif
-		return 0;
-	}
-#if USE_MAGIC_HEADERS
-	_ret=(size_t *) ret;
-	ret=(void *)(_ret+3);
-	for(; _ret<(size_t *)ret-2; _ret++) *_ret=*(size_t *) "NEDMALOC";
-	_ret[0]=(size_t) mspace;
-	_ret[1]=newsize-3*sizeof(size_t);
-#endif
-	return ret;
-}
-
-static FORCEINLINE void CallFree(void *RESTRICT mspace, void *RESTRICT mem, int isforeign) THROWSPEC
-{
-#if USE_MAGIC_HEADERS
-	mstate oldmspace=0;
-	size_t *_mem=(size_t *) mem-3, oldsize=0;
-#endif
-	if(isforeign)
-	{
-#if USE_MAGIC_HEADERS
-		assert(_mem[0]!=*(size_t *) "NEDMALOC");
-#endif
-#if defined(DEBUG)
-		printf("*** nedmalloc frees system allocated block %p\n", mem);
-#endif
-		free(mem);
-		return;
-	}
-#if USE_MAGIC_HEADERS
-	assert(_mem[0]==*(size_t *) "NEDMALOC");
-	oldmspace=(mstate) _mem[1];
-	oldsize=_mem[2];
-	for(; *_mem==*(size_t *) "NEDMALOC"; *_mem--=*(size_t *) "nedmaloc");
-	mem=(void *)(++_mem);
-#endif
-#if USE_ALLOCATOR==0
-	free(mem);
-#elif USE_ALLOCATOR==1
-	mspace_free((mstate) mspace, mem);
-#endif
-}
-
-static NEDMALLOCNOALIASATTR mstate nedblkmstate(void *RESTRICT mem) THROWSPEC
-{
-	if(mem)
-	{
-#if USE_MAGIC_HEADERS
-		size_t *_mem=(size_t *) mem-3;
-		if(_mem[0]==*(size_t *) "NEDMALOC")
-		{
-			return (mstate) _mem[1];
-		}
-		else return 0;
-#else
-#if USE_ALLOCATOR==0
-		/* Fail everything */
-		return 0;
-#elif USE_ALLOCATOR==1
-#ifdef ENABLE_FAST_HEAP_DETECTION
-#ifdef WIN32
-		/*  On Windows for RELEASE both x86 and x64 the NT heap precedes each block with an eight byte header
-			which looks like:
-				normal: 4 bytes of size, 4 bytes of [char < 64, char < 64, char < 64 bit 0 always set, char random ]
-				mmaped: 4 bytes of size  4 bytes of [zero,      zero,      0xb,                        zero        ]
-
-			On Windows for DEBUG both x86 and x64 the preceding four bytes is always 0xfdfdfdfd (no man's land).
-		*/
-#pragma pack(push, 1)
-		struct _HEAP_ENTRY
-		{
-			USHORT Size;
-			USHORT PreviousSize;
-			UCHAR Cookie;			/* SegmentIndex */
-			UCHAR Flags;			/* always bit 0 (HEAP_ENTRY_BUSY). bit 1=(HEAP_ENTRY_EXTRA_PRESENT), bit 2=normal block (HEAP_ENTRY_FILL_PATTERN), bit 3=mmap block (HEAP_ENTRY_VIRTUAL_ALLOC). Bit 4 (HEAP_ENTRY_LAST_ENTRY) could be set */
-			UCHAR UnusedBytes;
-			UCHAR SmallTagIndex;	/* fastbin index. Always one of 0x02, 0x03, 0x04 < 0x80 */
-		} *RESTRICT he=((struct _HEAP_ENTRY *) mem)-1;
-#pragma pack(pop)
-		unsigned int header=((unsigned int *)mem)[-1], mask1=0x8080E100, result1, mask2=0xFFFFFF06, result2;
-		result1=header & mask1;	/* Positive testing for NT heap */
-		result2=header & mask2;	/* Positive testing for dlmalloc */
-		if(result1==0x00000100 && result2!=0x00000102)
-		{	/* This is likely a NT heap block */
-			return 0;
-		}
-#endif
-#ifdef __linux__
-		/* On Linux glibc uses ptmalloc2 (really dlmalloc) just as we do, but prev_foot contains rubbish
-		when the preceding block is allocated because ptmalloc2 finds the local mstate by rounding the ptr
-		down to the nearest megabyte. It's like dlmalloc with FOOTERS disabled. */
-		mchunkptr p=mem2chunk(mem);
-		mstate fm=get_mstate_for(p);
-		/* If it's a ptmalloc2 block, fm is likely to be some crazy value */
-		if(!is_aligned(fm)) return 0;
-		if((size_t)mem-(size_t)fm>=(size_t)1<<(SIZE_T_BITSIZE-1)) return 0;
-		if(ok_magic(fm))
-			return fm;
-		else
-			return 0;
-		if(1) { }
-#endif
-		else
-		{
-			mchunkptr p=mem2chunk(mem);
-			mstate fm=get_mstate_for(p);
-			assert(ok_magic(fm));	/* If this fails, someone tried to free a block twice */
-			if(ok_magic(fm))
-				return fm;
-		}
-#else
-//#ifdef WIN32
-//		__try
-//#endif
-		{
-			/* We try to return zero here if it isn't one of our own blocks, however
-			the current block annotation scheme used by dlmalloc makes it impossible
-			to be absolutely sure of avoiding a segfault.
-
-			mchunkptr->prev_foot = mem-(2*size_t) = mstate ^ mparams.magic for PRECEDING block;
-			mchunkptr->head      = mem-(1*size_t) = 8 multiple size of this block with bottom three bits = FLAG_BITS
-			    FLAG_BITS = bit 0 is CINUSE (currently in use unless is mmap), bit 1 is PINUSE (previous block currently
-				            in use unless mmap), bit 2 is UNUSED and currently is always zero.
-			*/
-			register void *RESTRICT leastusedaddress_=leastusedaddress;		/* Cache these to avoid register reloading */
-			register size_t largestusedblock_=largestusedblock;
-			if(!is_aligned(mem)) return 0;		/* Would fail very rarely as all allocators return aligned blocks */
-			if(mem<leastusedaddress_) return 0;	/* Simple but effective */
-			{
-				mchunkptr p=mem2chunk(mem);
-				mstate fm=0;
-				int ismmapped=is_mmapped(p);
-				if((!ismmapped && !is_inuse(p)) || (p->head & FLAG4_BIT)) return 0;
-				/* Reduced uncertainty by 0.5^2 = 25.0% */
-				/* size should never exceed largestusedblock */
-				if(chunksize(p)>largestusedblock_) return 0;
-				/* Reduced uncertainty by a minimum of 0.5^3 = 12.5%, maximum 0.5^16 = 0.0015% */
-				/* Having sanity checked prev_foot and head, check next block */
-				if(!ismmapped && (!next_pinuse(p) || (next_chunk(p)->head & FLAG4_BIT))) return 0;
-				/* Reduced uncertainty by 0.5^5 = 3.13% or 0.5^18 = 0.00038% */
-	#if 0
-				/* If previous block is free, check that its next block pointer equals us */
-				if(!ismmapped && !pinuse(p))
-					if(next_chunk(prev_chunk(p))!=p) return 0;
-				/* We could start comparing prev_foot's for similarity but it starts getting slow. */
-	#endif
-				fm = get_mstate_for(p);
-				if(!is_aligned(fm) || (void *)fm<leastusedaddress_) return 0;
-				if((size_t)mem-(size_t)fm>=(size_t)1<<(SIZE_T_BITSIZE-1)) return 0;
-				assert(ok_magic(fm));	/* If this fails, someone tried to free a block twice */
-				if(ok_magic(fm))
-					return fm;
-			}
-		}
-//#ifdef WIN32
-//		__except(1) { }
-//#endif
-#endif
-#endif
-#endif
-	}
-	return 0;
-}
-NEDMALLOCNOALIASATTR size_t nedblksize(int *RESTRICT isforeign, void *RESTRICT mem) THROWSPEC
-{
-	if(mem)
-	{
-		if(isforeign) *isforeign=1;
-#if USE_MAGIC_HEADERS
-		{
-			size_t *_mem=(size_t *) mem-3;
-			if(_mem[0]==*(size_t *) "NEDMALOC")
-			{
-				mstate mspace=(mstate) _mem[1];
-				size_t size=_mem[2];
-				if(isforeign) *isforeign=0;
-				return size;
-			}
-		}
-#elif USE_ALLOCATOR==1
-		if(nedblkmstate(mem))
-		{
-			mchunkptr p=mem2chunk(mem);
-			if(isforeign) *isforeign=0;
-			return chunksize(p)-overhead_for(p);
-		}
-#ifdef DEBUG
-		else
-		{
-			int a=1; /* Set breakpoints here if needed */
-		}
-#endif
-#endif
-#if defined(ENABLE_TOLERANT_NEDMALLOC) || USE_ALLOCATOR==0
-#ifdef _MSC_VER
-		/* This is the MSVCRT equivalent */
-		return _msize(mem);
-#elif defined(__linux__)
-		/* This is the glibc/ptmalloc2/dlmalloc equivalent.  */
-		return malloc_usable_size(mem);
-#elif defined(__FreeBSD__) || defined(__APPLE__)
-		/* This is the BSD libc equivalent.  */
-		return malloc_size(mem);
-#else
-#error Cannot tolerate the memory allocator of an unknown system!
-#endif
-#endif
-	}
-	return 0;
-}
-
-NEDMALLOCNOALIASATTR void nedsetvalue(void *v) THROWSPEC											{ nedpsetvalue((nedpool *) 0, v); }
-NEDMALLOCNOALIASATTR NEDMALLOCPTRATTR void * nedmalloc(size_t size) THROWSPEC						{ return nedpmalloc((nedpool *) 0, size); }
-NEDMALLOCNOALIASATTR NEDMALLOCPTRATTR void * nedcalloc(size_t no, size_t size) THROWSPEC			{ return nedpcalloc((nedpool *) 0, no, size); }
-NEDMALLOCNOALIASATTR NEDMALLOCPTRATTR void * nedrealloc(void *mem, size_t size) THROWSPEC			{ return nedprealloc((nedpool *) 0, mem, size); }
-NEDMALLOCNOALIASATTR void   nedfree(void *mem) THROWSPEC											{ nedpfree((nedpool *) 0, mem); }
-NEDMALLOCNOALIASATTR NEDMALLOCPTRATTR void * nedmemalign(size_t alignment, size_t bytes) THROWSPEC	{ return nedpmemalign((nedpool *) 0, alignment, bytes); }
-NEDMALLOCNOALIASATTR struct nedmallinfo nedmallinfo(void) THROWSPEC									{ return nedpmallinfo((nedpool *) 0); }
-NEDMALLOCNOALIASATTR int    nedmallopt(int parno, int value) THROWSPEC								{ return nedpmallopt((nedpool *) 0, parno, value); }
-NEDMALLOCNOALIASATTR int    nedmalloc_trim(size_t pad) THROWSPEC									{ return nedpmalloc_trim((nedpool *) 0, pad); }
-void   nedmalloc_stats() THROWSPEC																	{ nedpmalloc_stats((nedpool *) 0); }
-NEDMALLOCNOALIASATTR size_t nedmalloc_footprint() THROWSPEC											{ return nedpmalloc_footprint((nedpool *) 0); }
-NEDMALLOCNOALIASATTR NEDMALLOCPTRATTR void **nedindependent_calloc(size_t elemsno, size_t elemsize, void **chunks) THROWSPEC	{ return nedpindependent_calloc((nedpool *) 0, elemsno, elemsize, chunks); }
-NEDMALLOCNOALIASATTR NEDMALLOCPTRATTR void **nedindependent_comalloc(size_t elems, size_t *sizes, void **chunks) THROWSPEC		{ return nedpindependent_comalloc((nedpool *) 0, elems, sizes, chunks); }
-
-struct threadcacheblk_t;
-typedef struct threadcacheblk_t threadcacheblk;
-struct threadcacheblk_t
-{	/* Keep less than 16 bytes on 32 bit systems and 32 bytes on 64 bit systems */
-#ifdef FULLSANITYCHECKS
-	unsigned int magic;
-#endif
-	unsigned int lastUsed, size;
-	threadcacheblk *next, *prev;
-};
-typedef struct threadcache_t
-{
-#ifdef FULLSANITYCHECKS
-	unsigned int magic1;
-#endif
-	int mymspace;						/* Last mspace entry this thread used */
-	long threadid;
-	unsigned int mallocs, frees, successes;
-	size_t freeInCache;					/* How much free space is stored in this cache */
-	threadcacheblk *bins[(THREADCACHEMAXBINS+1)*2];
-#ifdef FULLSANITYCHECKS
-	unsigned int magic2;
-#endif
-} threadcache;
-struct nedpool_t
-{
-	MLOCK_T mutex;
-	void *uservalue;
-	int threads;						/* Max entries in m to use */
-	threadcache *caches[THREADCACHEMAXCACHES];
-	TLSVAR mycache;						/* Thread cache for this thread. 0 for unset, negative for use mspace-1 directly, otherwise is cache-1 */
-	mstate m[MAXTHREADSINPOOL+1];		/* mspace entries for this pool */
-};
-static nedpool syspool;
-
-static FORCEINLINE NEDMALLOCNOALIASATTR unsigned int size2binidx(size_t _size) THROWSPEC
-{	/* 8=1000	16=10000	20=10100	24=11000	32=100000	48=110000	4096=1000000000000 */
-	unsigned int topbit, size=(unsigned int)(_size>>4);
-	/* 16=1		20=1	24=1	32=10	48=11	64=100	96=110	128=1000	4096=100000000 */
-
-#if defined(__GNUC__)
-        topbit = sizeof(size)*__CHAR_BIT__ - 1 - __builtin_clz(size);
-#elif defined(_MSC_VER) && _MSC_VER>=1300
-	{
-            unsigned long bsrTopBit;
-
-            _BitScanReverse(&bsrTopBit, size);
-
-            topbit = bsrTopBit;
-        }
-#else
-#if 0
-	union {
-		unsigned asInt[2];
-		double asDouble;
-	};
-	int n;
-
-	asDouble = (double)size + 0.5;
-	topbit = (asInt[!FOX_BIGENDIAN] >> 20) - 1023;
-#else
-	{
-		unsigned int x=size;
-		x = x | (x >> 1);
-		x = x | (x >> 2);
-		x = x | (x >> 4);
-		x = x | (x >> 8);
-		x = x | (x >>16);
-		x = ~x;
-		x = x - ((x >> 1) & 0x55555555);
-		x = (x & 0x33333333) + ((x >> 2) & 0x33333333);
-		x = (x + (x >> 4)) & 0x0F0F0F0F;
-		x = x + (x << 8);
-		x = x + (x << 16);
-		topbit=31 - (x >> 24);
-	}
-#endif
-#endif
-	return topbit;
-}
-
-
-#ifdef FULLSANITYCHECKS
-static void tcsanitycheck(threadcacheblk **ptr) THROWSPEC
-{
-	assert((ptr[0] && ptr[1]) || (!ptr[0] && !ptr[1]));
-	if(ptr[0] && ptr[1])
-	{
-		assert(nedblksize(ptr[0])>=sizeof(threadcacheblk));
-		assert(nedblksize(ptr[1])>=sizeof(threadcacheblk));
-		assert(*(unsigned int *) "NEDN"==ptr[0]->magic);
-		assert(*(unsigned int *) "NEDN"==ptr[1]->magic);
-		assert(!ptr[0]->prev);
-		assert(!ptr[1]->next);
-		if(ptr[0]==ptr[1])
-		{
-			assert(!ptr[0]->next);
-			assert(!ptr[1]->prev);
-		}
-	}
-}
-static void tcfullsanitycheck(threadcache *tc) THROWSPEC
-{
-	threadcacheblk **tcbptr=tc->bins;
-	int n;
-	for(n=0; n<=THREADCACHEMAXBINS; n++, tcbptr+=2)
-	{
-		threadcacheblk *b, *ob=0;
-		tcsanitycheck(tcbptr);
-		for(b=tcbptr[0]; b; ob=b, b=b->next)
-		{
-			assert(*(unsigned int *) "NEDN"==b->magic);
-			assert(!ob || ob->next==b);
-			assert(!ob || b->prev==ob);
-		}
-	}
-}
-#endif
-
-static NOINLINE void RemoveCacheEntries(nedpool *RESTRICT p, threadcache *RESTRICT tc, unsigned int age) THROWSPEC
-{
-#ifdef FULLSANITYCHECKS
-	tcfullsanitycheck(tc);
-#endif
-	if(tc->freeInCache)
-	{
-		threadcacheblk **tcbptr=tc->bins;
-		int n;
-		for(n=0; n<=THREADCACHEMAXBINS; n++, tcbptr+=2)
-		{
-			threadcacheblk **tcb=tcbptr+1;		/* come from oldest end of list */
-			/*tcsanitycheck(tcbptr);*/
-			for(; *tcb && tc->frees-(*tcb)->lastUsed>=age; )
-			{
-				threadcacheblk *f=*tcb;
-				size_t blksize=f->size; /*nedblksize(f);*/
-				assert(blksize<=nedblksize(0, f));
-				assert(blksize);
-#ifdef FULLSANITYCHECKS
-				assert(*(unsigned int *) "NEDN"==(*tcb)->magic);
-#endif
-				*tcb=(*tcb)->prev;
-				if(*tcb)
-					(*tcb)->next=0;
-				else
-					*tcbptr=0;
-				tc->freeInCache-=blksize;
-				assert((long) tc->freeInCache>=0);
-				CallFree(0, f, 0);
-				/*tcsanitycheck(tcbptr);*/
-			}
-		}
-	}
-#ifdef FULLSANITYCHECKS
-	tcfullsanitycheck(tc);
-#endif
-}
-static void DestroyCaches(nedpool *RESTRICT p) THROWSPEC
-{
-	if(p->caches)
-	{
-		threadcache *tc;
-		int n;
-		for(n=0; n<THREADCACHEMAXCACHES; n++)
-		{
-			if((tc=p->caches[n]))
-			{
-				tc->frees++;
-				RemoveCacheEntries(p, tc, 0);
-				assert(!tc->freeInCache);
-				tc->mymspace=-1;
-				tc->threadid=0;
-				CallFree(0, tc, 0);
-				p->caches[n]=0;
-			}
-		}
-	}
-}
-
-static NOINLINE threadcache *AllocCache(nedpool *RESTRICT p) THROWSPEC
-{
-	threadcache *tc=0;
-	int n, end;
-	ACQUIRE_LOCK(&p->mutex);
-	for(n=0; n<THREADCACHEMAXCACHES && p->caches[n]; n++);
-	if(THREADCACHEMAXCACHES==n)
-	{	/* List exhausted, so disable for this thread */
-		RELEASE_LOCK(&p->mutex);
-		return 0;
-	}
-	tc=p->caches[n]=(threadcache *) CallCalloc(p->m[0], sizeof(threadcache), 0);
-	if(!tc)
-	{
-		RELEASE_LOCK(&p->mutex);
-		return 0;
-	}
-#ifdef FULLSANITYCHECKS
-	tc->magic1=*(unsigned int *)"NEDMALC1";
-	tc->magic2=*(unsigned int *)"NEDMALC2";
-#endif
-	tc->threadid=(long)(size_t)CURRENT_THREAD;
-	for(end=0; p->m[end]; end++);
-	tc->mymspace=abs(tc->threadid) % end;
-	RELEASE_LOCK(&p->mutex);
-	if(TLSSET(p->mycache, (void *)(size_t)(n+1))) abort();
-	return tc;
-}
-
-static void *threadcache_malloc(nedpool *RESTRICT p, threadcache *RESTRICT tc, size_t *RESTRICT _size) THROWSPEC
-{
-	void *RESTRICT ret=0;
-	size_t size=*_size, blksize=0;
-	unsigned int bestsize;
-	unsigned int idx=size2binidx(size);
-	threadcacheblk *RESTRICT blk, **RESTRICT binsptr;
-#ifdef FULLSANITYCHECKS
-	tcfullsanitycheck(tc);
-#endif
-	/* Calculate best fit bin size */
-	bestsize=1<<(idx+4);
-#if 0
-	/* Finer grained bin fit */
-	idx<<=1;
-	if(size>bestsize)
-	{
-		idx++;
-		bestsize+=bestsize>>1;
-	}
-	if(size>bestsize)
-	{
-		idx++;
-		bestsize=1<<(4+(idx>>1));
-	}
-#else
-	if(size>bestsize)
-	{
-		idx++;
-		bestsize<<=1;
-	}
-#endif
-	assert(bestsize>=size);
-	if(size<bestsize) size=bestsize;
-	assert(size<=THREADCACHEMAX);
-	assert(idx<=THREADCACHEMAXBINS);
-	binsptr=&tc->bins[idx*2];
-	/* Try to match close, but move up a bin if necessary */
-	blk=*binsptr;
-	if(!blk || blk->size<size)
-	{	/* Bump it up a bin */
-		if(idx<THREADCACHEMAXBINS)
-		{
-			idx++;
-			binsptr+=2;
-			blk=*binsptr;
-		}
-	}
-	if(blk)
-	{
-		blksize=blk->size; /*nedblksize(blk);*/
-		assert(nedblksize(0, blk)>=blksize);
-		assert(blksize>=size);
-		if(blk->next)
-			blk->next->prev=0;
-		*binsptr=blk->next;
-		if(!*binsptr)
-			binsptr[1]=0;
-#ifdef FULLSANITYCHECKS
-		blk->magic=0;
-#endif
-		assert(binsptr[0]!=blk && binsptr[1]!=blk);
-		assert(nedblksize(0, blk)>=sizeof(threadcacheblk) && nedblksize(0, blk)<=THREADCACHEMAX+CHUNK_OVERHEAD);
-		/*printf("malloc: %p, %p, %p, %lu\n", p, tc, blk, (long) _size);*/
-		ret=(void *) blk;
-	}
-	++tc->mallocs;
-	if(ret)
-	{
-		assert(blksize>=size);
-		++tc->successes;
-		tc->freeInCache-=blksize;
-		assert((long) tc->freeInCache>=0);
-	}
-#if defined(DEBUG) && 0
-	if(!(tc->mallocs & 0xfff))
-	{
-		printf("*** threadcache=%u, mallocs=%u (%f), free=%u (%f), freeInCache=%u\n", (unsigned int) tc->threadid, tc->mallocs,
-			(float) tc->successes/tc->mallocs, tc->frees, (float) tc->successes/tc->frees, (unsigned int) tc->freeInCache);
-	}
-#endif
-#ifdef FULLSANITYCHECKS
-	tcfullsanitycheck(tc);
-#endif
-	*_size=size;
-	return ret;
-}
-static NOINLINE void ReleaseFreeInCache(nedpool *RESTRICT p, threadcache *RESTRICT tc, int mymspace) THROWSPEC
-{
-	unsigned int age=THREADCACHEMAXFREESPACE/8192;
-	/*ACQUIRE_LOCK(&p->m[mymspace]->mutex);*/
-	while(age && tc->freeInCache>=THREADCACHEMAXFREESPACE)
-	{
-		RemoveCacheEntries(p, tc, age);
-		/*printf("*** Removing cache entries older than %u (%u)\n", age, (unsigned int) tc->freeInCache);*/
-		age>>=1;
-	}
-	/*RELEASE_LOCK(&p->m[mymspace]->mutex);*/
-}
-static void threadcache_free(nedpool *RESTRICT p, threadcache *RESTRICT tc, int mymspace, void *RESTRICT mem, size_t size) THROWSPEC
-{
-	unsigned int bestsize;
-	unsigned int idx=size2binidx(size);
-	threadcacheblk **RESTRICT binsptr, *RESTRICT tck=(threadcacheblk *) mem;
-	assert(size>=sizeof(threadcacheblk) && size<=THREADCACHEMAX+CHUNK_OVERHEAD);
-#ifdef DEBUG
-	/* Make sure this is a valid memory block */
-	assert(nedblksize(0, mem));
-#endif
-#ifdef FULLSANITYCHECKS
-	tcfullsanitycheck(tc);
-#endif
-	/* Calculate best fit bin size */
-	bestsize=1<<(idx+4);
-#if 0
-	/* Finer grained bin fit */
-	idx<<=1;
-	if(size>bestsize)
-	{
-		unsigned int biggerbestsize=bestsize+bestsize<<1;
-		if(size>=biggerbestsize)
-		{
-			idx++;
-			bestsize=biggerbestsize;
-		}
-	}
-#endif
-	if(bestsize!=size)	/* dlmalloc can round up, so we round down to preserve indexing */
-		size=bestsize;
-	binsptr=&tc->bins[idx*2];
-	assert(idx<=THREADCACHEMAXBINS);
-	if(tck==*binsptr)
-	{
-		fprintf(stderr, "nedmalloc: Attempt to free already freed memory block %p - aborting!\n", tck);
-		abort();
-	}
-#ifdef FULLSANITYCHECKS
-	tck->magic=*(unsigned int *) "NEDN";
-#endif
-	tck->lastUsed=++tc->frees;
-	tck->size=(unsigned int) size;
-	tck->next=*binsptr;
-	tck->prev=0;
-	if(tck->next)
-		tck->next->prev=tck;
-	else
-		binsptr[1]=tck;
-	assert(!*binsptr || (*binsptr)->size==tck->size);
-	*binsptr=tck;
-	assert(tck==tc->bins[idx*2]);
-	assert(tc->bins[idx*2+1]==tck || binsptr[0]->next->prev==tck);
-	/*printf("free: %p, %p, %p, %lu\n", p, tc, mem, (long) size);*/
-	tc->freeInCache+=size;
-#ifdef FULLSANITYCHECKS
-	tcfullsanitycheck(tc);
-#endif
-#if 1
-	if(tc->freeInCache>=THREADCACHEMAXFREESPACE)
-		ReleaseFreeInCache(p, tc, mymspace);
-#endif
-}
-
-
-
-
-static NOINLINE int InitPool(nedpool *RESTRICT p, size_t capacity, int threads) THROWSPEC
-{	/* threads is -1 for system pool */
-	ensure_initialization();
-	ACQUIRE_MALLOC_GLOBAL_LOCK();
-	if(p->threads) goto done;
-	if(INITIAL_LOCK(&p->mutex)) goto err;
-	if(TLSALLOC(&p->mycache)) goto err;
-#if USE_ALLOCATOR==0
-	p->m[0]=(mstate) mspacecounter++;
-#elif USE_ALLOCATOR==1
-	if(!(p->m[0]=(mstate) create_mspace(capacity, 1))) goto err;
-	p->m[0]->extp=p;
-#endif
-	p->threads=(threads<1 || threads>MAXTHREADSINPOOL) ? MAXTHREADSINPOOL : threads;
-done:
-	RELEASE_MALLOC_GLOBAL_LOCK();
-	return 1;
-err:
-	if(threads<0)
-		abort();			/* If you can't allocate for system pool, we're screwed */
-	DestroyCaches(p);
-	if(p->m[0])
-	{
-#if USE_ALLOCATOR==1
-		destroy_mspace(p->m[0]);
-#endif
-		p->m[0]=0;
-	}
-	if(p->mycache)
-	{
-		if(TLSFREE(p->mycache)) abort();
-		p->mycache=0;
-	}
-	RELEASE_MALLOC_GLOBAL_LOCK();
-	return 0;
-}
-static NOINLINE mstate FindMSpace(nedpool *RESTRICT p, threadcache *RESTRICT tc, int *RESTRICT lastUsed, size_t size) THROWSPEC
-{	/* Gets called when thread's last used mspace is in use. The strategy
-	is to run through the list of all available mspaces looking for an
-	unlocked one and if we fail, we create a new one so long as we don't
-	exceed p->threads */
-	int n, end;
-	for(n=end=*lastUsed+1; p->m[n]; end=++n)
-	{
-		if(TRY_LOCK(&p->m[n]->mutex)) goto found;
-	}
-	for(n=0; n<*lastUsed && p->m[n]; n++)
-	{
-		if(TRY_LOCK(&p->m[n]->mutex)) goto found;
-	}
-	if(end<p->threads)
-	{
-		mstate temp;
-#if USE_ALLOCATOR==0
-		temp=(mstate) mspacecounter++;
-#elif USE_ALLOCATOR==1
-		if(!(temp=(mstate) create_mspace(size, 1)))
-			goto badexit;
-#endif
-		/* Now we're ready to modify the lists, we lock */
-		ACQUIRE_LOCK(&p->mutex);
-		while(p->m[end] && end<p->threads)
-			end++;
-		if(end>=p->threads)
-		{	/* Drat, must destroy it now */
-			RELEASE_LOCK(&p->mutex);
-#if USE_ALLOCATOR==1
-			destroy_mspace((mstate) temp);
-#endif
-			goto badexit;
-		}
-		/* We really want to make sure this goes into memory now but we
-		have to be careful of breaking aliasing rules, so write it twice */
-		*((volatile struct malloc_state **) &p->m[end])=p->m[end]=temp;
-		ACQUIRE_LOCK(&p->m[end]->mutex);
-		/*printf("Created mspace idx %d\n", end);*/
-		RELEASE_LOCK(&p->mutex);
-		n=end;
-		goto found;
-	}
-	/* Let it lock on the last one it used */
-badexit:
-	ACQUIRE_LOCK(&p->m[*lastUsed]->mutex);
-	return p->m[*lastUsed];
-found:
-	*lastUsed=n;
-	if(tc)
-		tc->mymspace=n;
-	else
-	{
-		if(TLSSET(p->mycache, (void *)(size_t)(-(n+1)))) abort();
-	}
-	return p->m[n];
-}
-
-typedef struct PoolList_t
-{
-	size_t size;			/* Size of list */
-	size_t length;			/* Actual entries in list */
-#ifdef DEBUG
-	nedpool *list[1];		/* Force testing of list expansion */
-#else
-	nedpool *list[16];
-#endif
-} PoolList;
-static MLOCK_T poollistlock;
-static PoolList *poollist;
-NEDMALLOCPTRATTR nedpool *nedcreatepool(size_t capacity, int threads) THROWSPEC
-{
-	nedpool *ret=0;
-	if(!poollist)
-	{
-		PoolList *newpoollist=0;
-		if(!(newpoollist=(PoolList *) nedpcalloc(0, 1, sizeof(PoolList)+sizeof(nedpool *)))) return 0;
-		INITIAL_LOCK(&poollistlock);
-		ACQUIRE_LOCK(&poollistlock);
-		poollist=newpoollist;
-		poollist->size=sizeof(poollist->list)/sizeof(nedpool *);
-	}
-	else
-		ACQUIRE_LOCK(&poollistlock);
-	if(poollist->length==poollist->size)
-	{
-		PoolList *newpoollist=0;
-		size_t newsize=0;
-		newsize=sizeof(PoolList)+(poollist->size+1)*sizeof(nedpool *);
-		if(!(newpoollist=(PoolList *) nedprealloc(0, poollist, newsize))) goto badexit;
-		poollist=newpoollist;
-		memset(&poollist->list[poollist->size], 0, newsize-((size_t)&poollist->list[poollist->size]-(size_t)&poollist->list[0]));
-		poollist->size=((newsize-((char *)&poollist->list[0]-(char *)poollist))/sizeof(nedpool *))-1;
-		assert(poollist->size>poollist->length);
-	}
-	if(!(ret=(nedpool *) nedpcalloc(0, 1, sizeof(nedpool)))) goto badexit;
-	if(!InitPool(ret, capacity, threads))
-	{
-		nedpfree(0, ret);
-		goto badexit;
-	}
-	poollist->list[poollist->length++]=ret;
-badexit:
-	RELEASE_LOCK(&poollistlock);
-	return ret;
-}
-void neddestroypool(nedpool *p) THROWSPEC
-{
-	unsigned int n;
-	ACQUIRE_LOCK(&p->mutex);
-	DestroyCaches(p);
-	for(n=0; p->m[n]; n++)
-	{
-#if USE_ALLOCATOR==1
-		destroy_mspace(p->m[n]);
-#endif
-		p->m[n]=0;
-	}
-	RELEASE_LOCK(&p->mutex);
-	if(TLSFREE(p->mycache)) abort();
-	nedpfree(0, p);
-	ACQUIRE_LOCK(&poollistlock);
-	assert(poollist);
-	for(n=0; n<poollist->length && poollist->list[n]!=p; n++);
-	assert(n!=poollist->length);
-	memmove(&poollist->list[n], &poollist->list[n+1], (size_t)&poollist->list[poollist->length]-(size_t)&poollist->list[n]);
-	if(!--poollist->length)
-	{
-		assert(!poollist->list[0]);
-		nedpfree(0, poollist);
-		poollist=0;
-	}
-	RELEASE_LOCK(&poollistlock);
-}
-void neddestroysyspool() THROWSPEC
-{
-	nedpool *p=&syspool;
-	int n;
-	ACQUIRE_LOCK(&p->mutex);
-	DestroyCaches(p);
-	for(n=0; p->m[n]; n++)
-	{
-#if USE_ALLOCATOR==1
-		destroy_mspace(p->m[n]);
-#endif
-		p->m[n]=0;
-	}
-	/* Render syspool unusable */
-	for(n=0; n<THREADCACHEMAXCACHES; n++)
-		p->caches[n]=(threadcache *)(size_t)(sizeof(size_t)>4 ? 0xdeadbeefdeadbeefULL : 0xdeadbeefUL);
-	for(n=0; n<MAXTHREADSINPOOL+1; n++)
-		p->m[n]=(mstate)(size_t)(sizeof(size_t)>4 ? 0xdeadbeefdeadbeefULL : 0xdeadbeefUL);
-	if(TLSFREE(p->mycache)) abort();
-	RELEASE_LOCK(&p->mutex);
-}
-nedpool **nedpoollist() THROWSPEC
-{
-	nedpool **ret=0;
-	if(poollist)
-	{
-		ACQUIRE_LOCK(&poollistlock);
-		if(!(ret=(nedpool **) nedmalloc((poollist->length+1)*sizeof(nedpool *)))) goto badexit;
-		memcpy(ret, poollist->list, (poollist->length+1)*sizeof(nedpool *));
-badexit:
-		RELEASE_LOCK(&poollistlock);
-	}
-	return ret;
-}
-
-void nedpsetvalue(nedpool *p, void *v) THROWSPEC
-{
-	if(!p) { p=&syspool; if(!syspool.threads) InitPool(&syspool, 0, -1); }
-	p->uservalue=v;
-}
-void *nedgetvalue(nedpool **p, void *mem) THROWSPEC
-{
-	nedpool *np=0;
-	mstate fm=nedblkmstate(mem);
-	if(!fm || !fm->extp) return 0;
-	np=(nedpool *) fm->extp;
-	if(p) *p=np;
-	return np->uservalue;
-}
-
-void nedtrimthreadcache(nedpool *p, int disable) THROWSPEC
-{
-	int mycache;
-	if(!p)
-	{
-		p=&syspool;
-		if(!syspool.threads) InitPool(&syspool, 0, -1);
-	}
-	mycache=(int)(size_t) TLSGET(p->mycache);
-	if(!mycache)
-	{	/* Set to mspace 0 */
-		if(disable && TLSSET(p->mycache, (void *)(size_t)-1)) abort();
-	}
-	else if(mycache>0)
-	{	/* Set to last used mspace */
-		threadcache *tc=p->caches[mycache-1];
-#if defined(DEBUG)
-		printf("Threadcache utilisation: %lf%% in cache with %lf%% lost to other threads\n",
-			100.0*tc->successes/tc->mallocs, 100.0*((double) tc->mallocs-tc->frees)/tc->mallocs);
-#endif
-		if(disable && TLSSET(p->mycache, (void *)(size_t)(-tc->mymspace))) abort();
-		tc->frees++;
-		RemoveCacheEntries(p, tc, 0);
-		assert(!tc->freeInCache);
-		if(disable)
-		{
-			tc->mymspace=-1;
-			tc->threadid=0;
-			CallFree(0, p->caches[mycache-1], 0);
-			p->caches[mycache-1]=0;
-		}
-	}
-}
-void neddisablethreadcache(nedpool *p) THROWSPEC
-{
-	nedtrimthreadcache(p, 1);
-}
-
-#define GETMSPACE(m,p,tc,ms,s,action)                 \
-  do                                                  \
-  {                                                   \
-    mstate m = GetMSpace((p),(tc),(ms),(s));          \
-    action;                                           \
-	if(USE_ALLOCATOR==1) { RELEASE_LOCK(&m->mutex); } \
-  } while (0)
-
-static FORCEINLINE mstate GetMSpace(nedpool *RESTRICT p, threadcache *RESTRICT tc, int mymspace, size_t size) THROWSPEC
-{	/* Returns a locked and ready for use mspace */
-	mstate m=p->m[mymspace];
-	assert(m);
-#if USE_ALLOCATOR==1
-	if(!TRY_LOCK(&p->m[mymspace]->mutex)) m=FindMSpace(p, tc, &mymspace, size);
-	/*assert(IS_LOCKED(&p->m[mymspace]->mutex));*/
-#endif
-	return m;
-}
-static NOINLINE void GetThreadCache_cold1(nedpool *RESTRICT *RESTRICT p) THROWSPEC
-{
-	*p=&syspool;
-	if(!syspool.threads) InitPool(&syspool, 0, -1);
-}
-static NOINLINE void GetThreadCache_cold2(nedpool *RESTRICT *RESTRICT p, threadcache *RESTRICT *RESTRICT tc, int *RESTRICT mymspace, int mycache) THROWSPEC
-{
-	if(!mycache)
-	{	/* Need to allocate a new cache */
-		*tc=AllocCache(*p);
-		if(!*tc)
-		{	/* Disable */
-			if(TLSSET((*p)->mycache, (void *)(size_t)-1)) abort();
-			*mymspace=0;
-		}
-		else
-			*mymspace=(*tc)->mymspace;
-	}
-	else
-	{	/* Cache disabled, but we do have an assigned thread pool */
-		*tc=0;
-		*mymspace=-mycache-1;
-	}
-}
-static FORCEINLINE void GetThreadCache(nedpool *RESTRICT *RESTRICT p, threadcache *RESTRICT *RESTRICT tc, int *RESTRICT mymspace, size_t *RESTRICT size) THROWSPEC
-{
-	int mycache;
-	if(size && *size<sizeof(threadcacheblk)) *size=sizeof(threadcacheblk);
-	if(!*p)
-		GetThreadCache_cold1(p);
-	mycache=(int)(size_t) TLSGET((*p)->mycache);
-	if(mycache>0)
-	{	/* Already have a cache */
-		*tc=(*p)->caches[mycache-1];
-		*mymspace=(*tc)->mymspace;
-	}
-	else GetThreadCache_cold2(p, tc, mymspace, mycache);
-	assert(*mymspace>=0);
-	assert(!(*tc) || (long)(size_t)CURRENT_THREAD==(*tc)->threadid);
-#ifdef FULLSANITYCHECKS
-	if(*tc)
-	{
-		if(*(unsigned int *)"NEDMALC1"!=(*tc)->magic1 || *(unsigned int *)"NEDMALC2"!=(*tc)->magic2)
-		{
-			abort();
-		}
-	}
-#endif
-}
-
-NEDMALLOCPTRATTR void * nedpmalloc(nedpool *p, size_t size) THROWSPEC
-{
-	void *ret=0;
-	threadcache *tc;
-	int mymspace;
-	GetThreadCache(&p, &tc, &mymspace, &size);
-#if THREADCACHEMAX
-	if(tc && size<=THREADCACHEMAX)
-	{	/* Use the thread cache */
-		ret=threadcache_malloc(p, tc, &size);
-	}
-#endif
-	if(!ret)
-	{	/* Use this thread's mspace */
-        GETMSPACE(m, p, tc, mymspace, size,
-                  ret=CallMalloc(m, size, 0));
-	}
-	return ret;
-}
-NEDMALLOCPTRATTR void * nedpcalloc(nedpool *p, size_t no, size_t size) THROWSPEC
-{
-	size_t rsize=size*no;
-	void *ret=0;
-	threadcache *tc;
-	int mymspace;
-	GetThreadCache(&p, &tc, &mymspace, &rsize);
-#if THREADCACHEMAX
-	if(tc && rsize<=THREADCACHEMAX)
-	{	/* Use the thread cache */
-		if((ret=threadcache_malloc(p, tc, &rsize)))
-			memset(ret, 0, rsize);
-	}
-#endif
-	if(!ret)
-	{	/* Use this thread's mspace */
-        GETMSPACE(m, p, tc, mymspace, rsize,
-                  ret=CallCalloc(m, rsize, 0));
-	}
-	return ret;
-}
-NEDMALLOCPTRATTR void * nedprealloc(nedpool *p, void *mem, size_t size) THROWSPEC
-{
-	void *ret=0;
-	threadcache *tc;
-	int mymspace, isforeign=1;
-	size_t memsize;
-	if(!mem) return nedpmalloc(p, size);
-	memsize=nedblksize(&isforeign, mem);
-	assert(memsize);
-	if(!memsize)
-	{
-		fprintf(stderr, "nedmalloc: nedprealloc() called with a block not created by nedmalloc!\n");
-		abort();
-	}
-	else if(size<=memsize && memsize-size<
-#ifdef DEBUG
-		32
-#else
-		1024
-#endif
-		)		/* If realloc size is within 1Kb smaller than existing, noop it */
-		return mem;
-	GetThreadCache(&p, &tc, &mymspace, &size);
-#if THREADCACHEMAX
-	if(tc && size && size<=THREADCACHEMAX)
-	{	/* Use the thread cache */
-		if((ret=threadcache_malloc(p, tc, &size)))
-		{
-			memcpy(ret, mem, memsize<size ? memsize : size);
-			if(memsize>=sizeof(threadcacheblk) && memsize<=(THREADCACHEMAX+CHUNK_OVERHEAD))
-				threadcache_free(p, tc, mymspace, mem, memsize);
-			else
-				CallFree(0, mem, isforeign);
-		}
-	}
-#endif
-	if(!ret)
-	{	/* Reallocs always happen in the mspace they happened in, so skip
-		locking the preferred mspace for this thread */
-		ret=CallRealloc(p->m[mymspace], mem, isforeign, memsize, size);
-	}
-	return ret;
-}
-void   nedpfree(nedpool *p, void *mem) THROWSPEC
-{	/* Frees always happen in the mspace they happened in, so skip
-	locking the preferred mspace for this thread */
-	threadcache *tc;
-	int mymspace, isforeign=1;
-	size_t memsize;
-	if(!mem)
-	{	/* If you tried this on FreeBSD you'd be sorry! */
-#ifdef DEBUG
-		fprintf(stderr, "nedmalloc: WARNING nedpfree() called with zero. This is not portable behaviour!\n");
-#endif
-		return;
-	}
-	memsize=nedblksize(&isforeign, mem);
-	assert(memsize);
-	if(!memsize)
-	{
-		fprintf(stderr, "nedmalloc: nedpfree() called with a block not created by nedmalloc!\n");
-		abort();
-	}
-	GetThreadCache(&p, &tc, &mymspace, 0);
-#if THREADCACHEMAX
-	if(mem && tc && memsize>=sizeof(threadcacheblk) && memsize<=(THREADCACHEMAX+CHUNK_OVERHEAD))
-		threadcache_free(p, tc, mymspace, mem, memsize);
-	else
-#endif
-		CallFree(0, mem, isforeign);
-}
-NEDMALLOCPTRATTR void * nedpmemalign(nedpool *p, size_t alignment, size_t bytes) THROWSPEC
-{
-	void *ret;
-	threadcache *tc;
-	int mymspace;
-	GetThreadCache(&p, &tc, &mymspace, &bytes);
-	{	/* Use this thread's mspace */
-        GETMSPACE(m, p, tc, mymspace, bytes,
-                  ret=CallMalloc(m, bytes, alignment));
-	}
-	return ret;
-}
-struct nedmallinfo nedpmallinfo(nedpool *p) THROWSPEC
-{
-	int n;
-	struct nedmallinfo ret={0};
-	if(!p) { p=&syspool; if(!syspool.threads) InitPool(&syspool, 0, -1); }
-	for(n=0; p->m[n]; n++)
-	{
-#if USE_ALLOCATOR==1 && !NO_MALLINFO
-		struct mallinfo t=mspace_mallinfo(p->m[n]);
-		ret.arena+=t.arena;
-		ret.ordblks+=t.ordblks;
-		ret.hblkhd+=t.hblkhd;
-		ret.usmblks+=t.usmblks;
-		ret.uordblks+=t.uordblks;
-		ret.fordblks+=t.fordblks;
-		ret.keepcost+=t.keepcost;
-#endif
-	}
-	return ret;
-}
-int    nedpmallopt(nedpool *p, int parno, int value) THROWSPEC
-{
-#if USE_ALLOCATOR==1
-	return mspace_mallopt(parno, value);
-#else
-	return 0;
-#endif
-}
-NEDMALLOCNOALIASATTR void*  nedmalloc_internals(size_t *granularity, size_t *magic) THROWSPEC
-{
-#if USE_ALLOCATOR==1
-	if(granularity) *granularity=mparams.granularity;
-	if(magic) *magic=mparams.magic;
-	return (void *) &syspool;
-#else
-	if(granularity) *granularity=0;
-	if(magic) *magic=0;
-	return 0;
-#endif
-}
-int    nedpmalloc_trim(nedpool *p, size_t pad) THROWSPEC
-{
-	int n, ret=0;
-	if(!p) { p=&syspool; if(!syspool.threads) InitPool(&syspool, 0, -1); }
-	for(n=0; p->m[n]; n++)
-	{
-#if USE_ALLOCATOR==1
-		ret+=mspace_trim(p->m[n], pad);
-#endif
-	}
-	return ret;
-}
-void   nedpmalloc_stats(nedpool *p) THROWSPEC
-{
-	int n;
-	if(!p) { p=&syspool; if(!syspool.threads) InitPool(&syspool, 0, -1); }
-	for(n=0; p->m[n]; n++)
-	{
-#if USE_ALLOCATOR==1
-		mspace_malloc_stats(p->m[n]);
-#endif
-	}
-}
-size_t nedpmalloc_footprint(nedpool *p) THROWSPEC
-{
-	size_t ret=0;
-	int n;
-	if(!p) { p=&syspool; if(!syspool.threads) InitPool(&syspool, 0, -1); }
-	for(n=0; p->m[n]; n++)
-	{
-#if USE_ALLOCATOR==1
-		ret+=mspace_footprint(p->m[n]);
-#endif
-	}
-	return ret;
-}
-NEDMALLOCPTRATTR void **nedpindependent_calloc(nedpool *p, size_t elemsno, size_t elemsize, void **chunks) THROWSPEC
-{
-	void **ret;
-	threadcache *tc;
-	int mymspace;
-	GetThreadCache(&p, &tc, &mymspace, &elemsize);
-#if USE_ALLOCATOR==0
-    GETMSPACE(m, p, tc, mymspace, elemsno*elemsize,
-              ret=unsupported_operation("independent_calloc"));
-#elif USE_ALLOCATOR==1
-    GETMSPACE(m, p, tc, mymspace, elemsno*elemsize,
-              ret=mspace_independent_calloc(m, elemsno, elemsize, chunks));
-#endif
-	return ret;
-}
-NEDMALLOCPTRATTR void **nedpindependent_comalloc(nedpool *p, size_t elems, size_t *sizes, void **chunks) THROWSPEC
-{
-	void **ret;
-	threadcache *tc;
-	int mymspace;
-    size_t i, *adjustedsizes=(size_t *) alloca(elems*sizeof(size_t));
-    if(!adjustedsizes) return 0;
-    for(i=0; i<elems; i++)
-        adjustedsizes[i]=sizes[i]<sizeof(threadcacheblk) ? sizeof(threadcacheblk) : sizes[i];
-	GetThreadCache(&p, &tc, &mymspace, 0);
-#if USE_ALLOCATOR==0
-	GETMSPACE(m, p, tc, mymspace, 0,
-              ret=unsupported_operation("independent_comalloc"));
-#elif USE_ALLOCATOR==1
-	GETMSPACE(m, p, tc, mymspace, 0,
-              ret=mspace_independent_comalloc(m, elems, adjustedsizes, chunks));
-#endif
-	return ret;
-}
-
-#if defined(__cplusplus)
-}
-#endif
-
-#ifdef _MSC_VER
-#pragma warning(pop)
-#endif
-
-#endif
+#ifdef NEDMALLOC_ENABLED
+/* Alternative malloc implementation for multiple threads without
+lock contention based on dlmalloc. (C) 2005-2009 Niall Douglas
+
+Boost Software License - Version 1.0 - August 17th, 2003
+
+Permission is hereby granted, free of charge, to any person or organization
+obtaining a copy of the software and accompanying documentation covered by
+this license (the "Software") to use, reproduce, display, distribute,
+execute, and transmit the Software, and to prepare derivative works of the
+Software, and to permit third-parties to whom the Software is furnished to
+do so, all subject to the following:
+
+The copyright notices in the Software and this entire statement, including
+the above license grant, this restriction and the following disclaimer,
+must be included in all copies of the Software, in whole or in part, and
+all derivative works of the Software, unless such copies or derivative
+works are solely in the form of machine-executable object code generated by
+a source language processor.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT
+SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE
+FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE,
+ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+DEALINGS IN THE SOFTWARE.
+*/
+
+#ifdef _MSC_VER
+/* Enable full aliasing on MSVC */
+/*#pragma optimize("a", on)*/
+#pragma warning(push)
+#pragma warning(disable:4100)	/* unreferenced formal parameter */
+#pragma warning(disable:4127)	/* conditional expression is constant */
+#pragma warning(disable:4706)	/* assignment within conditional expression */
+#endif
+
+/*#define ENABLE_TOLERANT_NEDMALLOC 1*/
+/*#define ENABLE_FAST_HEAP_DETECTION 1*/
+/*#define NEDMALLOC_DEBUG 1*/
+
+/*#define FULLSANITYCHECKS*/
+/* If link time code generation is on, don't force or prevent inlining */
+#if defined(_MSC_VER) && defined(NEDMALLOC_DLL_EXPORTS)
+#define FORCEINLINE
+#define NOINLINE
+#endif
+
+
+#include "nedmalloc.h"
+#ifdef WIN32
+ #include <malloc.h>
+ #include <stddef.h>
+#endif
+#if USE_ALLOCATOR==1
+ #define MSPACES 1
+ #define ONLY_MSPACES 1
+#endif
+#define USE_DL_PREFIX 1
+#ifndef USE_LOCKS
+ #define USE_LOCKS 1
+#endif
+#define FOOTERS 1           /* Need to enable footers so frees lock the right mspace */
+#ifndef NEDMALLOC_DEBUG
+ #if defined(DEBUG) || defined(_DEBUG)
+  #define NEDMALLOC_DEBUG 1
+ #else
+  #define NEDMALLOC_DEBUG 0
+ #endif
+#endif
+/* We need to consistently define DEBUG=0|1, _DEBUG and NDEBUG for dlmalloc */
+#undef DEBUG
+#undef _DEBUG
+#if NEDMALLOC_DEBUG
+ #define _DEBUG
+ #define DEBUG 1
+#else
+ #define DEBUG 0
+#endif
+#ifdef NDEBUG               /* Disable assert checking on release builds */
+ #undef DEBUG
+ #undef _DEBUG
+#endif
+/* The default of 64Kb means we spend too much time kernel-side */
+#ifndef DEFAULT_GRANULARITY
+#define DEFAULT_GRANULARITY (1*1024*1024)
+#if DEBUG
+#define DEFAULT_GRANULARITY_ALIGNED
+#endif
+#endif
+/*#define USE_SPIN_LOCKS 0*/
+
+
+#include "malloc.c.h"
+#ifdef NDEBUG               /* Disable assert checking on release builds */
+ #undef DEBUG
+#elif !NEDMALLOC_DEBUG
+ #ifdef __GNUC__
+  #warning DEBUG is defined so allocator will run with assert checking! Define NDEBUG to run at full speed.
+ #elif defined(_MSC_VER)
+  #pragma message(__FILE__ ": WARNING: DEBUG is defined so allocator will run with assert checking! Define NDEBUG to run at full speed.")
+ #endif
+#endif
+
+/* The maximum concurrent threads in a pool possible */
+#ifndef MAXTHREADSINPOOL
+#define MAXTHREADSINPOOL 16
+#endif
+/* The maximum number of threadcaches which can be allocated */
+#ifndef THREADCACHEMAXCACHES
+#define THREADCACHEMAXCACHES 256
+#endif
+/* The maximum size to be allocated from the thread cache */
+#ifndef THREADCACHEMAX
+#define THREADCACHEMAX 8192
+#endif
+#if 0
+/* The number of cache entries for finer grained bins. This is (topbitpos(THREADCACHEMAX)-4)*2 */
+#define THREADCACHEMAXBINS ((13-4)*2)
+#else
+/* The number of cache entries. This is (topbitpos(THREADCACHEMAX)-4) */
+#define THREADCACHEMAXBINS (13-4)
+#endif
+/* Point at which the free space in a thread cache is garbage collected */
+#ifndef THREADCACHEMAXFREESPACE
+#define THREADCACHEMAXFREESPACE (512*1024)
+#endif
+
+
+#ifdef WIN32
+ #define TLSVAR			DWORD
+ #define TLSALLOC(k)	(*(k)=TlsAlloc(), TLS_OUT_OF_INDEXES==*(k))
+ #define TLSFREE(k)		(!TlsFree(k))
+ #define TLSGET(k)		TlsGetValue(k)
+ #define TLSSET(k, a)	(!TlsSetValue(k, a))
+ #ifdef DEBUG
+static LPVOID ChkedTlsGetValue(DWORD idx)
+{
+	LPVOID ret=TlsGetValue(idx);
+	assert(S_OK==GetLastError());
+	return ret;
+}
+  #undef TLSGET
+  #define TLSGET(k) ChkedTlsGetValue(k)
+ #endif
+#else
+ #define TLSVAR			pthread_key_t
+ #define TLSALLOC(k)	pthread_key_create(k, 0)
+ #define TLSFREE(k)		pthread_key_delete(k)
+ #define TLSGET(k)		pthread_getspecific(k)
+ #define TLSSET(k, a)	pthread_setspecific(k, a)
+#endif
+
+#if defined(__cplusplus)
+#if !defined(NO_NED_NAMESPACE)
+namespace nedalloc {
+#else
+extern "C" {
+#endif
+#endif
+
+#if USE_ALLOCATOR==0
+static void *unsupported_operation(const char *opname) THROWSPEC
+{
+	fprintf(stderr, "nedmalloc: The operation %s is not supported under this build configuration\n", opname);
+	abort();
+	return 0;
+}
+static size_t mspacecounter=(size_t) 0xdeadbeef;
+#endif
+#ifndef ENABLE_FAST_HEAP_DETECTION
+static void *RESTRICT leastusedaddress;
+static size_t largestusedblock;
+#endif
+
+static FORCEINLINE void *CallMalloc(void *RESTRICT mspace, size_t size, size_t alignment) THROWSPEC
+{
+	void *RESTRICT ret=0;
+	size_t _alignment=alignment;
+#if USE_MAGIC_HEADERS
+	size_t *_ret=0;
+	size+=alignment+3*sizeof(size_t);
+	_alignment=0;
+#endif
+#if USE_ALLOCATOR==0
+	ret=_alignment ? 
+#ifdef _MSC_VER
+	/* This is the MSVCRT equivalent */
+		_aligned_malloc(size, _alignment)
+#elif defined(__linux__) || defined(__FreeBSD__) || defined(__APPLE__)
+	/* This is the glibc/ptmalloc2/dlmalloc/BSD libc equivalent.  */
+		memalign(_alignment, size)
+#else
+#error Cannot aligned allocate with the memory allocator of an unknown system!
+#endif
+		: malloc(size);
+#elif USE_ALLOCATOR==1
+	ret=_alignment ? mspace_memalign((mstate) mspace, _alignment, size) : mspace_malloc((mstate) mspace, size);
+#ifndef ENABLE_FAST_HEAP_DETECTION
+	if(ret)
+	{
+		size_t truesize=chunksize(mem2chunk(ret));
+		if(!leastusedaddress || (void *)((mstate) mspace)->least_addr<leastusedaddress) leastusedaddress=(void *)((mstate) mspace)->least_addr;
+		if(!largestusedblock || truesize>largestusedblock) largestusedblock=(truesize+mparams.page_size) & ~(mparams.page_size-1);
+	}
+#endif
+#endif
+	if(!ret) return 0;
+#if USE_MAGIC_HEADERS
+	_ret=(size_t *) ret;
+	ret=(void *)(_ret+3);
+	if(alignment) ret=(void *)(((size_t) ret+alignment-1)&~(alignment-1));
+	for(; _ret<(size_t *)ret-2; _ret++) *_ret=*(size_t *)"NEDMALOC";
+	_ret[0]=(size_t) mspace;
+	_ret[1]=size-3*sizeof(size_t);
+#endif
+	return ret;
+}
+
+static FORCEINLINE void *CallCalloc(void *RESTRICT mspace, size_t size, size_t alignment) THROWSPEC
+{
+	void *RESTRICT ret=0;
+#if USE_MAGIC_HEADERS
+	size_t *_ret=0;
+	size+=alignment+3*sizeof(size_t);
+#endif
+#if USE_ALLOCATOR==0
+	ret=calloc(1, size);
+#elif USE_ALLOCATOR==1
+	ret=mspace_calloc((mstate) mspace, 1, size);
+#ifndef ENABLE_FAST_HEAP_DETECTION
+	if(ret)
+	{
+		size_t truesize=chunksize(mem2chunk(ret));
+		if(!leastusedaddress || (void *)((mstate) mspace)->least_addr<leastusedaddress) leastusedaddress=(void *)((mstate) mspace)->least_addr;
+		if(!largestusedblock || truesize>largestusedblock) largestusedblock=(truesize+mparams.page_size) & ~(mparams.page_size-1);
+	}
+#endif
+#endif
+	if(!ret) return 0;
+#if USE_MAGIC_HEADERS
+	_ret=(size_t *) ret;
+	ret=(void *)(_ret+3);
+	if(alignment) ret=(void *)(((size_t) ret+alignment-1)&~(alignment-1));
+	for(; _ret<(size_t *)ret-2; _ret++) *_ret=*(size_t *) "NEDMALOC";
+	_ret[0]=(size_t) mspace;
+	_ret[1]=size-3*sizeof(size_t);
+#endif
+	return ret;
+}
+
+static FORCEINLINE void *CallRealloc(void *RESTRICT mspace, void *RESTRICT mem, int isforeign, size_t oldsize, size_t newsize) THROWSPEC
+{
+	void *RESTRICT ret=0;
+#if USE_MAGIC_HEADERS
+	mstate oldmspace=0;
+	size_t *_ret=0, *_mem=(size_t *) mem-3;
+#endif
+	if(isforeign)
+	{	/* Transfer */
+#if USE_MAGIC_HEADERS
+		assert(_mem[0]!=*(size_t *) "NEDMALOC");
+#endif
+		if((ret=CallMalloc(mspace, newsize, 0)))
+		{
+#if defined(DEBUG)
+			printf("*** nedmalloc frees system allocated block %p\n", mem);
+#endif
+			memcpy(ret, mem, oldsize<newsize ? oldsize : newsize);
+			free(mem);
+		}
+		return ret;
+	}
+#if USE_MAGIC_HEADERS
+	assert(_mem[0]==*(size_t *) "NEDMALOC");
+	newsize+=3*sizeof(size_t);
+	oldmspace=(mstate) _mem[1];
+	assert(oldsize>=_mem[2]);
+	for(; *_mem==*(size_t *) "NEDMALOC"; *_mem--=*(size_t *) "nedmaloc");
+	mem=(void *)(++_mem);
+#endif
+#if USE_ALLOCATOR==0
+	ret=realloc(mem, newsize);
+#elif USE_ALLOCATOR==1
+	ret=mspace_realloc((mstate) mspace, mem, newsize);
+#ifndef ENABLE_FAST_HEAP_DETECTION
+	if(ret)
+	{
+		size_t truesize=chunksize(mem2chunk(ret));
+		if(!largestusedblock || truesize>largestusedblock) largestusedblock=(truesize+mparams.page_size) & ~(mparams.page_size-1);
+	}
+#endif
+#endif
+	if(!ret)
+	{	/* Put it back the way it was */
+#if USE_MAGIC_HEADERS
+		for(; *_mem==0; *_mem++=*(size_t *) "NEDMALOC");
+#endif
+		return 0;
+	}
+#if USE_MAGIC_HEADERS
+	_ret=(size_t *) ret;
+	ret=(void *)(_ret+3);
+	for(; _ret<(size_t *)ret-2; _ret++) *_ret=*(size_t *) "NEDMALOC";
+	_ret[0]=(size_t) mspace;
+	_ret[1]=newsize-3*sizeof(size_t);
+#endif
+	return ret;
+}
+
+static FORCEINLINE void CallFree(void *RESTRICT mspace, void *RESTRICT mem, int isforeign) THROWSPEC
+{
+#if USE_MAGIC_HEADERS
+	mstate oldmspace=0;
+	size_t *_mem=(size_t *) mem-3, oldsize=0;
+#endif
+	if(isforeign)
+	{
+#if USE_MAGIC_HEADERS
+		assert(_mem[0]!=*(size_t *) "NEDMALOC");
+#endif
+#if defined(DEBUG)
+		printf("*** nedmalloc frees system allocated block %p\n", mem);
+#endif
+		free(mem);
+		return;
+	}
+#if USE_MAGIC_HEADERS
+	assert(_mem[0]==*(size_t *) "NEDMALOC");
+	oldmspace=(mstate) _mem[1];
+	oldsize=_mem[2];
+	for(; *_mem==*(size_t *) "NEDMALOC"; *_mem--=*(size_t *) "nedmaloc");
+	mem=(void *)(++_mem);
+#endif
+#if USE_ALLOCATOR==0
+	free(mem);
+#elif USE_ALLOCATOR==1
+	mspace_free((mstate) mspace, mem);
+#endif
+}
+
+static NEDMALLOCNOALIASATTR mstate nedblkmstate(void *RESTRICT mem) THROWSPEC
+{
+	if(mem)
+	{
+#if USE_MAGIC_HEADERS
+		size_t *_mem=(size_t *) mem-3;
+		if(_mem[0]==*(size_t *) "NEDMALOC")
+		{
+			return (mstate) _mem[1];
+		}
+		else return 0;
+#else
+#if USE_ALLOCATOR==0
+		/* Fail everything */
+		return 0;
+#elif USE_ALLOCATOR==1
+#ifdef ENABLE_FAST_HEAP_DETECTION
+#ifdef WIN32
+		/*  On Windows for RELEASE both x86 and x64 the NT heap precedes each block with an eight byte header
+			which looks like:
+				normal: 4 bytes of size, 4 bytes of [char < 64, char < 64, char < 64 bit 0 always set, char random ]
+				mmaped: 4 bytes of size  4 bytes of [zero,      zero,      0xb,                        zero        ]
+
+			On Windows for DEBUG both x86 and x64 the preceding four bytes is always 0xfdfdfdfd (no man's land).
+		*/
+#pragma pack(push, 1)
+		struct _HEAP_ENTRY
+		{
+			USHORT Size;
+			USHORT PreviousSize;
+			UCHAR Cookie;			/* SegmentIndex */
+			UCHAR Flags;			/* always bit 0 (HEAP_ENTRY_BUSY). bit 1=(HEAP_ENTRY_EXTRA_PRESENT), bit 2=normal block (HEAP_ENTRY_FILL_PATTERN), bit 3=mmap block (HEAP_ENTRY_VIRTUAL_ALLOC). Bit 4 (HEAP_ENTRY_LAST_ENTRY) could be set */
+			UCHAR UnusedBytes;
+			UCHAR SmallTagIndex;	/* fastbin index. Always one of 0x02, 0x03, 0x04 < 0x80 */
+		} *RESTRICT he=((struct _HEAP_ENTRY *) mem)-1;
+#pragma pack(pop)
+		unsigned int header=((unsigned int *)mem)[-1], mask1=0x8080E100, result1, mask2=0xFFFFFF06, result2;
+		result1=header & mask1;	/* Positive testing for NT heap */
+		result2=header & mask2;	/* Positive testing for dlmalloc */
+		if(result1==0x00000100 && result2!=0x00000102)
+		{	/* This is likely a NT heap block */
+			return 0;
+		}
+#endif
+#ifdef __linux__
+		/* On Linux glibc uses ptmalloc2 (really dlmalloc) just as we do, but prev_foot contains rubbish
+		when the preceding block is allocated because ptmalloc2 finds the local mstate by rounding the ptr
+		down to the nearest megabyte. It's like dlmalloc with FOOTERS disabled. */
+		mchunkptr p=mem2chunk(mem);
+		mstate fm=get_mstate_for(p);
+		/* If it's a ptmalloc2 block, fm is likely to be some crazy value */
+		if(!is_aligned(fm)) return 0;
+		if((size_t)mem-(size_t)fm>=(size_t)1<<(SIZE_T_BITSIZE-1)) return 0;
+		if(ok_magic(fm))
+			return fm;
+		else
+			return 0;
+		if(1) { }
+#endif
+		else
+		{
+			mchunkptr p=mem2chunk(mem);
+			mstate fm=get_mstate_for(p);
+			assert(ok_magic(fm));	/* If this fails, someone tried to free a block twice */
+			if(ok_magic(fm))
+				return fm;
+		}
+#else
+//#ifdef WIN32
+//		__try
+//#endif
+		{
+			/* We try to return zero here if it isn't one of our own blocks, however
+			the current block annotation scheme used by dlmalloc makes it impossible
+			to be absolutely sure of avoiding a segfault.
+
+			mchunkptr->prev_foot = mem-(2*size_t) = mstate ^ mparams.magic for PRECEDING block;
+			mchunkptr->head      = mem-(1*size_t) = 8 multiple size of this block with bottom three bits = FLAG_BITS
+			    FLAG_BITS = bit 0 is CINUSE (currently in use unless is mmap), bit 1 is PINUSE (previous block currently
+				            in use unless mmap), bit 2 is UNUSED and currently is always zero.
+			*/
+			register void *RESTRICT leastusedaddress_=leastusedaddress;		/* Cache these to avoid register reloading */
+			register size_t largestusedblock_=largestusedblock;
+			if(!is_aligned(mem)) return 0;		/* Would fail very rarely as all allocators return aligned blocks */
+			if(mem<leastusedaddress_) return 0;	/* Simple but effective */
+			{
+				mchunkptr p=mem2chunk(mem);
+				mstate fm=0;
+				int ismmapped=is_mmapped(p);
+				if((!ismmapped && !is_inuse(p)) || (p->head & FLAG4_BIT)) return 0;
+				/* Reduced uncertainty by 0.5^2 = 25.0% */
+				/* size should never exceed largestusedblock */
+				if(chunksize(p)>largestusedblock_) return 0;
+				/* Reduced uncertainty by a minimum of 0.5^3 = 12.5%, maximum 0.5^16 = 0.0015% */
+				/* Having sanity checked prev_foot and head, check next block */
+				if(!ismmapped && (!next_pinuse(p) || (next_chunk(p)->head & FLAG4_BIT))) return 0;
+				/* Reduced uncertainty by 0.5^5 = 3.13% or 0.5^18 = 0.00038% */
+	#if 0
+				/* If previous block is free, check that its next block pointer equals us */
+				if(!ismmapped && !pinuse(p))
+					if(next_chunk(prev_chunk(p))!=p) return 0;
+				/* We could start comparing prev_foot's for similarity but it starts getting slow. */
+	#endif
+				fm = get_mstate_for(p);
+				if(!is_aligned(fm) || (void *)fm<leastusedaddress_) return 0;
+				if((size_t)mem-(size_t)fm>=(size_t)1<<(SIZE_T_BITSIZE-1)) return 0;
+				assert(ok_magic(fm));	/* If this fails, someone tried to free a block twice */
+				if(ok_magic(fm))
+					return fm;
+			}
+		}
+//#ifdef WIN32
+//		__except(1) { }
+//#endif
+#endif
+#endif
+#endif
+	}
+	return 0;
+}
+NEDMALLOCNOALIASATTR size_t nedblksize(int *RESTRICT isforeign, void *RESTRICT mem) THROWSPEC
+{
+	if(mem)
+	{
+		if(isforeign) *isforeign=1;
+#if USE_MAGIC_HEADERS
+		{
+			size_t *_mem=(size_t *) mem-3;
+			if(_mem[0]==*(size_t *) "NEDMALOC")
+			{
+				mstate mspace=(mstate) _mem[1];
+				size_t size=_mem[2];
+				if(isforeign) *isforeign=0;
+				return size;
+			}
+		}
+#elif USE_ALLOCATOR==1
+		if(nedblkmstate(mem))
+		{
+			mchunkptr p=mem2chunk(mem);
+			if(isforeign) *isforeign=0;
+			return chunksize(p)-overhead_for(p);
+		}
+#ifdef DEBUG
+		else
+		{
+			int a=1; /* Set breakpoints here if needed */
+		}
+#endif
+#endif
+#if defined(ENABLE_TOLERANT_NEDMALLOC) || USE_ALLOCATOR==0
+#ifdef _MSC_VER
+		/* This is the MSVCRT equivalent */
+		return _msize(mem);
+#elif defined(__linux__)
+		/* This is the glibc/ptmalloc2/dlmalloc equivalent.  */
+		return malloc_usable_size(mem);
+#elif defined(__FreeBSD__) || defined(__APPLE__)
+		/* This is the BSD libc equivalent.  */
+		return malloc_size(mem);
+#else
+#error Cannot tolerate the memory allocator of an unknown system!
+#endif
+#endif
+	}
+	return 0;
+}
+
+NEDMALLOCNOALIASATTR void nedsetvalue(void *v) THROWSPEC											{ nedpsetvalue((nedpool *) 0, v); }
+NEDMALLOCNOALIASATTR NEDMALLOCPTRATTR void * nedmalloc(size_t size) THROWSPEC						{ return nedpmalloc((nedpool *) 0, size); }
+NEDMALLOCNOALIASATTR NEDMALLOCPTRATTR void * nedcalloc(size_t no, size_t size) THROWSPEC			{ return nedpcalloc((nedpool *) 0, no, size); }
+NEDMALLOCNOALIASATTR NEDMALLOCPTRATTR void * nedrealloc(void *mem, size_t size) THROWSPEC			{ return nedprealloc((nedpool *) 0, mem, size); }
+NEDMALLOCNOALIASATTR void   nedfree(void *mem) THROWSPEC											{ nedpfree((nedpool *) 0, mem); }
+NEDMALLOCNOALIASATTR NEDMALLOCPTRATTR void * nedmemalign(size_t alignment, size_t bytes) THROWSPEC	{ return nedpmemalign((nedpool *) 0, alignment, bytes); }
+NEDMALLOCNOALIASATTR struct nedmallinfo nedmallinfo(void) THROWSPEC									{ return nedpmallinfo((nedpool *) 0); }
+NEDMALLOCNOALIASATTR int    nedmallopt(int parno, int value) THROWSPEC								{ return nedpmallopt((nedpool *) 0, parno, value); }
+NEDMALLOCNOALIASATTR int    nedmalloc_trim(size_t pad) THROWSPEC									{ return nedpmalloc_trim((nedpool *) 0, pad); }
+void   nedmalloc_stats() THROWSPEC																	{ nedpmalloc_stats((nedpool *) 0); }
+NEDMALLOCNOALIASATTR size_t nedmalloc_footprint() THROWSPEC											{ return nedpmalloc_footprint((nedpool *) 0); }
+NEDMALLOCNOALIASATTR NEDMALLOCPTRATTR void **nedindependent_calloc(size_t elemsno, size_t elemsize, void **chunks) THROWSPEC	{ return nedpindependent_calloc((nedpool *) 0, elemsno, elemsize, chunks); }
+NEDMALLOCNOALIASATTR NEDMALLOCPTRATTR void **nedindependent_comalloc(size_t elems, size_t *sizes, void **chunks) THROWSPEC		{ return nedpindependent_comalloc((nedpool *) 0, elems, sizes, chunks); }
+
+struct threadcacheblk_t;
+typedef struct threadcacheblk_t threadcacheblk;
+struct threadcacheblk_t
+{	/* Keep less than 16 bytes on 32 bit systems and 32 bytes on 64 bit systems */
+#ifdef FULLSANITYCHECKS
+	unsigned int magic;
+#endif
+	unsigned int lastUsed, size;
+	threadcacheblk *next, *prev;
+};
+typedef struct threadcache_t
+{
+#ifdef FULLSANITYCHECKS
+	unsigned int magic1;
+#endif
+	int mymspace;						/* Last mspace entry this thread used */
+	long threadid;
+	unsigned int mallocs, frees, successes;
+	size_t freeInCache;					/* How much free space is stored in this cache */
+	threadcacheblk *bins[(THREADCACHEMAXBINS+1)*2];
+#ifdef FULLSANITYCHECKS
+	unsigned int magic2;
+#endif
+} threadcache;
+struct nedpool_t
+{
+	MLOCK_T mutex;
+	void *uservalue;
+	int threads;						/* Max entries in m to use */
+	threadcache *caches[THREADCACHEMAXCACHES];
+	TLSVAR mycache;						/* Thread cache for this thread. 0 for unset, negative for use mspace-1 directly, otherwise is cache-1 */
+	mstate m[MAXTHREADSINPOOL+1];		/* mspace entries for this pool */
+};
+static nedpool syspool;
+
+static FORCEINLINE NEDMALLOCNOALIASATTR unsigned int size2binidx(size_t _size) THROWSPEC
+{	/* 8=1000	16=10000	20=10100	24=11000	32=100000	48=110000	4096=1000000000000 */
+	unsigned int topbit, size=(unsigned int)(_size>>4);
+	/* 16=1		20=1	24=1	32=10	48=11	64=100	96=110	128=1000	4096=100000000 */
+
+#if defined(__GNUC__)
+        topbit = sizeof(size)*__CHAR_BIT__ - 1 - __builtin_clz(size);
+#elif defined(_MSC_VER) && _MSC_VER>=1300
+	{
+            unsigned long bsrTopBit;
+
+            _BitScanReverse(&bsrTopBit, size);
+
+            topbit = bsrTopBit;
+        }
+#else
+#if 0
+	union {
+		unsigned asInt[2];
+		double asDouble;
+	};
+	int n;
+
+	asDouble = (double)size + 0.5;
+	topbit = (asInt[!FOX_BIGENDIAN] >> 20) - 1023;
+#else
+	{
+		unsigned int x=size;
+		x = x | (x >> 1);
+		x = x | (x >> 2);
+		x = x | (x >> 4);
+		x = x | (x >> 8);
+		x = x | (x >>16);
+		x = ~x;
+		x = x - ((x >> 1) & 0x55555555);
+		x = (x & 0x33333333) + ((x >> 2) & 0x33333333);
+		x = (x + (x >> 4)) & 0x0F0F0F0F;
+		x = x + (x << 8);
+		x = x + (x << 16);
+		topbit=31 - (x >> 24);
+	}
+#endif
+#endif
+	return topbit;
+}
+
+
+#ifdef FULLSANITYCHECKS
+static void tcsanitycheck(threadcacheblk **ptr) THROWSPEC
+{
+	assert((ptr[0] && ptr[1]) || (!ptr[0] && !ptr[1]));
+	if(ptr[0] && ptr[1])
+	{
+		assert(nedblksize(ptr[0])>=sizeof(threadcacheblk));
+		assert(nedblksize(ptr[1])>=sizeof(threadcacheblk));
+		assert(*(unsigned int *) "NEDN"==ptr[0]->magic);
+		assert(*(unsigned int *) "NEDN"==ptr[1]->magic);
+		assert(!ptr[0]->prev);
+		assert(!ptr[1]->next);
+		if(ptr[0]==ptr[1])
+		{
+			assert(!ptr[0]->next);
+			assert(!ptr[1]->prev);
+		}
+	}
+}
+static void tcfullsanitycheck(threadcache *tc) THROWSPEC
+{
+	threadcacheblk **tcbptr=tc->bins;
+	int n;
+	for(n=0; n<=THREADCACHEMAXBINS; n++, tcbptr+=2)
+	{
+		threadcacheblk *b, *ob=0;
+		tcsanitycheck(tcbptr);
+		for(b=tcbptr[0]; b; ob=b, b=b->next)
+		{
+			assert(*(unsigned int *) "NEDN"==b->magic);
+			assert(!ob || ob->next==b);
+			assert(!ob || b->prev==ob);
+		}
+	}
+}
+#endif
+
+static NOINLINE void RemoveCacheEntries(nedpool *RESTRICT p, threadcache *RESTRICT tc, unsigned int age) THROWSPEC
+{
+#ifdef FULLSANITYCHECKS
+	tcfullsanitycheck(tc);
+#endif
+	if(tc->freeInCache)
+	{
+		threadcacheblk **tcbptr=tc->bins;
+		int n;
+		for(n=0; n<=THREADCACHEMAXBINS; n++, tcbptr+=2)
+		{
+			threadcacheblk **tcb=tcbptr+1;		/* come from oldest end of list */
+			/*tcsanitycheck(tcbptr);*/
+			for(; *tcb && tc->frees-(*tcb)->lastUsed>=age; )
+			{
+				threadcacheblk *f=*tcb;
+				size_t blksize=f->size; /*nedblksize(f);*/
+				assert(blksize<=nedblksize(0, f));
+				assert(blksize);
+#ifdef FULLSANITYCHECKS
+				assert(*(unsigned int *) "NEDN"==(*tcb)->magic);
+#endif
+				*tcb=(*tcb)->prev;
+				if(*tcb)
+					(*tcb)->next=0;
+				else
+					*tcbptr=0;
+				tc->freeInCache-=blksize;
+				assert((long) tc->freeInCache>=0);
+				CallFree(0, f, 0);
+				/*tcsanitycheck(tcbptr);*/
+			}
+		}
+	}
+#ifdef FULLSANITYCHECKS
+	tcfullsanitycheck(tc);
+#endif
+}
+static void DestroyCaches(nedpool *RESTRICT p) THROWSPEC
+{
+	if(p->caches)
+	{
+		threadcache *tc;
+		int n;
+		for(n=0; n<THREADCACHEMAXCACHES; n++)
+		{
+			if((tc=p->caches[n]))
+			{
+				tc->frees++;
+				RemoveCacheEntries(p, tc, 0);
+				assert(!tc->freeInCache);
+				tc->mymspace=-1;
+				tc->threadid=0;
+				CallFree(0, tc, 0);
+				p->caches[n]=0;
+			}
+		}
+	}
+}
+
+static NOINLINE threadcache *AllocCache(nedpool *RESTRICT p) THROWSPEC
+{
+	threadcache *tc=0;
+	int n, end;
+	ACQUIRE_LOCK(&p->mutex);
+	for(n=0; n<THREADCACHEMAXCACHES && p->caches[n]; n++);
+	if(THREADCACHEMAXCACHES==n)
+	{	/* List exhausted, so disable for this thread */
+		RELEASE_LOCK(&p->mutex);
+		return 0;
+	}
+	tc=p->caches[n]=(threadcache *) CallCalloc(p->m[0], sizeof(threadcache), 0);
+	if(!tc)
+	{
+		RELEASE_LOCK(&p->mutex);
+		return 0;
+	}
+#ifdef FULLSANITYCHECKS
+	tc->magic1=*(unsigned int *)"NEDMALC1";
+	tc->magic2=*(unsigned int *)"NEDMALC2";
+#endif
+	tc->threadid=(long)(size_t)CURRENT_THREAD;
+	for(end=0; p->m[end]; end++);
+	tc->mymspace=abs(tc->threadid) % end;
+	RELEASE_LOCK(&p->mutex);
+	if(TLSSET(p->mycache, (void *)(size_t)(n+1))) abort();
+	return tc;
+}
+
+static void *threadcache_malloc(nedpool *RESTRICT p, threadcache *RESTRICT tc, size_t *RESTRICT _size) THROWSPEC
+{
+	void *RESTRICT ret=0;
+	size_t size=*_size, blksize=0;
+	unsigned int bestsize;
+	unsigned int idx=size2binidx(size);
+	threadcacheblk *RESTRICT blk, **RESTRICT binsptr;
+#ifdef FULLSANITYCHECKS
+	tcfullsanitycheck(tc);
+#endif
+	/* Calculate best fit bin size */
+	bestsize=1<<(idx+4);
+#if 0
+	/* Finer grained bin fit */
+	idx<<=1;
+	if(size>bestsize)
+	{
+		idx++;
+		bestsize+=bestsize>>1;
+	}
+	if(size>bestsize)
+	{
+		idx++;
+		bestsize=1<<(4+(idx>>1));
+	}
+#else
+	if(size>bestsize)
+	{
+		idx++;
+		bestsize<<=1;
+	}
+#endif
+	assert(bestsize>=size);
+	if(size<bestsize) size=bestsize;
+	assert(size<=THREADCACHEMAX);
+	assert(idx<=THREADCACHEMAXBINS);
+	binsptr=&tc->bins[idx*2];
+	/* Try to match close, but move up a bin if necessary */
+	blk=*binsptr;
+	if(!blk || blk->size<size)
+	{	/* Bump it up a bin */
+		if(idx<THREADCACHEMAXBINS)
+		{
+			idx++;
+			binsptr+=2;
+			blk=*binsptr;
+		}
+	}
+	if(blk)
+	{
+		blksize=blk->size; /*nedblksize(blk);*/
+		assert(nedblksize(0, blk)>=blksize);
+		assert(blksize>=size);
+		if(blk->next)
+			blk->next->prev=0;
+		*binsptr=blk->next;
+		if(!*binsptr)
+			binsptr[1]=0;
+#ifdef FULLSANITYCHECKS
+		blk->magic=0;
+#endif
+		assert(binsptr[0]!=blk && binsptr[1]!=blk);
+		assert(nedblksize(0, blk)>=sizeof(threadcacheblk) && nedblksize(0, blk)<=THREADCACHEMAX+CHUNK_OVERHEAD);
+		/*printf("malloc: %p, %p, %p, %lu\n", p, tc, blk, (long) _size);*/
+		ret=(void *) blk;
+	}
+	++tc->mallocs;
+	if(ret)
+	{
+		assert(blksize>=size);
+		++tc->successes;
+		tc->freeInCache-=blksize;
+		assert((long) tc->freeInCache>=0);
+	}
+#if defined(DEBUG) && 0
+	if(!(tc->mallocs & 0xfff))
+	{
+		printf("*** threadcache=%u, mallocs=%u (%f), free=%u (%f), freeInCache=%u\n", (unsigned int) tc->threadid, tc->mallocs,
+			(float) tc->successes/tc->mallocs, tc->frees, (float) tc->successes/tc->frees, (unsigned int) tc->freeInCache);
+	}
+#endif
+#ifdef FULLSANITYCHECKS
+	tcfullsanitycheck(tc);
+#endif
+	*_size=size;
+	return ret;
+}
+static NOINLINE void ReleaseFreeInCache(nedpool *RESTRICT p, threadcache *RESTRICT tc, int mymspace) THROWSPEC
+{
+	unsigned int age=THREADCACHEMAXFREESPACE/8192;
+	/*ACQUIRE_LOCK(&p->m[mymspace]->mutex);*/
+	while(age && tc->freeInCache>=THREADCACHEMAXFREESPACE)
+	{
+		RemoveCacheEntries(p, tc, age);
+		/*printf("*** Removing cache entries older than %u (%u)\n", age, (unsigned int) tc->freeInCache);*/
+		age>>=1;
+	}
+	/*RELEASE_LOCK(&p->m[mymspace]->mutex);*/
+}
+static void threadcache_free(nedpool *RESTRICT p, threadcache *RESTRICT tc, int mymspace, void *RESTRICT mem, size_t size) THROWSPEC
+{
+	unsigned int bestsize;
+	unsigned int idx=size2binidx(size);
+	threadcacheblk **RESTRICT binsptr, *RESTRICT tck=(threadcacheblk *) mem;
+	assert(size>=sizeof(threadcacheblk) && size<=THREADCACHEMAX+CHUNK_OVERHEAD);
+#ifdef DEBUG
+	/* Make sure this is a valid memory block */
+	assert(nedblksize(0, mem));
+#endif
+#ifdef FULLSANITYCHECKS
+	tcfullsanitycheck(tc);
+#endif
+	/* Calculate best fit bin size */
+	bestsize=1<<(idx+4);
+#if 0
+	/* Finer grained bin fit */
+	idx<<=1;
+	if(size>bestsize)
+	{
+		unsigned int biggerbestsize=bestsize+bestsize<<1;
+		if(size>=biggerbestsize)
+		{
+			idx++;
+			bestsize=biggerbestsize;
+		}
+	}
+#endif
+	if(bestsize!=size)	/* dlmalloc can round up, so we round down to preserve indexing */
+		size=bestsize;
+	binsptr=&tc->bins[idx*2];
+	assert(idx<=THREADCACHEMAXBINS);
+	if(tck==*binsptr)
+	{
+		fprintf(stderr, "nedmalloc: Attempt to free already freed memory block %p - aborting!\n", tck);
+		abort();
+	}
+#ifdef FULLSANITYCHECKS
+	tck->magic=*(unsigned int *) "NEDN";
+#endif
+	tck->lastUsed=++tc->frees;
+	tck->size=(unsigned int) size;
+	tck->next=*binsptr;
+	tck->prev=0;
+	if(tck->next)
+		tck->next->prev=tck;
+	else
+		binsptr[1]=tck;
+	assert(!*binsptr || (*binsptr)->size==tck->size);
+	*binsptr=tck;
+	assert(tck==tc->bins[idx*2]);
+	assert(tc->bins[idx*2+1]==tck || binsptr[0]->next->prev==tck);
+	/*printf("free: %p, %p, %p, %lu\n", p, tc, mem, (long) size);*/
+	tc->freeInCache+=size;
+#ifdef FULLSANITYCHECKS
+	tcfullsanitycheck(tc);
+#endif
+#if 1
+	if(tc->freeInCache>=THREADCACHEMAXFREESPACE)
+		ReleaseFreeInCache(p, tc, mymspace);
+#endif
+}
+
+
+
+
+static NOINLINE int InitPool(nedpool *RESTRICT p, size_t capacity, int threads) THROWSPEC
+{	/* threads is -1 for system pool */
+	ensure_initialization();
+	ACQUIRE_MALLOC_GLOBAL_LOCK();
+	if(p->threads) goto done;
+	if(INITIAL_LOCK(&p->mutex)) goto err;
+	if(TLSALLOC(&p->mycache)) goto err;
+#if USE_ALLOCATOR==0
+	p->m[0]=(mstate) mspacecounter++;
+#elif USE_ALLOCATOR==1
+	if(!(p->m[0]=(mstate) create_mspace(capacity, 1))) goto err;
+	p->m[0]->extp=p;
+#endif
+	p->threads=(threads<1 || threads>MAXTHREADSINPOOL) ? MAXTHREADSINPOOL : threads;
+done:
+	RELEASE_MALLOC_GLOBAL_LOCK();
+	return 1;
+err:
+	if(threads<0)
+		abort();			/* If you can't allocate for system pool, we're screwed */
+	DestroyCaches(p);
+	if(p->m[0])
+	{
+#if USE_ALLOCATOR==1
+		destroy_mspace(p->m[0]);
+#endif
+		p->m[0]=0;
+	}
+	if(p->mycache)
+	{
+		if(TLSFREE(p->mycache)) abort();
+		p->mycache=0;
+	}
+	RELEASE_MALLOC_GLOBAL_LOCK();
+	return 0;
+}
+static NOINLINE mstate FindMSpace(nedpool *RESTRICT p, threadcache *RESTRICT tc, int *RESTRICT lastUsed, size_t size) THROWSPEC
+{	/* Gets called when thread's last used mspace is in use. The strategy
+	is to run through the list of all available mspaces looking for an
+	unlocked one and if we fail, we create a new one so long as we don't
+	exceed p->threads */
+	int n, end;
+	for(n=end=*lastUsed+1; p->m[n]; end=++n)
+	{
+		if(TRY_LOCK(&p->m[n]->mutex)) goto found;
+	}
+	for(n=0; n<*lastUsed && p->m[n]; n++)
+	{
+		if(TRY_LOCK(&p->m[n]->mutex)) goto found;
+	}
+	if(end<p->threads)
+	{
+		mstate temp;
+#if USE_ALLOCATOR==0
+		temp=(mstate) mspacecounter++;
+#elif USE_ALLOCATOR==1
+		if(!(temp=(mstate) create_mspace(size, 1)))
+			goto badexit;
+#endif
+		/* Now we're ready to modify the lists, we lock */
+		ACQUIRE_LOCK(&p->mutex);
+		while(p->m[end] && end<p->threads)
+			end++;
+		if(end>=p->threads)
+		{	/* Drat, must destroy it now */
+			RELEASE_LOCK(&p->mutex);
+#if USE_ALLOCATOR==1
+			destroy_mspace((mstate) temp);
+#endif
+			goto badexit;
+		}
+		/* We really want to make sure this goes into memory now but we
+		have to be careful of breaking aliasing rules, so write it twice */
+		*((volatile struct malloc_state **) &p->m[end])=p->m[end]=temp;
+		ACQUIRE_LOCK(&p->m[end]->mutex);
+		/*printf("Created mspace idx %d\n", end);*/
+		RELEASE_LOCK(&p->mutex);
+		n=end;
+		goto found;
+	}
+	/* Let it lock on the last one it used */
+badexit:
+	ACQUIRE_LOCK(&p->m[*lastUsed]->mutex);
+	return p->m[*lastUsed];
+found:
+	*lastUsed=n;
+	if(tc)
+		tc->mymspace=n;
+	else
+	{
+		if(TLSSET(p->mycache, (void *)(size_t)(-(n+1)))) abort();
+	}
+	return p->m[n];
+}
+
+typedef struct PoolList_t
+{
+	size_t size;			/* Size of list */
+	size_t length;			/* Actual entries in list */
+#ifdef DEBUG
+	nedpool *list[1];		/* Force testing of list expansion */
+#else
+	nedpool *list[16];
+#endif
+} PoolList;
+static MLOCK_T poollistlock;
+static PoolList *poollist;
+NEDMALLOCPTRATTR nedpool *nedcreatepool(size_t capacity, int threads) THROWSPEC
+{
+	nedpool *ret=0;
+	if(!poollist)
+	{
+		PoolList *newpoollist=0;
+		if(!(newpoollist=(PoolList *) nedpcalloc(0, 1, sizeof(PoolList)+sizeof(nedpool *)))) return 0;
+		INITIAL_LOCK(&poollistlock);
+		ACQUIRE_LOCK(&poollistlock);
+		poollist=newpoollist;
+		poollist->size=sizeof(poollist->list)/sizeof(nedpool *);
+	}
+	else
+		ACQUIRE_LOCK(&poollistlock);
+	if(poollist->length==poollist->size)
+	{
+		PoolList *newpoollist=0;
+		size_t newsize=0;
+		newsize=sizeof(PoolList)+(poollist->size+1)*sizeof(nedpool *);
+		if(!(newpoollist=(PoolList *) nedprealloc(0, poollist, newsize))) goto badexit;
+		poollist=newpoollist;
+		memset(&poollist->list[poollist->size], 0, newsize-((size_t)&poollist->list[poollist->size]-(size_t)&poollist->list[0]));
+		poollist->size=((newsize-((char *)&poollist->list[0]-(char *)poollist))/sizeof(nedpool *))-1;
+		assert(poollist->size>poollist->length);
+	}
+	if(!(ret=(nedpool *) nedpcalloc(0, 1, sizeof(nedpool)))) goto badexit;
+	if(!InitPool(ret, capacity, threads))
+	{
+		nedpfree(0, ret);
+		goto badexit;
+	}
+	poollist->list[poollist->length++]=ret;
+badexit:
+	RELEASE_LOCK(&poollistlock);
+	return ret;
+}
+void neddestroypool(nedpool *p) THROWSPEC
+{
+	unsigned int n;
+	ACQUIRE_LOCK(&p->mutex);
+	DestroyCaches(p);
+	for(n=0; p->m[n]; n++)
+	{
+#if USE_ALLOCATOR==1
+		destroy_mspace(p->m[n]);
+#endif
+		p->m[n]=0;
+	}
+	RELEASE_LOCK(&p->mutex);
+	if(TLSFREE(p->mycache)) abort();
+	nedpfree(0, p);
+	ACQUIRE_LOCK(&poollistlock);
+	assert(poollist);
+	for(n=0; n<poollist->length && poollist->list[n]!=p; n++);
+	assert(n!=poollist->length);
+	memmove(&poollist->list[n], &poollist->list[n+1], (size_t)&poollist->list[poollist->length]-(size_t)&poollist->list[n]);
+	if(!--poollist->length)
+	{
+		assert(!poollist->list[0]);
+		nedpfree(0, poollist);
+		poollist=0;
+	}
+	RELEASE_LOCK(&poollistlock);
+}
+void neddestroysyspool() THROWSPEC
+{
+	nedpool *p=&syspool;
+	int n;
+	ACQUIRE_LOCK(&p->mutex);
+	DestroyCaches(p);
+	for(n=0; p->m[n]; n++)
+	{
+#if USE_ALLOCATOR==1
+		destroy_mspace(p->m[n]);
+#endif
+		p->m[n]=0;
+	}
+	/* Render syspool unusable */
+	for(n=0; n<THREADCACHEMAXCACHES; n++)
+		p->caches[n]=(threadcache *)(size_t)(sizeof(size_t)>4 ? 0xdeadbeefdeadbeefULL : 0xdeadbeefUL);
+	for(n=0; n<MAXTHREADSINPOOL+1; n++)
+		p->m[n]=(mstate)(size_t)(sizeof(size_t)>4 ? 0xdeadbeefdeadbeefULL : 0xdeadbeefUL);
+	if(TLSFREE(p->mycache)) abort();
+	RELEASE_LOCK(&p->mutex);
+}
+nedpool **nedpoollist() THROWSPEC
+{
+	nedpool **ret=0;
+	if(poollist)
+	{
+		ACQUIRE_LOCK(&poollistlock);
+		if(!(ret=(nedpool **) nedmalloc((poollist->length+1)*sizeof(nedpool *)))) goto badexit;
+		memcpy(ret, poollist->list, (poollist->length+1)*sizeof(nedpool *));
+badexit:
+		RELEASE_LOCK(&poollistlock);
+	}
+	return ret;
+}
+
+void nedpsetvalue(nedpool *p, void *v) THROWSPEC
+{
+	if(!p) { p=&syspool; if(!syspool.threads) InitPool(&syspool, 0, -1); }
+	p->uservalue=v;
+}
+void *nedgetvalue(nedpool **p, void *mem) THROWSPEC
+{
+	nedpool *np=0;
+	mstate fm=nedblkmstate(mem);
+	if(!fm || !fm->extp) return 0;
+	np=(nedpool *) fm->extp;
+	if(p) *p=np;
+	return np->uservalue;
+}
+
+void nedtrimthreadcache(nedpool *p, int disable) THROWSPEC
+{
+	int mycache;
+	if(!p)
+	{
+		p=&syspool;
+		if(!syspool.threads) InitPool(&syspool, 0, -1);
+	}
+	mycache=(int)(size_t) TLSGET(p->mycache);
+	if(!mycache)
+	{	/* Set to mspace 0 */
+		if(disable && TLSSET(p->mycache, (void *)(size_t)-1)) abort();
+	}
+	else if(mycache>0)
+	{	/* Set to last used mspace */
+		threadcache *tc=p->caches[mycache-1];
+#if defined(DEBUG)
+		printf("Threadcache utilisation: %lf%% in cache with %lf%% lost to other threads\n",
+			100.0*tc->successes/tc->mallocs, 100.0*((double) tc->mallocs-tc->frees)/tc->mallocs);
+#endif
+		if(disable && TLSSET(p->mycache, (void *)(size_t)(-tc->mymspace))) abort();
+		tc->frees++;
+		RemoveCacheEntries(p, tc, 0);
+		assert(!tc->freeInCache);
+		if(disable)
+		{
+			tc->mymspace=-1;
+			tc->threadid=0;
+			CallFree(0, p->caches[mycache-1], 0);
+			p->caches[mycache-1]=0;
+		}
+	}
+}
+void neddisablethreadcache(nedpool *p) THROWSPEC
+{
+	nedtrimthreadcache(p, 1);
+}
+
+#define GETMSPACE(m,p,tc,ms,s,action)                 \
+  do                                                  \
+  {                                                   \
+    mstate m = GetMSpace((p),(tc),(ms),(s));          \
+    action;                                           \
+	if(USE_ALLOCATOR==1) { RELEASE_LOCK(&m->mutex); } \
+  } while (0)
+
+static FORCEINLINE mstate GetMSpace(nedpool *RESTRICT p, threadcache *RESTRICT tc, int mymspace, size_t size) THROWSPEC
+{	/* Returns a locked and ready for use mspace */
+	mstate m=p->m[mymspace];
+	assert(m);
+#if USE_ALLOCATOR==1
+	if(!TRY_LOCK(&p->m[mymspace]->mutex)) m=FindMSpace(p, tc, &mymspace, size);
+	/*assert(IS_LOCKED(&p->m[mymspace]->mutex));*/
+#endif
+	return m;
+}
+static NOINLINE void GetThreadCache_cold1(nedpool *RESTRICT *RESTRICT p) THROWSPEC
+{
+	*p=&syspool;
+	if(!syspool.threads) InitPool(&syspool, 0, -1);
+}
+static NOINLINE void GetThreadCache_cold2(nedpool *RESTRICT *RESTRICT p, threadcache *RESTRICT *RESTRICT tc, int *RESTRICT mymspace, int mycache) THROWSPEC
+{
+	if(!mycache)
+	{	/* Need to allocate a new cache */
+		*tc=AllocCache(*p);
+		if(!*tc)
+		{	/* Disable */
+			if(TLSSET((*p)->mycache, (void *)(size_t)-1)) abort();
+			*mymspace=0;
+		}
+		else
+			*mymspace=(*tc)->mymspace;
+	}
+	else
+	{	/* Cache disabled, but we do have an assigned thread pool */
+		*tc=0;
+		*mymspace=-mycache-1;
+	}
+}
+static FORCEINLINE void GetThreadCache(nedpool *RESTRICT *RESTRICT p, threadcache *RESTRICT *RESTRICT tc, int *RESTRICT mymspace, size_t *RESTRICT size) THROWSPEC
+{
+	int mycache;
+	if(size && *size<sizeof(threadcacheblk)) *size=sizeof(threadcacheblk);
+	if(!*p)
+		GetThreadCache_cold1(p);
+	mycache=(int)(size_t) TLSGET((*p)->mycache);
+	if(mycache>0)
+	{	/* Already have a cache */
+		*tc=(*p)->caches[mycache-1];
+		*mymspace=(*tc)->mymspace;
+	}
+	else GetThreadCache_cold2(p, tc, mymspace, mycache);
+	assert(*mymspace>=0);
+	assert(!(*tc) || (long)(size_t)CURRENT_THREAD==(*tc)->threadid);
+#ifdef FULLSANITYCHECKS
+	if(*tc)
+	{
+		if(*(unsigned int *)"NEDMALC1"!=(*tc)->magic1 || *(unsigned int *)"NEDMALC2"!=(*tc)->magic2)
+		{
+			abort();
+		}
+	}
+#endif
+}
+
+NEDMALLOCPTRATTR void * nedpmalloc(nedpool *p, size_t size) THROWSPEC
+{
+	void *ret=0;
+	threadcache *tc;
+	int mymspace;
+	GetThreadCache(&p, &tc, &mymspace, &size);
+#if THREADCACHEMAX
+	if(tc && size<=THREADCACHEMAX)
+	{	/* Use the thread cache */
+		ret=threadcache_malloc(p, tc, &size);
+	}
+#endif
+	if(!ret)
+	{	/* Use this thread's mspace */
+        GETMSPACE(m, p, tc, mymspace, size,
+                  ret=CallMalloc(m, size, 0));
+	}
+	return ret;
+}
+NEDMALLOCPTRATTR void * nedpcalloc(nedpool *p, size_t no, size_t size) THROWSPEC
+{
+	size_t rsize=size*no;
+	void *ret=0;
+	threadcache *tc;
+	int mymspace;
+	GetThreadCache(&p, &tc, &mymspace, &rsize);
+#if THREADCACHEMAX
+	if(tc && rsize<=THREADCACHEMAX)
+	{	/* Use the thread cache */
+		if((ret=threadcache_malloc(p, tc, &rsize)))
+			memset(ret, 0, rsize);
+	}
+#endif
+	if(!ret)
+	{	/* Use this thread's mspace */
+        GETMSPACE(m, p, tc, mymspace, rsize,
+                  ret=CallCalloc(m, rsize, 0));
+	}
+	return ret;
+}
+NEDMALLOCPTRATTR void * nedprealloc(nedpool *p, void *mem, size_t size) THROWSPEC
+{
+	void *ret=0;
+	threadcache *tc;
+	int mymspace, isforeign=1;
+	size_t memsize;
+	if(!mem) return nedpmalloc(p, size);
+	memsize=nedblksize(&isforeign, mem);
+	assert(memsize);
+	if(!memsize)
+	{
+		fprintf(stderr, "nedmalloc: nedprealloc() called with a block not created by nedmalloc!\n");
+		abort();
+	}
+	else if(size<=memsize && memsize-size<
+#ifdef DEBUG
+		32
+#else
+		1024
+#endif
+		)		/* If realloc size is within 1Kb smaller than existing, noop it */
+		return mem;
+	GetThreadCache(&p, &tc, &mymspace, &size);
+#if THREADCACHEMAX
+	if(tc && size && size<=THREADCACHEMAX)
+	{	/* Use the thread cache */
+		if((ret=threadcache_malloc(p, tc, &size)))
+		{
+			memcpy(ret, mem, memsize<size ? memsize : size);
+			if(memsize>=sizeof(threadcacheblk) && memsize<=(THREADCACHEMAX+CHUNK_OVERHEAD))
+				threadcache_free(p, tc, mymspace, mem, memsize);
+			else
+				CallFree(0, mem, isforeign);
+		}
+	}
+#endif
+	if(!ret)
+	{	/* Reallocs always happen in the mspace they happened in, so skip
+		locking the preferred mspace for this thread */
+		ret=CallRealloc(p->m[mymspace], mem, isforeign, memsize, size);
+	}
+	return ret;
+}
+void   nedpfree(nedpool *p, void *mem) THROWSPEC
+{	/* Frees always happen in the mspace they happened in, so skip
+	locking the preferred mspace for this thread */
+	threadcache *tc;
+	int mymspace, isforeign=1;
+	size_t memsize;
+	if(!mem)
+	{	/* If you tried this on FreeBSD you'd be sorry! */
+#ifdef DEBUG
+		fprintf(stderr, "nedmalloc: WARNING nedpfree() called with zero. This is not portable behaviour!\n");
+#endif
+		return;
+	}
+	memsize=nedblksize(&isforeign, mem);
+	assert(memsize);
+	if(!memsize)
+	{
+		fprintf(stderr, "nedmalloc: nedpfree() called with a block not created by nedmalloc!\n");
+		abort();
+	}
+	GetThreadCache(&p, &tc, &mymspace, 0);
+#if THREADCACHEMAX
+	if(mem && tc && memsize>=sizeof(threadcacheblk) && memsize<=(THREADCACHEMAX+CHUNK_OVERHEAD))
+		threadcache_free(p, tc, mymspace, mem, memsize);
+	else
+#endif
+		CallFree(0, mem, isforeign);
+}
+NEDMALLOCPTRATTR void * nedpmemalign(nedpool *p, size_t alignment, size_t bytes) THROWSPEC
+{
+	void *ret;
+	threadcache *tc;
+	int mymspace;
+	GetThreadCache(&p, &tc, &mymspace, &bytes);
+	{	/* Use this thread's mspace */
+        GETMSPACE(m, p, tc, mymspace, bytes,
+                  ret=CallMalloc(m, bytes, alignment));
+	}
+	return ret;
+}
+struct nedmallinfo nedpmallinfo(nedpool *p) THROWSPEC
+{
+	int n;
+	struct nedmallinfo ret={0};
+	if(!p) { p=&syspool; if(!syspool.threads) InitPool(&syspool, 0, -1); }
+	for(n=0; p->m[n]; n++)
+	{
+#if USE_ALLOCATOR==1 && !NO_MALLINFO
+		struct mallinfo t=mspace_mallinfo(p->m[n]);
+		ret.arena+=t.arena;
+		ret.ordblks+=t.ordblks;
+		ret.hblkhd+=t.hblkhd;
+		ret.usmblks+=t.usmblks;
+		ret.uordblks+=t.uordblks;
+		ret.fordblks+=t.fordblks;
+		ret.keepcost+=t.keepcost;
+#endif
+	}
+	return ret;
+}
+int    nedpmallopt(nedpool *p, int parno, int value) THROWSPEC
+{
+#if USE_ALLOCATOR==1
+	return mspace_mallopt(parno, value);
+#else
+	return 0;
+#endif
+}
+NEDMALLOCNOALIASATTR void*  nedmalloc_internals(size_t *granularity, size_t *magic) THROWSPEC
+{
+#if USE_ALLOCATOR==1
+	if(granularity) *granularity=mparams.granularity;
+	if(magic) *magic=mparams.magic;
+	return (void *) &syspool;
+#else
+	if(granularity) *granularity=0;
+	if(magic) *magic=0;
+	return 0;
+#endif
+}
+int    nedpmalloc_trim(nedpool *p, size_t pad) THROWSPEC
+{
+	int n, ret=0;
+	if(!p) { p=&syspool; if(!syspool.threads) InitPool(&syspool, 0, -1); }
+	for(n=0; p->m[n]; n++)
+	{
+#if USE_ALLOCATOR==1
+		ret+=mspace_trim(p->m[n], pad);
+#endif
+	}
+	return ret;
+}
+void   nedpmalloc_stats(nedpool *p) THROWSPEC
+{
+	int n;
+	if(!p) { p=&syspool; if(!syspool.threads) InitPool(&syspool, 0, -1); }
+	for(n=0; p->m[n]; n++)
+	{
+#if USE_ALLOCATOR==1
+		mspace_malloc_stats(p->m[n]);
+#endif
+	}
+}
+size_t nedpmalloc_footprint(nedpool *p) THROWSPEC
+{
+	size_t ret=0;
+	int n;
+	if(!p) { p=&syspool; if(!syspool.threads) InitPool(&syspool, 0, -1); }
+	for(n=0; p->m[n]; n++)
+	{
+#if USE_ALLOCATOR==1
+		ret+=mspace_footprint(p->m[n]);
+#endif
+	}
+	return ret;
+}
+NEDMALLOCPTRATTR void **nedpindependent_calloc(nedpool *p, size_t elemsno, size_t elemsize, void **chunks) THROWSPEC
+{
+	void **ret;
+	threadcache *tc;
+	int mymspace;
+	GetThreadCache(&p, &tc, &mymspace, &elemsize);
+#if USE_ALLOCATOR==0
+    GETMSPACE(m, p, tc, mymspace, elemsno*elemsize,
+              ret=unsupported_operation("independent_calloc"));
+#elif USE_ALLOCATOR==1
+    GETMSPACE(m, p, tc, mymspace, elemsno*elemsize,
+              ret=mspace_independent_calloc(m, elemsno, elemsize, chunks));
+#endif
+	return ret;
+}
+NEDMALLOCPTRATTR void **nedpindependent_comalloc(nedpool *p, size_t elems, size_t *sizes, void **chunks) THROWSPEC
+{
+	void **ret;
+	threadcache *tc;
+	int mymspace;
+    size_t i, *adjustedsizes=(size_t *) alloca(elems*sizeof(size_t));
+    if(!adjustedsizes) return 0;
+    for(i=0; i<elems; i++)
+        adjustedsizes[i]=sizes[i]<sizeof(threadcacheblk) ? sizeof(threadcacheblk) : sizes[i];
+	GetThreadCache(&p, &tc, &mymspace, 0);
+#if USE_ALLOCATOR==0
+	GETMSPACE(m, p, tc, mymspace, 0,
+              ret=unsupported_operation("independent_comalloc"));
+#elif USE_ALLOCATOR==1
+	GETMSPACE(m, p, tc, mymspace, 0,
+              ret=mspace_independent_comalloc(m, elems, adjustedsizes, chunks));
+#endif
+	return ret;
+}
+
+#if defined(__cplusplus)
+}
+#endif
+
+#ifdef _MSC_VER
+#pragma warning(pop)
+#endif
+
+#endif
diff --git a/drivers/nedmalloc/nedmalloc.h b/drivers/nedmalloc/nedmalloc.h
index b9add1683a..7ec65849fc 100644
--- a/drivers/nedmalloc/nedmalloc.h
+++ b/drivers/nedmalloc/nedmalloc.h
@@ -1,302 +1,302 @@
-#ifdef NEDMALLOC_ENABLED
-
-/* nedalloc, an alternative malloc implementation for multiple threads without
-lock contention based on dlmalloc v2.8.3. (C) 2005-2009 Niall Douglas
-
-Boost Software License - Version 1.0 - August 17th, 2003
-
-Permission is hereby granted, free of charge, to any person or organization
-obtaining a copy of the software and accompanying documentation covered by
-this license (the "Software") to use, reproduce, display, distribute,
-execute, and transmit the Software, and to prepare derivative works of the
-Software, and to permit third-parties to whom the Software is furnished to
-do so, all subject to the following:
-
-The copyright notices in the Software and this entire statement, including
-the above license grant, this restriction and the following disclaimer,
-must be included in all copies of the Software, in whole or in part, and
-all derivative works of the Software, unless such copies or derivative
-works are solely in the form of machine-executable object code generated by
-a source language processor.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT
-SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE
-FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE,
-ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
-DEALINGS IN THE SOFTWARE.
-*/
-
-#ifndef NEDMALLOC_H
-#define NEDMALLOC_H
-
-#include "typedefs.h"
-#define MALLOC_ALIGNMENT DEFAULT_ALIGNMENT
-
-#ifdef PSP_ENABLED
-#define USE_LOCKS 0
-#define HAVE_MMAP 0
-#endif
-
-/* See malloc.c.h for what each function does.
-
-REPLACE_SYSTEM_ALLOCATOR on POSIX causes nedalloc's functions to be called
-malloc, free etc. instead of nedmalloc, nedfree etc. You may or may not want
-this. On Windows it causes nedmalloc to patch all loaded DLLs and binaries
-to replace usage of the system allocator.
-
-NO_NED_NAMESPACE prevents the functions from being defined in the nedalloc
-namespace when in C++ (uses the global namespace instead).
-
-NEDMALLOCEXTSPEC can be defined to be __declspec(dllexport) or
-__attribute__ ((visibility("default"))) or whatever you like. It defaults
-to extern unless NEDMALLOC_DLL_EXPORTS is set as it would be when building
-nedmalloc.dll.
-
-USE_LOCKS can be 2 if you want to define your own MLOCK_T, INITIAL_LOCK,
-ACQUIRE_LOCK, RELEASE_LOCK, TRY_LOCK, IS_LOCKED and NULL_LOCK_INITIALIZER.
-
-NEDMALLOC_DEBUG can be defined to cause DEBUG to be set differently for nedmalloc
-than for the rest of the build. Remember to set NDEBUG to disable all assertion
-checking too.
-
-USE_MAGIC_HEADERS causes nedalloc to allocate an extra three sizeof(size_t)
-to each block. nedpfree() and nedprealloc() can then automagically know when
-to free a system allocated block. Enabling this typically adds 20-50% to
-application memory usage.
-
-ENABLE_TOLERANT_NEDMALLOC is automatically turned on if REPLACE_SYSTEM_ALLOCATOR
-is set or the Windows DLL is being built. This causes nedmalloc to detect when a
-system allocator block is passed to it and to handle it appropriately. Note that
-without USE_MAGIC_HEADERS there is a very tiny chance that nedmalloc will segfault
-on non-Windows builds (it uses Win32 SEH to trap segfaults on Windows and there
-is no comparable system on POSIX).
-
-USE_ALLOCATOR can be one of these settings (it defaults to 1):
-  0: System allocator (nedmalloc now simply acts as a threadcache).
-     WARNING: Intended for DEBUG USE ONLY - not all functions work correctly.
-  1: dlmalloc
-
-ENABLE_LARGE_PAGES enables support for requesting memory from the system in large
-(typically >=2Mb) pages if the host OS supports this. These occupy just a single
-TLB entry and can significantly improve performance in large working set applications.
-
-ENABLE_FAST_HEAP_DETECTION enables special logic to detect blocks allocated
-by the system heap. This avoids 1.5%-2% overhead when checking for non-nedmalloc
-blocks, but it assumes that the NT and glibc heaps function in a very specific
-fashion which may not hold true across OS upgrades.
-*/
-
-#include <stddef.h>   /* for size_t */
-
-#ifndef NEDMALLOCEXTSPEC
- #ifdef NEDMALLOC_DLL_EXPORTS
-  #ifdef WIN32
-   #define NEDMALLOCEXTSPEC extern __declspec(dllexport)
-  #elif defined(__GNUC__)
-   #define NEDMALLOCEXTSPEC extern __attribute__ ((visibility("default")))
-  #endif
-  #ifndef ENABLE_TOLERANT_NEDMALLOC
-   #define ENABLE_TOLERANT_NEDMALLOC 1
-  #endif
- #else
-  #define NEDMALLOCEXTSPEC extern
- #endif
-#endif
-
-#if __STDC_VERSION__ >= 199901L		/* C99 or better */
- #define RESTRICT restrict
-#else
- #if defined(_MSC_VER) && _MSC_VER>=1400
-  #define RESTRICT __restrict
- #endif
- #ifdef __GNUC__
-  #define RESTRICT __restrict
- #endif
-#endif
-#ifndef RESTRICT
- #define RESTRICT
-#endif
-
-#if defined(_MSC_VER) && _MSC_VER>=1400
- #define NEDMALLOCPTRATTR __declspec(restrict)
- #define NEDMALLOCNOALIASATTR __declspec(noalias)
-#endif
-#ifdef __GNUC__
- #define NEDMALLOCPTRATTR __attribute__ ((malloc))
-#endif
-#ifndef NEDMALLOCPTRATTR
- #define NEDMALLOCPTRATTR
-#endif
-#ifndef NEDMALLOCNOALIASATTR
- #define NEDMALLOCNOALIASATTR
-#endif
-
-#ifndef USE_MAGIC_HEADERS
- #define USE_MAGIC_HEADERS 0
-#endif
-
-#ifndef USE_ALLOCATOR
- #define USE_ALLOCATOR 1 /* dlmalloc */
-#endif
-
-#if !USE_ALLOCATOR && !USE_MAGIC_HEADERS
-#error If you are using the system allocator then you MUST use magic headers
-#endif
-
-#ifdef REPLACE_SYSTEM_ALLOCATOR
- #if USE_ALLOCATOR==0
-  #error Cannot combine using the system allocator with replacing the system allocator
- #endif
- #ifndef ENABLE_TOLERANT_NEDMALLOC
-  #define ENABLE_TOLERANT_NEDMALLOC 1
- #endif
- #ifndef WIN32	/* We have a dedicated patcher for Windows */
-  #define nedmalloc               malloc
-  #define nedcalloc               calloc
-  #define nedrealloc              realloc
-  #define nedfree                 free
-  #define nedmemalign             memalign
-  #define nedmallinfo             mallinfo
-  #define nedmallopt              mallopt
-  #define nedmalloc_trim          malloc_trim
-  #define nedmalloc_stats         malloc_stats
-  #define nedmalloc_footprint     malloc_footprint
-  #define nedindependent_calloc   independent_calloc
-  #define nedindependent_comalloc independent_comalloc
-  #ifdef _MSC_VER
-   #define nedblksize              _msize
-  #endif
- #endif
-#endif
-
-#if defined(__cplusplus)
-extern "C" {
-#endif
-struct nedmallinfo {
-  size_t arena;    /* non-mmapped space allocated from system */
-  size_t ordblks;  /* number of free chunks */
-  size_t smblks;   /* always 0 */
-  size_t hblks;    /* always 0 */
-  size_t hblkhd;   /* space in mmapped regions */
-  size_t usmblks;  /* maximum total allocated space */
-  size_t fsmblks;  /* always 0 */
-  size_t uordblks; /* total allocated space */
-  size_t fordblks; /* total free space */
-  size_t keepcost; /* releasable (via malloc_trim) space */
-};
-#if defined(__cplusplus)
-}
-#endif
-
-#if defined(__cplusplus)
- #if !defined(NO_NED_NAMESPACE)
-namespace nedalloc {
- #else
-extern "C" {
- #endif
- #define THROWSPEC throw()
-#else
- #define THROWSPEC
-#endif
-
-/* These are the global functions */
-
-/* Gets the usable size of an allocated block. Note this will always be bigger than what was
-asked for due to rounding etc. Optionally returns 1 in isforeign if the block came from the
-system allocator - note that there is a small (>0.01%) but real chance of segfault on non-Windows
-systems when passing non-nedmalloc blocks if you don't use USE_MAGIC_HEADERS.
-*/
-NEDMALLOCEXTSPEC NEDMALLOCNOALIASATTR size_t nedblksize(int *RESTRICT isforeign, void *RESTRICT mem) THROWSPEC;
-
-NEDMALLOCEXTSPEC NEDMALLOCNOALIASATTR void nedsetvalue(void *v) THROWSPEC;
-
-NEDMALLOCEXTSPEC NEDMALLOCNOALIASATTR NEDMALLOCPTRATTR void * nedmalloc(size_t size) THROWSPEC;
-NEDMALLOCEXTSPEC NEDMALLOCNOALIASATTR NEDMALLOCPTRATTR void * nedcalloc(size_t no, size_t size) THROWSPEC;
-NEDMALLOCEXTSPEC NEDMALLOCNOALIASATTR NEDMALLOCPTRATTR void * nedrealloc(void *mem, size_t size) THROWSPEC;
-NEDMALLOCEXTSPEC NEDMALLOCNOALIASATTR void   nedfree(void *mem) THROWSPEC;
-NEDMALLOCEXTSPEC NEDMALLOCNOALIASATTR NEDMALLOCPTRATTR void * nedmemalign(size_t alignment, size_t bytes) THROWSPEC;
-NEDMALLOCEXTSPEC NEDMALLOCNOALIASATTR struct nedmallinfo nedmallinfo(void) THROWSPEC;
-NEDMALLOCEXTSPEC NEDMALLOCNOALIASATTR int    nedmallopt(int parno, int value) THROWSPEC;
-NEDMALLOCEXTSPEC NEDMALLOCNOALIASATTR void*  nedmalloc_internals(size_t *granularity, size_t *magic) THROWSPEC;
-NEDMALLOCEXTSPEC NEDMALLOCNOALIASATTR int    nedmalloc_trim(size_t pad) THROWSPEC;
-NEDMALLOCEXTSPEC void   nedmalloc_stats(void) THROWSPEC;
-NEDMALLOCEXTSPEC NEDMALLOCNOALIASATTR size_t nedmalloc_footprint(void) THROWSPEC;
-NEDMALLOCEXTSPEC NEDMALLOCNOALIASATTR NEDMALLOCPTRATTR void **nedindependent_calloc(size_t elemsno, size_t elemsize, void **chunks) THROWSPEC;
-NEDMALLOCEXTSPEC NEDMALLOCNOALIASATTR NEDMALLOCPTRATTR void **nedindependent_comalloc(size_t elems, size_t *sizes, void **chunks) THROWSPEC;
-
-/* Destroys the system memory pool used by the functions above.
-Useful for when you have nedmalloc in a DLL you're about to unload.
-If you call ANY nedmalloc functions after calling this you will
-get a fatal exception!
-*/
-NEDMALLOCEXTSPEC void neddestroysyspool() THROWSPEC;
-
-/* These are the pool functions */
-struct nedpool_t;
-typedef struct nedpool_t nedpool;
-
-/* Creates a memory pool for use with the nedp* functions below.
-Capacity is how much to allocate immediately (if you know you'll be allocating a lot
-of memory very soon) which you can leave at zero. Threads specifies how many threads
-will *normally* be accessing the pool concurrently. Setting this to zero means it
-extends on demand, but be careful of this as it can rapidly consume system resources
-where bursts of concurrent threads use a pool at once.
-*/
-NEDMALLOCEXTSPEC NEDMALLOCPTRATTR nedpool *nedcreatepool(size_t capacity, int threads) THROWSPEC;
-
-/* Destroys a memory pool previously created by nedcreatepool().
-*/
-NEDMALLOCEXTSPEC void neddestroypool(nedpool *p) THROWSPEC;
-
-/* Returns a zero terminated snapshot of threadpools existing at the time of call. Call
-nedfree() on the returned list when you are done. Returns zero if there is only the
-system pool in existence.
-*/
-NEDMALLOCEXTSPEC nedpool **nedpoollist() THROWSPEC;
-
-/* Sets a value to be associated with a pool. You can retrieve this value by passing
-any memory block allocated from that pool.
-*/
-NEDMALLOCEXTSPEC void nedpsetvalue(nedpool *p, void *v) THROWSPEC;
-
-/* Gets a previously set value using nedpsetvalue() or zero if memory is unknown.
-Optionally can also retrieve pool. You can detect an unknown block by the return
-being zero and *p being unmodifed.
-*/
-NEDMALLOCEXTSPEC void *nedgetvalue(nedpool **p, void *mem) THROWSPEC;
-
-/* Trims the thread cache for the calling thread, returning any existing cache
-data to the central pool. Remember to ALWAYS call with zero if you used the
-system pool. Setting disable to non-zero replicates neddisablethreadcache().
-*/
-NEDMALLOCEXTSPEC void nedtrimthreadcache(nedpool *p, int disable) THROWSPEC;
-
-/* Disables the thread cache for the calling thread, returning any existing cache
-data to the central pool. Remember to ALWAYS call with zero if you used the
-system pool.
-*/
-NEDMALLOCEXTSPEC void neddisablethreadcache(nedpool *p) THROWSPEC;
-
-
-NEDMALLOCEXTSPEC NEDMALLOCPTRATTR void * nedpmalloc(nedpool *p, size_t size) THROWSPEC;
-NEDMALLOCEXTSPEC NEDMALLOCPTRATTR void * nedpcalloc(nedpool *p, size_t no, size_t size) THROWSPEC;
-NEDMALLOCEXTSPEC NEDMALLOCPTRATTR void * nedprealloc(nedpool *p, void *mem, size_t size) THROWSPEC;
-NEDMALLOCEXTSPEC void   nedpfree(nedpool *p, void *mem) THROWSPEC;
-NEDMALLOCEXTSPEC NEDMALLOCPTRATTR void * nedpmemalign(nedpool *p, size_t alignment, size_t bytes) THROWSPEC;
-NEDMALLOCEXTSPEC struct nedmallinfo nedpmallinfo(nedpool *p) THROWSPEC;
-NEDMALLOCEXTSPEC int    nedpmallopt(nedpool *p, int parno, int value) THROWSPEC;
-NEDMALLOCEXTSPEC int    nedpmalloc_trim(nedpool *p, size_t pad) THROWSPEC;
-NEDMALLOCEXTSPEC void   nedpmalloc_stats(nedpool *p) THROWSPEC;
-NEDMALLOCEXTSPEC size_t nedpmalloc_footprint(nedpool *p) THROWSPEC;
-NEDMALLOCEXTSPEC NEDMALLOCPTRATTR void **nedpindependent_calloc(nedpool *p, size_t elemsno, size_t elemsize, void **chunks) THROWSPEC;
-NEDMALLOCEXTSPEC NEDMALLOCPTRATTR void **nedpindependent_comalloc(nedpool *p, size_t elems, size_t *sizes, void **chunks) THROWSPEC;
-
-#if defined(__cplusplus)
-}
-#endif
-
-#endif
-
-#endif
+#ifdef NEDMALLOC_ENABLED
+
+/* nedalloc, an alternative malloc implementation for multiple threads without
+lock contention based on dlmalloc v2.8.3. (C) 2005-2009 Niall Douglas
+
+Boost Software License - Version 1.0 - August 17th, 2003
+
+Permission is hereby granted, free of charge, to any person or organization
+obtaining a copy of the software and accompanying documentation covered by
+this license (the "Software") to use, reproduce, display, distribute,
+execute, and transmit the Software, and to prepare derivative works of the
+Software, and to permit third-parties to whom the Software is furnished to
+do so, all subject to the following:
+
+The copyright notices in the Software and this entire statement, including
+the above license grant, this restriction and the following disclaimer,
+must be included in all copies of the Software, in whole or in part, and
+all derivative works of the Software, unless such copies or derivative
+works are solely in the form of machine-executable object code generated by
+a source language processor.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT
+SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE
+FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE,
+ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+DEALINGS IN THE SOFTWARE.
+*/
+
+#ifndef NEDMALLOC_H
+#define NEDMALLOC_H
+
+#include "typedefs.h"
+#define MALLOC_ALIGNMENT DEFAULT_ALIGNMENT
+
+#ifdef PSP_ENABLED
+#define USE_LOCKS 0
+#define HAVE_MMAP 0
+#endif
+
+/* See malloc.c.h for what each function does.
+
+REPLACE_SYSTEM_ALLOCATOR on POSIX causes nedalloc's functions to be called
+malloc, free etc. instead of nedmalloc, nedfree etc. You may or may not want
+this. On Windows it causes nedmalloc to patch all loaded DLLs and binaries
+to replace usage of the system allocator.
+
+NO_NED_NAMESPACE prevents the functions from being defined in the nedalloc
+namespace when in C++ (uses the global namespace instead).
+
+NEDMALLOCEXTSPEC can be defined to be __declspec(dllexport) or
+__attribute__ ((visibility("default"))) or whatever you like. It defaults
+to extern unless NEDMALLOC_DLL_EXPORTS is set as it would be when building
+nedmalloc.dll.
+
+USE_LOCKS can be 2 if you want to define your own MLOCK_T, INITIAL_LOCK,
+ACQUIRE_LOCK, RELEASE_LOCK, TRY_LOCK, IS_LOCKED and NULL_LOCK_INITIALIZER.
+
+NEDMALLOC_DEBUG can be defined to cause DEBUG to be set differently for nedmalloc
+than for the rest of the build. Remember to set NDEBUG to disable all assertion
+checking too.
+
+USE_MAGIC_HEADERS causes nedalloc to allocate an extra three sizeof(size_t)
+to each block. nedpfree() and nedprealloc() can then automagically know when
+to free a system allocated block. Enabling this typically adds 20-50% to
+application memory usage.
+
+ENABLE_TOLERANT_NEDMALLOC is automatically turned on if REPLACE_SYSTEM_ALLOCATOR
+is set or the Windows DLL is being built. This causes nedmalloc to detect when a
+system allocator block is passed to it and to handle it appropriately. Note that
+without USE_MAGIC_HEADERS there is a very tiny chance that nedmalloc will segfault
+on non-Windows builds (it uses Win32 SEH to trap segfaults on Windows and there
+is no comparable system on POSIX).
+
+USE_ALLOCATOR can be one of these settings (it defaults to 1):
+  0: System allocator (nedmalloc now simply acts as a threadcache).
+     WARNING: Intended for DEBUG USE ONLY - not all functions work correctly.
+  1: dlmalloc
+
+ENABLE_LARGE_PAGES enables support for requesting memory from the system in large
+(typically >=2Mb) pages if the host OS supports this. These occupy just a single
+TLB entry and can significantly improve performance in large working set applications.
+
+ENABLE_FAST_HEAP_DETECTION enables special logic to detect blocks allocated
+by the system heap. This avoids 1.5%-2% overhead when checking for non-nedmalloc
+blocks, but it assumes that the NT and glibc heaps function in a very specific
+fashion which may not hold true across OS upgrades.
+*/
+
+#include <stddef.h>   /* for size_t */
+
+#ifndef NEDMALLOCEXTSPEC
+ #ifdef NEDMALLOC_DLL_EXPORTS
+  #ifdef WIN32
+   #define NEDMALLOCEXTSPEC extern __declspec(dllexport)
+  #elif defined(__GNUC__)
+   #define NEDMALLOCEXTSPEC extern __attribute__ ((visibility("default")))
+  #endif
+  #ifndef ENABLE_TOLERANT_NEDMALLOC
+   #define ENABLE_TOLERANT_NEDMALLOC 1
+  #endif
+ #else
+  #define NEDMALLOCEXTSPEC extern
+ #endif
+#endif
+
+#if __STDC_VERSION__ >= 199901L		/* C99 or better */
+ #define RESTRICT restrict
+#else
+ #if defined(_MSC_VER) && _MSC_VER>=1400
+  #define RESTRICT __restrict
+ #endif
+ #ifdef __GNUC__
+  #define RESTRICT __restrict
+ #endif
+#endif
+#ifndef RESTRICT
+ #define RESTRICT
+#endif
+
+#if defined(_MSC_VER) && _MSC_VER>=1400
+ #define NEDMALLOCPTRATTR __declspec(restrict)
+ #define NEDMALLOCNOALIASATTR __declspec(noalias)
+#endif
+#ifdef __GNUC__
+ #define NEDMALLOCPTRATTR __attribute__ ((malloc))
+#endif
+#ifndef NEDMALLOCPTRATTR
+ #define NEDMALLOCPTRATTR
+#endif
+#ifndef NEDMALLOCNOALIASATTR
+ #define NEDMALLOCNOALIASATTR
+#endif
+
+#ifndef USE_MAGIC_HEADERS
+ #define USE_MAGIC_HEADERS 0
+#endif
+
+#ifndef USE_ALLOCATOR
+ #define USE_ALLOCATOR 1 /* dlmalloc */
+#endif
+
+#if !USE_ALLOCATOR && !USE_MAGIC_HEADERS
+#error If you are using the system allocator then you MUST use magic headers
+#endif
+
+#ifdef REPLACE_SYSTEM_ALLOCATOR
+ #if USE_ALLOCATOR==0
+  #error Cannot combine using the system allocator with replacing the system allocator
+ #endif
+ #ifndef ENABLE_TOLERANT_NEDMALLOC
+  #define ENABLE_TOLERANT_NEDMALLOC 1
+ #endif
+ #ifndef WIN32	/* We have a dedicated patcher for Windows */
+  #define nedmalloc               malloc
+  #define nedcalloc               calloc
+  #define nedrealloc              realloc
+  #define nedfree                 free
+  #define nedmemalign             memalign
+  #define nedmallinfo             mallinfo
+  #define nedmallopt              mallopt
+  #define nedmalloc_trim          malloc_trim
+  #define nedmalloc_stats         malloc_stats
+  #define nedmalloc_footprint     malloc_footprint
+  #define nedindependent_calloc   independent_calloc
+  #define nedindependent_comalloc independent_comalloc
+  #ifdef _MSC_VER
+   #define nedblksize              _msize
+  #endif
+ #endif
+#endif
+
+#if defined(__cplusplus)
+extern "C" {
+#endif
+struct nedmallinfo {
+  size_t arena;    /* non-mmapped space allocated from system */
+  size_t ordblks;  /* number of free chunks */
+  size_t smblks;   /* always 0 */
+  size_t hblks;    /* always 0 */
+  size_t hblkhd;   /* space in mmapped regions */
+  size_t usmblks;  /* maximum total allocated space */
+  size_t fsmblks;  /* always 0 */
+  size_t uordblks; /* total allocated space */
+  size_t fordblks; /* total free space */
+  size_t keepcost; /* releasable (via malloc_trim) space */
+};
+#if defined(__cplusplus)
+}
+#endif
+
+#if defined(__cplusplus)
+ #if !defined(NO_NED_NAMESPACE)
+namespace nedalloc {
+ #else
+extern "C" {
+ #endif
+ #define THROWSPEC throw()
+#else
+ #define THROWSPEC
+#endif
+
+/* These are the global functions */
+
+/* Gets the usable size of an allocated block. Note this will always be bigger than what was
+asked for due to rounding etc. Optionally returns 1 in isforeign if the block came from the
+system allocator - note that there is a small (>0.01%) but real chance of segfault on non-Windows
+systems when passing non-nedmalloc blocks if you don't use USE_MAGIC_HEADERS.
+*/
+NEDMALLOCEXTSPEC NEDMALLOCNOALIASATTR size_t nedblksize(int *RESTRICT isforeign, void *RESTRICT mem) THROWSPEC;
+
+NEDMALLOCEXTSPEC NEDMALLOCNOALIASATTR void nedsetvalue(void *v) THROWSPEC;
+
+NEDMALLOCEXTSPEC NEDMALLOCNOALIASATTR NEDMALLOCPTRATTR void * nedmalloc(size_t size) THROWSPEC;
+NEDMALLOCEXTSPEC NEDMALLOCNOALIASATTR NEDMALLOCPTRATTR void * nedcalloc(size_t no, size_t size) THROWSPEC;
+NEDMALLOCEXTSPEC NEDMALLOCNOALIASATTR NEDMALLOCPTRATTR void * nedrealloc(void *mem, size_t size) THROWSPEC;
+NEDMALLOCEXTSPEC NEDMALLOCNOALIASATTR void   nedfree(void *mem) THROWSPEC;
+NEDMALLOCEXTSPEC NEDMALLOCNOALIASATTR NEDMALLOCPTRATTR void * nedmemalign(size_t alignment, size_t bytes) THROWSPEC;
+NEDMALLOCEXTSPEC NEDMALLOCNOALIASATTR struct nedmallinfo nedmallinfo(void) THROWSPEC;
+NEDMALLOCEXTSPEC NEDMALLOCNOALIASATTR int    nedmallopt(int parno, int value) THROWSPEC;
+NEDMALLOCEXTSPEC NEDMALLOCNOALIASATTR void*  nedmalloc_internals(size_t *granularity, size_t *magic) THROWSPEC;
+NEDMALLOCEXTSPEC NEDMALLOCNOALIASATTR int    nedmalloc_trim(size_t pad) THROWSPEC;
+NEDMALLOCEXTSPEC void   nedmalloc_stats(void) THROWSPEC;
+NEDMALLOCEXTSPEC NEDMALLOCNOALIASATTR size_t nedmalloc_footprint(void) THROWSPEC;
+NEDMALLOCEXTSPEC NEDMALLOCNOALIASATTR NEDMALLOCPTRATTR void **nedindependent_calloc(size_t elemsno, size_t elemsize, void **chunks) THROWSPEC;
+NEDMALLOCEXTSPEC NEDMALLOCNOALIASATTR NEDMALLOCPTRATTR void **nedindependent_comalloc(size_t elems, size_t *sizes, void **chunks) THROWSPEC;
+
+/* Destroys the system memory pool used by the functions above.
+Useful for when you have nedmalloc in a DLL you're about to unload.
+If you call ANY nedmalloc functions after calling this you will
+get a fatal exception!
+*/
+NEDMALLOCEXTSPEC void neddestroysyspool() THROWSPEC;
+
+/* These are the pool functions */
+struct nedpool_t;
+typedef struct nedpool_t nedpool;
+
+/* Creates a memory pool for use with the nedp* functions below.
+Capacity is how much to allocate immediately (if you know you'll be allocating a lot
+of memory very soon) which you can leave at zero. Threads specifies how many threads
+will *normally* be accessing the pool concurrently. Setting this to zero means it
+extends on demand, but be careful of this as it can rapidly consume system resources
+where bursts of concurrent threads use a pool at once.
+*/
+NEDMALLOCEXTSPEC NEDMALLOCPTRATTR nedpool *nedcreatepool(size_t capacity, int threads) THROWSPEC;
+
+/* Destroys a memory pool previously created by nedcreatepool().
+*/
+NEDMALLOCEXTSPEC void neddestroypool(nedpool *p) THROWSPEC;
+
+/* Returns a zero terminated snapshot of threadpools existing at the time of call. Call
+nedfree() on the returned list when you are done. Returns zero if there is only the
+system pool in existence.
+*/
+NEDMALLOCEXTSPEC nedpool **nedpoollist() THROWSPEC;
+
+/* Sets a value to be associated with a pool. You can retrieve this value by passing
+any memory block allocated from that pool.
+*/
+NEDMALLOCEXTSPEC void nedpsetvalue(nedpool *p, void *v) THROWSPEC;
+
+/* Gets a previously set value using nedpsetvalue() or zero if memory is unknown.
+Optionally can also retrieve pool. You can detect an unknown block by the return
+being zero and *p being unmodifed.
+*/
+NEDMALLOCEXTSPEC void *nedgetvalue(nedpool **p, void *mem) THROWSPEC;
+
+/* Trims the thread cache for the calling thread, returning any existing cache
+data to the central pool. Remember to ALWAYS call with zero if you used the
+system pool. Setting disable to non-zero replicates neddisablethreadcache().
+*/
+NEDMALLOCEXTSPEC void nedtrimthreadcache(nedpool *p, int disable) THROWSPEC;
+
+/* Disables the thread cache for the calling thread, returning any existing cache
+data to the central pool. Remember to ALWAYS call with zero if you used the
+system pool.
+*/
+NEDMALLOCEXTSPEC void neddisablethreadcache(nedpool *p) THROWSPEC;
+
+
+NEDMALLOCEXTSPEC NEDMALLOCPTRATTR void * nedpmalloc(nedpool *p, size_t size) THROWSPEC;
+NEDMALLOCEXTSPEC NEDMALLOCPTRATTR void * nedpcalloc(nedpool *p, size_t no, size_t size) THROWSPEC;
+NEDMALLOCEXTSPEC NEDMALLOCPTRATTR void * nedprealloc(nedpool *p, void *mem, size_t size) THROWSPEC;
+NEDMALLOCEXTSPEC void   nedpfree(nedpool *p, void *mem) THROWSPEC;
+NEDMALLOCEXTSPEC NEDMALLOCPTRATTR void * nedpmemalign(nedpool *p, size_t alignment, size_t bytes) THROWSPEC;
+NEDMALLOCEXTSPEC struct nedmallinfo nedpmallinfo(nedpool *p) THROWSPEC;
+NEDMALLOCEXTSPEC int    nedpmallopt(nedpool *p, int parno, int value) THROWSPEC;
+NEDMALLOCEXTSPEC int    nedpmalloc_trim(nedpool *p, size_t pad) THROWSPEC;
+NEDMALLOCEXTSPEC void   nedpmalloc_stats(nedpool *p) THROWSPEC;
+NEDMALLOCEXTSPEC size_t nedpmalloc_footprint(nedpool *p) THROWSPEC;
+NEDMALLOCEXTSPEC NEDMALLOCPTRATTR void **nedpindependent_calloc(nedpool *p, size_t elemsno, size_t elemsize, void **chunks) THROWSPEC;
+NEDMALLOCEXTSPEC NEDMALLOCPTRATTR void **nedpindependent_comalloc(nedpool *p, size_t elems, size_t *sizes, void **chunks) THROWSPEC;
+
+#if defined(__cplusplus)
+}
+#endif
+
+#endif
+
+#endif
diff --git a/drivers/openssl/register_openssl.cpp b/drivers/openssl/register_openssl.cpp
index a4a60813b6..ed2150bef5 100644
--- a/drivers/openssl/register_openssl.cpp
+++ b/drivers/openssl/register_openssl.cpp
@@ -1,19 +1,19 @@
-#include "register_openssl.h"
-
-#include "stream_peer_openssl.h"
-#ifdef OPENSSL_ENABLED
-
-void register_openssl() {
-
-	ObjectTypeDB::register_type<StreamPeerOpenSSL>();
-	StreamPeerOpenSSL::initialize_ssl();
-
-}
-
-void unregister_openssl() {
-
-	StreamPeerOpenSSL::finalize_ssl();
-
-}
-#endif
-
+#include "register_openssl.h"
+
+#include "stream_peer_openssl.h"
+#ifdef OPENSSL_ENABLED
+
+void register_openssl() {
+
+	ObjectTypeDB::register_type<StreamPeerOpenSSL>();
+	StreamPeerOpenSSL::initialize_ssl();
+
+}
+
+void unregister_openssl() {
+
+	StreamPeerOpenSSL::finalize_ssl();
+
+}
+#endif
+
diff --git a/drivers/openssl/register_openssl.h b/drivers/openssl/register_openssl.h
index e1c554ca4a..e547a2b750 100644
--- a/drivers/openssl/register_openssl.h
+++ b/drivers/openssl/register_openssl.h
@@ -1,11 +1,11 @@
-#ifndef REGISTER_OPENSSL_H
-#define REGISTER_OPENSSL_H
-
-#ifdef OPENSSL_ENABLED
-
-void register_openssl();
-void unregister_openssl();
-
-#endif
-
-#endif // REGISTER_OPENSSL_H
+#ifndef REGISTER_OPENSSL_H
+#define REGISTER_OPENSSL_H
+
+#ifdef OPENSSL_ENABLED
+
+void register_openssl();
+void unregister_openssl();
+
+#endif
+
+#endif // REGISTER_OPENSSL_H
diff --git a/drivers/opus/SCsub b/drivers/opus/SCsub
new file mode 100644
index 0000000000..a2bebf62b3
--- /dev/null
+++ b/drivers/opus/SCsub
@@ -0,0 +1,200 @@
+Import('env')
+
+opus_sources = [
+	"opus/audio_stream_opus.cpp",
+]
+
+opus_sources_silk=[]
+
+opus_sources_lib = [
+	"opus/celt/bands.c",
+	"opus/celt/celt_lpc.c",
+	"opus/celt/entenc.c",
+	"opus/celt/mdct.c",
+	"opus/celt/quant_bands.c",
+	"opus/celt/celt.c",
+	"opus/celt/cwrs.c",
+	"opus/celt/kiss_fft.c",
+	"opus/celt/modes.c",
+	"opus/celt/rate.c",
+	"opus/celt/celt_decoder.c",
+	"opus/celt/entcode.c",
+	"opus/celt/laplace.c",
+	#opus/celt/opus_custom_demo.c",
+	"opus/celt/vq.c",
+	"opus/celt/celt_encoder.c",
+	"opus/celt/entdec.c",
+	"opus/celt/mathops.c",
+	"opus/celt/pitch.c",
+	"opus/silk/A2NLSF.c",
+	"opus/silk/decoder_set_fs.c",
+	"opus/silk/NLSF_stabilize.c",
+	"opus/silk/sigm_Q15.c",
+	"opus/silk/ana_filt_bank_1.c",
+	"opus/silk/enc_API.c",
+	"opus/silk/NLSF_unpack.c",
+	"opus/silk/sort.c",
+	"opus/silk/biquad_alt.c",
+	"opus/silk/encode_indices.c",
+	"opus/silk/NLSF_VQ.c",
+	"opus/silk/stereo_decode_pred.c",
+	"opus/silk/bwexpander_32.c",
+	"opus/silk/encode_pulses.c",
+	"opus/silk/NLSF_VQ_weights_laroia.c",
+	"opus/silk/stereo_encode_pred.c",
+	"opus/silk/bwexpander.c",
+	"opus/silk/gain_quant.c",
+	"opus/silk/NSQ.c",
+	"opus/silk/stereo_find_predictor.c",
+	"opus/silk/check_control_input.c",
+	"opus/silk/HP_variable_cutoff.c",
+	"opus/silk/NSQ_del_dec.c",
+	"opus/silk/stereo_LR_to_MS.c",
+	"opus/silk/CNG.c",
+	"opus/silk/init_decoder.c",
+	"opus/silk/pitch_est_tables.c",
+	"opus/silk/stereo_MS_to_LR.c",
+	"opus/silk/code_signs.c",
+	"opus/silk/init_encoder.c",
+	"opus/silk/PLC.c",
+	"opus/silk/stereo_quant_pred.c",
+	"opus/silk/control_audio_bandwidth.c",
+	"opus/silk/inner_prod_aligned.c",
+	"opus/silk/process_NLSFs.c",
+	"opus/silk/sum_sqr_shift.c",
+	"opus/silk/control_codec.c",
+	"opus/silk/interpolate.c",
+	"opus/silk/quant_LTP_gains.c",
+	"opus/silk/table_LSF_cos.c",
+	"opus/silk/control_SNR.c",
+	"opus/silk/lin2log.c",
+	"opus/silk/resampler.c",
+	"opus/silk/tables_gain.c",
+	"opus/silk/debug.c",
+	"opus/silk/log2lin.c",
+	"opus/silk/resampler_down2_3.c",
+	"opus/silk/tables_LTP.c",
+	"opus/silk/dec_API.c",
+	"opus/silk/LPC_analysis_filter.c",
+	"opus/silk/resampler_down2.c",
+	"opus/silk/tables_NLSF_CB_NB_MB.c",
+	"opus/silk/decode_core.c",
+	"opus/silk/LPC_inv_pred_gain.c",
+	"opus/silk/resampler_private_AR2.c",
+	"opus/silk/tables_NLSF_CB_WB.c",
+	"opus/silk/decode_frame.c",
+	"opus/silk/LP_variable_cutoff.c",
+	"opus/silk/resampler_private_down_FIR.c",
+	"opus/silk/tables_other.c",
+	"opus/silk/decode_indices.c",
+	"opus/silk/NLSF2A.c",
+	"opus/silk/resampler_private_IIR_FIR.c",
+	"opus/silk/tables_pitch_lag.c",
+	"opus/silk/decode_parameters.c",
+	"opus/silk/NLSF_decode.c",
+	"opus/silk/resampler_private_up2_HQ.c",
+	"opus/silk/tables_pulses_per_block.c",
+	"opus/silk/decode_pitch.c",
+	"opus/silk/NLSF_del_dec_quant.c",
+	"opus/silk/resampler_rom.c",
+	"opus/silk/VAD.c",
+	"opus/silk/decode_pulses.c",
+	"opus/silk/NLSF_encode.c",
+	"opus/silk/shell_coder.c",
+	"opus/silk/VQ_WMat_EC.c",
+	"opus/analysis.c",
+	"opus/internal.c",
+	"opus/opus.c",
+	#"opus/opus_demo.c",
+	"opus/opus_multistream.c",
+	"opus/repacketizer.c",
+	"opus/wincerts.c",
+	"opus/http.c",
+	"opus/mlp.c",
+	#"opus/opus_compare.c",
+	"opus/opus_encoder.c",
+	"opus/opus_multistream_decoder.c",
+	#"opus/repacketizer_demo.c",
+	"opus/info.c",
+	"opus/mlp_data.c",
+	"opus/opus_decoder.c",
+	"opus/opusfile.c",
+	"opus/opus_multistream_encoder.c",
+	"opus/stream.c"
+]
+
+if("opus_fixed_point" in env and env.opus_fixed_point=="yes"):
+	env.Append(CPPPATH=["#drivers/opus/silk/fixed"], CFLAGS=["-DOPUS_FIXED_POINT"])
+	opus_sources_silk = [
+		"opus/silk/fixed/apply_sine_window_FIX.c",
+		"opus/silk/fixed/k2a_FIX.c",
+		"opus/silk/fixed/residual_energy16_FIX.c",
+		"opus/silk/fixed/autocorr_FIX.c",
+		"opus/silk/fixed/k2a_Q16_FIX.c",
+		"opus/silk/fixed/residual_energy_FIX.c",
+		"opus/silk/fixed/burg_modified_FIX.c",
+		"opus/silk/fixed/LTP_analysis_filter_FIX.c",
+		"opus/silk/fixed/schur64_FIX.c",
+		"opus/silk/fixed/corrMatrix_FIX.c",
+		"opus/silk/fixed/LTP_scale_ctrl_FIX.c",
+		"opus/silk/fixed/schur_FIX.c",
+		"opus/silk/fixed/encode_frame_FIX.c",
+		"opus/silk/fixed/noise_shape_analysis_FIX.c",
+		"opus/silk/fixed/solve_LS_FIX.c",
+		"opus/silk/fixed/find_LPC_FIX.c",
+		"opus/silk/fixed/pitch_analysis_core_FIX.c",
+		"opus/silk/fixed/vector_ops_FIX.c",
+		"opus/silk/fixed/find_LTP_FIX.c",
+		"opus/silk/fixed/prefilter_FIX.c",
+		"opus/silk/fixed/warped_autocorrelation_FIX.c",
+		"opus/silk/fixed/find_pitch_lags_FIX.c",
+		"opus/silk/fixed/process_gains_FIX.c",
+		"opus/silk/fixed/find_pred_coefs_FIX.c",
+		"opus/silk/fixed/regularize_correlations_FIX.c"
+	]
+else:
+	env.Append(CPPPATH=["#drivers/opus/silk/float"])
+	opus_sources_silk = [
+		"opus/silk/float/apply_sine_window_FLP.c",
+		"opus/silk/float/inner_product_FLP.c",
+		"opus/silk/float/regularize_correlations_FLP.c",
+		"opus/silk/float/autocorrelation_FLP.c",
+		"opus/silk/float/k2a_FLP.c",
+		"opus/silk/float/residual_energy_FLP.c",
+		"opus/silk/float/burg_modified_FLP.c",
+		"opus/silk/float/levinsondurbin_FLP.c",
+		"opus/silk/float/scale_copy_vector_FLP.c",
+		"opus/silk/float/bwexpander_FLP.c",
+		"opus/silk/float/LPC_analysis_filter_FLP.c",
+		"opus/silk/float/scale_vector_FLP.c",
+		"opus/silk/float/corrMatrix_FLP.c",
+		"opus/silk/float/LPC_inv_pred_gain_FLP.c",
+		"opus/silk/float/schur_FLP.c",
+		"opus/silk/float/encode_frame_FLP.c",
+		"opus/silk/float/LTP_analysis_filter_FLP.c",
+		"opus/silk/float/solve_LS_FLP.c",
+		"opus/silk/float/energy_FLP.c",
+		"opus/silk/float/LTP_scale_ctrl_FLP.c",
+		"opus/silk/float/sort_FLP.c",
+		"opus/silk/float/find_LPC_FLP.c",
+		"opus/silk/float/noise_shape_analysis_FLP.c",
+		"opus/silk/float/warped_autocorrelation_FLP.c",
+		"opus/silk/float/find_LTP_FLP.c",
+		"opus/silk/float/pitch_analysis_core_FLP.c",
+		"opus/silk/float/wrappers_FLP.c",
+		"opus/silk/float/find_pitch_lags_FLP.c",
+		"opus/silk/float/prefilter_FLP.c",
+		"opus/silk/float/find_pred_coefs_FLP.c",
+		"opus/silk/float/process_gains_FLP.c"
+	]
+
+
+opus_sources_lib+=opus_sources_silk
+env.drivers_sources+=opus_sources_lib
+env.drivers_sources+=opus_sources
+
+env.Append(CPPPATH=["#drivers/opus"])
+env.Append(CPPPATH=["#drivers/opus/celt","#drivers/opus/silk","#drivers/opus/silk/float"])
+env.Append(CFLAGS=["-DOPUS_HAVE_CONFIG_H"])
+
+Export('env')
diff --git a/drivers/opus/analysis.c b/drivers/opus/analysis.c
new file mode 100644
index 0000000000..47e8668b8e
--- /dev/null
+++ b/drivers/opus/analysis.c
@@ -0,0 +1,645 @@
+/* Copyright (c) 2011 Xiph.Org Foundation
+   Written by Jean-Marc Valin */
+/*
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR
+   CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifdef OPUS_HAVE_CONFIG_H
+#include "opus_config.h"
+#endif
+
+#include "kiss_fft.h"
+#include "celt.h"
+#include "opus_modes.h"
+#include "arch.h"
+#include "quant_bands.h"
+#include <stdio.h>
+#include "analysis.h"
+#include "mlp.h"
+#include "stack_alloc.h"
+
+extern const MLP net;
+
+#ifndef M_PI
+#define M_PI 3.141592653
+#endif
+
+static const float dct_table[128] = {
+        0.250000f, 0.250000f, 0.250000f, 0.250000f, 0.250000f, 0.250000f, 0.250000f, 0.250000f,
+        0.250000f, 0.250000f, 0.250000f, 0.250000f, 0.250000f, 0.250000f, 0.250000f, 0.250000f,
+        0.351851f, 0.338330f, 0.311806f, 0.273300f, 0.224292f, 0.166664f, 0.102631f, 0.034654f,
+       -0.034654f,-0.102631f,-0.166664f,-0.224292f,-0.273300f,-0.311806f,-0.338330f,-0.351851f,
+        0.346760f, 0.293969f, 0.196424f, 0.068975f,-0.068975f,-0.196424f,-0.293969f,-0.346760f,
+       -0.346760f,-0.293969f,-0.196424f,-0.068975f, 0.068975f, 0.196424f, 0.293969f, 0.346760f,
+        0.338330f, 0.224292f, 0.034654f,-0.166664f,-0.311806f,-0.351851f,-0.273300f,-0.102631f,
+        0.102631f, 0.273300f, 0.351851f, 0.311806f, 0.166664f,-0.034654f,-0.224292f,-0.338330f,
+        0.326641f, 0.135299f,-0.135299f,-0.326641f,-0.326641f,-0.135299f, 0.135299f, 0.326641f,
+        0.326641f, 0.135299f,-0.135299f,-0.326641f,-0.326641f,-0.135299f, 0.135299f, 0.326641f,
+        0.311806f, 0.034654f,-0.273300f,-0.338330f,-0.102631f, 0.224292f, 0.351851f, 0.166664f,
+       -0.166664f,-0.351851f,-0.224292f, 0.102631f, 0.338330f, 0.273300f,-0.034654f,-0.311806f,
+        0.293969f,-0.068975f,-0.346760f,-0.196424f, 0.196424f, 0.346760f, 0.068975f,-0.293969f,
+       -0.293969f, 0.068975f, 0.346760f, 0.196424f,-0.196424f,-0.346760f,-0.068975f, 0.293969f,
+        0.273300f,-0.166664f,-0.338330f, 0.034654f, 0.351851f, 0.102631f,-0.311806f,-0.224292f,
+        0.224292f, 0.311806f,-0.102631f,-0.351851f,-0.034654f, 0.338330f, 0.166664f,-0.273300f,
+};
+
+static const float analysis_window[240] = {
+      0.000043f, 0.000171f, 0.000385f, 0.000685f, 0.001071f, 0.001541f, 0.002098f, 0.002739f,
+      0.003466f, 0.004278f, 0.005174f, 0.006156f, 0.007222f, 0.008373f, 0.009607f, 0.010926f,
+      0.012329f, 0.013815f, 0.015385f, 0.017037f, 0.018772f, 0.020590f, 0.022490f, 0.024472f,
+      0.026535f, 0.028679f, 0.030904f, 0.033210f, 0.035595f, 0.038060f, 0.040604f, 0.043227f,
+      0.045928f, 0.048707f, 0.051564f, 0.054497f, 0.057506f, 0.060591f, 0.063752f, 0.066987f,
+      0.070297f, 0.073680f, 0.077136f, 0.080665f, 0.084265f, 0.087937f, 0.091679f, 0.095492f,
+      0.099373f, 0.103323f, 0.107342f, 0.111427f, 0.115579f, 0.119797f, 0.124080f, 0.128428f,
+      0.132839f, 0.137313f, 0.141849f, 0.146447f, 0.151105f, 0.155823f, 0.160600f, 0.165435f,
+      0.170327f, 0.175276f, 0.180280f, 0.185340f, 0.190453f, 0.195619f, 0.200838f, 0.206107f,
+      0.211427f, 0.216797f, 0.222215f, 0.227680f, 0.233193f, 0.238751f, 0.244353f, 0.250000f,
+      0.255689f, 0.261421f, 0.267193f, 0.273005f, 0.278856f, 0.284744f, 0.290670f, 0.296632f,
+      0.302628f, 0.308658f, 0.314721f, 0.320816f, 0.326941f, 0.333097f, 0.339280f, 0.345492f,
+      0.351729f, 0.357992f, 0.364280f, 0.370590f, 0.376923f, 0.383277f, 0.389651f, 0.396044f,
+      0.402455f, 0.408882f, 0.415325f, 0.421783f, 0.428254f, 0.434737f, 0.441231f, 0.447736f,
+      0.454249f, 0.460770f, 0.467298f, 0.473832f, 0.480370f, 0.486912f, 0.493455f, 0.500000f,
+      0.506545f, 0.513088f, 0.519630f, 0.526168f, 0.532702f, 0.539230f, 0.545751f, 0.552264f,
+      0.558769f, 0.565263f, 0.571746f, 0.578217f, 0.584675f, 0.591118f, 0.597545f, 0.603956f,
+      0.610349f, 0.616723f, 0.623077f, 0.629410f, 0.635720f, 0.642008f, 0.648271f, 0.654508f,
+      0.660720f, 0.666903f, 0.673059f, 0.679184f, 0.685279f, 0.691342f, 0.697372f, 0.703368f,
+      0.709330f, 0.715256f, 0.721144f, 0.726995f, 0.732807f, 0.738579f, 0.744311f, 0.750000f,
+      0.755647f, 0.761249f, 0.766807f, 0.772320f, 0.777785f, 0.783203f, 0.788573f, 0.793893f,
+      0.799162f, 0.804381f, 0.809547f, 0.814660f, 0.819720f, 0.824724f, 0.829673f, 0.834565f,
+      0.839400f, 0.844177f, 0.848895f, 0.853553f, 0.858151f, 0.862687f, 0.867161f, 0.871572f,
+      0.875920f, 0.880203f, 0.884421f, 0.888573f, 0.892658f, 0.896677f, 0.900627f, 0.904508f,
+      0.908321f, 0.912063f, 0.915735f, 0.919335f, 0.922864f, 0.926320f, 0.929703f, 0.933013f,
+      0.936248f, 0.939409f, 0.942494f, 0.945503f, 0.948436f, 0.951293f, 0.954072f, 0.956773f,
+      0.959396f, 0.961940f, 0.964405f, 0.966790f, 0.969096f, 0.971321f, 0.973465f, 0.975528f,
+      0.977510f, 0.979410f, 0.981228f, 0.982963f, 0.984615f, 0.986185f, 0.987671f, 0.989074f,
+      0.990393f, 0.991627f, 0.992778f, 0.993844f, 0.994826f, 0.995722f, 0.996534f, 0.997261f,
+      0.997902f, 0.998459f, 0.998929f, 0.999315f, 0.999615f, 0.999829f, 0.999957f, 1.000000f,
+};
+
+static const int tbands[NB_TBANDS+1] = {
+       2,  4,  6,  8, 10, 12, 14, 16, 20, 24, 28, 32, 40, 48, 56, 68, 80, 96, 120
+};
+
+static const int extra_bands[NB_TOT_BANDS+1] = {
+      1, 2,  4,  6,  8, 10, 12, 14, 16, 20, 24, 28, 32, 40, 48, 56, 68, 80, 96, 120, 160, 200
+};
+
+/*static const float tweight[NB_TBANDS+1] = {
+      .3, .4, .5, .6, .7, .8, .9, 1., 1., 1., 1., 1., 1., 1., .8, .7, .6, .5
+};*/
+
+#define NB_TONAL_SKIP_BANDS 9
+
+#define cA 0.43157974f
+#define cB 0.67848403f
+#define cC 0.08595542f
+#define cE ((float)M_PI/2)
+static OPUS_INLINE float fast_atan2f(float y, float x) {
+   float x2, y2;
+   /* Should avoid underflow on the values we'll get */
+   if (ABS16(x)+ABS16(y)<1e-9f)
+   {
+      x*=1e12f;
+      y*=1e12f;
+   }
+   x2 = x*x;
+   y2 = y*y;
+   if(x2<y2){
+      float den = (y2 + cB*x2) * (y2 + cC*x2);
+      if (den!=0)
+         return -x*y*(y2 + cA*x2) / den + (y<0 ? -cE : cE);
+      else
+         return (y<0 ? -cE : cE);
+   }else{
+      float den = (x2 + cB*y2) * (x2 + cC*y2);
+      if (den!=0)
+         return  x*y*(x2 + cA*y2) / den + (y<0 ? -cE : cE) - (x*y<0 ? -cE : cE);
+      else
+         return (y<0 ? -cE : cE) - (x*y<0 ? -cE : cE);
+   }
+}
+
+void tonality_get_info(TonalityAnalysisState *tonal, AnalysisInfo *info_out, int len)
+{
+   int pos;
+   int curr_lookahead;
+   float psum;
+   int i;
+
+   pos = tonal->read_pos;
+   curr_lookahead = tonal->write_pos-tonal->read_pos;
+   if (curr_lookahead<0)
+      curr_lookahead += DETECT_SIZE;
+
+   if (len > 480 && pos != tonal->write_pos)
+   {
+      pos++;
+      if (pos==DETECT_SIZE)
+         pos=0;
+   }
+   if (pos == tonal->write_pos)
+      pos--;
+   if (pos<0)
+      pos = DETECT_SIZE-1;
+   OPUS_COPY(info_out, &tonal->info[pos], 1);
+   tonal->read_subframe += len/120;
+   while (tonal->read_subframe>=4)
+   {
+      tonal->read_subframe -= 4;
+      tonal->read_pos++;
+   }
+   if (tonal->read_pos>=DETECT_SIZE)
+      tonal->read_pos-=DETECT_SIZE;
+
+   /* Compensate for the delay in the features themselves.
+      FIXME: Need a better estimate the 10 I just made up */
+   curr_lookahead = IMAX(curr_lookahead-10, 0);
+
+   psum=0;
+   /* Summing the probability of transition patterns that involve music at
+      time (DETECT_SIZE-curr_lookahead-1) */
+   for (i=0;i<DETECT_SIZE-curr_lookahead;i++)
+      psum += tonal->pmusic[i];
+   for (;i<DETECT_SIZE;i++)
+      psum += tonal->pspeech[i];
+   psum = psum*tonal->music_confidence + (1-psum)*tonal->speech_confidence;
+   /*printf("%f %f %f\n", psum, info_out->music_prob, info_out->tonality);*/
+
+   info_out->music_prob = psum;
+}
+
+void tonality_analysis(TonalityAnalysisState *tonal, AnalysisInfo *info_out, const CELTMode *celt_mode, const void *x, int len, int offset, int c1, int c2, int C, int lsb_depth, downmix_func downmix)
+{
+    int i, b;
+    const kiss_fft_state *kfft;
+    VARDECL(kiss_fft_cpx, in);
+    VARDECL(kiss_fft_cpx, out);
+    int N = 480, N2=240;
+    float * OPUS_RESTRICT A = tonal->angle;
+    float * OPUS_RESTRICT dA = tonal->d_angle;
+    float * OPUS_RESTRICT d2A = tonal->d2_angle;
+    VARDECL(float, tonality);
+    VARDECL(float, noisiness);
+    float band_tonality[NB_TBANDS];
+    float logE[NB_TBANDS];
+    float BFCC[8];
+    float features[25];
+    float frame_tonality;
+    float max_frame_tonality;
+    /*float tw_sum=0;*/
+    float frame_noisiness;
+    const float pi4 = (float)(M_PI*M_PI*M_PI*M_PI);
+    float slope=0;
+    float frame_stationarity;
+    float relativeE;
+    float frame_probs[2];
+    float alpha, alphaE, alphaE2;
+    float frame_loudness;
+    float bandwidth_mask;
+    int bandwidth=0;
+    float maxE = 0;
+    float noise_floor;
+    int remaining;
+    AnalysisInfo *info;
+    SAVE_STACK;
+
+    tonal->last_transition++;
+    alpha = 1.f/IMIN(20, 1+tonal->count);
+    alphaE = 1.f/IMIN(50, 1+tonal->count);
+    alphaE2 = 1.f/IMIN(1000, 1+tonal->count);
+
+    if (tonal->count<4)
+       tonal->music_prob = .5;
+    kfft = celt_mode->mdct.kfft[0];
+    if (tonal->count==0)
+       tonal->mem_fill = 240;
+    downmix(x, &tonal->inmem[tonal->mem_fill], IMIN(len, ANALYSIS_BUF_SIZE-tonal->mem_fill), offset, c1, c2, C);
+    if (tonal->mem_fill+len < ANALYSIS_BUF_SIZE)
+    {
+       tonal->mem_fill += len;
+       /* Don't have enough to update the analysis */
+       RESTORE_STACK;
+       return;
+    }
+    info = &tonal->info[tonal->write_pos++];
+    if (tonal->write_pos>=DETECT_SIZE)
+       tonal->write_pos-=DETECT_SIZE;
+
+    ALLOC(in, 480, kiss_fft_cpx);
+    ALLOC(out, 480, kiss_fft_cpx);
+    ALLOC(tonality, 240, float);
+    ALLOC(noisiness, 240, float);
+    for (i=0;i<N2;i++)
+    {
+       float w = analysis_window[i];
+       in[i].r = (kiss_fft_scalar)(w*tonal->inmem[i]);
+       in[i].i = (kiss_fft_scalar)(w*tonal->inmem[N2+i]);
+       in[N-i-1].r = (kiss_fft_scalar)(w*tonal->inmem[N-i-1]);
+       in[N-i-1].i = (kiss_fft_scalar)(w*tonal->inmem[N+N2-i-1]);
+    }
+    OPUS_MOVE(tonal->inmem, tonal->inmem+ANALYSIS_BUF_SIZE-240, 240);
+    remaining = len - (ANALYSIS_BUF_SIZE-tonal->mem_fill);
+    downmix(x, &tonal->inmem[240], remaining, offset+ANALYSIS_BUF_SIZE-tonal->mem_fill, c1, c2, C);
+    tonal->mem_fill = 240 + remaining;
+    opus_fft(kfft, in, out);
+
+    for (i=1;i<N2;i++)
+    {
+       float X1r, X2r, X1i, X2i;
+       float angle, d_angle, d2_angle;
+       float angle2, d_angle2, d2_angle2;
+       float mod1, mod2, avg_mod;
+       X1r = (float)out[i].r+out[N-i].r;
+       X1i = (float)out[i].i-out[N-i].i;
+       X2r = (float)out[i].i+out[N-i].i;
+       X2i = (float)out[N-i].r-out[i].r;
+
+       angle = (float)(.5f/M_PI)*fast_atan2f(X1i, X1r);
+       d_angle = angle - A[i];
+       d2_angle = d_angle - dA[i];
+
+       angle2 = (float)(.5f/M_PI)*fast_atan2f(X2i, X2r);
+       d_angle2 = angle2 - angle;
+       d2_angle2 = d_angle2 - d_angle;
+
+       mod1 = d2_angle - (float)floor(.5+d2_angle);
+       noisiness[i] = ABS16(mod1);
+       mod1 *= mod1;
+       mod1 *= mod1;
+
+       mod2 = d2_angle2 - (float)floor(.5+d2_angle2);
+       noisiness[i] += ABS16(mod2);
+       mod2 *= mod2;
+       mod2 *= mod2;
+
+       avg_mod = .25f*(d2A[i]+2.f*mod1+mod2);
+       tonality[i] = 1.f/(1.f+40.f*16.f*pi4*avg_mod)-.015f;
+
+       A[i] = angle2;
+       dA[i] = d_angle2;
+       d2A[i] = mod2;
+    }
+
+    frame_tonality = 0;
+    max_frame_tonality = 0;
+    /*tw_sum = 0;*/
+    info->activity = 0;
+    frame_noisiness = 0;
+    frame_stationarity = 0;
+    if (!tonal->count)
+    {
+       for (b=0;b<NB_TBANDS;b++)
+       {
+          tonal->lowE[b] = 1e10;
+          tonal->highE[b] = -1e10;
+       }
+    }
+    relativeE = 0;
+    frame_loudness = 0;
+    for (b=0;b<NB_TBANDS;b++)
+    {
+       float E=0, tE=0, nE=0;
+       float L1, L2;
+       float stationarity;
+       for (i=tbands[b];i<tbands[b+1];i++)
+       {
+          float binE = out[i].r*(float)out[i].r + out[N-i].r*(float)out[N-i].r
+                     + out[i].i*(float)out[i].i + out[N-i].i*(float)out[N-i].i;
+#ifdef OPUS_FIXED_POINT
+          /* FIXME: It's probably best to change the BFCC filter initial state instead */
+          binE *= 5.55e-17f;
+#endif
+          E += binE;
+          tE += binE*tonality[i];
+          nE += binE*2.f*(.5f-noisiness[i]);
+       }
+       tonal->E[tonal->E_count][b] = E;
+       frame_noisiness += nE/(1e-15f+E);
+
+       frame_loudness += (float)sqrt(E+1e-10f);
+       logE[b] = (float)log(E+1e-10f);
+       tonal->lowE[b] = MIN32(logE[b], tonal->lowE[b]+.01f);
+       tonal->highE[b] = MAX32(logE[b], tonal->highE[b]-.1f);
+       if (tonal->highE[b] < tonal->lowE[b]+1.f)
+       {
+          tonal->highE[b]+=.5f;
+          tonal->lowE[b]-=.5f;
+       }
+       relativeE += (logE[b]-tonal->lowE[b])/(1e-15f+tonal->highE[b]-tonal->lowE[b]);
+
+       L1=L2=0;
+       for (i=0;i<NB_FRAMES;i++)
+       {
+          L1 += (float)sqrt(tonal->E[i][b]);
+          L2 += tonal->E[i][b];
+       }
+
+       stationarity = MIN16(0.99f,L1/(float)sqrt(1e-15+NB_FRAMES*L2));
+       stationarity *= stationarity;
+       stationarity *= stationarity;
+       frame_stationarity += stationarity;
+       /*band_tonality[b] = tE/(1e-15+E)*/;
+       band_tonality[b] = MAX16(tE/(1e-15f+E), stationarity*tonal->prev_band_tonality[b]);
+#if 0
+       if (b>=NB_TONAL_SKIP_BANDS)
+       {
+          frame_tonality += tweight[b]*band_tonality[b];
+          tw_sum += tweight[b];
+       }
+#else
+       frame_tonality += band_tonality[b];
+       if (b>=NB_TBANDS-NB_TONAL_SKIP_BANDS)
+          frame_tonality -= band_tonality[b-NB_TBANDS+NB_TONAL_SKIP_BANDS];
+#endif
+       max_frame_tonality = MAX16(max_frame_tonality, (1.f+.03f*(b-NB_TBANDS))*frame_tonality);
+       slope += band_tonality[b]*(b-8);
+       /*printf("%f %f ", band_tonality[b], stationarity);*/
+       tonal->prev_band_tonality[b] = band_tonality[b];
+    }
+
+    bandwidth_mask = 0;
+    bandwidth = 0;
+    maxE = 0;
+    noise_floor = 5.7e-4f/(1<<(IMAX(0,lsb_depth-8)));
+#ifdef OPUS_FIXED_POINT
+    noise_floor *= 1<<(15+SIG_SHIFT);
+#endif
+    noise_floor *= noise_floor;
+    for (b=0;b<NB_TOT_BANDS;b++)
+    {
+       float E=0;
+       int band_start, band_end;
+       /* Keep a margin of 300 Hz for aliasing */
+       band_start = extra_bands[b];
+       band_end = extra_bands[b+1];
+       for (i=band_start;i<band_end;i++)
+       {
+          float binE = out[i].r*(float)out[i].r + out[N-i].r*(float)out[N-i].r
+                     + out[i].i*(float)out[i].i + out[N-i].i*(float)out[N-i].i;
+          E += binE;
+       }
+       maxE = MAX32(maxE, E);
+       tonal->meanE[b] = MAX32((1-alphaE2)*tonal->meanE[b], E);
+       E = MAX32(E, tonal->meanE[b]);
+       /* Use a simple follower with 13 dB/Bark slope for spreading function */
+       bandwidth_mask = MAX32(.05f*bandwidth_mask, E);
+       /* Consider the band "active" only if all these conditions are met:
+          1) less than 10 dB below the simple follower
+          2) less than 90 dB below the peak band (maximal masking possible considering
+             both the ATH and the loudness-dependent slope of the spreading function)
+          3) above the PCM quantization noise floor
+       */
+       if (E>.1*bandwidth_mask && E*1e9f > maxE && E > noise_floor*(band_end-band_start))
+          bandwidth = b;
+    }
+    if (tonal->count<=2)
+       bandwidth = 20;
+    frame_loudness = 20*(float)log10(frame_loudness);
+    tonal->Etracker = MAX32(tonal->Etracker-.03f, frame_loudness);
+    tonal->lowECount *= (1-alphaE);
+    if (frame_loudness < tonal->Etracker-30)
+       tonal->lowECount += alphaE;
+
+    for (i=0;i<8;i++)
+    {
+       float sum=0;
+       for (b=0;b<16;b++)
+          sum += dct_table[i*16+b]*logE[b];
+       BFCC[i] = sum;
+    }
+
+    frame_stationarity /= NB_TBANDS;
+    relativeE /= NB_TBANDS;
+    if (tonal->count<10)
+       relativeE = .5;
+    frame_noisiness /= NB_TBANDS;
+#if 1
+    info->activity = frame_noisiness + (1-frame_noisiness)*relativeE;
+#else
+    info->activity = .5*(1+frame_noisiness-frame_stationarity);
+#endif
+    frame_tonality = (max_frame_tonality/(NB_TBANDS-NB_TONAL_SKIP_BANDS));
+    frame_tonality = MAX16(frame_tonality, tonal->prev_tonality*.8f);
+    tonal->prev_tonality = frame_tonality;
+
+    slope /= 8*8;
+    info->tonality_slope = slope;
+
+    tonal->E_count = (tonal->E_count+1)%NB_FRAMES;
+    tonal->count++;
+    info->tonality = frame_tonality;
+
+    for (i=0;i<4;i++)
+       features[i] = -0.12299f*(BFCC[i]+tonal->mem[i+24]) + 0.49195f*(tonal->mem[i]+tonal->mem[i+16]) + 0.69693f*tonal->mem[i+8] - 1.4349f*tonal->cmean[i];
+
+    for (i=0;i<4;i++)
+       tonal->cmean[i] = (1-alpha)*tonal->cmean[i] + alpha*BFCC[i];
+
+    for (i=0;i<4;i++)
+        features[4+i] = 0.63246f*(BFCC[i]-tonal->mem[i+24]) + 0.31623f*(tonal->mem[i]-tonal->mem[i+16]);
+    for (i=0;i<3;i++)
+        features[8+i] = 0.53452f*(BFCC[i]+tonal->mem[i+24]) - 0.26726f*(tonal->mem[i]+tonal->mem[i+16]) -0.53452f*tonal->mem[i+8];
+
+    if (tonal->count > 5)
+    {
+       for (i=0;i<9;i++)
+          tonal->std[i] = (1-alpha)*tonal->std[i] + alpha*features[i]*features[i];
+    }
+
+    for (i=0;i<8;i++)
+    {
+       tonal->mem[i+24] = tonal->mem[i+16];
+       tonal->mem[i+16] = tonal->mem[i+8];
+       tonal->mem[i+8] = tonal->mem[i];
+       tonal->mem[i] = BFCC[i];
+    }
+    for (i=0;i<9;i++)
+       features[11+i] = (float)sqrt(tonal->std[i]);
+    features[20] = info->tonality;
+    features[21] = info->activity;
+    features[22] = frame_stationarity;
+    features[23] = info->tonality_slope;
+    features[24] = tonal->lowECount;
+
+#ifndef DISABLE_FLOAT_API
+    mlp_process(&net, features, frame_probs);
+    frame_probs[0] = .5f*(frame_probs[0]+1);
+    /* Curve fitting between the MLP probability and the actual probability */
+    frame_probs[0] = .01f + 1.21f*frame_probs[0]*frame_probs[0] - .23f*(float)pow(frame_probs[0], 10);
+    /* Probability of active audio (as opposed to silence) */
+    frame_probs[1] = .5f*frame_probs[1]+.5f;
+    /* Consider that silence has a 50-50 probability. */
+    frame_probs[0] = frame_probs[1]*frame_probs[0] + (1-frame_probs[1])*.5f;
+
+    /*printf("%f %f ", frame_probs[0], frame_probs[1]);*/
+    {
+       /* Probability of state transition */
+       float tau;
+       /* Represents independence of the MLP probabilities, where
+          beta=1 means fully independent. */
+       float beta;
+       /* Denormalized probability of speech (p0) and music (p1) after update */
+       float p0, p1;
+       /* Probabilities for "all speech" and "all music" */
+       float s0, m0;
+       /* Probability sum for renormalisation */
+       float psum;
+       /* Instantaneous probability of speech and music, with beta pre-applied. */
+       float speech0;
+       float music0;
+
+       /* One transition every 3 minutes of active audio */
+       tau = .00005f*frame_probs[1];
+       beta = .05f;
+       if (1) {
+          /* Adapt beta based on how "unexpected" the new prob is */
+          float p, q;
+          p = MAX16(.05f,MIN16(.95f,frame_probs[0]));
+          q = MAX16(.05f,MIN16(.95f,tonal->music_prob));
+          beta = .01f+.05f*ABS16(p-q)/(p*(1-q)+q*(1-p));
+       }
+       /* p0 and p1 are the probabilities of speech and music at this frame
+          using only information from previous frame and applying the
+          state transition model */
+       p0 = (1-tonal->music_prob)*(1-tau) +    tonal->music_prob *tau;
+       p1 =    tonal->music_prob *(1-tau) + (1-tonal->music_prob)*tau;
+       /* We apply the current probability with exponent beta to work around
+          the fact that the probability estimates aren't independent. */
+       p0 *= (float)pow(1-frame_probs[0], beta);
+       p1 *= (float)pow(frame_probs[0], beta);
+       /* Normalise the probabilities to get the Marokv probability of music. */
+       tonal->music_prob = p1/(p0+p1);
+       info->music_prob = tonal->music_prob;
+
+       /* This chunk of code deals with delayed decision. */
+       psum=1e-20f;
+       /* Instantaneous probability of speech and music, with beta pre-applied. */
+       speech0 = (float)pow(1-frame_probs[0], beta);
+       music0  = (float)pow(frame_probs[0], beta);
+       if (tonal->count==1)
+       {
+          tonal->pspeech[0]=.5;
+          tonal->pmusic [0]=.5;
+       }
+       /* Updated probability of having only speech (s0) or only music (m0),
+          before considering the new observation. */
+       s0 = tonal->pspeech[0] + tonal->pspeech[1];
+       m0 = tonal->pmusic [0] + tonal->pmusic [1];
+       /* Updates s0 and m0 with instantaneous probability. */
+       tonal->pspeech[0] = s0*(1-tau)*speech0;
+       tonal->pmusic [0] = m0*(1-tau)*music0;
+       /* Propagate the transition probabilities */
+       for (i=1;i<DETECT_SIZE-1;i++)
+       {
+          tonal->pspeech[i] = tonal->pspeech[i+1]*speech0;
+          tonal->pmusic [i] = tonal->pmusic [i+1]*music0;
+       }
+       /* Probability that the latest frame is speech, when all the previous ones were music. */
+       tonal->pspeech[DETECT_SIZE-1] = m0*tau*speech0;
+       /* Probability that the latest frame is music, when all the previous ones were speech. */
+       tonal->pmusic [DETECT_SIZE-1] = s0*tau*music0;
+
+       /* Renormalise probabilities to 1 */
+       for (i=0;i<DETECT_SIZE;i++)
+          psum += tonal->pspeech[i] + tonal->pmusic[i];
+       psum = 1.f/psum;
+       for (i=0;i<DETECT_SIZE;i++)
+       {
+          tonal->pspeech[i] *= psum;
+          tonal->pmusic [i] *= psum;
+       }
+       psum = tonal->pmusic[0];
+       for (i=1;i<DETECT_SIZE;i++)
+          psum += tonal->pspeech[i];
+
+       /* Estimate our confidence in the speech/music decisions */
+       if (frame_probs[1]>.75)
+       {
+          if (tonal->music_prob>.9)
+          {
+             float adapt;
+             adapt = 1.f/(++tonal->music_confidence_count);
+             tonal->music_confidence_count = IMIN(tonal->music_confidence_count, 500);
+             tonal->music_confidence += adapt*MAX16(-.2f,frame_probs[0]-tonal->music_confidence);
+          }
+          if (tonal->music_prob<.1)
+          {
+             float adapt;
+             adapt = 1.f/(++tonal->speech_confidence_count);
+             tonal->speech_confidence_count = IMIN(tonal->speech_confidence_count, 500);
+             tonal->speech_confidence += adapt*MIN16(.2f,frame_probs[0]-tonal->speech_confidence);
+          }
+       } else {
+          if (tonal->music_confidence_count==0)
+             tonal->music_confidence = .9f;
+          if (tonal->speech_confidence_count==0)
+             tonal->speech_confidence = .1f;
+       }
+    }
+    if (tonal->last_music != (tonal->music_prob>.5f))
+       tonal->last_transition=0;
+    tonal->last_music = tonal->music_prob>.5f;
+#else
+    info->music_prob = 0;
+#endif
+    /*for (i=0;i<25;i++)
+       printf("%f ", features[i]);
+    printf("\n");*/
+
+    info->bandwidth = bandwidth;
+    /*printf("%d %d\n", info->bandwidth, info->opus_bandwidth);*/
+    info->noisiness = frame_noisiness;
+    info->valid = 1;
+    if (info_out!=NULL)
+       OPUS_COPY(info_out, info, 1);
+    RESTORE_STACK;
+}
+
+void run_analysis(TonalityAnalysisState *analysis, const CELTMode *celt_mode, const void *analysis_pcm,
+                 int analysis_frame_size, int frame_size, int c1, int c2, int C, opus_int32 Fs,
+                 int lsb_depth, downmix_func downmix, AnalysisInfo *analysis_info)
+{
+   int offset;
+   int pcm_len;
+
+   if (analysis_pcm != NULL)
+   {
+      /* Avoid overflow/wrap-around of the analysis buffer */
+      analysis_frame_size = IMIN((DETECT_SIZE-5)*Fs/100, analysis_frame_size);
+
+      pcm_len = analysis_frame_size - analysis->analysis_offset;
+      offset = analysis->analysis_offset;
+      do {
+         tonality_analysis(analysis, NULL, celt_mode, analysis_pcm, IMIN(480, pcm_len), offset, c1, c2, C, lsb_depth, downmix);
+         offset += 480;
+         pcm_len -= 480;
+      } while (pcm_len>0);
+      analysis->analysis_offset = analysis_frame_size;
+
+      analysis->analysis_offset -= frame_size;
+   }
+
+   analysis_info->valid = 0;
+   tonality_get_info(analysis, analysis_info, frame_size);
+}
diff --git a/drivers/opus/analysis.h b/drivers/opus/analysis.h
new file mode 100644
index 0000000000..be0388faa3
--- /dev/null
+++ b/drivers/opus/analysis.h
@@ -0,0 +1,90 @@
+/* Copyright (c) 2011 Xiph.Org Foundation
+   Written by Jean-Marc Valin */
+/*
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR
+   CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef ANALYSIS_H
+#define ANALYSIS_H
+
+#include "celt.h"
+#include "opus_private.h"
+
+#define NB_FRAMES 8
+#define NB_TBANDS 18
+#define NB_TOT_BANDS 21
+#define ANALYSIS_BUF_SIZE 720 /* 15 ms at 48 kHz */
+
+#define DETECT_SIZE 200
+
+typedef struct {
+   float angle[240];
+   float d_angle[240];
+   float d2_angle[240];
+   opus_val32 inmem[ANALYSIS_BUF_SIZE];
+   int   mem_fill;                      /* number of usable samples in the buffer */
+   float prev_band_tonality[NB_TBANDS];
+   float prev_tonality;
+   float E[NB_FRAMES][NB_TBANDS];
+   float lowE[NB_TBANDS];
+   float highE[NB_TBANDS];
+   float meanE[NB_TOT_BANDS];
+   float mem[32];
+   float cmean[8];
+   float std[9];
+   float music_prob;
+   float Etracker;
+   float lowECount;
+   int E_count;
+   int last_music;
+   int last_transition;
+   int count;
+   float subframe_mem[3];
+   int analysis_offset;
+   /** Probability of having speech for time i to DETECT_SIZE-1 (and music before).
+       pspeech[0] is the probability that all frames in the window are speech. */
+   float pspeech[DETECT_SIZE];
+   /** Probability of having music for time i to DETECT_SIZE-1 (and speech before).
+       pmusic[0] is the probability that all frames in the window are music. */
+   float pmusic[DETECT_SIZE];
+   float speech_confidence;
+   float music_confidence;
+   int speech_confidence_count;
+   int music_confidence_count;
+   int write_pos;
+   int read_pos;
+   int read_subframe;
+   AnalysisInfo info[DETECT_SIZE];
+} TonalityAnalysisState;
+
+void tonality_analysis(TonalityAnalysisState *tonal, AnalysisInfo *info,
+     const CELTMode *celt_mode, const void *x, int len, int offset, int c1, int c2, int C, int lsb_depth, downmix_func downmix);
+
+void tonality_get_info(TonalityAnalysisState *tonal, AnalysisInfo *info_out, int len);
+
+void run_analysis(TonalityAnalysisState *analysis, const CELTMode *celt_mode, const void *analysis_pcm,
+                 int analysis_frame_size, int frame_size, int c1, int c2, int C, opus_int32 Fs,
+                 int lsb_depth, downmix_func downmix, AnalysisInfo *analysis_info);
+
+#endif
diff --git a/drivers/opus/audio_stream_opus.cpp b/drivers/opus/audio_stream_opus.cpp
new file mode 100644
index 0000000000..eb9c81e152
--- /dev/null
+++ b/drivers/opus/audio_stream_opus.cpp
@@ -0,0 +1,376 @@
+/*************************************************************************/
+/*  audio_stream_opus.cpp                                                */
+/*************************************************************************/
+/*                       This file is part of:                           */
+/*                           GODOT ENGINE                                */
+/*                    http://www.godotengine.org                         */
+/*************************************************************************/
+/* Copyright (c) 2007-2015 Juan Linietsky, Ariel Manzur.                 */
+/*                                                                       */
+/* Author: George Marques <george@gmarqu.es>                             */
+/*                                                                       */
+/* Permission is hereby granted, free of charge, to any person obtaining */
+/* a copy of this software and associated documentation files (the       */
+/* "Software"), to deal in the Software without restriction, including   */
+/* without limitation the rights to use, copy, modify, merge, publish,   */
+/* distribute, sublicense, and/or sell copies of the Software, and to    */
+/* permit persons to whom the Software is furnished to do so, subject to */
+/* the following conditions:                                             */
+/*                                                                       */
+/* The above copyright notice and this permission notice shall be        */
+/* included in all copies or substantial portions of the Software.       */
+/*                                                                       */
+/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,       */
+/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF    */
+/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/
+/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY  */
+/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,  */
+/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE     */
+/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.                */
+/*************************************************************************/
+#include "audio_stream_opus.h"
+
+const float AudioStreamPlaybackOpus::osrate=48000.0f;
+
+int AudioStreamPlaybackOpus::_op_read_func(void *_stream, unsigned char *_ptr, int _nbytes) {
+	FileAccess *fa=(FileAccess*)_stream;
+
+	if(fa->eof_reached())
+		return 0;
+
+	uint8_t *dst = (uint8_t*)_ptr;
+
+	int read = fa->get_buffer(dst, _nbytes);
+
+	return read;
+}
+
+int AudioStreamPlaybackOpus::_op_seek_func(void *_stream, opus_int64 _offset, int _whence){
+
+#ifdef SEEK_SET
+	FileAccess *fa=(FileAccess*)_stream;
+
+	switch (_whence) {
+		case SEEK_SET: {
+			fa->seek(_offset);
+		} break;
+		case SEEK_CUR: {
+			fa->seek(fa->get_pos()+_offset);
+		} break;
+		case SEEK_END: {
+			fa->seek_end(_offset);
+		} break;
+		default: {
+			ERR_PRINT("BUG, wtf was whence set to?\n");
+		}
+	}
+	int ret=fa->eof_reached()?-1:0;
+	return ret;
+#else
+	return -1; // no seeking
+#endif
+}
+
+int AudioStreamPlaybackOpus::_op_close_func(void *_stream) {
+	if (!_stream)
+		return 0;
+	FileAccess *fa=(FileAccess*)_stream;
+	if (fa->is_open())
+		fa->close();
+	return 0;
+}
+
+opus_int64 AudioStreamPlaybackOpus::_op_tell_func(void *_stream) {
+	FileAccess *_fa = (FileAccess*)_stream;
+	return (opus_int64)_fa->get_pos();
+}
+
+void AudioStreamPlaybackOpus::_clear_stream() {
+	if(!stream_loaded)
+		return;
+
+	op_free(opus_file);
+	_close_file();
+
+	stream_loaded=false;
+	stream_channels=1;
+	playing=false;
+}
+
+void AudioStreamPlaybackOpus::_close_file() {
+	if (f) {
+		memdelete(f);
+		f=NULL;
+	}
+}
+
+Error AudioStreamPlaybackOpus::_load_stream() {
+
+	ERR_FAIL_COND_V(!stream_valid,ERR_UNCONFIGURED);
+
+	_clear_stream();
+	if (file=="")
+		return ERR_INVALID_DATA;
+
+	Error err;
+	f=FileAccess::open(file,FileAccess::READ,&err);
+
+	if (err) {
+		ERR_FAIL_COND_V( err, err );
+	}
+
+	int _err = 0;
+
+	opus_file = op_open_callbacks(f,&_op_callbacks,NULL,0,&_err);
+
+	switch (_err) {
+		case OP_EREAD: { // - Can't read the file.
+			memdelete(f); f=NULL;
+			ERR_FAIL_V( ERR_FILE_CANT_READ );
+		} break;
+		case OP_EVERSION: // - Unrecognized version number.
+		case OP_ENOTFORMAT: // - Stream is not Opus data.
+		case OP_EIMPL : { // - Stream used non-implemented feature.
+			memdelete(f); f=NULL;
+			ERR_FAIL_V( ERR_FILE_UNRECOGNIZED );
+		} break;
+		case OP_EBADLINK: // - Failed to find old data after seeking.
+		case OP_EBADTIMESTAMP: // - Timestamp failed the validity checks.
+		case OP_EBADHEADER: { // - Invalid or mising Opus bitstream header.
+			memdelete(f); f=NULL;
+			ERR_FAIL_V( ERR_FILE_CORRUPT );
+		} break;
+		case OP_EFAULT: { // - Internal logic fault; indicates a bug or heap/stack corruption.
+			memdelete(f); f=NULL;
+			ERR_FAIL_V( ERR_BUG );
+		} break;
+	}
+	repeats=0;
+	stream_loaded=true;
+
+
+	return OK;
+}
+
+AudioStreamPlaybackOpus::AudioStreamPlaybackOpus() {
+	loops=false;
+	playing=false;
+	f = NULL;
+	stream_loaded=false;
+	stream_valid=false;
+	repeats=0;
+	paused=true;
+	stream_channels=0;
+	current_section=0;
+	length=0;
+	loop_restart_time=0;
+	pre_skip=0;
+
+	_op_callbacks.read = _op_read_func;
+	_op_callbacks.seek = _op_seek_func;
+	_op_callbacks.tell = _op_tell_func;
+	_op_callbacks.close = _op_close_func;
+}
+
+Error AudioStreamPlaybackOpus::set_file(const String &p_file) {
+	file=p_file;
+	stream_valid=false;
+	Error err;
+	f=FileAccess::open(file,FileAccess::READ,&err);
+
+	if (err) {
+		ERR_FAIL_COND_V( err, err );
+	}
+
+	int _err;
+
+	opus_file = op_open_callbacks(f,&_op_callbacks,NULL,0,&_err);
+
+	switch (_err) {
+		case OP_EREAD: { // - Can't read the file.
+			memdelete(f); f=NULL;
+			ERR_FAIL_V( ERR_FILE_CANT_READ );
+		} break;
+		case OP_EVERSION: // - Unrecognized version number.
+		case OP_ENOTFORMAT: // - Stream is not Opus data.
+		case OP_EIMPL : { // - Stream used non-implemented feature.
+			memdelete(f); f=NULL;
+			ERR_FAIL_V( ERR_FILE_UNRECOGNIZED );
+		} break;
+		case OP_EBADLINK: // - Failed to find old data after seeking.
+		case OP_EBADTIMESTAMP: // - Timestamp failed the validity checks.
+		case OP_EBADHEADER: { // - Invalid or mising Opus bitstream header.
+			memdelete(f); f=NULL;
+			ERR_FAIL_V( ERR_FILE_CORRUPT );
+		} break;
+		case OP_EFAULT: { // - Internal logic fault; indicates a bug or heap/stack corruption.
+			memdelete(f); f=NULL;
+			ERR_FAIL_V( ERR_BUG );
+		} break;
+	}
+
+	const OpusHead *oinfo = op_head(opus_file,-1);
+
+	stream_channels=oinfo->channel_count;
+	pre_skip=oinfo->pre_skip;
+	frames_mixed=pre_skip;
+	ogg_int64_t len = op_pcm_total(opus_file,-1);
+	if(len < 0) {
+		length = 0;
+	} else {
+		length=(len/osrate);
+	}
+
+	op_free(opus_file);
+	memdelete(f);
+	f=NULL;
+	stream_valid=true;
+
+
+	return OK;
+}
+
+void AudioStreamPlaybackOpus::play(float p_from) {
+	if (playing)
+		stop();
+
+	if (_load_stream()!=OK)
+		return;
+
+	frames_mixed=pre_skip;
+	playing=true;
+	if (p_from>0) {
+		seek_pos(p_from);
+	}
+}
+
+void AudioStreamPlaybackOpus::stop() {
+	_clear_stream();
+	playing=false;
+}
+
+void AudioStreamPlaybackOpus::seek_pos(float p_time) {
+	if(!playing) return;
+	ogg_int64_t pcm_offset = (ogg_int64_t)(p_time * osrate);
+	bool ok = op_pcm_seek(opus_file,pcm_offset)==0;
+	if(!ok) {
+		ERR_PRINT("Seek time over stream size.");
+		return;
+	}
+	frames_mixed=osrate*p_time;
+}
+
+int AudioStreamPlaybackOpus::mix(int16_t* p_bufer,int p_frames) {
+	if (!playing)
+		return 0;
+
+	int total=p_frames;
+
+	while (true) {
+
+		int todo = p_frames;
+
+		if (todo==0 || todo<MIN_MIX) {
+			break;
+		}
+
+		int ret=op_read(opus_file,(opus_int16*)p_bufer,todo*stream_channels,&current_section);
+		if (ret<0) {
+			playing = false;
+			ERR_EXPLAIN("Error reading Opus File: "+file);
+			ERR_BREAK(ret<0);
+		} else if (ret==0) { // end of song, reload?
+			op_free(opus_file);
+
+			_close_file();
+
+			f=FileAccess::open(file,FileAccess::READ);
+
+			int errv = 0;
+			opus_file = op_open_callbacks(f,&_op_callbacks,NULL,0,&errv);
+			if (errv!=0) {
+				playing=false;
+				break; // :(
+			}
+
+			if (!has_loop()) {
+				playing=false;
+				repeats=1;
+				break;
+			}
+
+			if (loop_restart_time) {
+				bool ok = op_pcm_seek(opus_file, (loop_restart_time*osrate)+pre_skip)==0;
+				if (!ok) {
+					playing=false;
+					ERR_PRINT("loop restart time rejected")
+				}
+
+				frames_mixed=(loop_restart_time*osrate)+pre_skip;
+			} else {
+				frames_mixed=pre_skip;
+			}
+			repeats++;
+			continue;
+
+		}
+
+		stream_channels=op_head(opus_file,current_section)->channel_count;
+
+		frames_mixed+=ret;
+
+		p_bufer+=ret*stream_channels;
+		p_frames-=ret;
+
+	}
+
+	return total-p_frames;
+}
+
+float AudioStreamPlaybackOpus::get_length() const {
+	if(!stream_loaded) {
+		if(const_cast<AudioStreamPlaybackOpus*>(this)->_load_stream() != OK)
+			return 0;
+	}
+	return length;
+}
+
+float AudioStreamPlaybackOpus::get_pos() const {
+
+	int32_t frames = int32_t(frames_mixed);
+	if (frames < 0)
+		frames=0;
+	return double(frames) / osrate;
+}
+
+int AudioStreamPlaybackOpus::get_minimum_buffer_size() const {
+	return MIN_MIX;
+}
+
+AudioStreamPlaybackOpus::~AudioStreamPlaybackOpus() {
+	_clear_stream();
+}
+
+RES ResourceFormatLoaderAudioStreamOpus::load(const String &p_path, const String& p_original_path, Error *r_error) {
+	if (r_error)
+		*r_error=OK;
+
+	AudioStreamOpus *opus_stream = memnew(AudioStreamOpus);
+	opus_stream->set_file(p_path);
+	return Ref<AudioStreamOpus>(opus_stream);
+}
+
+void ResourceFormatLoaderAudioStreamOpus::get_recognized_extensions(List<String> *p_extensions) const {
+
+	p_extensions->push_back("opus");
+}
+String ResourceFormatLoaderAudioStreamOpus::get_resource_type(const String &p_path) const {
+
+	if (p_path.extension().to_lower()=="opus")
+		return "AudioStreamOpus";
+	return "";
+}
+
+bool ResourceFormatLoaderAudioStreamOpus::handles_type(const String& p_type) const {
+	return (p_type=="AudioStream" || p_type=="AudioStreamOpus");
+}
diff --git a/drivers/opus/audio_stream_opus.h b/drivers/opus/audio_stream_opus.h
new file mode 100644
index 0000000000..2f173cc270
--- /dev/null
+++ b/drivers/opus/audio_stream_opus.h
@@ -0,0 +1,141 @@
+/*************************************************************************/
+/*  audio_stream_opus.h                                                  */
+/*************************************************************************/
+/*                       This file is part of:                           */
+/*                           GODOT ENGINE                                */
+/*                    http://www.godotengine.org                         */
+/*************************************************************************/
+/* Copyright (c) 2007-2015 Juan Linietsky, Ariel Manzur.                 */
+/*                                                                       */
+/* Author: George Marques <george@gmarqu.es>                             */
+/*                                                                       */
+/* Permission is hereby granted, free of charge, to any person obtaining */
+/* a copy of this software and associated documentation files (the       */
+/* "Software"), to deal in the Software without restriction, including   */
+/* without limitation the rights to use, copy, modify, merge, publish,   */
+/* distribute, sublicense, and/or sell copies of the Software, and to    */
+/* permit persons to whom the Software is furnished to do so, subject to */
+/* the following conditions:                                             */
+/*                                                                       */
+/* The above copyright notice and this permission notice shall be        */
+/* included in all copies or substantial portions of the Software.       */
+/*                                                                       */
+/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,       */
+/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF    */
+/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/
+/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY  */
+/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,  */
+/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE     */
+/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.                */
+/*************************************************************************/
+
+#ifndef AUDIO_STREAM_OPUS_H
+#define AUDIO_STREAM_OPUS_H
+
+#include "scene/resources/audio_stream.h"
+#include "opus/opusfile.h"
+#include "opus/internal.h"
+#include "os/file_access.h"
+#include "io/resource_loader.h"
+
+class AudioStreamPlaybackOpus : public AudioStreamPlayback {
+
+	OBJ_TYPE(AudioStreamPlaybackOpus,AudioStreamPlayback)
+
+	enum {
+		MIN_MIX=1024
+	};
+
+	FileAccess *f;
+
+	OpusFileCallbacks _op_callbacks;
+	float length;
+	static int _op_read_func(void *_stream, unsigned char *_ptr, int _nbytes);
+	static int _op_seek_func(void *_stream, opus_int64 _offset, int _whence);
+	static int _op_close_func(void *_stream);
+	static opus_int64 _op_tell_func(void *_stream);
+	static const float osrate;
+
+	String file;
+	int64_t frames_mixed;
+
+	bool stream_loaded;
+	volatile bool playing;
+	OggOpusFile *opus_file;
+	int stream_channels;
+	int current_section;
+	int pre_skip;
+
+	bool paused;
+	bool loops;
+	int repeats;
+
+	Error _load_stream();
+	void _clear_stream();
+	void _close_file();
+
+	bool stream_valid;
+	float loop_restart_time;
+
+public:
+	Error set_file(const String& p_file);
+
+	virtual void play(float p_from=0);
+	virtual void stop();
+	virtual bool is_playing() const { return playing; }
+
+	virtual void set_loop_restart_time(float p_time) { loop_restart_time=p_time; }
+
+	virtual void set_paused(bool p_paused) { paused=p_paused; }
+	virtual bool is_paused() const { return paused; }
+
+	virtual void set_loop(bool p_enable) { loops=p_enable; }
+	virtual bool has_loop() const {return loops; }
+
+	virtual float get_length() const;
+
+	virtual String get_stream_name() const { return ""; }
+
+	virtual int get_loop_count() const { return repeats; }
+
+	virtual float get_pos() const;
+	virtual void seek_pos(float p_time);
+
+	virtual int get_channels() const { return stream_channels; }
+	virtual int get_mix_rate() const { return osrate; }
+
+	virtual int get_minimum_buffer_size() const;
+
+	virtual int mix(int16_t* p_bufer,int p_frames);
+
+	AudioStreamPlaybackOpus();
+	~AudioStreamPlaybackOpus();
+};
+
+
+class AudioStreamOpus: public AudioStream {
+
+	OBJ_TYPE(AudioStreamOpus,AudioStream)
+
+	String file;
+public:
+
+	Ref<AudioStreamPlayback> instance_playback() {
+		Ref<AudioStreamPlaybackOpus> pb = memnew( AudioStreamPlaybackOpus );
+		pb->set_file(file);
+		return pb;
+	}
+
+	void set_file(const String& p_file) { file=p_file; }
+
+};
+
+class ResourceFormatLoaderAudioStreamOpus: public ResourceFormatLoader {
+public:
+	virtual RES load(const String &p_path,const String& p_original_path="",Error *r_error=NULL);
+	virtual void get_recognized_extensions(List<String> *p_extensions) const;
+	virtual bool handles_type(const String& p_type) const;
+	virtual String get_resource_type(const String &p_path) const;
+};
+
+#endif // AUDIO_STREAM_OPUS_H
diff --git a/drivers/opus/celt/_kiss_fft_guts.h b/drivers/opus/celt/_kiss_fft_guts.h
new file mode 100644
index 0000000000..21bea8a9b0
--- /dev/null
+++ b/drivers/opus/celt/_kiss_fft_guts.h
@@ -0,0 +1,183 @@
+/*Copyright (c) 2003-2004, Mark Borgerding
+
+  All rights reserved.
+
+  Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions are met:
+
+    * Redistributions of source code must retain the above copyright notice,
+       this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright notice,
+       this list of conditions and the following disclaimer in the
+       documentation and/or other materials provided with the distribution.
+
+  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+  POSSIBILITY OF SUCH DAMAGE.*/
+
+#ifndef KISS_FFT_GUTS_H
+#define KISS_FFT_GUTS_H
+
+#define MIN(a,b) ((a)<(b) ? (a):(b))
+#define MAX(a,b) ((a)>(b) ? (a):(b))
+
+/* kiss_fft.h
+   defines kiss_fft_scalar as either short or a float type
+   and defines
+   typedef struct { kiss_fft_scalar r; kiss_fft_scalar i; }kiss_fft_cpx; */
+#include "kiss_fft.h"
+
+/*
+  Explanation of macros dealing with complex math:
+
+   C_MUL(m,a,b)         : m = a*b
+   C_FIXDIV( c , div )  : if a fixed point impl., c /= div. noop otherwise
+   C_SUB( res, a,b)     : res = a - b
+   C_SUBFROM( res , a)  : res -= a
+   C_ADDTO( res , a)    : res += a
+ * */
+#ifdef OPUS_FIXED_POINT
+#include "arch.h"
+
+
+#define SAMP_MAX 2147483647
+#define TWID_MAX 32767
+#define TRIG_UPSCALE 1
+
+#define SAMP_MIN -SAMP_MAX
+
+
+#   define S_MUL(a,b) MULT16_32_Q15(b, a)
+
+#   define C_MUL(m,a,b) \
+      do{ (m).r = SUB32(S_MUL((a).r,(b).r) , S_MUL((a).i,(b).i)); \
+          (m).i = ADD32(S_MUL((a).r,(b).i) , S_MUL((a).i,(b).r)); }while(0)
+
+#   define C_MULC(m,a,b) \
+      do{ (m).r = ADD32(S_MUL((a).r,(b).r) , S_MUL((a).i,(b).i)); \
+          (m).i = SUB32(S_MUL((a).i,(b).r) , S_MUL((a).r,(b).i)); }while(0)
+
+#   define C_MUL4(m,a,b) \
+      do{ (m).r = SHR32(SUB32(S_MUL((a).r,(b).r) , S_MUL((a).i,(b).i)),2); \
+          (m).i = SHR32(ADD32(S_MUL((a).r,(b).i) , S_MUL((a).i,(b).r)),2); }while(0)
+
+#   define C_MULBYSCALAR( c, s ) \
+      do{ (c).r =  S_MUL( (c).r , s ) ;\
+          (c).i =  S_MUL( (c).i , s ) ; }while(0)
+
+#   define DIVSCALAR(x,k) \
+        (x) = S_MUL(  x, (TWID_MAX-((k)>>1))/(k)+1 )
+
+#   define C_FIXDIV(c,div) \
+        do {    DIVSCALAR( (c).r , div);  \
+                DIVSCALAR( (c).i  , div); }while (0)
+
+#define  C_ADD( res, a,b)\
+    do {(res).r=ADD32((a).r,(b).r);  (res).i=ADD32((a).i,(b).i); \
+    }while(0)
+#define  C_SUB( res, a,b)\
+    do {(res).r=SUB32((a).r,(b).r);  (res).i=SUB32((a).i,(b).i); \
+    }while(0)
+#define C_ADDTO( res , a)\
+    do {(res).r = ADD32((res).r, (a).r);  (res).i = ADD32((res).i,(a).i);\
+    }while(0)
+
+#define C_SUBFROM( res , a)\
+    do {(res).r = ADD32((res).r,(a).r);  (res).i = SUB32((res).i,(a).i); \
+    }while(0)
+
+#if defined(OPUS_ARM_INLINE_ASM)
+#include "arm/kiss_fft_armv4.h"
+#endif
+
+#if defined(OPUS_ARM_INLINE_EDSP)
+#include "arm/kiss_fft_armv5e.h"
+#endif
+
+#else  /* not OPUS_FIXED_POINT*/
+
+#   define S_MUL(a,b) ( (a)*(b) )
+#define C_MUL(m,a,b) \
+    do{ (m).r = (a).r*(b).r - (a).i*(b).i;\
+        (m).i = (a).r*(b).i + (a).i*(b).r; }while(0)
+#define C_MULC(m,a,b) \
+    do{ (m).r = (a).r*(b).r + (a).i*(b).i;\
+        (m).i = (a).i*(b).r - (a).r*(b).i; }while(0)
+
+#define C_MUL4(m,a,b) C_MUL(m,a,b)
+
+#   define C_FIXDIV(c,div) /* NOOP */
+#   define C_MULBYSCALAR( c, s ) \
+    do{ (c).r *= (s);\
+        (c).i *= (s); }while(0)
+#endif
+
+#ifndef CHECK_OVERFLOW_OP
+#  define CHECK_OVERFLOW_OP(a,op,b) /* noop */
+#endif
+
+#ifndef C_ADD
+#define  C_ADD( res, a,b)\
+    do { \
+            CHECK_OVERFLOW_OP((a).r,+,(b).r)\
+            CHECK_OVERFLOW_OP((a).i,+,(b).i)\
+            (res).r=(a).r+(b).r;  (res).i=(a).i+(b).i; \
+    }while(0)
+#define  C_SUB( res, a,b)\
+    do { \
+            CHECK_OVERFLOW_OP((a).r,-,(b).r)\
+            CHECK_OVERFLOW_OP((a).i,-,(b).i)\
+            (res).r=(a).r-(b).r;  (res).i=(a).i-(b).i; \
+    }while(0)
+#define C_ADDTO( res , a)\
+    do { \
+            CHECK_OVERFLOW_OP((res).r,+,(a).r)\
+            CHECK_OVERFLOW_OP((res).i,+,(a).i)\
+            (res).r += (a).r;  (res).i += (a).i;\
+    }while(0)
+
+#define C_SUBFROM( res , a)\
+    do {\
+            CHECK_OVERFLOW_OP((res).r,-,(a).r)\
+            CHECK_OVERFLOW_OP((res).i,-,(a).i)\
+            (res).r -= (a).r;  (res).i -= (a).i; \
+    }while(0)
+#endif /* C_ADD defined */
+
+#ifdef OPUS_FIXED_POINT
+#  define KISS_FFT_COS(phase)  TRIG_UPSCALE*floor(MIN(32767,MAX(-32767,.5+32768 * cos (phase))))
+#  define KISS_FFT_SIN(phase)  TRIG_UPSCALE*floor(MIN(32767,MAX(-32767,.5+32768 * sin (phase))))
+#  define KISS_FFT_COS(phase)  floor(.5+TWID_MAX*cos (phase))
+#  define KISS_FFT_SIN(phase)  floor(.5+TWID_MAX*sin (phase))
+#  define HALF_OF(x) ((x)>>1)
+#elif defined(USE_SIMD)
+#  define KISS_FFT_COS(phase) _mm_set1_ps( cos(phase) )
+#  define KISS_FFT_SIN(phase) _mm_set1_ps( sin(phase) )
+#  define HALF_OF(x) ((x)*_mm_set1_ps(.5f))
+#else
+#  define KISS_FFT_COS(phase) (kiss_fft_scalar) cos(phase)
+#  define KISS_FFT_SIN(phase) (kiss_fft_scalar) sin(phase)
+#  define HALF_OF(x) ((x)*.5f)
+#endif
+
+#define  kf_cexp(x,phase) \
+        do{ \
+                (x)->r = KISS_FFT_COS(phase);\
+                (x)->i = KISS_FFT_SIN(phase);\
+        }while(0)
+
+#define  kf_cexp2(x,phase) \
+   do{ \
+      (x)->r = TRIG_UPSCALE*celt_cos_norm((phase));\
+      (x)->i = TRIG_UPSCALE*celt_cos_norm((phase)-32768);\
+}while(0)
+
+#endif /* KISS_FFT_GUTS_H */
diff --git a/drivers/opus/celt/arch.h b/drivers/opus/celt/arch.h
new file mode 100644
index 0000000000..83e3705000
--- /dev/null
+++ b/drivers/opus/celt/arch.h
@@ -0,0 +1,214 @@
+/* Copyright (c) 2003-2008 Jean-Marc Valin
+   Copyright (c) 2007-2008 CSIRO
+   Copyright (c) 2007-2009 Xiph.Org Foundation
+   Written by Jean-Marc Valin */
+/**
+   @file arch.h
+   @brief Various architecture definitions for CELT
+*/
+/*
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef ARCH_H
+#define ARCH_H
+
+#include "opus_types.h"
+#include "opus_defines.h"
+
+# if !defined(__GNUC_PREREQ)
+#  if defined(__GNUC__)&&defined(__GNUC_MINOR__)
+#   define __GNUC_PREREQ(_maj,_min) \
+ ((__GNUC__<<16)+__GNUC_MINOR__>=((_maj)<<16)+(_min))
+#  else
+#   define __GNUC_PREREQ(_maj,_min) 0
+#  endif
+# endif
+
+#define CELT_SIG_SCALE 32768.f
+
+#define celt_fatal(str) _celt_fatal(str, __FILE__, __LINE__);
+#ifdef ENABLE_ASSERTIONS
+#include <stdio.h>
+#include <stdlib.h>
+#ifdef __GNUC__
+__attribute__((noreturn))
+#endif
+static OPUS_INLINE void _celt_fatal(const char *str, const char *file, int line)
+{
+   fprintf (stderr, "Fatal (internal) error in %s, line %d: %s\n", file, line, str);
+   abort();
+}
+#define celt_assert(cond) {if (!(cond)) {celt_fatal("assertion failed: " #cond);}}
+#define celt_assert2(cond, message) {if (!(cond)) {celt_fatal("assertion failed: " #cond "\n" message);}}
+#else
+#define celt_assert(cond)
+#define celt_assert2(cond, message)
+#endif
+
+#define IMUL32(a,b) ((a)*(b))
+
+#define ABS(x) ((x) < 0 ? (-(x)) : (x))      /**< Absolute integer value. */
+#define ABS16(x) ((x) < 0 ? (-(x)) : (x))    /**< Absolute 16-bit value.  */
+#define MIN16(a,b) ((a) < (b) ? (a) : (b))   /**< Minimum 16-bit value.   */
+#define MAX16(a,b) ((a) > (b) ? (a) : (b))   /**< Maximum 16-bit value.   */
+#define ABS32(x) ((x) < 0 ? (-(x)) : (x))    /**< Absolute 32-bit value.  */
+#define MIN32(a,b) ((a) < (b) ? (a) : (b))   /**< Minimum 32-bit value.   */
+#define MAX32(a,b) ((a) > (b) ? (a) : (b))   /**< Maximum 32-bit value.   */
+#define IMIN(a,b) ((a) < (b) ? (a) : (b))   /**< Minimum int value.   */
+#define IMAX(a,b) ((a) > (b) ? (a) : (b))   /**< Maximum int value.   */
+#define UADD32(a,b) ((a)+(b))
+#define USUB32(a,b) ((a)-(b))
+
+#define PRINT_MIPS(file)
+
+#ifdef OPUS_FIXED_POINT
+
+typedef opus_int16 opus_val16;
+typedef opus_int32 opus_val32;
+
+typedef opus_val32 celt_sig;
+typedef opus_val16 celt_norm;
+typedef opus_val32 celt_ener;
+
+#define Q15ONE 32767
+
+#define SIG_SHIFT 12
+
+#define NORM_SCALING 16384
+
+#define DB_SHIFT 10
+
+#define EPSILON 1
+#define VERY_SMALL 0
+#define VERY_LARGE16 ((opus_val16)32767)
+#define Q15_ONE ((opus_val16)32767)
+
+#define SCALEIN(a)      (a)
+#define SCALEOUT(a)     (a)
+
+#ifdef FIXED_DEBUG
+#include "fixed_debug.h"
+#else
+
+#include "fixed_generic.h"
+
+#ifdef OPUS_ARM_INLINE_EDSP
+#include "arm/fixed_armv5e.h"
+#elif defined (OPUS_ARM_INLINE_ASM)
+#include "arm/fixed_armv4.h"
+#elif defined (BFIN_ASM)
+#include "fixed_bfin.h"
+#elif defined (TI_C5X_ASM)
+#include "fixed_c5x.h"
+#elif defined (TI_C6X_ASM)
+#include "fixed_c6x.h"
+#endif
+
+#endif
+
+#else /* OPUS_FIXED_POINT */
+
+typedef float opus_val16;
+typedef float opus_val32;
+
+typedef float celt_sig;
+typedef float celt_norm;
+typedef float celt_ener;
+
+#define Q15ONE 1.0f
+
+#define NORM_SCALING 1.f
+
+#define EPSILON 1e-15f
+#define VERY_SMALL 1e-30f
+#define VERY_LARGE16 1e15f
+#define Q15_ONE ((opus_val16)1.f)
+
+#define QCONST16(x,bits) (x)
+#define QCONST32(x,bits) (x)
+
+#define NEG16(x) (-(x))
+#define NEG32(x) (-(x))
+#define EXTRACT16(x) (x)
+#define EXTEND32(x) (x)
+#define SHR16(a,shift) (a)
+#define SHL16(a,shift) (a)
+#define SHR32(a,shift) (a)
+#define SHL32(a,shift) (a)
+#define PSHR32(a,shift) (a)
+#define VSHR32(a,shift) (a)
+
+#define PSHR(a,shift)   (a)
+#define SHR(a,shift)    (a)
+#define SHL(a,shift)    (a)
+#define SATURATE(x,a)   (x)
+#define SATURATE16(x)   (x)
+
+#define ROUND16(a,shift)  (a)
+#define HALF16(x)       (.5f*(x))
+#define HALF32(x)       (.5f*(x))
+
+#define ADD16(a,b) ((a)+(b))
+#define SUB16(a,b) ((a)-(b))
+#define ADD32(a,b) ((a)+(b))
+#define SUB32(a,b) ((a)-(b))
+#define MULT16_16_16(a,b)     ((a)*(b))
+#define MULT16_16(a,b)     ((opus_val32)(a)*(opus_val32)(b))
+#define MAC16_16(c,a,b)     ((c)+(opus_val32)(a)*(opus_val32)(b))
+
+#define MULT16_32_Q15(a,b)     ((a)*(b))
+#define MULT16_32_Q16(a,b)     ((a)*(b))
+
+#define MULT32_32_Q31(a,b)     ((a)*(b))
+
+#define MAC16_32_Q15(c,a,b)     ((c)+(a)*(b))
+
+#define MULT16_16_Q11_32(a,b)     ((a)*(b))
+#define MULT16_16_Q11(a,b)     ((a)*(b))
+#define MULT16_16_Q13(a,b)     ((a)*(b))
+#define MULT16_16_Q14(a,b)     ((a)*(b))
+#define MULT16_16_Q15(a,b)     ((a)*(b))
+#define MULT16_16_P15(a,b)     ((a)*(b))
+#define MULT16_16_P13(a,b)     ((a)*(b))
+#define MULT16_16_P14(a,b)     ((a)*(b))
+#define MULT16_32_P16(a,b)     ((a)*(b))
+
+#define DIV32_16(a,b)     (((opus_val32)(a))/(opus_val16)(b))
+#define DIV32(a,b)     (((opus_val32)(a))/(opus_val32)(b))
+
+#define SCALEIN(a)      ((a)*CELT_SIG_SCALE)
+#define SCALEOUT(a)     ((a)*(1/CELT_SIG_SCALE))
+
+#endif /* !OPUS_FIXED_POINT */
+
+#ifndef GLOBAL_STACK_SIZE
+#ifdef OPUS_FIXED_POINT
+#define GLOBAL_STACK_SIZE 100000
+#else
+#define GLOBAL_STACK_SIZE 100000
+#endif
+#endif
+
+#endif /* ARCH_H */
diff --git a/drivers/opus/celt/arm/arm2gnu.pl b/drivers/opus/celt/arm/arm2gnu.pl
new file mode 100755
index 0000000000..eab42efa2b
--- /dev/null
+++ b/drivers/opus/celt/arm/arm2gnu.pl
@@ -0,0 +1,316 @@
+#!/usr/bin/perl
+
+my $bigend;  # little/big endian
+my $nxstack;
+
+$nxstack = 0;
+
+eval 'exec /usr/local/bin/perl -S $0 ${1+"$@"}'
+    if $running_under_some_shell;
+
+while ($ARGV[0] =~ /^-/) {
+    $_ = shift;
+  last if /^--/;
+    if (/^-n/) {
+    $nflag++;
+    next;
+    }
+    die "I don't recognize this switch: $_\\n";
+}
+$printit++ unless $nflag;
+
+$\ = "\n";      # automatically add newline on print
+$n=0;
+
+$thumb = 0;     # ARM mode by default, not Thumb.
+@proc_stack = ();
+
+LINE:
+while (<>) {
+
+    # For ADRLs we need to add a new line after the substituted one.
+    $addPadding = 0;
+
+    # First, we do not dare to touch *anything* inside double quotes, do we?
+    # Second, if you want a dollar character in the string,
+    # insert two of them -- that's how ARM C and assembler treat strings.
+    s/^([A-Za-z_]\w*)[ \t]+DCB[ \t]*\"/$1:   .ascii \"/   && do { s/\$\$/\$/g; next };
+    s/\bDCB\b[ \t]*\"/.ascii \"/                          && do { s/\$\$/\$/g; next };
+    s/^(\S+)\s+RN\s+(\S+)/$1 .req r$2/                    && do { s/\$\$/\$/g; next };
+    # If there's nothing on a line but a comment, don't try to apply any further
+    #  substitutions (this is a cheap hack to avoid mucking up the license header)
+    s/^([ \t]*);/$1@/                                     && do { s/\$\$/\$/g; next };
+    # If substituted -- leave immediately !
+
+    s/@/,:/;
+    s/;/@/;
+    while ( /@.*'/ ) {
+      s/(@.*)'/$1/g;
+    }
+    s/\{FALSE\}/0/g;
+    s/\{TRUE\}/1/g;
+    s/\{(\w\w\w\w+)\}/$1/g;
+    s/\bINCLUDE[ \t]*([^ \t\n]+)/.include \"$1\"/;
+    s/\bGET[ \t]*([^ \t\n]+)/.include \"${ my $x=$1; $x =~ s|\.s|-gnu.S|; \$x }\"/;
+    s/\bIMPORT\b/.extern/;
+    s/\bEXPORT\b/.global/;
+    s/^(\s+)\[/$1IF/;
+    s/^(\s+)\|/$1ELSE/;
+    s/^(\s+)\]/$1ENDIF/;
+    s/IF *:DEF:/ .ifdef/;
+    s/IF *:LNOT: *:DEF:/ .ifndef/;
+    s/ELSE/ .else/;
+    s/ENDIF/ .endif/;
+
+    if( /\bIF\b/ ) {
+      s/\bIF\b/ .if/;
+      s/=/==/;
+    }
+    if ( $n == 2) {
+        s/\$/\\/g;
+    }
+    if ($n == 1) {
+        s/\$//g;
+        s/label//g;
+    $n = 2;
+      }
+    if ( /MACRO/ ) {
+      s/MACRO *\n/.macro/;
+      $n=1;
+    }
+    if ( /\bMEND\b/ ) {
+      s/\bMEND\b/.endm/;
+      $n=0;
+    }
+
+    # ".rdata" doesn't work in 'as' version 2.13.2, as it is ".rodata" there.
+    #
+    if ( /\bAREA\b/ ) {
+        my $align;
+        $align = "2";
+        if ( /ALIGN=(\d+)/ ) {
+            $align = $1;
+        }
+        if ( /CODE/ ) {
+            $nxstack = 1;
+        }
+        s/^(.+)CODE(.+)READONLY(.*)/    .text/;
+        s/^(.+)DATA(.+)READONLY(.*)/    .section .rdata/;
+        s/^(.+)\|\|\.data\|\|(.+)/    .data/;
+        s/^(.+)\|\|\.bss\|\|(.+)/    .bss/;
+        s/$/;   .p2align $align/;
+        # Enable NEON instructions but don't produce a binary that requires
+        # ARMv7. RVCT does not have equivalent directives, so we just do this
+        # for all CODE areas.
+        if ( /.text/ ) {
+            # Separating .arch, .fpu, etc., by semicolons does not work (gas
+            # thinks the semicolon is part of the arch name, even when there's
+            # whitespace separating them). Sadly this means our line numbers
+            # won't match the original source file (we could use the .line
+            # directive, which is documented to be obsolete, but then gdb will
+            # show the wrong line in the translated source file).
+            s/$/;   .arch armv7-a\n   .fpu neon\n   .object_arch armv4t/;
+        }
+    }
+
+    s/\|\|\.constdata\$(\d+)\|\|/.L_CONST$1/;       # ||.constdata$3||
+    s/\|\|\.bss\$(\d+)\|\|/.L_BSS$1/;               # ||.bss$2||
+    s/\|\|\.data\$(\d+)\|\|/.L_DATA$1/;             # ||.data$2||
+    s/\|\|([a-zA-Z0-9_]+)\@([a-zA-Z0-9_]+)\|\|/@ $&/;
+    s/^(\s+)\%(\s)/    .space $1/;
+
+    s/\|(.+)\.(\d+)\|/\.$1_$2/;                     # |L80.123| -> .L80_123
+    s/\bCODE32\b/.code 32/ && do {$thumb = 0};
+    s/\bCODE16\b/.code 16/ && do {$thumb = 1};
+    if (/\bPROC\b/)
+    {
+        my $prefix;
+        my $proc;
+        /^([A-Za-z_\.]\w+)\b/;
+        $proc = $1;
+        $prefix = "";
+        if ($proc)
+        {
+            $prefix = $prefix.sprintf("\t.type\t%s, %%function; ",$proc);
+            push(@proc_stack, $proc);
+            s/^[A-Za-z_\.]\w+/$&:/;
+        }
+        $prefix = $prefix."\t.thumb_func; " if ($thumb);
+        s/\bPROC\b/@ $&/;
+        $_ = $prefix.$_;
+    }
+    s/^(\s*)(S|Q|SH|U|UQ|UH)ASX\b/$1$2ADDSUBX/;
+    s/^(\s*)(S|Q|SH|U|UQ|UH)SAX\b/$1$2SUBADDX/;
+    if (/\bENDP\b/)
+    {
+        my $proc;
+        s/\bENDP\b/@ $&/;
+        $proc = pop(@proc_stack);
+        $_ = "\t.size $proc, .-$proc".$_ if ($proc);
+    }
+    s/\bSUBT\b/@ $&/;
+    s/\bDATA\b/@ $&/;   # DATA directive is deprecated -- Asm guide, p.7-25
+    s/\bKEEP\b/@ $&/;
+    s/\bEXPORTAS\b/@ $&/;
+    s/\|\|(.)+\bEQU\b/@ $&/;
+    s/\|\|([\w\$]+)\|\|/$1/;
+    s/\bENTRY\b/@ $&/;
+    s/\bASSERT\b/@ $&/;
+    s/\bGBLL\b/@ $&/;
+    s/\bGBLA\b/@ $&/;
+    s/^\W+OPT\b/@ $&/;
+    s/:OR:/|/g;
+    s/:SHL:/<</g;
+    s/:SHR:/>>/g;
+    s/:AND:/&/g;
+    s/:LAND:/&&/g;
+    s/CPSR/cpsr/;
+    s/SPSR/spsr/;
+    s/ALIGN$/.balign 4/;
+    s/ALIGN\s+([0-9x]+)$/.balign $1/;
+    s/psr_cxsf/psr_all/;
+    s/LTORG/.ltorg/;
+    s/^([A-Za-z_]\w*)[ \t]+EQU/ .set $1,/;
+    s/^([A-Za-z_]\w*)[ \t]+SETL/ .set $1,/;
+    s/^([A-Za-z_]\w*)[ \t]+SETA/ .set $1,/;
+    s/^([A-Za-z_]\w*)[ \t]+\*/ .set $1,/;
+
+    #  {PC} + 0xdeadfeed  -->  . + 0xdeadfeed
+    s/\{PC\} \+/ \. +/;
+
+    # Single hex constant on the line !
+    #
+    # >>> NOTE <<<
+    #   Double-precision floats in gcc are always mixed-endian, which means
+    #   bytes in two words are little-endian, but words are big-endian.
+    #   So, 0x0000deadfeed0000 would be stored as 0x0000dead at low address
+    #   and 0xfeed0000 at high address.
+    #
+    s/\bDCFD\b[ \t]+0x([a-fA-F0-9]{8})([a-fA-F0-9]{8})/.long 0x$1, 0x$2/;
+    # Only decimal constants on the line, no hex !
+    s/\bDCFD\b[ \t]+([0-9\.\-]+)/.double $1/;
+
+    # Single hex constant on the line !
+#    s/\bDCFS\b[ \t]+0x([a-f0-9]{8})([a-f0-9]{8})/.long 0x$1, 0x$2/;
+    # Only decimal constants on the line, no hex !
+#    s/\bDCFS\b[ \t]+([0-9\.\-]+)/.double $1/;
+    s/\bDCFS[ \t]+0x/.word 0x/;
+    s/\bDCFS\b/.float/;
+
+    s/^([A-Za-z_]\w*)[ \t]+DCD/$1 .word/;
+    s/\bDCD\b/.word/;
+    s/^([A-Za-z_]\w*)[ \t]+DCW/$1 .short/;
+    s/\bDCW\b/.short/;
+    s/^([A-Za-z_]\w*)[ \t]+DCB/$1 .byte/;
+    s/\bDCB\b/.byte/;
+    s/^([A-Za-z_]\w*)[ \t]+\%/.comm $1,/;
+    s/^[A-Za-z_\.]\w+/$&:/;
+    s/^(\d+)/$1:/;
+    s/\%(\d+)/$1b_or_f/;
+    s/\%[Bb](\d+)/$1b/;
+    s/\%[Ff](\d+)/$1f/;
+    s/\%[Ff][Tt](\d+)/$1f/;
+    s/&([\dA-Fa-f]+)/0x$1/;
+    if ( /\b2_[01]+\b/ ) {
+      s/\b2_([01]+)\b/conv$1&&&&/g;
+      while ( /[01][01][01][01]&&&&/ ) {
+        s/0000&&&&/&&&&0/g;
+        s/0001&&&&/&&&&1/g;
+        s/0010&&&&/&&&&2/g;
+        s/0011&&&&/&&&&3/g;
+        s/0100&&&&/&&&&4/g;
+        s/0101&&&&/&&&&5/g;
+        s/0110&&&&/&&&&6/g;
+        s/0111&&&&/&&&&7/g;
+        s/1000&&&&/&&&&8/g;
+        s/1001&&&&/&&&&9/g;
+        s/1010&&&&/&&&&A/g;
+        s/1011&&&&/&&&&B/g;
+        s/1100&&&&/&&&&C/g;
+        s/1101&&&&/&&&&D/g;
+        s/1110&&&&/&&&&E/g;
+        s/1111&&&&/&&&&F/g;
+      }
+      s/000&&&&/&&&&0/g;
+      s/001&&&&/&&&&1/g;
+      s/010&&&&/&&&&2/g;
+      s/011&&&&/&&&&3/g;
+      s/100&&&&/&&&&4/g;
+      s/101&&&&/&&&&5/g;
+      s/110&&&&/&&&&6/g;
+      s/111&&&&/&&&&7/g;
+      s/00&&&&/&&&&0/g;
+      s/01&&&&/&&&&1/g;
+      s/10&&&&/&&&&2/g;
+      s/11&&&&/&&&&3/g;
+      s/0&&&&/&&&&0/g;
+      s/1&&&&/&&&&1/g;
+      s/conv&&&&/0x/g;
+    }
+
+    if ( /commandline/)
+    {
+        if( /-bigend/)
+        {
+            $bigend=1;
+        }
+    }
+
+    if ( /\bDCDU\b/ )
+    {
+        my $cmd=$_;
+        my $value;
+        my $prefix;
+        my $w1;
+        my $w2;
+        my $w3;
+        my $w4;
+
+        s/\s+DCDU\b/@ $&/;
+
+        $cmd =~ /\bDCDU\b\s+0x(\d+)/;
+        $value = $1;
+        $value =~ /(\w\w)(\w\w)(\w\w)(\w\w)/;
+        $w1 = $1;
+        $w2 = $2;
+        $w3 = $3;
+        $w4 = $4;
+
+        if( $bigend ne "")
+        {
+            # big endian
+            $prefix = "\t.byte\t0x".$w1.";".
+                      "\t.byte\t0x".$w2.";".
+                      "\t.byte\t0x".$w3.";".
+                      "\t.byte\t0x".$w4."; ";
+        }
+        else
+        {
+            # little endian
+            $prefix = "\t.byte\t0x".$w4.";".
+                      "\t.byte\t0x".$w3.";".
+                      "\t.byte\t0x".$w2.";".
+                      "\t.byte\t0x".$w1."; ";
+        }
+        $_=$prefix.$_;
+    }
+
+    if ( /\badrl\b/i )
+    {
+        s/\badrl\s+(\w+)\s*,\s*(\w+)/ldr $1,=$2/i;
+        $addPadding = 1;
+    }
+    s/\bEND\b/@ END/;
+} continue {
+    printf ("%s", $_) if $printit;
+    if ($addPadding != 0)
+    {
+        printf ("   mov r0,r0\n");
+        $addPadding = 0;
+    }
+}
+#If we had a code section, mark that this object doesn't need an executable
+# stack.
+if ($nxstack) {
+    printf ("    .section\t.note.GNU-stack,\"\",\%\%progbits\n");
+}
diff --git a/drivers/opus/celt/arm/arm_celt_map.c b/drivers/opus/celt/arm/arm_celt_map.c
new file mode 100644
index 0000000000..b187345154
--- /dev/null
+++ b/drivers/opus/celt/arm/arm_celt_map.c
@@ -0,0 +1,49 @@
+/* Copyright (c) 2010 Xiph.Org Foundation
+ * Copyright (c) 2013 Parrot */
+/*
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifdef OPUS_HAVE_CONFIG_H
+#include "opus_config.h"
+#endif
+
+#include "pitch.h"
+
+#if defined(OPUS_HAVE_RTCD)
+
+# if defined(OPUS_FIXED_POINT)
+opus_val32 (*const CELT_PITCH_XCORR_IMPL[OPUS_ARCHMASK+1])(const opus_val16 *,
+    const opus_val16 *, opus_val32 *, int , int) = {
+  celt_pitch_xcorr_c,               /* ARMv4 */
+  MAY_HAVE_EDSP(celt_pitch_xcorr),  /* EDSP */
+  MAY_HAVE_MEDIA(celt_pitch_xcorr), /* Media */
+  MAY_HAVE_NEON(celt_pitch_xcorr)   /* NEON */
+};
+# else
+#  error "Floating-point implementation is not supported by ARM asm yet." \
+ "Reconfigure with --disable-rtcd or send patches."
+# endif
+
+#endif
diff --git a/drivers/opus/celt/arm/armcpu.c b/drivers/opus/celt/arm/armcpu.c
new file mode 100644
index 0000000000..7f0af631b9
--- /dev/null
+++ b/drivers/opus/celt/arm/armcpu.c
@@ -0,0 +1,174 @@
+/* Copyright (c) 2010 Xiph.Org Foundation
+ * Copyright (c) 2013 Parrot */
+/*
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+/* Original code from libtheora modified to suit to Opus */
+
+#ifdef OPUS_HAVE_CONFIG_H
+#include "opus_config.h"
+#endif
+
+#ifdef OPUS_HAVE_RTCD
+
+#include "armcpu.h"
+#include "cpu_support.h"
+#include "os_support.h"
+#include "opus_types.h"
+
+#define OPUS_CPU_ARM_V4    (1)
+#define OPUS_CPU_ARM_EDSP  (1<<1)
+#define OPUS_CPU_ARM_MEDIA (1<<2)
+#define OPUS_CPU_ARM_NEON  (1<<3)
+
+#if defined(_MSC_VER)
+/*For GetExceptionCode() and EXCEPTION_ILLEGAL_INSTRUCTION.*/
+# define WIN32_LEAN_AND_MEAN
+# define WIN32_EXTRA_LEAN
+# include <windows.h>
+
+static OPUS_INLINE opus_uint32 opus_cpu_capabilities(void){
+  opus_uint32 flags;
+  flags=0;
+  /* MSVC has no OPUS_INLINE __asm support for ARM, but it does let you __emit
+   * instructions via their assembled hex code.
+   * All of these instructions should be essentially nops. */
+# if defined(OPUS_ARM_MAY_HAVE_EDSP)
+  __try{
+    /*PLD [r13]*/
+    __emit(0xF5DDF000);
+    flags|=OPUS_CPU_ARM_EDSP;
+  }
+  __except(GetExceptionCode()==EXCEPTION_ILLEGAL_INSTRUCTION){
+    /*Ignore exception.*/
+  }
+#  if defined(OPUS_ARM_MAY_HAVE_MEDIA)
+  __try{
+    /*SHADD8 r3,r3,r3*/
+    __emit(0xE6333F93);
+    flags|=OPUS_CPU_ARM_MEDIA;
+  }
+  __except(GetExceptionCode()==EXCEPTION_ILLEGAL_INSTRUCTION){
+    /*Ignore exception.*/
+  }
+#   if defined(OPUS_ARM_MAY_HAVE_NEON)
+  __try{
+    /*VORR q0,q0,q0*/
+    __emit(0xF2200150);
+    flags|=OPUS_CPU_ARM_NEON;
+  }
+  __except(GetExceptionCode()==EXCEPTION_ILLEGAL_INSTRUCTION){
+    /*Ignore exception.*/
+  }
+#   endif
+#  endif
+# endif
+  return flags;
+}
+
+#elif defined(__linux__)
+/* Linux based */
+opus_uint32 opus_cpu_capabilities(void)
+{
+  opus_uint32 flags = 0;
+  FILE *cpuinfo;
+
+  /* Reading /proc/self/auxv would be easier, but that doesn't work reliably on
+   * Android */
+  cpuinfo = fopen("/proc/cpuinfo", "r");
+
+  if(cpuinfo != NULL)
+  {
+    /* 512 should be enough for anybody (it's even enough for all the flags that
+     * x86 has accumulated... so far). */
+    char buf[512];
+
+    while(fgets(buf, 512, cpuinfo) != NULL)
+    {
+# if defined(OPUS_ARM_MAY_HAVE_EDSP) || defined(OPUS_ARM_MAY_HAVE_NEON)
+      /* Search for edsp and neon flag */
+      if(memcmp(buf, "Features", 8) == 0)
+      {
+        char *p;
+#  if defined(OPUS_ARM_MAY_HAVE_EDSP)
+        p = strstr(buf, " edsp");
+        if(p != NULL && (p[5] == ' ' || p[5] == '\n'))
+          flags |= OPUS_CPU_ARM_EDSP;
+#  endif
+
+#  if defined(OPUS_ARM_MAY_HAVE_NEON)
+        p = strstr(buf, " neon");
+        if(p != NULL && (p[5] == ' ' || p[5] == '\n'))
+          flags |= OPUS_CPU_ARM_NEON;
+#  endif
+      }
+# endif
+
+# if defined(OPUS_ARM_MAY_HAVE_MEDIA)
+      /* Search for media capabilities (>= ARMv6) */
+      if(memcmp(buf, "CPU architecture:", 17) == 0)
+      {
+        int version;
+        version = atoi(buf+17);
+
+        if(version >= 6)
+          flags |= OPUS_CPU_ARM_MEDIA;
+      }
+# endif
+    }
+
+    fclose(cpuinfo);
+  }
+  return flags;
+}
+#else
+/* The feature registers which can tell us what the processor supports are
+ * accessible in priveleged modes only, so we can't have a general user-space
+ * detection method like on x86.*/
+# error "Configured to use ARM asm but no CPU detection method available for " \
+   "your platform.  Reconfigure with --disable-rtcd (or send patches)."
+#endif
+
+int opus_select_arch(void)
+{
+  opus_uint32 flags = opus_cpu_capabilities();
+  int arch = 0;
+
+  if(!(flags & OPUS_CPU_ARM_EDSP))
+    return arch;
+  arch++;
+
+  if(!(flags & OPUS_CPU_ARM_MEDIA))
+    return arch;
+  arch++;
+
+  if(!(flags & OPUS_CPU_ARM_NEON))
+    return arch;
+  arch++;
+
+  return arch;
+}
+
+#endif
diff --git a/drivers/opus/celt/arm/armcpu.h b/drivers/opus/celt/arm/armcpu.h
new file mode 100644
index 0000000000..ac5744606e
--- /dev/null
+++ b/drivers/opus/celt/arm/armcpu.h
@@ -0,0 +1,71 @@
+/* Copyright (c) 2010 Xiph.Org Foundation
+ * Copyright (c) 2013 Parrot */
+/*
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#if !defined(ARMCPU_H)
+# define ARMCPU_H
+
+# if defined(OPUS_ARM_MAY_HAVE_EDSP)
+#  define MAY_HAVE_EDSP(name) name ## _edsp
+# else
+#  define MAY_HAVE_EDSP(name) name ## _c
+# endif
+
+# if defined(OPUS_ARM_MAY_HAVE_MEDIA)
+#  define MAY_HAVE_MEDIA(name) name ## _media
+# else
+#  define MAY_HAVE_MEDIA(name) MAY_HAVE_EDSP(name)
+# endif
+
+# if defined(OPUS_ARM_MAY_HAVE_NEON)
+#  define MAY_HAVE_NEON(name) name ## _neon
+# else
+#  define MAY_HAVE_NEON(name) MAY_HAVE_MEDIA(name)
+# endif
+
+# if defined(OPUS_ARM_PRESUME_EDSP)
+#  define PRESUME_EDSP(name) name ## _edsp
+# else
+#  define PRESUME_EDSP(name) name ## _c
+# endif
+
+# if defined(OPUS_ARM_PRESUME_MEDIA)
+#  define PRESUME_MEDIA(name) name ## _media
+# else
+#  define PRESUME_MEDIA(name) PRESUME_EDSP(name)
+# endif
+
+# if defined(OPUS_ARM_PRESUME_NEON)
+#  define PRESUME_NEON(name) name ## _neon
+# else
+#  define PRESUME_NEON(name) PRESUME_MEDIA(name)
+# endif
+
+# if defined(OPUS_HAVE_RTCD)
+int opus_select_arch(void);
+# endif
+
+#endif
diff --git a/drivers/opus/celt/arm/armopts.s b/drivers/opus/celt/arm/armopts.s
new file mode 100644
index 0000000000..fb9196072a
--- /dev/null
+++ b/drivers/opus/celt/arm/armopts.s
@@ -0,0 +1,37 @@
+/* Copyright (C) 2013 Mozilla Corporation */
+/*
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+; Set the following to 1 if we have EDSP instructions
+;  (LDRD/STRD, etc., ARMv5E and later).
+OPUS_ARM_MAY_HAVE_EDSP  * 
+
+; Set the following to 1 if we have ARMv6 media instructions.
+OPUS_ARM_MAY_HAVE_MEDIA * 
+
+; Set the following to 1 if we have NEON (some ARMv7)
+OPUS_ARM_MAY_HAVE_NEON  * 
+
+END
diff --git a/drivers/opus/celt/arm/armopts.s.in b/drivers/opus/celt/arm/armopts.s.in
new file mode 100644
index 0000000000..3d8aaf2754
--- /dev/null
+++ b/drivers/opus/celt/arm/armopts.s.in
@@ -0,0 +1,37 @@
+/* Copyright (C) 2013 Mozilla Corporation */
+/*
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+; Set the following to 1 if we have EDSP instructions
+;  (LDRD/STRD, etc., ARMv5E and later).
+OPUS_ARM_MAY_HAVE_EDSP  * @OPUS_ARM_MAY_HAVE_EDSP@
+
+; Set the following to 1 if we have ARMv6 media instructions.
+OPUS_ARM_MAY_HAVE_MEDIA * @OPUS_ARM_MAY_HAVE_MEDIA@
+
+; Set the following to 1 if we have NEON (some ARMv7)
+OPUS_ARM_MAY_HAVE_NEON  * @OPUS_ARM_MAY_HAVE_NEON@
+
+END
diff --git a/drivers/opus/celt/arm/celt_pitch_xcorr_arm.s b/drivers/opus/celt/arm/celt_pitch_xcorr_arm.s
new file mode 100644
index 0000000000..09917b16bf
--- /dev/null
+++ b/drivers/opus/celt/arm/celt_pitch_xcorr_arm.s
@@ -0,0 +1,545 @@
+; Copyright (c) 2007-2008 CSIRO
+; Copyright (c) 2007-2009 Xiph.Org Foundation
+; Copyright (c) 2013      Parrot
+; Written by Aurélien Zanelli
+;
+; Redistribution and use in source and binary forms, with or without
+; modification, are permitted provided that the following conditions
+; are met:
+;
+; - Redistributions of source code must retain the above copyright
+; notice, this list of conditions and the following disclaimer.
+;
+; - Redistributions in binary form must reproduce the above copyright
+; notice, this list of conditions and the following disclaimer in the
+; documentation and/or other materials provided with the distribution.
+;
+; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+; ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+; OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+; EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+; PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+; PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+; LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+  AREA  |.text|, CODE, READONLY
+
+  GET    celt/arm/armopts.s
+
+IF OPUS_ARM_MAY_HAVE_EDSP
+  EXPORT celt_pitch_xcorr_edsp
+ENDIF
+
+IF OPUS_ARM_MAY_HAVE_NEON
+  EXPORT celt_pitch_xcorr_neon
+ENDIF
+
+IF OPUS_ARM_MAY_HAVE_NEON
+
+; Compute sum[k]=sum(x[j]*y[j+k],j=0...len-1), k=0...3
+xcorr_kernel_neon PROC
+  ; input:
+  ;   r3     = int         len
+  ;   r4     = opus_val16 *x
+  ;   r5     = opus_val16 *y
+  ;   q0     = opus_val32  sum[4]
+  ; output:
+  ;   q0     = opus_val32  sum[4]
+  ; preserved: r0-r3, r6-r11, d2, q4-q7, q9-q15
+  ; internal usage:
+  ;   r12 = int j
+  ;   d3  = y_3|y_2|y_1|y_0
+  ;   q2  = y_B|y_A|y_9|y_8|y_7|y_6|y_5|y_4
+  ;   q3  = x_7|x_6|x_5|x_4|x_3|x_2|x_1|x_0
+  ;   q8  = scratch
+  ;
+  ; Load y[0...3]
+  ; This requires len>0 to always be valid (which we assert in the C code).
+  VLD1.16      {d5}, [r5]!
+  SUBS         r12, r3, #8
+  BLE xcorr_kernel_neon_process4
+; Process 8 samples at a time.
+; This loop loads one y value more than we actually need. Therefore we have to
+; stop as soon as there are 8 or fewer samples left (instead of 7), to avoid
+; reading past the end of the array.
+xcorr_kernel_neon_process8
+  ; This loop has 19 total instructions (10 cycles to issue, minimum), with
+  ; - 2 cycles of ARM insrtuctions,
+  ; - 10 cycles of load/store/byte permute instructions, and
+  ; - 9 cycles of data processing instructions.
+  ; On a Cortex A8, we dual-issue the maximum amount (9 cycles) between the
+  ; latter two categories, meaning the whole loop should run in 10 cycles per
+  ; iteration, barring cache misses.
+  ;
+  ; Load x[0...7]
+  VLD1.16      {d6, d7}, [r4]!
+  ; Unlike VMOV, VAND is a data processsing instruction (and doesn't get
+  ; assembled to VMOV, like VORR would), so it dual-issues with the prior VLD1.
+  VAND         d3, d5, d5
+  SUBS         r12, r12, #8
+  ; Load y[4...11]
+  VLD1.16      {d4, d5}, [r5]!
+  VMLAL.S16    q0, d3, d6[0]
+  VEXT.16      d16, d3, d4, #1
+  VMLAL.S16    q0, d4, d7[0]
+  VEXT.16      d17, d4, d5, #1
+  VMLAL.S16    q0, d16, d6[1]
+  VEXT.16      d16, d3, d4, #2
+  VMLAL.S16    q0, d17, d7[1]
+  VEXT.16      d17, d4, d5, #2
+  VMLAL.S16    q0, d16, d6[2]
+  VEXT.16      d16, d3, d4, #3
+  VMLAL.S16    q0, d17, d7[2]
+  VEXT.16      d17, d4, d5, #3
+  VMLAL.S16    q0, d16, d6[3]
+  VMLAL.S16    q0, d17, d7[3]
+  BGT xcorr_kernel_neon_process8
+; Process 4 samples here if we have > 4 left (still reading one extra y value).
+xcorr_kernel_neon_process4
+  ADDS         r12, r12, #4
+  BLE xcorr_kernel_neon_process2
+  ; Load x[0...3]
+  VLD1.16      d6, [r4]!
+  ; Use VAND since it's a data processing instruction again.
+  VAND         d4, d5, d5
+  SUB          r12, r12, #4
+  ; Load y[4...7]
+  VLD1.16      d5, [r5]!
+  VMLAL.S16    q0, d4, d6[0]
+  VEXT.16      d16, d4, d5, #1
+  VMLAL.S16    q0, d16, d6[1]
+  VEXT.16      d16, d4, d5, #2
+  VMLAL.S16    q0, d16, d6[2]
+  VEXT.16      d16, d4, d5, #3
+  VMLAL.S16    q0, d16, d6[3]
+; Process 2 samples here if we have > 2 left (still reading one extra y value).
+xcorr_kernel_neon_process2
+  ADDS         r12, r12, #2
+  BLE xcorr_kernel_neon_process1
+  ; Load x[0...1]
+  VLD2.16      {d6[],d7[]}, [r4]!
+  ; Use VAND since it's a data processing instruction again.
+  VAND         d4, d5, d5
+  SUB          r12, r12, #2
+  ; Load y[4...5]
+  VLD1.32      {d5[]}, [r5]!
+  VMLAL.S16    q0, d4, d6
+  VEXT.16      d16, d4, d5, #1
+  ; Replace bottom copy of {y5,y4} in d5 with {y3,y2} from d4, using VSRI
+  ; instead of VEXT, since it's a data-processing instruction.
+  VSRI.64      d5, d4, #32
+  VMLAL.S16    q0, d16, d7
+; Process 1 sample using the extra y value we loaded above.
+xcorr_kernel_neon_process1
+  ; Load next *x
+  VLD1.16      {d6[]}, [r4]!
+  ADDS         r12, r12, #1
+  ; y[0...3] are left in d5 from prior iteration(s) (if any)
+  VMLAL.S16    q0, d5, d6
+  MOVLE        pc, lr
+; Now process 1 last sample, not reading ahead.
+  ; Load last *y
+  VLD1.16      {d4[]}, [r5]!
+  VSRI.64      d4, d5, #16
+  ; Load last *x
+  VLD1.16      {d6[]}, [r4]!
+  VMLAL.S16    q0, d4, d6
+  MOV          pc, lr
+  ENDP
+
+; opus_val32 celt_pitch_xcorr_neon(opus_val16 *_x, opus_val16 *_y,
+;  opus_val32 *xcorr, int len, int max_pitch)
+celt_pitch_xcorr_neon PROC
+  ; input:
+  ;   r0  = opus_val16 *_x
+  ;   r1  = opus_val16 *_y
+  ;   r2  = opus_val32 *xcorr
+  ;   r3  = int         len
+  ; output:
+  ;   r0  = int         maxcorr
+  ; internal usage:
+  ;   r4  = opus_val16 *x (for xcorr_kernel_neon())
+  ;   r5  = opus_val16 *y (for xcorr_kernel_neon())
+  ;   r6  = int         max_pitch
+  ;   r12 = int         j
+  ;   q15 = int         maxcorr[4] (q15 is not used by xcorr_kernel_neon())
+  STMFD        sp!, {r4-r6, lr}
+  LDR          r6, [sp, #16]
+  VMOV.S32     q15, #1
+  ; if (max_pitch < 4) goto celt_pitch_xcorr_neon_process4_done
+  SUBS         r6, r6, #4
+  BLT celt_pitch_xcorr_neon_process4_done
+celt_pitch_xcorr_neon_process4
+  ; xcorr_kernel_neon parameters:
+  ; r3 = len, r4 = _x, r5 = _y, q0 = {0, 0, 0, 0}
+  MOV          r4, r0
+  MOV          r5, r1
+  VEOR         q0, q0, q0
+  ; xcorr_kernel_neon only modifies r4, r5, r12, and q0...q3.
+  ; So we don't save/restore any other registers.
+  BL xcorr_kernel_neon
+  SUBS         r6, r6, #4
+  VST1.32      {q0}, [r2]!
+  ; _y += 4
+  ADD          r1, r1, #8
+  VMAX.S32     q15, q15, q0
+  ; if (max_pitch < 4) goto celt_pitch_xcorr_neon_process4_done
+  BGE celt_pitch_xcorr_neon_process4
+; We have less than 4 sums left to compute.
+celt_pitch_xcorr_neon_process4_done
+  ADDS         r6, r6, #4
+  ; Reduce maxcorr to a single value
+  VMAX.S32     d30, d30, d31
+  VPMAX.S32    d30, d30, d30
+  ; if (max_pitch <= 0) goto celt_pitch_xcorr_neon_done
+  BLE celt_pitch_xcorr_neon_done
+; Now compute each remaining sum one at a time.
+celt_pitch_xcorr_neon_process_remaining
+  MOV          r4, r0
+  MOV          r5, r1
+  VMOV.I32     q0, #0
+  SUBS         r12, r3, #8
+  BLT celt_pitch_xcorr_neon_process_remaining4
+; Sum terms 8 at a time.
+celt_pitch_xcorr_neon_process_remaining_loop8
+  ; Load x[0...7]
+  VLD1.16      {q1}, [r4]!
+  ; Load y[0...7]
+  VLD1.16      {q2}, [r5]!
+  SUBS         r12, r12, #8
+  VMLAL.S16    q0, d4, d2
+  VMLAL.S16    q0, d5, d3
+  BGE celt_pitch_xcorr_neon_process_remaining_loop8
+; Sum terms 4 at a time.
+celt_pitch_xcorr_neon_process_remaining4
+  ADDS         r12, r12, #4
+  BLT celt_pitch_xcorr_neon_process_remaining4_done
+  ; Load x[0...3]
+  VLD1.16      {d2}, [r4]!
+  ; Load y[0...3]
+  VLD1.16      {d3}, [r5]!
+  SUB          r12, r12, #4
+  VMLAL.S16    q0, d3, d2
+celt_pitch_xcorr_neon_process_remaining4_done
+  ; Reduce the sum to a single value.
+  VADD.S32     d0, d0, d1
+  VPADDL.S32   d0, d0
+  ADDS         r12, r12, #4
+  BLE celt_pitch_xcorr_neon_process_remaining_loop_done
+; Sum terms 1 at a time.
+celt_pitch_xcorr_neon_process_remaining_loop1
+  VLD1.16      {d2[]}, [r4]!
+  VLD1.16      {d3[]}, [r5]!
+  SUBS         r12, r12, #1
+  VMLAL.S16    q0, d2, d3
+  BGT celt_pitch_xcorr_neon_process_remaining_loop1
+celt_pitch_xcorr_neon_process_remaining_loop_done
+  VST1.32      {d0[0]}, [r2]!
+  VMAX.S32     d30, d30, d0
+  SUBS         r6, r6, #1
+  ; _y++
+  ADD          r1, r1, #2
+  ; if (--max_pitch > 0) goto celt_pitch_xcorr_neon_process_remaining
+  BGT celt_pitch_xcorr_neon_process_remaining
+celt_pitch_xcorr_neon_done
+  VMOV.32      r0, d30[0]
+  LDMFD        sp!, {r4-r6, pc}
+  ENDP
+
+ENDIF
+
+IF OPUS_ARM_MAY_HAVE_EDSP
+
+; This will get used on ARMv7 devices without NEON, so it has been optimized
+; to take advantage of dual-issuing where possible.
+xcorr_kernel_edsp PROC
+  ; input:
+  ;   r3      = int         len
+  ;   r4      = opus_val16 *_x (must be 32-bit aligned)
+  ;   r5      = opus_val16 *_y (must be 32-bit aligned)
+  ;   r6...r9 = opus_val32  sum[4]
+  ; output:
+  ;   r6...r9 = opus_val32  sum[4]
+  ; preserved: r0-r5
+  ; internal usage
+  ;   r2      = int         j
+  ;   r12,r14 = opus_val16  x[4]
+  ;   r10,r11 = opus_val16  y[4]
+  STMFD        sp!, {r2,r4,r5,lr}
+  LDR          r10, [r5], #4      ; Load y[0...1]
+  SUBS         r2, r3, #4         ; j = len-4
+  LDR          r11, [r5], #4      ; Load y[2...3]
+  BLE xcorr_kernel_edsp_process4_done
+  LDR          r12, [r4], #4      ; Load x[0...1]
+  ; Stall
+xcorr_kernel_edsp_process4
+  ; The multiplies must issue from pipeline 0, and can't dual-issue with each
+  ; other. Every other instruction here dual-issues with a multiply, and is
+  ; thus "free". There should be no stalls in the body of the loop.
+  SMLABB       r6, r12, r10, r6   ; sum[0] = MAC16_16(sum[0],x_0,y_0)
+  LDR          r14, [r4], #4      ; Load x[2...3]
+  SMLABT       r7, r12, r10, r7   ; sum[1] = MAC16_16(sum[1],x_0,y_1)
+  SUBS         r2, r2, #4         ; j-=4
+  SMLABB       r8, r12, r11, r8   ; sum[2] = MAC16_16(sum[2],x_0,y_2)
+  SMLABT       r9, r12, r11, r9   ; sum[3] = MAC16_16(sum[3],x_0,y_3)
+  SMLATT       r6, r12, r10, r6   ; sum[0] = MAC16_16(sum[0],x_1,y_1)
+  LDR          r10, [r5], #4      ; Load y[4...5]
+  SMLATB       r7, r12, r11, r7   ; sum[1] = MAC16_16(sum[1],x_1,y_2)
+  SMLATT       r8, r12, r11, r8   ; sum[2] = MAC16_16(sum[2],x_1,y_3)
+  SMLATB       r9, r12, r10, r9   ; sum[3] = MAC16_16(sum[3],x_1,y_4)
+  LDRGT        r12, [r4], #4      ; Load x[0...1]
+  SMLABB       r6, r14, r11, r6   ; sum[0] = MAC16_16(sum[0],x_2,y_2)
+  SMLABT       r7, r14, r11, r7   ; sum[1] = MAC16_16(sum[1],x_2,y_3)
+  SMLABB       r8, r14, r10, r8   ; sum[2] = MAC16_16(sum[2],x_2,y_4)
+  SMLABT       r9, r14, r10, r9   ; sum[3] = MAC16_16(sum[3],x_2,y_5)
+  SMLATT       r6, r14, r11, r6   ; sum[0] = MAC16_16(sum[0],x_3,y_3)
+  LDR          r11, [r5], #4      ; Load y[6...7]
+  SMLATB       r7, r14, r10, r7   ; sum[1] = MAC16_16(sum[1],x_3,y_4)
+  SMLATT       r8, r14, r10, r8   ; sum[2] = MAC16_16(sum[2],x_3,y_5)
+  SMLATB       r9, r14, r11, r9   ; sum[3] = MAC16_16(sum[3],x_3,y_6)
+  BGT xcorr_kernel_edsp_process4
+xcorr_kernel_edsp_process4_done
+  ADDS         r2, r2, #4
+  BLE xcorr_kernel_edsp_done
+  LDRH         r12, [r4], #2      ; r12 = *x++
+  SUBS         r2, r2, #1         ; j--
+  ; Stall
+  SMLABB       r6, r12, r10, r6   ; sum[0] = MAC16_16(sum[0],x,y_0)
+  LDRGTH       r14, [r4], #2      ; r14 = *x++
+  SMLABT       r7, r12, r10, r7   ; sum[1] = MAC16_16(sum[1],x,y_1)
+  SMLABB       r8, r12, r11, r8   ; sum[2] = MAC16_16(sum[2],x,y_2)
+  SMLABT       r9, r12, r11, r9   ; sum[3] = MAC16_16(sum[3],x,y_3)
+  BLE xcorr_kernel_edsp_done
+  SMLABT       r6, r14, r10, r6   ; sum[0] = MAC16_16(sum[0],x,y_1)
+  SUBS         r2, r2, #1         ; j--
+  SMLABB       r7, r14, r11, r7   ; sum[1] = MAC16_16(sum[1],x,y_2)
+  LDRH         r10, [r5], #2      ; r10 = y_4 = *y++
+  SMLABT       r8, r14, r11, r8   ; sum[2] = MAC16_16(sum[2],x,y_3)
+  LDRGTH       r12, [r4], #2      ; r12 = *x++
+  SMLABB       r9, r14, r10, r9   ; sum[3] = MAC16_16(sum[3],x,y_4)
+  BLE xcorr_kernel_edsp_done
+  SMLABB       r6, r12, r11, r6   ; sum[0] = MAC16_16(sum[0],tmp,y_2)
+  CMP          r2, #1             ; j--
+  SMLABT       r7, r12, r11, r7   ; sum[1] = MAC16_16(sum[1],tmp,y_3)
+  LDRH         r2, [r5], #2       ; r2 = y_5 = *y++
+  SMLABB       r8, r12, r10, r8   ; sum[2] = MAC16_16(sum[2],tmp,y_4)
+  LDRGTH       r14, [r4]          ; r14 = *x
+  SMLABB       r9, r12, r2, r9    ; sum[3] = MAC16_16(sum[3],tmp,y_5)
+  BLE xcorr_kernel_edsp_done
+  SMLABT       r6, r14, r11, r6   ; sum[0] = MAC16_16(sum[0],tmp,y_3)
+  LDRH         r11, [r5]          ; r11 = y_6 = *y
+  SMLABB       r7, r14, r10, r7   ; sum[1] = MAC16_16(sum[1],tmp,y_4)
+  SMLABB       r8, r14, r2, r8    ; sum[2] = MAC16_16(sum[2],tmp,y_5)
+  SMLABB       r9, r14, r11, r9   ; sum[3] = MAC16_16(sum[3],tmp,y_6)
+xcorr_kernel_edsp_done
+  LDMFD        sp!, {r2,r4,r5,pc}
+  ENDP
+
+celt_pitch_xcorr_edsp PROC
+  ; input:
+  ;   r0  = opus_val16 *_x (must be 32-bit aligned)
+  ;   r1  = opus_val16 *_y (only needs to be 16-bit aligned)
+  ;   r2  = opus_val32 *xcorr
+  ;   r3  = int         len
+  ; output:
+  ;   r0  = maxcorr
+  ; internal usage
+  ;   r4  = opus_val16 *x
+  ;   r5  = opus_val16 *y
+  ;   r6  = opus_val32  sum0
+  ;   r7  = opus_val32  sum1
+  ;   r8  = opus_val32  sum2
+  ;   r9  = opus_val32  sum3
+  ;   r1  = int         max_pitch
+  ;   r12 = int         j
+  STMFD        sp!, {r4-r11, lr}
+  MOV          r5, r1
+  LDR          r1, [sp, #36]
+  MOV          r4, r0
+  TST          r5, #3
+  ; maxcorr = 1
+  MOV          r0, #1
+  BEQ          celt_pitch_xcorr_edsp_process1u_done
+; Compute one sum at the start to make y 32-bit aligned.
+  SUBS         r12, r3, #4
+  ; r14 = sum = 0
+  MOV          r14, #0
+  LDRH         r8, [r5], #2
+  BLE celt_pitch_xcorr_edsp_process1u_loop4_done
+  LDR          r6, [r4], #4
+  MOV          r8, r8, LSL #16
+celt_pitch_xcorr_edsp_process1u_loop4
+  LDR          r9, [r5], #4
+  SMLABT       r14, r6, r8, r14     ; sum = MAC16_16(sum, x_0, y_0)
+  LDR          r7, [r4], #4
+  SMLATB       r14, r6, r9, r14     ; sum = MAC16_16(sum, x_1, y_1)
+  LDR          r8, [r5], #4
+  SMLABT       r14, r7, r9, r14     ; sum = MAC16_16(sum, x_2, y_2)
+  SUBS         r12, r12, #4         ; j-=4
+  SMLATB       r14, r7, r8, r14     ; sum = MAC16_16(sum, x_3, y_3)
+  LDRGT        r6, [r4], #4
+  BGT celt_pitch_xcorr_edsp_process1u_loop4
+  MOV          r8, r8, LSR #16
+celt_pitch_xcorr_edsp_process1u_loop4_done
+  ADDS         r12, r12, #4
+celt_pitch_xcorr_edsp_process1u_loop1
+  LDRGEH       r6, [r4], #2
+  ; Stall
+  SMLABBGE     r14, r6, r8, r14    ; sum = MAC16_16(sum, *x, *y)
+  SUBGES       r12, r12, #1
+  LDRGTH       r8, [r5], #2
+  BGT celt_pitch_xcorr_edsp_process1u_loop1
+  ; Restore _x
+  SUB          r4, r4, r3, LSL #1
+  ; Restore and advance _y
+  SUB          r5, r5, r3, LSL #1
+  ; maxcorr = max(maxcorr, sum)
+  CMP          r0, r14
+  ADD          r5, r5, #2
+  MOVLT        r0, r14
+  SUBS         r1, r1, #1
+  ; xcorr[i] = sum
+  STR          r14, [r2], #4
+  BLE celt_pitch_xcorr_edsp_done
+celt_pitch_xcorr_edsp_process1u_done
+  ; if (max_pitch < 4) goto celt_pitch_xcorr_edsp_process2
+  SUBS         r1, r1, #4
+  BLT celt_pitch_xcorr_edsp_process2
+celt_pitch_xcorr_edsp_process4
+  ; xcorr_kernel_edsp parameters:
+  ; r3 = len, r4 = _x, r5 = _y, r6...r9 = sum[4] = {0, 0, 0, 0}
+  MOV          r6, #0
+  MOV          r7, #0
+  MOV          r8, #0
+  MOV          r9, #0
+  BL xcorr_kernel_edsp  ; xcorr_kernel_edsp(_x, _y+i, xcorr+i, len)
+  ; maxcorr = max(maxcorr, sum0, sum1, sum2, sum3)
+  CMP          r0, r6
+  ; _y+=4
+  ADD          r5, r5, #8
+  MOVLT        r0, r6
+  CMP          r0, r7
+  MOVLT        r0, r7
+  CMP          r0, r8
+  MOVLT        r0, r8
+  CMP          r0, r9
+  MOVLT        r0, r9
+  STMIA        r2!, {r6-r9}
+  SUBS         r1, r1, #4
+  BGE celt_pitch_xcorr_edsp_process4
+celt_pitch_xcorr_edsp_process2
+  ADDS         r1, r1, #2
+  BLT celt_pitch_xcorr_edsp_process1a
+  SUBS         r12, r3, #4
+  ; {r10, r11} = {sum0, sum1} = {0, 0}
+  MOV          r10, #0
+  MOV          r11, #0
+  LDR          r8, [r5], #4
+  BLE celt_pitch_xcorr_edsp_process2_loop_done
+  LDR          r6, [r4], #4
+  LDR          r9, [r5], #4
+celt_pitch_xcorr_edsp_process2_loop4
+  SMLABB       r10, r6, r8, r10     ; sum0 = MAC16_16(sum0, x_0, y_0)
+  LDR          r7, [r4], #4
+  SMLABT       r11, r6, r8, r11     ; sum1 = MAC16_16(sum1, x_0, y_1)
+  SUBS         r12, r12, #4         ; j-=4
+  SMLATT       r10, r6, r8, r10     ; sum0 = MAC16_16(sum0, x_1, y_1)
+  LDR          r8, [r5], #4
+  SMLATB       r11, r6, r9, r11     ; sum1 = MAC16_16(sum1, x_1, y_2)
+  LDRGT        r6, [r4], #4
+  SMLABB       r10, r7, r9, r10     ; sum0 = MAC16_16(sum0, x_2, y_2)
+  SMLABT       r11, r7, r9, r11     ; sum1 = MAC16_16(sum1, x_2, y_3)
+  SMLATT       r10, r7, r9, r10     ; sum0 = MAC16_16(sum0, x_3, y_3)
+  LDRGT        r9, [r5], #4
+  SMLATB       r11, r7, r8, r11     ; sum1 = MAC16_16(sum1, x_3, y_4)
+  BGT celt_pitch_xcorr_edsp_process2_loop4
+celt_pitch_xcorr_edsp_process2_loop_done
+  ADDS         r12, r12, #2
+  BLE  celt_pitch_xcorr_edsp_process2_1
+  LDR          r6, [r4], #4
+  ; Stall
+  SMLABB       r10, r6, r8, r10     ; sum0 = MAC16_16(sum0, x_0, y_0)
+  LDR          r9, [r5], #4
+  SMLABT       r11, r6, r8, r11     ; sum1 = MAC16_16(sum1, x_0, y_1)
+  SUB          r12, r12, #2
+  SMLATT       r10, r6, r8, r10     ; sum0 = MAC16_16(sum0, x_1, y_1)
+  MOV          r8, r9
+  SMLATB       r11, r6, r9, r11     ; sum1 = MAC16_16(sum1, x_1, y_2)
+celt_pitch_xcorr_edsp_process2_1
+  LDRH         r6, [r4], #2
+  ADDS         r12, r12, #1
+  ; Stall
+  SMLABB       r10, r6, r8, r10     ; sum0 = MAC16_16(sum0, x_0, y_0)
+  LDRGTH       r7, [r4], #2
+  SMLABT       r11, r6, r8, r11     ; sum1 = MAC16_16(sum1, x_0, y_1)
+  BLE celt_pitch_xcorr_edsp_process2_done
+  LDRH         r9, [r5], #2
+  SMLABT       r10, r7, r8, r10     ; sum0 = MAC16_16(sum0, x_0, y_1)
+  SMLABB       r11, r7, r9, r11     ; sum1 = MAC16_16(sum1, x_0, y_2)
+celt_pitch_xcorr_edsp_process2_done
+  ; Restore _x
+  SUB          r4, r4, r3, LSL #1
+  ; Restore and advance _y
+  SUB          r5, r5, r3, LSL #1
+  ; maxcorr = max(maxcorr, sum0)
+  CMP          r0, r10
+  ADD          r5, r5, #2
+  MOVLT        r0, r10
+  SUB          r1, r1, #2
+  ; maxcorr = max(maxcorr, sum1)
+  CMP          r0, r11
+  ; xcorr[i] = sum
+  STR          r10, [r2], #4
+  MOVLT        r0, r11
+  STR          r11, [r2], #4
+celt_pitch_xcorr_edsp_process1a
+  ADDS         r1, r1, #1
+  BLT celt_pitch_xcorr_edsp_done
+  SUBS         r12, r3, #4
+  ; r14 = sum = 0
+  MOV          r14, #0
+  BLT celt_pitch_xcorr_edsp_process1a_loop_done
+  LDR          r6, [r4], #4
+  LDR          r8, [r5], #4
+  LDR          r7, [r4], #4
+  LDR          r9, [r5], #4
+celt_pitch_xcorr_edsp_process1a_loop4
+  SMLABB       r14, r6, r8, r14     ; sum = MAC16_16(sum, x_0, y_0)
+  SUBS         r12, r12, #4         ; j-=4
+  SMLATT       r14, r6, r8, r14     ; sum = MAC16_16(sum, x_1, y_1)
+  LDRGE        r6, [r4], #4
+  SMLABB       r14, r7, r9, r14     ; sum = MAC16_16(sum, x_2, y_2)
+  LDRGE        r8, [r5], #4
+  SMLATT       r14, r7, r9, r14     ; sum = MAC16_16(sum, x_3, y_3)
+  LDRGE        r7, [r4], #4
+  LDRGE        r9, [r5], #4
+  BGE celt_pitch_xcorr_edsp_process1a_loop4
+celt_pitch_xcorr_edsp_process1a_loop_done
+  ADDS         r12, r12, #2
+  LDRGE        r6, [r4], #4
+  LDRGE        r8, [r5], #4
+  ; Stall
+  SMLABBGE     r14, r6, r8, r14     ; sum = MAC16_16(sum, x_0, y_0)
+  SUBGE        r12, r12, #2
+  SMLATTGE     r14, r6, r8, r14     ; sum = MAC16_16(sum, x_1, y_1)
+  ADDS         r12, r12, #1
+  LDRGEH       r6, [r4], #2
+  LDRGEH       r8, [r5], #2
+  ; Stall
+  SMLABBGE     r14, r6, r8, r14     ; sum = MAC16_16(sum, *x, *y)
+  ; maxcorr = max(maxcorr, sum)
+  CMP          r0, r14
+  ; xcorr[i] = sum
+  STR          r14, [r2], #4
+  MOVLT        r0, r14
+celt_pitch_xcorr_edsp_done
+  LDMFD        sp!, {r4-r11, pc}
+  ENDP
+
+ENDIF
+
+END
diff --git a/drivers/opus/celt/arm/fixed_armv4.h b/drivers/opus/celt/arm/fixed_armv4.h
new file mode 100644
index 0000000000..b690bc8cea
--- /dev/null
+++ b/drivers/opus/celt/arm/fixed_armv4.h
@@ -0,0 +1,76 @@
+/* Copyright (C) 2013 Xiph.Org Foundation and contributors */
+/*
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef FIXED_ARMv4_H
+#define FIXED_ARMv4_H
+
+/** 16x32 multiplication, followed by a 16-bit shift right. Results fits in 32 bits */
+#undef MULT16_32_Q16
+static OPUS_INLINE opus_val32 MULT16_32_Q16_armv4(opus_val16 a, opus_val32 b)
+{
+  unsigned rd_lo;
+  int rd_hi;
+  __asm__(
+      "#MULT16_32_Q16\n\t"
+      "smull %0, %1, %2, %3\n\t"
+      : "=&r"(rd_lo), "=&r"(rd_hi)
+      : "%r"(b),"r"(a<<16)
+  );
+  return rd_hi;
+}
+#define MULT16_32_Q16(a, b) (MULT16_32_Q16_armv4(a, b))
+
+
+/** 16x32 multiplication, followed by a 15-bit shift right. Results fits in 32 bits */
+#undef MULT16_32_Q15
+static OPUS_INLINE opus_val32 MULT16_32_Q15_armv4(opus_val16 a, opus_val32 b)
+{
+  unsigned rd_lo;
+  int rd_hi;
+  __asm__(
+      "#MULT16_32_Q15\n\t"
+      "smull %0, %1, %2, %3\n\t"
+      : "=&r"(rd_lo), "=&r"(rd_hi)
+      : "%r"(b), "r"(a<<16)
+  );
+  /*We intentionally don't OR in the high bit of rd_lo for speed.*/
+  return rd_hi<<1;
+}
+#define MULT16_32_Q15(a, b) (MULT16_32_Q15_armv4(a, b))
+
+
+/** 16x32 multiply, followed by a 15-bit shift right and 32-bit add.
+    b must fit in 31 bits.
+    Result fits in 32 bits. */
+#undef MAC16_32_Q15
+#define MAC16_32_Q15(c, a, b) ADD32(c, MULT16_32_Q15(a, b))
+
+
+/** 32x32 multiplication, followed by a 31-bit shift right. Results fits in 32 bits */
+#undef MULT32_32_Q31
+#define MULT32_32_Q31(a,b) (opus_val32)((((opus_int64)(a)) * ((opus_int64)(b)))>>31)
+
+#endif
diff --git a/drivers/opus/celt/arm/fixed_armv5e.h b/drivers/opus/celt/arm/fixed_armv5e.h
new file mode 100644
index 0000000000..1194a7d3ec
--- /dev/null
+++ b/drivers/opus/celt/arm/fixed_armv5e.h
@@ -0,0 +1,116 @@
+/* Copyright (C) 2007-2009 Xiph.Org Foundation
+   Copyright (C) 2003-2008 Jean-Marc Valin
+   Copyright (C) 2007-2008 CSIRO
+   Copyright (C) 2013      Parrot */
+/*
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef FIXED_ARMv5E_H
+#define FIXED_ARMv5E_H
+
+#include "fixed_armv4.h"
+
+/** 16x32 multiplication, followed by a 16-bit shift right. Results fits in 32 bits */
+#undef MULT16_32_Q16
+static OPUS_INLINE opus_val32 MULT16_32_Q16_armv5e(opus_val16 a, opus_val32 b)
+{
+  int res;
+  __asm__(
+      "#MULT16_32_Q16\n\t"
+      "smulwb %0, %1, %2\n\t"
+      : "=r"(res)
+      : "r"(b),"r"(a)
+  );
+  return res;
+}
+#define MULT16_32_Q16(a, b) (MULT16_32_Q16_armv5e(a, b))
+
+
+/** 16x32 multiplication, followed by a 15-bit shift right. Results fits in 32 bits */
+#undef MULT16_32_Q15
+static OPUS_INLINE opus_val32 MULT16_32_Q15_armv5e(opus_val16 a, opus_val32 b)
+{
+  int res;
+  __asm__(
+      "#MULT16_32_Q15\n\t"
+      "smulwb %0, %1, %2\n\t"
+      : "=r"(res)
+      : "r"(b), "r"(a)
+  );
+  return res<<1;
+}
+#define MULT16_32_Q15(a, b) (MULT16_32_Q15_armv5e(a, b))
+
+
+/** 16x32 multiply, followed by a 15-bit shift right and 32-bit add.
+    b must fit in 31 bits.
+    Result fits in 32 bits. */
+#undef MAC16_32_Q15
+static OPUS_INLINE opus_val32 MAC16_32_Q15_armv5e(opus_val32 c, opus_val16 a,
+ opus_val32 b)
+{
+  int res;
+  __asm__(
+      "#MAC16_32_Q15\n\t"
+      "smlawb %0, %1, %2, %3;\n"
+      : "=r"(res)
+      : "r"(b<<1), "r"(a), "r"(c)
+  );
+  return res;
+}
+#define MAC16_32_Q15(c, a, b) (MAC16_32_Q15_armv5e(c, a, b))
+
+/** 16x16 multiply-add where the result fits in 32 bits */
+#undef MAC16_16
+static OPUS_INLINE opus_val32 MAC16_16_armv5e(opus_val32 c, opus_val16 a,
+ opus_val16 b)
+{
+  int res;
+  __asm__(
+      "#MAC16_16\n\t"
+      "smlabb %0, %1, %2, %3;\n"
+      : "=r"(res)
+      : "r"(a), "r"(b), "r"(c)
+  );
+  return res;
+}
+#define MAC16_16(c, a, b) (MAC16_16_armv5e(c, a, b))
+
+/** 16x16 multiplication where the result fits in 32 bits */
+#undef MULT16_16
+static OPUS_INLINE opus_val32 MULT16_16_armv5e(opus_val16 a, opus_val16 b)
+{
+  int res;
+  __asm__(
+      "#MULT16_16\n\t"
+      "smulbb %0, %1, %2;\n"
+      : "=r"(res)
+      : "r"(a), "r"(b)
+  );
+  return res;
+}
+#define MULT16_16(a, b) (MULT16_16_armv5e(a, b))
+
+#endif
diff --git a/drivers/opus/celt/arm/kiss_fft_armv4.h b/drivers/opus/celt/arm/kiss_fft_armv4.h
new file mode 100644
index 0000000000..773464628b
--- /dev/null
+++ b/drivers/opus/celt/arm/kiss_fft_armv4.h
@@ -0,0 +1,121 @@
+/*Copyright (c) 2013, Xiph.Org Foundation and contributors.
+
+  All rights reserved.
+
+  Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions are met:
+
+    * Redistributions of source code must retain the above copyright notice,
+       this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright notice,
+       this list of conditions and the following disclaimer in the
+       documentation and/or other materials provided with the distribution.
+
+  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+  POSSIBILITY OF SUCH DAMAGE.*/
+
+#ifndef KISS_FFT_ARMv4_H
+#define KISS_FFT_ARMv4_H
+
+#if !defined(KISS_FFT_GUTS_H)
+#error "This file should only be included from _kiss_fft_guts.h"
+#endif
+
+#ifdef OPUS_FIXED_POINT
+
+#undef C_MUL
+#define C_MUL(m,a,b) \
+    do{ \
+       int br__; \
+       int bi__; \
+       int tt__; \
+        __asm__ __volatile__( \
+            "#C_MUL\n\t" \
+            "ldrsh %[br], [%[bp], #0]\n\t" \
+            "ldm %[ap], {r0,r1}\n\t" \
+            "ldrsh %[bi], [%[bp], #2]\n\t" \
+            "smull %[tt], %[mi], r1, %[br]\n\t" \
+            "smlal %[tt], %[mi], r0, %[bi]\n\t" \
+            "rsb %[bi], %[bi], #0\n\t" \
+            "smull %[br], %[mr], r0, %[br]\n\t" \
+            "mov %[tt], %[tt], lsr #15\n\t" \
+            "smlal %[br], %[mr], r1, %[bi]\n\t" \
+            "orr %[mi], %[tt], %[mi], lsl #17\n\t" \
+            "mov %[br], %[br], lsr #15\n\t" \
+            "orr %[mr], %[br], %[mr], lsl #17\n\t" \
+            : [mr]"=r"((m).r), [mi]"=r"((m).i), \
+              [br]"=&r"(br__), [bi]"=r"(bi__), [tt]"=r"(tt__) \
+            : [ap]"r"(&(a)), [bp]"r"(&(b)) \
+            : "r0", "r1" \
+        ); \
+    } \
+    while(0)
+
+#undef C_MUL4
+#define C_MUL4(m,a,b) \
+    do{ \
+       int br__; \
+       int bi__; \
+       int tt__; \
+        __asm__ __volatile__( \
+            "#C_MUL4\n\t" \
+            "ldrsh %[br], [%[bp], #0]\n\t" \
+            "ldm %[ap], {r0,r1}\n\t" \
+            "ldrsh %[bi], [%[bp], #2]\n\t" \
+            "smull %[tt], %[mi], r1, %[br]\n\t" \
+            "smlal %[tt], %[mi], r0, %[bi]\n\t" \
+            "rsb %[bi], %[bi], #0\n\t" \
+            "smull %[br], %[mr], r0, %[br]\n\t" \
+            "mov %[tt], %[tt], lsr #17\n\t" \
+            "smlal %[br], %[mr], r1, %[bi]\n\t" \
+            "orr %[mi], %[tt], %[mi], lsl #15\n\t" \
+            "mov %[br], %[br], lsr #17\n\t" \
+            "orr %[mr], %[br], %[mr], lsl #15\n\t" \
+            : [mr]"=r"((m).r), [mi]"=r"((m).i), \
+              [br]"=&r"(br__), [bi]"=r"(bi__), [tt]"=r"(tt__) \
+            : [ap]"r"(&(a)), [bp]"r"(&(b)) \
+            : "r0", "r1" \
+        ); \
+    } \
+    while(0)
+
+#undef C_MULC
+#define C_MULC(m,a,b) \
+    do{ \
+       int br__; \
+       int bi__; \
+       int tt__; \
+        __asm__ __volatile__( \
+            "#C_MULC\n\t" \
+            "ldrsh %[br], [%[bp], #0]\n\t" \
+            "ldm %[ap], {r0,r1}\n\t" \
+            "ldrsh %[bi], [%[bp], #2]\n\t" \
+            "smull %[tt], %[mr], r0, %[br]\n\t" \
+            "smlal %[tt], %[mr], r1, %[bi]\n\t" \
+            "rsb %[bi], %[bi], #0\n\t" \
+            "smull %[br], %[mi], r1, %[br]\n\t" \
+            "mov %[tt], %[tt], lsr #15\n\t" \
+            "smlal %[br], %[mi], r0, %[bi]\n\t" \
+            "orr %[mr], %[tt], %[mr], lsl #17\n\t" \
+            "mov %[br], %[br], lsr #15\n\t" \
+            "orr %[mi], %[br], %[mi], lsl #17\n\t" \
+            : [mr]"=r"((m).r), [mi]"=r"((m).i), \
+              [br]"=&r"(br__), [bi]"=r"(bi__), [tt]"=r"(tt__) \
+            : [ap]"r"(&(a)), [bp]"r"(&(b)) \
+            : "r0", "r1" \
+        ); \
+    } \
+    while(0)
+
+#endif /* OPUS_FIXED_POINT */
+
+#endif /* KISS_FFT_ARMv4_H */
diff --git a/drivers/opus/celt/arm/kiss_fft_armv5e.h b/drivers/opus/celt/arm/kiss_fft_armv5e.h
new file mode 100644
index 0000000000..1eff56a66a
--- /dev/null
+++ b/drivers/opus/celt/arm/kiss_fft_armv5e.h
@@ -0,0 +1,118 @@
+/*Copyright (c) 2013, Xiph.Org Foundation and contributors.
+
+  All rights reserved.
+
+  Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions are met:
+
+    * Redistributions of source code must retain the above copyright notice,
+       this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright notice,
+       this list of conditions and the following disclaimer in the
+       documentation and/or other materials provided with the distribution.
+
+  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+  POSSIBILITY OF SUCH DAMAGE.*/
+
+#ifndef KISS_FFT_ARMv5E_H
+#define KISS_FFT_ARMv5E_H
+
+#if !defined(KISS_FFT_GUTS_H)
+#error "This file should only be included from _kiss_fft_guts.h"
+#endif
+
+#ifdef OPUS_FIXED_POINT
+
+#if defined(__thumb__)||defined(__thumb2__)
+#define LDRD_CONS "Q"
+#else
+#define LDRD_CONS "Uq"
+#endif
+
+#undef C_MUL
+#define C_MUL(m,a,b) \
+    do{ \
+        int mr1__; \
+        int mr2__; \
+        int mi__; \
+        long long aval__; \
+        int bval__; \
+        __asm__( \
+            "#C_MUL\n\t" \
+            "ldrd %[aval], %H[aval], %[ap]\n\t" \
+            "ldr %[bval], %[bp]\n\t" \
+            "smulwb %[mi], %H[aval], %[bval]\n\t" \
+            "smulwb %[mr1], %[aval], %[bval]\n\t" \
+            "smulwt %[mr2], %H[aval], %[bval]\n\t" \
+            "smlawt %[mi], %[aval], %[bval], %[mi]\n\t" \
+            : [mr1]"=r"(mr1__), [mr2]"=r"(mr2__), [mi]"=r"(mi__), \
+              [aval]"=&r"(aval__), [bval]"=r"(bval__) \
+            : [ap]LDRD_CONS(a), [bp]"m"(b) \
+        ); \
+        (m).r = SHL32(SUB32(mr1__, mr2__), 1); \
+        (m).i = SHL32(mi__, 1); \
+    } \
+    while(0)
+
+#undef C_MUL4
+#define C_MUL4(m,a,b) \
+    do{ \
+        int mr1__; \
+        int mr2__; \
+        int mi__; \
+        long long aval__; \
+        int bval__; \
+        __asm__( \
+            "#C_MUL4\n\t" \
+            "ldrd %[aval], %H[aval], %[ap]\n\t" \
+            "ldr %[bval], %[bp]\n\t" \
+            "smulwb %[mi], %H[aval], %[bval]\n\t" \
+            "smulwb %[mr1], %[aval], %[bval]\n\t" \
+            "smulwt %[mr2], %H[aval], %[bval]\n\t" \
+            "smlawt %[mi], %[aval], %[bval], %[mi]\n\t" \
+            : [mr1]"=r"(mr1__), [mr2]"=r"(mr2__), [mi]"=r"(mi__), \
+              [aval]"=&r"(aval__), [bval]"=r"(bval__) \
+            : [ap]LDRD_CONS(a), [bp]"m"(b) \
+        ); \
+        (m).r = SHR32(SUB32(mr1__, mr2__), 1); \
+        (m).i = SHR32(mi__, 1); \
+    } \
+    while(0)
+
+#undef C_MULC
+#define C_MULC(m,a,b) \
+    do{ \
+        int mr__; \
+        int mi1__; \
+        int mi2__; \
+        long long aval__; \
+        int bval__; \
+        __asm__( \
+            "#C_MULC\n\t" \
+            "ldrd %[aval], %H[aval], %[ap]\n\t" \
+            "ldr %[bval], %[bp]\n\t" \
+            "smulwb %[mr], %[aval], %[bval]\n\t" \
+            "smulwb %[mi1], %H[aval], %[bval]\n\t" \
+            "smulwt %[mi2], %[aval], %[bval]\n\t" \
+            "smlawt %[mr], %H[aval], %[bval], %[mr]\n\t" \
+            : [mr]"=r"(mr__), [mi1]"=r"(mi1__), [mi2]"=r"(mi2__), \
+              [aval]"=&r"(aval__), [bval]"=r"(bval__) \
+            : [ap]LDRD_CONS(a), [bp]"m"(b) \
+        ); \
+        (m).r = SHL32(mr__, 1); \
+        (m).i = SHL32(SUB32(mi1__, mi2__), 1); \
+    } \
+    while(0)
+
+#endif /* OPUS_FIXED_POINT */
+
+#endif /* KISS_FFT_GUTS_H */
diff --git a/drivers/opus/celt/arm/pitch_arm.h b/drivers/opus/celt/arm/pitch_arm.h
new file mode 100644
index 0000000000..df5e82ef0b
--- /dev/null
+++ b/drivers/opus/celt/arm/pitch_arm.h
@@ -0,0 +1,57 @@
+/* Copyright (c) 2010 Xiph.Org Foundation
+ * Copyright (c) 2013 Parrot */
+/*
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#if !defined(PITCH_ARM_H)
+# define PITCH_ARM_H
+
+# include "armcpu.h"
+
+# if defined(OPUS_FIXED_POINT)
+
+#  if defined(OPUS_ARM_MAY_HAVE_NEON)
+opus_val32 celt_pitch_xcorr_neon(const opus_val16 *_x, const opus_val16 *_y,
+    opus_val32 *xcorr, int len, int max_pitch);
+#  endif
+
+#  if defined(OPUS_ARM_MAY_HAVE_MEDIA)
+#   define celt_pitch_xcorr_media MAY_HAVE_EDSP(celt_pitch_xcorr)
+#  endif
+
+#  if defined(OPUS_ARM_MAY_HAVE_EDSP)
+opus_val32 celt_pitch_xcorr_edsp(const opus_val16 *_x, const opus_val16 *_y,
+    opus_val32 *xcorr, int len, int max_pitch);
+#  endif
+
+#  if !defined(OPUS_HAVE_RTCD)
+#   define OVERRIDE_PITCH_XCORR (1)
+#   define celt_pitch_xcorr(_x, _y, xcorr, len, max_pitch, arch) \
+  ((void)(arch),PRESUME_NEON(celt_pitch_xcorr)(_x, _y, xcorr, len, max_pitch))
+#  endif
+
+# endif
+
+#endif
diff --git a/drivers/opus/celt/bands.c b/drivers/opus/celt/bands.c
new file mode 100644
index 0000000000..87280c8333
--- /dev/null
+++ b/drivers/opus/celt/bands.c
@@ -0,0 +1,1518 @@
+/* Copyright (c) 2007-2008 CSIRO
+   Copyright (c) 2007-2009 Xiph.Org Foundation
+   Copyright (c) 2008-2009 Gregory Maxwell
+   Written by Jean-Marc Valin and Gregory Maxwell */
+/*
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifdef OPUS_HAVE_CONFIG_H
+#include "opus_config.h"
+#endif
+
+#include <math.h>
+#include "bands.h"
+#include "opus_modes.h"
+#include "vq.h"
+#include "cwrs.h"
+#include "stack_alloc.h"
+#include "os_support.h"
+#include "mathops.h"
+#include "rate.h"
+#include "quant_bands.h"
+#include "pitch.h"
+
+int hysteresis_decision(opus_val16 val, const opus_val16 *thresholds, const opus_val16 *hysteresis, int N, int prev)
+{
+   int i;
+   for (i=0;i<N;i++)
+   {
+      if (val < thresholds[i])
+         break;
+   }
+   if (i>prev && val < thresholds[prev]+hysteresis[prev])
+      i=prev;
+   if (i<prev && val > thresholds[prev-1]-hysteresis[prev-1])
+      i=prev;
+   return i;
+}
+
+opus_uint32 celt_lcg_rand(opus_uint32 seed)
+{
+   return 1664525 * seed + 1013904223;
+}
+
+/* This is a cos() approximation designed to be bit-exact on any platform. Bit exactness
+   with this approximation is important because it has an impact on the bit allocation */
+static opus_int16 bitexact_cos(opus_int16 x)
+{
+   opus_int32 tmp;
+   opus_int16 x2;
+   tmp = (4096+((opus_int32)(x)*(x)))>>13;
+   celt_assert(tmp<=32767);
+   x2 = tmp;
+   x2 = (32767-x2) + FRAC_MUL16(x2, (-7651 + FRAC_MUL16(x2, (8277 + FRAC_MUL16(-626, x2)))));
+   celt_assert(x2<=32766);
+   return 1+x2;
+}
+
+static int bitexact_log2tan(int isin,int icos)
+{
+   int lc;
+   int ls;
+   lc=EC_ILOG(icos);
+   ls=EC_ILOG(isin);
+   icos<<=15-lc;
+   isin<<=15-ls;
+   return (ls-lc)*(1<<11)
+         +FRAC_MUL16(isin, FRAC_MUL16(isin, -2597) + 7932)
+         -FRAC_MUL16(icos, FRAC_MUL16(icos, -2597) + 7932);
+}
+
+#ifdef OPUS_FIXED_POINT
+/* Compute the amplitude (sqrt energy) in each of the bands */
+void compute_band_energies(const CELTMode *m, const celt_sig *X, celt_ener *bandE, int end, int C, int M)
+{
+   int i, c, N;
+   const opus_int16 *eBands = m->eBands;
+   N = M*m->shortMdctSize;
+   c=0; do {
+      for (i=0;i<end;i++)
+      {
+         int j;
+         opus_val32 maxval=0;
+         opus_val32 sum = 0;
+
+         j=M*eBands[i]; do {
+            maxval = MAX32(maxval, X[j+c*N]);
+            maxval = MAX32(maxval, -X[j+c*N]);
+         } while (++j<M*eBands[i+1]);
+
+         if (maxval > 0)
+         {
+            int shift = celt_ilog2(maxval)-10;
+            j=M*eBands[i]; do {
+               sum = MAC16_16(sum, EXTRACT16(VSHR32(X[j+c*N],shift)),
+                                   EXTRACT16(VSHR32(X[j+c*N],shift)));
+            } while (++j<M*eBands[i+1]);
+            /* We're adding one here to ensure the normalized band isn't larger than unity norm */
+            bandE[i+c*m->nbEBands] = EPSILON+VSHR32(EXTEND32(celt_sqrt(sum)),-shift);
+         } else {
+            bandE[i+c*m->nbEBands] = EPSILON;
+         }
+         /*printf ("%f ", bandE[i+c*m->nbEBands]);*/
+      }
+   } while (++c<C);
+   /*printf ("\n");*/
+}
+
+/* Normalise each band such that the energy is one. */
+void normalise_bands(const CELTMode *m, const celt_sig * OPUS_RESTRICT freq, celt_norm * OPUS_RESTRICT X, const celt_ener *bandE, int end, int C, int M)
+{
+   int i, c, N;
+   const opus_int16 *eBands = m->eBands;
+   N = M*m->shortMdctSize;
+   c=0; do {
+      i=0; do {
+         opus_val16 g;
+         int j,shift;
+         opus_val16 E;
+         shift = celt_zlog2(bandE[i+c*m->nbEBands])-13;
+         E = VSHR32(bandE[i+c*m->nbEBands], shift);
+         g = EXTRACT16(celt_rcp(SHL32(E,3)));
+         j=M*eBands[i]; do {
+            X[j+c*N] = MULT16_16_Q15(VSHR32(freq[j+c*N],shift-1),g);
+         } while (++j<M*eBands[i+1]);
+      } while (++i<end);
+   } while (++c<C);
+}
+
+#else /* OPUS_FIXED_POINT */
+/* Compute the amplitude (sqrt energy) in each of the bands */
+void compute_band_energies(const CELTMode *m, const celt_sig *X, celt_ener *bandE, int end, int C, int M)
+{
+   int i, c, N;
+   const opus_int16 *eBands = m->eBands;
+   N = M*m->shortMdctSize;
+   c=0; do {
+      for (i=0;i<end;i++)
+      {
+         int j;
+         opus_val32 sum = 1e-27f;
+         for (j=M*eBands[i];j<M*eBands[i+1];j++)
+            sum += X[j+c*N]*X[j+c*N];
+         bandE[i+c*m->nbEBands] = celt_sqrt(sum);
+         /*printf ("%f ", bandE[i+c*m->nbEBands]);*/
+      }
+   } while (++c<C);
+   /*printf ("\n");*/
+}
+
+/* Normalise each band such that the energy is one. */
+void normalise_bands(const CELTMode *m, const celt_sig * OPUS_RESTRICT freq, celt_norm * OPUS_RESTRICT X, const celt_ener *bandE, int end, int C, int M)
+{
+   int i, c, N;
+   const opus_int16 *eBands = m->eBands;
+   N = M*m->shortMdctSize;
+   c=0; do {
+      for (i=0;i<end;i++)
+      {
+         int j;
+         opus_val16 g = 1.f/(1e-27f+bandE[i+c*m->nbEBands]);
+         for (j=M*eBands[i];j<M*eBands[i+1];j++)
+            X[j+c*N] = freq[j+c*N]*g;
+      }
+   } while (++c<C);
+}
+
+#endif /* OPUS_FIXED_POINT */
+
+/* De-normalise the energy to produce the synthesis from the unit-energy bands */
+void denormalise_bands(const CELTMode *m, const celt_norm * OPUS_RESTRICT X,
+      celt_sig * OPUS_RESTRICT freq, const opus_val16 *bandLogE, int start, int end, int C, int M)
+{
+   int i, c, N;
+   const opus_int16 *eBands = m->eBands;
+   N = M*m->shortMdctSize;
+   celt_assert2(C<=2, "denormalise_bands() not implemented for >2 channels");
+   c=0; do {
+      celt_sig * OPUS_RESTRICT f;
+      const celt_norm * OPUS_RESTRICT x;
+      f = freq+c*N;
+      x = X+c*N+M*eBands[start];
+      for (i=0;i<M*eBands[start];i++)
+         *f++ = 0;
+      for (i=start;i<end;i++)
+      {
+         int j, band_end;
+         opus_val16 g;
+         opus_val16 lg;
+#ifdef OPUS_FIXED_POINT
+         int shift;
+#endif
+         j=M*eBands[i];
+         band_end = M*eBands[i+1];
+         lg = ADD16(bandLogE[i+c*m->nbEBands], SHL16((opus_val16)eMeans[i],6));
+#ifndef OPUS_FIXED_POINT
+         g = celt_exp2(lg);
+#else
+         /* Handle the integer part of the log energy */
+         shift = 16-(lg>>DB_SHIFT);
+         if (shift>31)
+         {
+            shift=0;
+            g=0;
+         } else {
+            /* Handle the fractional part. */
+            g = celt_exp2_frac(lg&((1<<DB_SHIFT)-1));
+         }
+         /* Handle extreme gains with negative shift. */
+         if (shift<0)
+         {
+            /* For shift < -2 we'd be likely to overflow, so we're capping
+               the gain here. This shouldn't happen unless the bitstream is
+               already corrupted. */
+            if (shift < -2)
+            {
+               g = 32767;
+               shift = -2;
+            }
+            do {
+               *f++ = SHL32(MULT16_16(*x++, g), -shift);
+            } while (++j<band_end);
+         } else
+#endif
+         /* Be careful of the fixed-point "else" just above when changing this code */
+         do {
+            *f++ = SHR32(MULT16_16(*x++, g), shift);
+         } while (++j<band_end);
+      }
+      celt_assert(start <= end);
+      for (i=M*eBands[end];i<N;i++)
+         *f++ = 0;
+   } while (++c<C);
+}
+
+/* This prevents energy collapse for transients with multiple short MDCTs */
+void anti_collapse(const CELTMode *m, celt_norm *X_, unsigned char *collapse_masks, int LM, int C, int size,
+      int start, int end, opus_val16 *logE, opus_val16 *prev1logE,
+      opus_val16 *prev2logE, int *pulses, opus_uint32 seed)
+{
+   int c, i, j, k;
+   for (i=start;i<end;i++)
+   {
+      int N0;
+      opus_val16 thresh, sqrt_1;
+      int depth;
+#ifdef OPUS_FIXED_POINT
+      int shift;
+      opus_val32 thresh32;
+#endif
+
+      N0 = m->eBands[i+1]-m->eBands[i];
+      /* depth in 1/8 bits */
+      depth = (1+pulses[i])/((m->eBands[i+1]-m->eBands[i])<<LM);
+
+#ifdef OPUS_FIXED_POINT
+      thresh32 = SHR32(celt_exp2(-SHL16(depth, 10-BITRES)),1);
+      thresh = MULT16_32_Q15(QCONST16(0.5f, 15), MIN32(32767,thresh32));
+      {
+         opus_val32 t;
+         t = N0<<LM;
+         shift = celt_ilog2(t)>>1;
+         t = SHL32(t, (7-shift)<<1);
+         sqrt_1 = celt_rsqrt_norm(t);
+      }
+#else
+      thresh = .5f*celt_exp2(-.125f*depth);
+      sqrt_1 = celt_rsqrt(N0<<LM);
+#endif
+
+      c=0; do
+      {
+         celt_norm *X;
+         opus_val16 prev1;
+         opus_val16 prev2;
+         opus_val32 Ediff;
+         opus_val16 r;
+         int renormalize=0;
+         prev1 = prev1logE[c*m->nbEBands+i];
+         prev2 = prev2logE[c*m->nbEBands+i];
+         if (C==1)
+         {
+            prev1 = MAX16(prev1,prev1logE[m->nbEBands+i]);
+            prev2 = MAX16(prev2,prev2logE[m->nbEBands+i]);
+         }
+         Ediff = EXTEND32(logE[c*m->nbEBands+i])-EXTEND32(MIN16(prev1,prev2));
+         Ediff = MAX32(0, Ediff);
+
+#ifdef OPUS_FIXED_POINT
+         if (Ediff < 16384)
+         {
+            opus_val32 r32 = SHR32(celt_exp2(-EXTRACT16(Ediff)),1);
+            r = 2*MIN16(16383,r32);
+         } else {
+            r = 0;
+         }
+         if (LM==3)
+            r = MULT16_16_Q14(23170, MIN32(23169, r));
+         r = SHR16(MIN16(thresh, r),1);
+         r = SHR32(MULT16_16_Q15(sqrt_1, r),shift);
+#else
+         /* r needs to be multiplied by 2 or 2*sqrt(2) depending on LM because
+            short blocks don't have the same energy as long */
+         r = 2.f*celt_exp2(-Ediff);
+         if (LM==3)
+            r *= 1.41421356f;
+         r = MIN16(thresh, r);
+         r = r*sqrt_1;
+#endif
+         X = X_+c*size+(m->eBands[i]<<LM);
+         for (k=0;k<1<<LM;k++)
+         {
+            /* Detect collapse */
+            if (!(collapse_masks[i*C+c]&1<<k))
+            {
+               /* Fill with noise */
+               for (j=0;j<N0;j++)
+               {
+                  seed = celt_lcg_rand(seed);
+                  X[(j<<LM)+k] = (seed&0x8000 ? r : -r);
+               }
+               renormalize = 1;
+            }
+         }
+         /* We just added some energy, so we need to renormalise */
+         if (renormalize)
+            renormalise_vector(X, N0<<LM, Q15ONE);
+      } while (++c<C);
+   }
+}
+
+static void intensity_stereo(const CELTMode *m, celt_norm *X, celt_norm *Y, const celt_ener *bandE, int bandID, int N)
+{
+   int i = bandID;
+   int j;
+   opus_val16 a1, a2;
+   opus_val16 left, right;
+   opus_val16 norm;
+#ifdef OPUS_FIXED_POINT
+   int shift = celt_zlog2(MAX32(bandE[i], bandE[i+m->nbEBands]))-13;
+#endif
+   left = VSHR32(bandE[i],shift);
+   right = VSHR32(bandE[i+m->nbEBands],shift);
+   norm = EPSILON + celt_sqrt(EPSILON+MULT16_16(left,left)+MULT16_16(right,right));
+   a1 = DIV32_16(SHL32(EXTEND32(left),14),norm);
+   a2 = DIV32_16(SHL32(EXTEND32(right),14),norm);
+   for (j=0;j<N;j++)
+   {
+      celt_norm r, l;
+      l = X[j];
+      r = Y[j];
+      X[j] = MULT16_16_Q14(a1,l) + MULT16_16_Q14(a2,r);
+      /* Side is not encoded, no need to calculate */
+   }
+}
+
+static void stereo_split(celt_norm *X, celt_norm *Y, int N)
+{
+   int j;
+   for (j=0;j<N;j++)
+   {
+      celt_norm r, l;
+      l = MULT16_16_Q15(QCONST16(.70710678f,15), X[j]);
+      r = MULT16_16_Q15(QCONST16(.70710678f,15), Y[j]);
+      X[j] = l+r;
+      Y[j] = r-l;
+   }
+}
+
+static void stereo_merge(celt_norm *X, celt_norm *Y, opus_val16 mid, int N)
+{
+   int j;
+   opus_val32 xp=0, side=0;
+   opus_val32 El, Er;
+   opus_val16 mid2;
+#ifdef OPUS_FIXED_POINT
+   int kl, kr;
+#endif
+   opus_val32 t, lgain, rgain;
+
+   /* Compute the norm of X+Y and X-Y as |X|^2 + |Y|^2 +/- sum(xy) */
+   dual_inner_prod(Y, X, Y, N, &xp, &side);
+   /* Compensating for the mid normalization */
+   xp = MULT16_32_Q15(mid, xp);
+   /* mid and side are in Q15, not Q14 like X and Y */
+   mid2 = SHR32(mid, 1);
+   El = MULT16_16(mid2, mid2) + side - 2*xp;
+   Er = MULT16_16(mid2, mid2) + side + 2*xp;
+   if (Er < QCONST32(6e-4f, 28) || El < QCONST32(6e-4f, 28))
+   {
+      for (j=0;j<N;j++)
+         Y[j] = X[j];
+      return;
+   }
+
+#ifdef OPUS_FIXED_POINT
+   kl = celt_ilog2(El)>>1;
+   kr = celt_ilog2(Er)>>1;
+#endif
+   t = VSHR32(El, (kl-7)<<1);
+   lgain = celt_rsqrt_norm(t);
+   t = VSHR32(Er, (kr-7)<<1);
+   rgain = celt_rsqrt_norm(t);
+
+#ifdef OPUS_FIXED_POINT
+   if (kl < 7)
+      kl = 7;
+   if (kr < 7)
+      kr = 7;
+#endif
+
+   for (j=0;j<N;j++)
+   {
+      celt_norm r, l;
+      /* Apply mid scaling (side is already scaled) */
+      l = MULT16_16_Q15(mid, X[j]);
+      r = Y[j];
+      X[j] = EXTRACT16(PSHR32(MULT16_16(lgain, SUB16(l,r)), kl+1));
+      Y[j] = EXTRACT16(PSHR32(MULT16_16(rgain, ADD16(l,r)), kr+1));
+   }
+}
+
+/* Decide whether we should spread the pulses in the current frame */
+int spreading_decision(const CELTMode *m, celt_norm *X, int *average,
+      int last_decision, int *hf_average, int *tapset_decision, int update_hf,
+      int end, int C, int M)
+{
+   int i, c, N0;
+   int sum = 0, nbBands=0;
+   const opus_int16 * OPUS_RESTRICT eBands = m->eBands;
+   int decision;
+   int hf_sum=0;
+
+   celt_assert(end>0);
+
+   N0 = M*m->shortMdctSize;
+
+   if (M*(eBands[end]-eBands[end-1]) <= 8)
+      return SPREAD_NONE;
+   c=0; do {
+      for (i=0;i<end;i++)
+      {
+         int j, N, tmp=0;
+         int tcount[3] = {0,0,0};
+         celt_norm * OPUS_RESTRICT x = X+M*eBands[i]+c*N0;
+         N = M*(eBands[i+1]-eBands[i]);
+         if (N<=8)
+            continue;
+         /* Compute rough CDF of |x[j]| */
+         for (j=0;j<N;j++)
+         {
+            opus_val32 x2N; /* Q13 */
+
+            x2N = MULT16_16(MULT16_16_Q15(x[j], x[j]), N);
+            if (x2N < QCONST16(0.25f,13))
+               tcount[0]++;
+            if (x2N < QCONST16(0.0625f,13))
+               tcount[1]++;
+            if (x2N < QCONST16(0.015625f,13))
+               tcount[2]++;
+         }
+
+         /* Only include four last bands (8 kHz and up) */
+         if (i>m->nbEBands-4)
+            hf_sum += 32*(tcount[1]+tcount[0])/N;
+         tmp = (2*tcount[2] >= N) + (2*tcount[1] >= N) + (2*tcount[0] >= N);
+         sum += tmp*256;
+         nbBands++;
+      }
+   } while (++c<C);
+
+   if (update_hf)
+   {
+      if (hf_sum)
+         hf_sum /= C*(4-m->nbEBands+end);
+      *hf_average = (*hf_average+hf_sum)>>1;
+      hf_sum = *hf_average;
+      if (*tapset_decision==2)
+         hf_sum += 4;
+      else if (*tapset_decision==0)
+         hf_sum -= 4;
+      if (hf_sum > 22)
+         *tapset_decision=2;
+      else if (hf_sum > 18)
+         *tapset_decision=1;
+      else
+         *tapset_decision=0;
+   }
+   /*printf("%d %d %d\n", hf_sum, *hf_average, *tapset_decision);*/
+   celt_assert(nbBands>0); /* end has to be non-zero */
+   sum /= nbBands;
+   /* Recursive averaging */
+   sum = (sum+*average)>>1;
+   *average = sum;
+   /* Hysteresis */
+   sum = (3*sum + (((3-last_decision)<<7) + 64) + 2)>>2;
+   if (sum < 80)
+   {
+      decision = SPREAD_AGGRESSIVE;
+   } else if (sum < 256)
+   {
+      decision = SPREAD_NORMAL;
+   } else if (sum < 384)
+   {
+      decision = SPREAD_LIGHT;
+   } else {
+      decision = SPREAD_NONE;
+   }
+#ifdef FUZZING
+   decision = rand()&0x3;
+   *tapset_decision=rand()%3;
+#endif
+   return decision;
+}
+
+/* Indexing table for converting from natural Hadamard to ordery Hadamard
+   This is essentially a bit-reversed Gray, on top of which we've added
+   an inversion of the order because we want the DC at the end rather than
+   the beginning. The lines are for N=2, 4, 8, 16 */
+static const int ordery_table[] = {
+       1,  0,
+       3,  0,  2,  1,
+       7,  0,  4,  3,  6,  1,  5,  2,
+      15,  0,  8,  7, 12,  3, 11,  4, 14,  1,  9,  6, 13,  2, 10,  5,
+};
+
+static void deinterleave_hadamard(celt_norm *X, int N0, int stride, int hadamard)
+{
+   int i,j;
+   VARDECL(celt_norm, tmp);
+   int N;
+   SAVE_STACK;
+   N = N0*stride;
+   ALLOC(tmp, N, celt_norm);
+   celt_assert(stride>0);
+   if (hadamard)
+   {
+      const int *ordery = ordery_table+stride-2;
+      for (i=0;i<stride;i++)
+      {
+         for (j=0;j<N0;j++)
+            tmp[ordery[i]*N0+j] = X[j*stride+i];
+      }
+   } else {
+      for (i=0;i<stride;i++)
+         for (j=0;j<N0;j++)
+            tmp[i*N0+j] = X[j*stride+i];
+   }
+   for (j=0;j<N;j++)
+      X[j] = tmp[j];
+   RESTORE_STACK;
+}
+
+static void interleave_hadamard(celt_norm *X, int N0, int stride, int hadamard)
+{
+   int i,j;
+   VARDECL(celt_norm, tmp);
+   int N;
+   SAVE_STACK;
+   N = N0*stride;
+   ALLOC(tmp, N, celt_norm);
+   if (hadamard)
+   {
+      const int *ordery = ordery_table+stride-2;
+      for (i=0;i<stride;i++)
+         for (j=0;j<N0;j++)
+            tmp[j*stride+i] = X[ordery[i]*N0+j];
+   } else {
+      for (i=0;i<stride;i++)
+         for (j=0;j<N0;j++)
+            tmp[j*stride+i] = X[i*N0+j];
+   }
+   for (j=0;j<N;j++)
+      X[j] = tmp[j];
+   RESTORE_STACK;
+}
+
+void haar1(celt_norm *X, int N0, int stride)
+{
+   int i, j;
+   N0 >>= 1;
+   for (i=0;i<stride;i++)
+      for (j=0;j<N0;j++)
+      {
+         celt_norm tmp1, tmp2;
+         tmp1 = MULT16_16_Q15(QCONST16(.70710678f,15), X[stride*2*j+i]);
+         tmp2 = MULT16_16_Q15(QCONST16(.70710678f,15), X[stride*(2*j+1)+i]);
+         X[stride*2*j+i] = tmp1 + tmp2;
+         X[stride*(2*j+1)+i] = tmp1 - tmp2;
+      }
+}
+
+static int compute_qn(int N, int b, int offset, int pulse_cap, int stereo)
+{
+   static const opus_int16 exp2_table8[8] =
+      {16384, 17866, 19483, 21247, 23170, 25267, 27554, 30048};
+   int qn, qb;
+   int N2 = 2*N-1;
+   if (stereo && N==2)
+      N2--;
+   /* The upper limit ensures that in a stereo split with itheta==16384, we'll
+       always have enough bits left over to code at least one pulse in the
+       side; otherwise it would collapse, since it doesn't get folded. */
+   qb = IMIN(b-pulse_cap-(4<<BITRES), (b+N2*offset)/N2);
+
+   qb = IMIN(8<<BITRES, qb);
+
+   if (qb<(1<<BITRES>>1)) {
+      qn = 1;
+   } else {
+      qn = exp2_table8[qb&0x7]>>(14-(qb>>BITRES));
+      qn = (qn+1)>>1<<1;
+   }
+   celt_assert(qn <= 256);
+   return qn;
+}
+
+struct band_ctx {
+   int encode;
+   const CELTMode *m;
+   int i;
+   int intensity;
+   int spread;
+   int tf_change;
+   ec_ctx *ec;
+   opus_int32 remaining_bits;
+   const celt_ener *bandE;
+   opus_uint32 seed;
+};
+
+struct split_ctx {
+   int inv;
+   int imid;
+   int iside;
+   int delta;
+   int itheta;
+   int qalloc;
+};
+
+static void compute_theta(struct band_ctx *ctx, struct split_ctx *sctx,
+      celt_norm *X, celt_norm *Y, int N, int *b, int B, int B0,
+      int LM,
+      int stereo, int *fill)
+{
+   int qn;
+   int itheta=0;
+   int delta;
+   int imid, iside;
+   int qalloc;
+   int pulse_cap;
+   int offset;
+   opus_int32 tell;
+   int inv=0;
+   int encode;
+   const CELTMode *m;
+   int i;
+   int intensity;
+   ec_ctx *ec;
+   const celt_ener *bandE;
+
+   encode = ctx->encode;
+   m = ctx->m;
+   i = ctx->i;
+   intensity = ctx->intensity;
+   ec = ctx->ec;
+   bandE = ctx->bandE;
+
+   /* Decide on the resolution to give to the split parameter theta */
+   pulse_cap = m->logN[i]+LM*(1<<BITRES);
+   offset = (pulse_cap>>1) - (stereo&&N==2 ? QTHETA_OFFSET_TWOPHASE : QTHETA_OFFSET);
+   qn = compute_qn(N, *b, offset, pulse_cap, stereo);
+   if (stereo && i>=intensity)
+      qn = 1;
+   if (encode)
+   {
+      /* theta is the atan() of the ratio between the (normalized)
+         side and mid. With just that parameter, we can re-scale both
+         mid and side because we know that 1) they have unit norm and
+         2) they are orthogonal. */
+      itheta = stereo_itheta(X, Y, stereo, N);
+   }
+   tell = ec_tell_frac(ec);
+   if (qn!=1)
+   {
+      if (encode)
+         itheta = (itheta*qn+8192)>>14;
+
+      /* Entropy coding of the angle. We use a uniform pdf for the
+         time split, a step for stereo, and a triangular one for the rest. */
+      if (stereo && N>2)
+      {
+         int p0 = 3;
+         int x = itheta;
+         int x0 = qn/2;
+         int ft = p0*(x0+1) + x0;
+         /* Use a probability of p0 up to itheta=8192 and then use 1 after */
+         if (encode)
+         {
+            ec_encode(ec,x<=x0?p0*x:(x-1-x0)+(x0+1)*p0,x<=x0?p0*(x+1):(x-x0)+(x0+1)*p0,ft);
+         } else {
+            int fs;
+            fs=ec_decode(ec,ft);
+            if (fs<(x0+1)*p0)
+               x=fs/p0;
+            else
+               x=x0+1+(fs-(x0+1)*p0);
+            ec_dec_update(ec,x<=x0?p0*x:(x-1-x0)+(x0+1)*p0,x<=x0?p0*(x+1):(x-x0)+(x0+1)*p0,ft);
+            itheta = x;
+         }
+      } else if (B0>1 || stereo) {
+         /* Uniform pdf */
+         if (encode)
+            ec_enc_uint(ec, itheta, qn+1);
+         else
+            itheta = ec_dec_uint(ec, qn+1);
+      } else {
+         int fs=1, ft;
+         ft = ((qn>>1)+1)*((qn>>1)+1);
+         if (encode)
+         {
+            int fl;
+
+            fs = itheta <= (qn>>1) ? itheta + 1 : qn + 1 - itheta;
+            fl = itheta <= (qn>>1) ? itheta*(itheta + 1)>>1 :
+             ft - ((qn + 1 - itheta)*(qn + 2 - itheta)>>1);
+
+            ec_encode(ec, fl, fl+fs, ft);
+         } else {
+            /* Triangular pdf */
+            int fl=0;
+            int fm;
+            fm = ec_decode(ec, ft);
+
+            if (fm < ((qn>>1)*((qn>>1) + 1)>>1))
+            {
+               itheta = (isqrt32(8*(opus_uint32)fm + 1) - 1)>>1;
+               fs = itheta + 1;
+               fl = itheta*(itheta + 1)>>1;
+            }
+            else
+            {
+               itheta = (2*(qn + 1)
+                - isqrt32(8*(opus_uint32)(ft - fm - 1) + 1))>>1;
+               fs = qn + 1 - itheta;
+               fl = ft - ((qn + 1 - itheta)*(qn + 2 - itheta)>>1);
+            }
+
+            ec_dec_update(ec, fl, fl+fs, ft);
+         }
+      }
+      itheta = (opus_int32)itheta*16384/qn;
+      if (encode && stereo)
+      {
+         if (itheta==0)
+            intensity_stereo(m, X, Y, bandE, i, N);
+         else
+            stereo_split(X, Y, N);
+      }
+      /* NOTE: Renormalising X and Y *may* help fixed-point a bit at very high rate.
+               Let's do that at higher complexity */
+   } else if (stereo) {
+      if (encode)
+      {
+         inv = itheta > 8192;
+         if (inv)
+         {
+            int j;
+            for (j=0;j<N;j++)
+               Y[j] = -Y[j];
+         }
+         intensity_stereo(m, X, Y, bandE, i, N);
+      }
+      if (*b>2<<BITRES && ctx->remaining_bits > 2<<BITRES)
+      {
+         if (encode)
+            ec_enc_bit_logp(ec, inv, 2);
+         else
+            inv = ec_dec_bit_logp(ec, 2);
+      } else
+         inv = 0;
+      itheta = 0;
+   }
+   qalloc = ec_tell_frac(ec) - tell;
+   *b -= qalloc;
+
+   if (itheta == 0)
+   {
+      imid = 32767;
+      iside = 0;
+      *fill &= (1<<B)-1;
+      delta = -16384;
+   } else if (itheta == 16384)
+   {
+      imid = 0;
+      iside = 32767;
+      *fill &= ((1<<B)-1)<<B;
+      delta = 16384;
+   } else {
+      imid = bitexact_cos((opus_int16)itheta);
+      iside = bitexact_cos((opus_int16)(16384-itheta));
+      /* This is the mid vs side allocation that minimizes squared error
+         in that band. */
+      delta = FRAC_MUL16((N-1)<<7,bitexact_log2tan(iside,imid));
+   }
+
+   sctx->inv = inv;
+   sctx->imid = imid;
+   sctx->iside = iside;
+   sctx->delta = delta;
+   sctx->itheta = itheta;
+   sctx->qalloc = qalloc;
+}
+static unsigned quant_band_n1(struct band_ctx *ctx, celt_norm *X, celt_norm *Y, int b,
+      celt_norm *lowband_out)
+{
+#ifdef RESYNTH
+   int resynth = 1;
+#else
+   int resynth = !ctx->encode;
+#endif
+   int c;
+   int stereo;
+   celt_norm *x = X;
+   int encode;
+   ec_ctx *ec;
+
+   encode = ctx->encode;
+   ec = ctx->ec;
+
+   stereo = Y != NULL;
+   c=0; do {
+      int sign=0;
+      if (ctx->remaining_bits>=1<<BITRES)
+      {
+         if (encode)
+         {
+            sign = x[0]<0;
+            ec_enc_bits(ec, sign, 1);
+         } else {
+            sign = ec_dec_bits(ec, 1);
+         }
+         ctx->remaining_bits -= 1<<BITRES;
+         b-=1<<BITRES;
+      }
+      if (resynth)
+         x[0] = sign ? -NORM_SCALING : NORM_SCALING;
+      x = Y;
+   } while (++c<1+stereo);
+   if (lowband_out)
+      lowband_out[0] = SHR16(X[0],4);
+   return 1;
+}
+
+/* This function is responsible for encoding and decoding a mono partition.
+   It can split the band in two and transmit the energy difference with
+   the two half-bands. It can be called recursively so bands can end up being
+   split in 8 parts. */
+static unsigned quant_partition(struct band_ctx *ctx, celt_norm *X,
+      int N, int b, int B, celt_norm *lowband,
+      int LM,
+      opus_val16 gain, int fill)
+{
+   const unsigned char *cache;
+   int q;
+   int curr_bits;
+   int imid=0, iside=0;
+   int B0=B;
+   opus_val16 mid=0, side=0;
+   unsigned cm=0;
+#ifdef RESYNTH
+   int resynth = 1;
+#else
+   int resynth = !ctx->encode;
+#endif
+   celt_norm *Y=NULL;
+   int encode;
+   const CELTMode *m;
+   int i;
+   int spread;
+   ec_ctx *ec;
+
+   encode = ctx->encode;
+   m = ctx->m;
+   i = ctx->i;
+   spread = ctx->spread;
+   ec = ctx->ec;
+
+   /* If we need 1.5 more bit than we can produce, split the band in two. */
+   cache = m->cache.bits + m->cache.index[(LM+1)*m->nbEBands+i];
+   if (LM != -1 && b > cache[cache[0]]+12 && N>2)
+   {
+      int mbits, sbits, delta;
+      int itheta;
+      int qalloc;
+      struct split_ctx sctx;
+      celt_norm *next_lowband2=NULL;
+      opus_int32 rebalance;
+
+      N >>= 1;
+      Y = X+N;
+      LM -= 1;
+      if (B==1)
+         fill = (fill&1)|(fill<<1);
+      B = (B+1)>>1;
+
+      compute_theta(ctx, &sctx, X, Y, N, &b, B, B0,
+            LM, 0, &fill);
+      imid = sctx.imid;
+      iside = sctx.iside;
+      delta = sctx.delta;
+      itheta = sctx.itheta;
+      qalloc = sctx.qalloc;
+#ifdef OPUS_FIXED_POINT
+      mid = imid;
+      side = iside;
+#else
+      mid = (1.f/32768)*imid;
+      side = (1.f/32768)*iside;
+#endif
+
+      /* Give more bits to low-energy MDCTs than they would otherwise deserve */
+      if (B0>1 && (itheta&0x3fff))
+      {
+         if (itheta > 8192)
+            /* Rough approximation for pre-echo masking */
+            delta -= delta>>(4-LM);
+         else
+            /* Corresponds to a forward-masking slope of 1.5 dB per 10 ms */
+            delta = IMIN(0, delta + (N<<BITRES>>(5-LM)));
+      }
+      mbits = IMAX(0, IMIN(b, (b-delta)/2));
+      sbits = b-mbits;
+      ctx->remaining_bits -= qalloc;
+
+      if (lowband)
+         next_lowband2 = lowband+N; /* >32-bit split case */
+
+      rebalance = ctx->remaining_bits;
+      if (mbits >= sbits)
+      {
+         cm = quant_partition(ctx, X, N, mbits, B,
+               lowband, LM,
+               MULT16_16_P15(gain,mid), fill);
+         rebalance = mbits - (rebalance-ctx->remaining_bits);
+         if (rebalance > 3<<BITRES && itheta!=0)
+            sbits += rebalance - (3<<BITRES);
+         cm |= quant_partition(ctx, Y, N, sbits, B,
+               next_lowband2, LM,
+               MULT16_16_P15(gain,side), fill>>B)<<(B0>>1);
+      } else {
+         cm = quant_partition(ctx, Y, N, sbits, B,
+               next_lowband2, LM,
+               MULT16_16_P15(gain,side), fill>>B)<<(B0>>1);
+         rebalance = sbits - (rebalance-ctx->remaining_bits);
+         if (rebalance > 3<<BITRES && itheta!=16384)
+            mbits += rebalance - (3<<BITRES);
+         cm |= quant_partition(ctx, X, N, mbits, B,
+               lowband, LM,
+               MULT16_16_P15(gain,mid), fill);
+      }
+   } else {
+      /* This is the basic no-split case */
+      q = bits2pulses(m, i, LM, b);
+      curr_bits = pulses2bits(m, i, LM, q);
+      ctx->remaining_bits -= curr_bits;
+
+      /* Ensures we can never bust the budget */
+      while (ctx->remaining_bits < 0 && q > 0)
+      {
+         ctx->remaining_bits += curr_bits;
+         q--;
+         curr_bits = pulses2bits(m, i, LM, q);
+         ctx->remaining_bits -= curr_bits;
+      }
+
+      if (q!=0)
+      {
+         int K = get_pulses(q);
+
+         /* Finally do the actual quantization */
+         if (encode)
+         {
+            cm = alg_quant(X, N, K, spread, B, ec
+#ifdef RESYNTH
+                 , gain
+#endif
+                 );
+         } else {
+            cm = alg_unquant(X, N, K, spread, B, ec, gain);
+         }
+      } else {
+         /* If there's no pulse, fill the band anyway */
+         int j;
+         if (resynth)
+         {
+            unsigned cm_mask;
+            /* B can be as large as 16, so this shift might overflow an int on a
+               16-bit platform; use a long to get defined behavior.*/
+            cm_mask = (unsigned)(1UL<<B)-1;
+            fill &= cm_mask;
+            if (!fill)
+            {
+               for (j=0;j<N;j++)
+                  X[j] = 0;
+            } else {
+               if (lowband == NULL)
+               {
+                  /* Noise */
+                  for (j=0;j<N;j++)
+                  {
+                     ctx->seed = celt_lcg_rand(ctx->seed);
+                     X[j] = (celt_norm)((opus_int32)ctx->seed>>20);
+                  }
+                  cm = cm_mask;
+               } else {
+                  /* Folded spectrum */
+                  for (j=0;j<N;j++)
+                  {
+                     opus_val16 tmp;
+                     ctx->seed = celt_lcg_rand(ctx->seed);
+                     /* About 48 dB below the "normal" folding level */
+                     tmp = QCONST16(1.0f/256, 10);
+                     tmp = (ctx->seed)&0x8000 ? tmp : -tmp;
+                     X[j] = lowband[j]+tmp;
+                  }
+                  cm = fill;
+               }
+               renormalise_vector(X, N, gain);
+            }
+         }
+      }
+   }
+
+   return cm;
+}
+
+
+/* This function is responsible for encoding and decoding a band for the mono case. */
+static unsigned quant_band(struct band_ctx *ctx, celt_norm *X,
+      int N, int b, int B, celt_norm *lowband,
+      int LM, celt_norm *lowband_out,
+      opus_val16 gain, celt_norm *lowband_scratch, int fill)
+{
+   int N0=N;
+   int N_B=N;
+   int N_B0;
+   int B0=B;
+   int time_divide=0;
+   int recombine=0;
+   int longBlocks;
+   unsigned cm=0;
+#ifdef RESYNTH
+   int resynth = 1;
+#else
+   int resynth = !ctx->encode;
+#endif
+   int k;
+   int encode;
+   int tf_change;
+
+   encode = ctx->encode;
+   tf_change = ctx->tf_change;
+
+   longBlocks = B0==1;
+
+   N_B /= B;
+
+   /* Special case for one sample */
+   if (N==1)
+   {
+      return quant_band_n1(ctx, X, NULL, b, lowband_out);
+   }
+
+   if (tf_change>0)
+      recombine = tf_change;
+   /* Band recombining to increase frequency resolution */
+
+   if (lowband_scratch && lowband && (recombine || ((N_B&1) == 0 && tf_change<0) || B0>1))
+   {
+      int j;
+      for (j=0;j<N;j++)
+         lowband_scratch[j] = lowband[j];
+      lowband = lowband_scratch;
+   }
+
+   for (k=0;k<recombine;k++)
+   {
+      static const unsigned char bit_interleave_table[16]={
+            0,1,1,1,2,3,3,3,2,3,3,3,2,3,3,3
+      };
+      if (encode)
+         haar1(X, N>>k, 1<<k);
+      if (lowband)
+         haar1(lowband, N>>k, 1<<k);
+      fill = bit_interleave_table[fill&0xF]|bit_interleave_table[fill>>4]<<2;
+   }
+   B>>=recombine;
+   N_B<<=recombine;
+
+   /* Increasing the time resolution */
+   while ((N_B&1) == 0 && tf_change<0)
+   {
+      if (encode)
+         haar1(X, N_B, B);
+      if (lowband)
+         haar1(lowband, N_B, B);
+      fill |= fill<<B;
+      B <<= 1;
+      N_B >>= 1;
+      time_divide++;
+      tf_change++;
+   }
+   B0=B;
+   N_B0 = N_B;
+
+   /* Reorganize the samples in time order instead of frequency order */
+   if (B0>1)
+   {
+      if (encode)
+         deinterleave_hadamard(X, N_B>>recombine, B0<<recombine, longBlocks);
+      if (lowband)
+         deinterleave_hadamard(lowband, N_B>>recombine, B0<<recombine, longBlocks);
+   }
+
+   cm = quant_partition(ctx, X, N, b, B, lowband,
+         LM, gain, fill);
+
+   /* This code is used by the decoder and by the resynthesis-enabled encoder */
+   if (resynth)
+   {
+      /* Undo the sample reorganization going from time order to frequency order */
+      if (B0>1)
+         interleave_hadamard(X, N_B>>recombine, B0<<recombine, longBlocks);
+
+      /* Undo time-freq changes that we did earlier */
+      N_B = N_B0;
+      B = B0;
+      for (k=0;k<time_divide;k++)
+      {
+         B >>= 1;
+         N_B <<= 1;
+         cm |= cm>>B;
+         haar1(X, N_B, B);
+      }
+
+      for (k=0;k<recombine;k++)
+      {
+         static const unsigned char bit_deinterleave_table[16]={
+               0x00,0x03,0x0C,0x0F,0x30,0x33,0x3C,0x3F,
+               0xC0,0xC3,0xCC,0xCF,0xF0,0xF3,0xFC,0xFF
+         };
+         cm = bit_deinterleave_table[cm];
+         haar1(X, N0>>k, 1<<k);
+      }
+      B<<=recombine;
+
+      /* Scale output for later folding */
+      if (lowband_out)
+      {
+         int j;
+         opus_val16 n;
+         n = celt_sqrt(SHL32(EXTEND32(N0),22));
+         for (j=0;j<N0;j++)
+            lowband_out[j] = MULT16_16_Q15(n,X[j]);
+      }
+      cm &= (1<<B)-1;
+   }
+   return cm;
+}
+
+
+/* This function is responsible for encoding and decoding a band for the stereo case. */
+static unsigned quant_band_stereo(struct band_ctx *ctx, celt_norm *X, celt_norm *Y,
+      int N, int b, int B, celt_norm *lowband,
+      int LM, celt_norm *lowband_out,
+      celt_norm *lowband_scratch, int fill)
+{
+   int imid=0, iside=0;
+   int inv = 0;
+   opus_val16 mid=0, side=0;
+   unsigned cm=0;
+#ifdef RESYNTH
+   int resynth = 1;
+#else
+   int resynth = !ctx->encode;
+#endif
+   int mbits, sbits, delta;
+   int itheta;
+   int qalloc;
+   struct split_ctx sctx;
+   int orig_fill;
+   int encode;
+   ec_ctx *ec;
+
+   encode = ctx->encode;
+   ec = ctx->ec;
+
+   /* Special case for one sample */
+   if (N==1)
+   {
+      return quant_band_n1(ctx, X, Y, b, lowband_out);
+   }
+
+   orig_fill = fill;
+
+   compute_theta(ctx, &sctx, X, Y, N, &b, B, B,
+         LM, 1, &fill);
+   inv = sctx.inv;
+   imid = sctx.imid;
+   iside = sctx.iside;
+   delta = sctx.delta;
+   itheta = sctx.itheta;
+   qalloc = sctx.qalloc;
+#ifdef OPUS_FIXED_POINT
+   mid = imid;
+   side = iside;
+#else
+   mid = (1.f/32768)*imid;
+   side = (1.f/32768)*iside;
+#endif
+
+   /* This is a special case for N=2 that only works for stereo and takes
+      advantage of the fact that mid and side are orthogonal to encode
+      the side with just one bit. */
+   if (N==2)
+   {
+      int c;
+      int sign=0;
+      celt_norm *x2, *y2;
+      mbits = b;
+      sbits = 0;
+      /* Only need one bit for the side. */
+      if (itheta != 0 && itheta != 16384)
+         sbits = 1<<BITRES;
+      mbits -= sbits;
+      c = itheta > 8192;
+      ctx->remaining_bits -= qalloc+sbits;
+
+      x2 = c ? Y : X;
+      y2 = c ? X : Y;
+      if (sbits)
+      {
+         if (encode)
+         {
+            /* Here we only need to encode a sign for the side. */
+            sign = x2[0]*y2[1] - x2[1]*y2[0] < 0;
+            ec_enc_bits(ec, sign, 1);
+         } else {
+            sign = ec_dec_bits(ec, 1);
+         }
+      }
+      sign = 1-2*sign;
+      /* We use orig_fill here because we want to fold the side, but if
+         itheta==16384, we'll have cleared the low bits of fill. */
+      cm = quant_band(ctx, x2, N, mbits, B, lowband,
+            LM, lowband_out, Q15ONE, lowband_scratch, orig_fill);
+      /* We don't split N=2 bands, so cm is either 1 or 0 (for a fold-collapse),
+         and there's no need to worry about mixing with the other channel. */
+      y2[0] = -sign*x2[1];
+      y2[1] = sign*x2[0];
+      if (resynth)
+      {
+         celt_norm tmp;
+         X[0] = MULT16_16_Q15(mid, X[0]);
+         X[1] = MULT16_16_Q15(mid, X[1]);
+         Y[0] = MULT16_16_Q15(side, Y[0]);
+         Y[1] = MULT16_16_Q15(side, Y[1]);
+         tmp = X[0];
+         X[0] = SUB16(tmp,Y[0]);
+         Y[0] = ADD16(tmp,Y[0]);
+         tmp = X[1];
+         X[1] = SUB16(tmp,Y[1]);
+         Y[1] = ADD16(tmp,Y[1]);
+      }
+   } else {
+      /* "Normal" split code */
+      opus_int32 rebalance;
+
+      mbits = IMAX(0, IMIN(b, (b-delta)/2));
+      sbits = b-mbits;
+      ctx->remaining_bits -= qalloc;
+
+      rebalance = ctx->remaining_bits;
+      if (mbits >= sbits)
+      {
+         /* In stereo mode, we do not apply a scaling to the mid because we need the normalized
+            mid for folding later. */
+         cm = quant_band(ctx, X, N, mbits, B,
+               lowband, LM, lowband_out,
+               Q15ONE, lowband_scratch, fill);
+         rebalance = mbits - (rebalance-ctx->remaining_bits);
+         if (rebalance > 3<<BITRES && itheta!=0)
+            sbits += rebalance - (3<<BITRES);
+
+         /* For a stereo split, the high bits of fill are always zero, so no
+            folding will be done to the side. */
+         cm |= quant_band(ctx, Y, N, sbits, B,
+               NULL, LM, NULL,
+               side, NULL, fill>>B);
+      } else {
+         /* For a stereo split, the high bits of fill are always zero, so no
+            folding will be done to the side. */
+         cm = quant_band(ctx, Y, N, sbits, B,
+               NULL, LM, NULL,
+               side, NULL, fill>>B);
+         rebalance = sbits - (rebalance-ctx->remaining_bits);
+         if (rebalance > 3<<BITRES && itheta!=16384)
+            mbits += rebalance - (3<<BITRES);
+         /* In stereo mode, we do not apply a scaling to the mid because we need the normalized
+            mid for folding later. */
+         cm |= quant_band(ctx, X, N, mbits, B,
+               lowband, LM, lowband_out,
+               Q15ONE, lowband_scratch, fill);
+      }
+   }
+
+
+   /* This code is used by the decoder and by the resynthesis-enabled encoder */
+   if (resynth)
+   {
+      if (N!=2)
+         stereo_merge(X, Y, mid, N);
+      if (inv)
+      {
+         int j;
+         for (j=0;j<N;j++)
+            Y[j] = -Y[j];
+      }
+   }
+   return cm;
+}
+
+
+void quant_all_bands(int encode, const CELTMode *m, int start, int end,
+      celt_norm *X_, celt_norm *Y_, unsigned char *collapse_masks, const celt_ener *bandE, int *pulses,
+      int shortBlocks, int spread, int dual_stereo, int intensity, int *tf_res,
+      opus_int32 total_bits, opus_int32 balance, ec_ctx *ec, int LM, int codedBands, opus_uint32 *seed)
+{
+   int i;
+   opus_int32 remaining_bits;
+   const opus_int16 * OPUS_RESTRICT eBands = m->eBands;
+   celt_norm * OPUS_RESTRICT norm, * OPUS_RESTRICT norm2;
+   VARDECL(celt_norm, _norm);
+   celt_norm *lowband_scratch;
+   int B;
+   int M;
+   int lowband_offset;
+   int update_lowband = 1;
+   int C = Y_ != NULL ? 2 : 1;
+   int norm_offset;
+#ifdef RESYNTH
+   int resynth = 1;
+#else
+   int resynth = !encode;
+#endif
+   struct band_ctx ctx;
+   SAVE_STACK;
+
+   M = 1<<LM;
+   B = shortBlocks ? M : 1;
+   norm_offset = M*eBands[start];
+   /* No need to allocate norm for the last band because we don't need an
+      output in that band. */
+   ALLOC(_norm, C*(M*eBands[m->nbEBands-1]-norm_offset), celt_norm);
+   norm = _norm;
+   norm2 = norm + M*eBands[m->nbEBands-1]-norm_offset;
+   /* We can use the last band as scratch space because we don't need that
+      scratch space for the last band. */
+   lowband_scratch = X_+M*eBands[m->nbEBands-1];
+
+   lowband_offset = 0;
+   ctx.bandE = bandE;
+   ctx.ec = ec;
+   ctx.encode = encode;
+   ctx.intensity = intensity;
+   ctx.m = m;
+   ctx.seed = *seed;
+   ctx.spread = spread;
+   for (i=start;i<end;i++)
+   {
+      opus_int32 tell;
+      int b;
+      int N;
+      opus_int32 curr_balance;
+      int effective_lowband=-1;
+      celt_norm * OPUS_RESTRICT X, * OPUS_RESTRICT Y;
+      int tf_change=0;
+      unsigned x_cm;
+      unsigned y_cm;
+      int last;
+
+      ctx.i = i;
+      last = (i==end-1);
+
+      X = X_+M*eBands[i];
+      if (Y_!=NULL)
+         Y = Y_+M*eBands[i];
+      else
+         Y = NULL;
+      N = M*eBands[i+1]-M*eBands[i];
+      tell = ec_tell_frac(ec);
+
+      /* Compute how many bits we want to allocate to this band */
+      if (i != start)
+         balance -= tell;
+      remaining_bits = total_bits-tell-1;
+      ctx.remaining_bits = remaining_bits;
+      if (i <= codedBands-1)
+      {
+         curr_balance = balance / IMIN(3, codedBands-i);
+         b = IMAX(0, IMIN(16383, IMIN(remaining_bits+1,pulses[i]+curr_balance)));
+      } else {
+         b = 0;
+      }
+
+      if (resynth && M*eBands[i]-N >= M*eBands[start] && (update_lowband || lowband_offset==0))
+            lowband_offset = i;
+
+      tf_change = tf_res[i];
+      ctx.tf_change = tf_change;
+      if (i>=m->effEBands)
+      {
+         X=norm;
+         if (Y_!=NULL)
+            Y = norm;
+         lowband_scratch = NULL;
+      }
+      if (i==end-1)
+         lowband_scratch = NULL;
+
+      /* Get a conservative estimate of the collapse_mask's for the bands we're
+         going to be folding from. */
+      if (lowband_offset != 0 && (spread!=SPREAD_AGGRESSIVE || B>1 || tf_change<0))
+      {
+         int fold_start;
+         int fold_end;
+         int fold_i;
+         /* This ensures we never repeat spectral content within one band */
+         effective_lowband = IMAX(0, M*eBands[lowband_offset]-norm_offset-N);
+         fold_start = lowband_offset;
+         while(M*eBands[--fold_start] > effective_lowband+norm_offset);
+         fold_end = lowband_offset-1;
+         while(M*eBands[++fold_end] < effective_lowband+norm_offset+N);
+         x_cm = y_cm = 0;
+         fold_i = fold_start; do {
+           x_cm |= collapse_masks[fold_i*C+0];
+           y_cm |= collapse_masks[fold_i*C+C-1];
+         } while (++fold_i<fold_end);
+      }
+      /* Otherwise, we'll be using the LCG to fold, so all blocks will (almost
+         always) be non-zero. */
+      else
+         x_cm = y_cm = (1<<B)-1;
+
+      if (dual_stereo && i==intensity)
+      {
+         int j;
+
+         /* Switch off dual stereo to do intensity. */
+         dual_stereo = 0;
+         if (resynth)
+            for (j=0;j<M*eBands[i]-norm_offset;j++)
+               norm[j] = HALF32(norm[j]+norm2[j]);
+      }
+      if (dual_stereo)
+      {
+         x_cm = quant_band(&ctx, X, N, b/2, B,
+               effective_lowband != -1 ? norm+effective_lowband : NULL, LM,
+               last?NULL:norm+M*eBands[i]-norm_offset, Q15ONE, lowband_scratch, x_cm);
+         y_cm = quant_band(&ctx, Y, N, b/2, B,
+               effective_lowband != -1 ? norm2+effective_lowband : NULL, LM,
+               last?NULL:norm2+M*eBands[i]-norm_offset, Q15ONE, lowband_scratch, y_cm);
+      } else {
+         if (Y!=NULL)
+         {
+            x_cm = quant_band_stereo(&ctx, X, Y, N, b, B,
+                  effective_lowband != -1 ? norm+effective_lowband : NULL, LM,
+                        last?NULL:norm+M*eBands[i]-norm_offset, lowband_scratch, x_cm|y_cm);
+         } else {
+            x_cm = quant_band(&ctx, X, N, b, B,
+                  effective_lowband != -1 ? norm+effective_lowband : NULL, LM,
+                        last?NULL:norm+M*eBands[i]-norm_offset, Q15ONE, lowband_scratch, x_cm|y_cm);
+         }
+         y_cm = x_cm;
+      }
+      collapse_masks[i*C+0] = (unsigned char)x_cm;
+      collapse_masks[i*C+C-1] = (unsigned char)y_cm;
+      balance += pulses[i] + tell;
+
+      /* Update the folding position only as long as we have 1 bit/sample depth. */
+      update_lowband = b>(N<<BITRES);
+   }
+   *seed = ctx.seed;
+
+   RESTORE_STACK;
+}
+
diff --git a/drivers/opus/celt/bands.h b/drivers/opus/celt/bands.h
new file mode 100644
index 0000000000..fe1e47097a
--- /dev/null
+++ b/drivers/opus/celt/bands.h
@@ -0,0 +1,114 @@
+/* Copyright (c) 2007-2008 CSIRO
+   Copyright (c) 2007-2009 Xiph.Org Foundation
+   Copyright (c) 2008-2009 Gregory Maxwell
+   Written by Jean-Marc Valin and Gregory Maxwell */
+/*
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef BANDS_H
+#define BANDS_H
+
+#include "arch.h"
+#include "opus_modes.h"
+#include "entenc.h"
+#include "entdec.h"
+#include "rate.h"
+
+/** Compute the amplitude (sqrt energy) in each of the bands
+ * @param m Mode data
+ * @param X Spectrum
+ * @param bandE Square root of the energy for each band (returned)
+ */
+void compute_band_energies(const CELTMode *m, const celt_sig *X, celt_ener *bandE, int end, int C, int M);
+
+/*void compute_noise_energies(const CELTMode *m, const celt_sig *X, const opus_val16 *tonality, celt_ener *bandE);*/
+
+/** Normalise each band of X such that the energy in each band is
+    equal to 1
+ * @param m Mode data
+ * @param X Spectrum (returned normalised)
+ * @param bandE Square root of the energy for each band
+ */
+void normalise_bands(const CELTMode *m, const celt_sig * OPUS_RESTRICT freq, celt_norm * OPUS_RESTRICT X, const celt_ener *bandE, int end, int C, int M);
+
+/** Denormalise each band of X to restore full amplitude
+ * @param m Mode data
+ * @param X Spectrum (returned de-normalised)
+ * @param bandE Square root of the energy for each band
+ */
+void denormalise_bands(const CELTMode *m, const celt_norm * OPUS_RESTRICT X,
+      celt_sig * OPUS_RESTRICT freq, const opus_val16 *bandE, int start, int end, int C, int M);
+
+#define SPREAD_NONE       (0)
+#define SPREAD_LIGHT      (1)
+#define SPREAD_NORMAL     (2)
+#define SPREAD_AGGRESSIVE (3)
+
+int spreading_decision(const CELTMode *m, celt_norm *X, int *average,
+      int last_decision, int *hf_average, int *tapset_decision, int update_hf,
+      int end, int C, int M);
+
+#ifdef MEASURE_NORM_MSE
+void measure_norm_mse(const CELTMode *m, float *X, float *X0, float *bandE, float *bandE0, int M, int N, int C);
+#endif
+
+void haar1(celt_norm *X, int N0, int stride);
+
+/** Quantisation/encoding of the residual spectrum
+ * @param encode flag that indicates whether we're encoding (1) or decoding (0)
+ * @param m Mode data
+ * @param start First band to process
+ * @param end Last band to process + 1
+ * @param X Residual (normalised)
+ * @param Y Residual (normalised) for second channel (or NULL for mono)
+ * @param collapse_masks Anti-collapse tracking mask
+ * @param bandE Square root of the energy for each band
+ * @param pulses Bit allocation (per band) for PVQ
+ * @param shortBlocks Zero for long blocks, non-zero for short blocks
+ * @param spread Amount of spreading to use
+ * @param dual_stereo Zero for MS stereo, non-zero for dual stereo
+ * @param intensity First band to use intensity stereo
+ * @param tf_res Time-frequency resolution change
+ * @param total_bits Total number of bits that can be used for the frame (including the ones already spent)
+ * @param balance Number of unallocated bits
+ * @param en Entropy coder state
+ * @param LM log2() of the number of 2.5 subframes in the frame
+ * @param codedBands Last band to receive bits + 1
+ * @param seed Random generator seed
+ */
+void quant_all_bands(int encode, const CELTMode *m, int start, int end,
+      celt_norm * X, celt_norm * Y, unsigned char *collapse_masks, const celt_ener *bandE, int *pulses,
+      int shortBlocks, int spread, int dual_stereo, int intensity, int *tf_res,
+      opus_int32 total_bits, opus_int32 balance, ec_ctx *ec, int M, int codedBands, opus_uint32 *seed);
+
+void anti_collapse(const CELTMode *m, celt_norm *X_, unsigned char *collapse_masks, int LM, int C, int size,
+      int start, int end, opus_val16 *logE, opus_val16 *prev1logE,
+      opus_val16 *prev2logE, int *pulses, opus_uint32 seed);
+
+opus_uint32 celt_lcg_rand(opus_uint32 seed);
+
+int hysteresis_decision(opus_val16 val, const opus_val16 *thresholds, const opus_val16 *hysteresis, int N, int prev);
+
+#endif /* BANDS_H */
diff --git a/drivers/opus/celt/celt.c b/drivers/opus/celt/celt.c
new file mode 100644
index 0000000000..b894e1e13f
--- /dev/null
+++ b/drivers/opus/celt/celt.c
@@ -0,0 +1,223 @@
+/* Copyright (c) 2007-2008 CSIRO
+   Copyright (c) 2007-2010 Xiph.Org Foundation
+   Copyright (c) 2008 Gregory Maxwell
+   Written by Jean-Marc Valin and Gregory Maxwell */
+/*
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifdef OPUS_HAVE_CONFIG_H
+#include "opus_config.h"
+#endif
+
+#define CELT_C
+
+#include "os_support.h"
+#include "mdct.h"
+#include <math.h>
+#include "celt.h"
+#include "pitch.h"
+#include "bands.h"
+#include "opus_modes.h"
+#include "entcode.h"
+#include "quant_bands.h"
+#include "rate.h"
+#include "stack_alloc.h"
+#include "mathops.h"
+#include "float_cast.h"
+#include <stdarg.h>
+#include "celt_lpc.h"
+#include "vq.h"
+
+#ifndef PACKAGE_VERSION
+#define PACKAGE_VERSION "unknown"
+#endif
+
+
+int resampling_factor(opus_int32 rate)
+{
+   int ret;
+   switch (rate)
+   {
+   case 48000:
+      ret = 1;
+      break;
+   case 24000:
+      ret = 2;
+      break;
+   case 16000:
+      ret = 3;
+      break;
+   case 12000:
+      ret = 4;
+      break;
+   case 8000:
+      ret = 6;
+      break;
+   default:
+#ifndef CUSTOM_MODES
+      celt_assert(0);
+#endif
+      ret = 0;
+      break;
+   }
+   return ret;
+}
+
+#ifndef OVERRIDE_COMB_FILTER_CONST
+static void comb_filter_const(opus_val32 *y, opus_val32 *x, int T, int N,
+      opus_val16 g10, opus_val16 g11, opus_val16 g12)
+{
+   opus_val32 x0, x1, x2, x3, x4;
+   int i;
+   x4 = x[-T-2];
+   x3 = x[-T-1];
+   x2 = x[-T];
+   x1 = x[-T+1];
+   for (i=0;i<N;i++)
+   {
+      x0=x[i-T+2];
+      y[i] = x[i]
+               + MULT16_32_Q15(g10,x2)
+               + MULT16_32_Q15(g11,ADD32(x1,x3))
+               + MULT16_32_Q15(g12,ADD32(x0,x4));
+      x4=x3;
+      x3=x2;
+      x2=x1;
+      x1=x0;
+   }
+
+}
+#endif
+
+void comb_filter(opus_val32 *y, opus_val32 *x, int T0, int T1, int N,
+      opus_val16 g0, opus_val16 g1, int tapset0, int tapset1,
+      const opus_val16 *window, int overlap)
+{
+   int i;
+   /* printf ("%d %d %f %f\n", T0, T1, g0, g1); */
+   opus_val16 g00, g01, g02, g10, g11, g12;
+   opus_val32 x0, x1, x2, x3, x4;
+   static const opus_val16 gains[3][3] = {
+         {QCONST16(0.3066406250f, 15), QCONST16(0.2170410156f, 15), QCONST16(0.1296386719f, 15)},
+         {QCONST16(0.4638671875f, 15), QCONST16(0.2680664062f, 15), QCONST16(0.f, 15)},
+         {QCONST16(0.7998046875f, 15), QCONST16(0.1000976562f, 15), QCONST16(0.f, 15)}};
+
+   if (g0==0 && g1==0)
+   {
+      /* OPT: Happens to work without the OPUS_MOVE(), but only because the current encoder already copies x to y */
+      if (x!=y)
+         OPUS_MOVE(y, x, N);
+      return;
+   }
+   g00 = MULT16_16_Q15(g0, gains[tapset0][0]);
+   g01 = MULT16_16_Q15(g0, gains[tapset0][1]);
+   g02 = MULT16_16_Q15(g0, gains[tapset0][2]);
+   g10 = MULT16_16_Q15(g1, gains[tapset1][0]);
+   g11 = MULT16_16_Q15(g1, gains[tapset1][1]);
+   g12 = MULT16_16_Q15(g1, gains[tapset1][2]);
+   x1 = x[-T1+1];
+   x2 = x[-T1  ];
+   x3 = x[-T1-1];
+   x4 = x[-T1-2];
+   for (i=0;i<overlap;i++)
+   {
+      opus_val16 f;
+      x0=x[i-T1+2];
+      f = MULT16_16_Q15(window[i],window[i]);
+      y[i] = x[i]
+               + MULT16_32_Q15(MULT16_16_Q15((Q15ONE-f),g00),x[i-T0])
+               + MULT16_32_Q15(MULT16_16_Q15((Q15ONE-f),g01),ADD32(x[i-T0+1],x[i-T0-1]))
+               + MULT16_32_Q15(MULT16_16_Q15((Q15ONE-f),g02),ADD32(x[i-T0+2],x[i-T0-2]))
+               + MULT16_32_Q15(MULT16_16_Q15(f,g10),x2)
+               + MULT16_32_Q15(MULT16_16_Q15(f,g11),ADD32(x1,x3))
+               + MULT16_32_Q15(MULT16_16_Q15(f,g12),ADD32(x0,x4));
+      x4=x3;
+      x3=x2;
+      x2=x1;
+      x1=x0;
+
+   }
+   if (g1==0)
+   {
+      /* OPT: Happens to work without the OPUS_MOVE(), but only because the current encoder already copies x to y */
+      if (x!=y)
+         OPUS_MOVE(y+overlap, x+overlap, N-overlap);
+      return;
+   }
+
+   /* Compute the part with the constant filter. */
+   comb_filter_const(y+i, x+i, T1, N-i, g10, g11, g12);
+}
+
+const signed char tf_select_table[4][8] = {
+      {0, -1, 0, -1,    0,-1, 0,-1},
+      {0, -1, 0, -2,    1, 0, 1,-1},
+      {0, -2, 0, -3,    2, 0, 1,-1},
+      {0, -2, 0, -3,    3, 0, 1,-1},
+};
+
+
+void init_caps(const CELTMode *m,int *cap,int LM,int C)
+{
+   int i;
+   for (i=0;i<m->nbEBands;i++)
+   {
+      int N;
+      N=(m->eBands[i+1]-m->eBands[i])<<LM;
+      cap[i] = (m->cache.caps[m->nbEBands*(2*LM+C-1)+i]+64)*C*N>>2;
+   }
+}
+
+
+
+const char *opus_strerror(int error)
+{
+   static const char * const error_strings[8] = {
+      "success",
+      "invalid argument",
+      "buffer too small",
+      "internal error",
+      "corrupted stream",
+      "request not implemented",
+      "invalid state",
+      "memory allocation failed"
+   };
+   if (error > 0 || error < -7)
+      return "unknown error";
+   else
+      return error_strings[-error];
+}
+
+const char *opus_get_version_string(void)
+{
+    return "libopus " PACKAGE_VERSION
+#ifdef OPUS_FIXED_POINT
+          "-fixed"
+#endif
+#ifdef FUZZING
+          "-fuzzing"
+#endif
+          ;
+}
diff --git a/drivers/opus/celt/celt.h b/drivers/opus/celt/celt.h
new file mode 100644
index 0000000000..5deea1f0aa
--- /dev/null
+++ b/drivers/opus/celt/celt.h
@@ -0,0 +1,218 @@
+/* Copyright (c) 2007-2008 CSIRO
+   Copyright (c) 2007-2009 Xiph.Org Foundation
+   Copyright (c) 2008 Gregory Maxwell
+   Written by Jean-Marc Valin and Gregory Maxwell */
+/**
+  @file celt.h
+  @brief Contains all the functions for encoding and decoding audio
+ */
+
+/*
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef CELT_H
+#define CELT_H
+
+#include "opus_types.h"
+#include "opus_defines.h"
+#include "opus_custom.h"
+#include "entenc.h"
+#include "entdec.h"
+#include "arch.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define CELTEncoder OpusCustomEncoder
+#define CELTDecoder OpusCustomDecoder
+#define CELTMode OpusCustomMode
+
+typedef struct {
+   int valid;
+   float tonality;
+   float tonality_slope;
+   float noisiness;
+   float activity;
+   float music_prob;
+   int        bandwidth;
+}AnalysisInfo;
+
+#define __celt_check_mode_ptr_ptr(ptr) ((ptr) + ((ptr) - (const CELTMode**)(ptr)))
+
+#define __celt_check_analysis_ptr(ptr) ((ptr) + ((ptr) - (const AnalysisInfo*)(ptr)))
+
+/* Encoder/decoder Requests */
+
+/* Expose this option again when variable framesize actually works */
+#define OPUS_FRAMESIZE_VARIABLE              5010 /**< Optimize the frame size dynamically */
+
+
+#define CELT_SET_PREDICTION_REQUEST    10002
+/** Controls the use of interframe prediction.
+    0=Independent frames
+    1=Short term interframe prediction allowed
+    2=Long term prediction allowed
+ */
+#define CELT_SET_PREDICTION(x) CELT_SET_PREDICTION_REQUEST, __opus_check_int(x)
+
+#define CELT_SET_INPUT_CLIPPING_REQUEST    10004
+#define CELT_SET_INPUT_CLIPPING(x) CELT_SET_INPUT_CLIPPING_REQUEST, __opus_check_int(x)
+
+#define CELT_GET_AND_CLEAR_ERROR_REQUEST   10007
+#define CELT_GET_AND_CLEAR_ERROR(x) CELT_GET_AND_CLEAR_ERROR_REQUEST, __opus_check_int_ptr(x)
+
+#define CELT_SET_CHANNELS_REQUEST    10008
+#define CELT_SET_CHANNELS(x) CELT_SET_CHANNELS_REQUEST, __opus_check_int(x)
+
+
+/* Internal */
+#define CELT_SET_START_BAND_REQUEST    10010
+#define CELT_SET_START_BAND(x) CELT_SET_START_BAND_REQUEST, __opus_check_int(x)
+
+#define CELT_SET_END_BAND_REQUEST    10012
+#define CELT_SET_END_BAND(x) CELT_SET_END_BAND_REQUEST, __opus_check_int(x)
+
+#define CELT_GET_MODE_REQUEST    10015
+/** Get the CELTMode used by an encoder or decoder */
+#define CELT_GET_MODE(x) CELT_GET_MODE_REQUEST, __celt_check_mode_ptr_ptr(x)
+
+#define CELT_SET_SIGNALLING_REQUEST    10016
+#define CELT_SET_SIGNALLING(x) CELT_SET_SIGNALLING_REQUEST, __opus_check_int(x)
+
+#define CELT_SET_TONALITY_REQUEST    10018
+#define CELT_SET_TONALITY(x) CELT_SET_TONALITY_REQUEST, __opus_check_int(x)
+#define CELT_SET_TONALITY_SLOPE_REQUEST    10020
+#define CELT_SET_TONALITY_SLOPE(x) CELT_SET_TONALITY_SLOPE_REQUEST, __opus_check_int(x)
+
+#define CELT_SET_ANALYSIS_REQUEST    10022
+#define CELT_SET_ANALYSIS(x) CELT_SET_ANALYSIS_REQUEST, __celt_check_analysis_ptr(x)
+
+#define OPUS_SET_LFE_REQUEST    10024
+#define OPUS_SET_LFE(x) OPUS_SET_LFE_REQUEST, __opus_check_int(x)
+
+#define OPUS_SET_ENERGY_MASK_REQUEST    10026
+#define OPUS_SET_ENERGY_MASK(x) OPUS_SET_ENERGY_MASK_REQUEST, __opus_check_val16_ptr(x)
+
+/* Encoder stuff */
+
+int celt_encoder_get_size(int channels);
+
+int celt_encode_with_ec(OpusCustomEncoder * OPUS_RESTRICT st, const opus_val16 * pcm, int frame_size, unsigned char *compressed, int nbCompressedBytes, ec_enc *enc);
+
+int celt_encoder_init(CELTEncoder *st, opus_int32 sampling_rate, int channels,
+                      int arch);
+
+
+
+/* Decoder stuff */
+
+int celt_decoder_get_size(int channels);
+
+
+int celt_decoder_init(CELTDecoder *st, opus_int32 sampling_rate, int channels);
+
+int celt_decode_with_ec(OpusCustomDecoder * OPUS_RESTRICT st, const unsigned char *data, int len, opus_val16 * OPUS_RESTRICT pcm, int frame_size, ec_dec *dec);
+
+#define celt_encoder_ctl opus_custom_encoder_ctl
+#define celt_decoder_ctl opus_custom_decoder_ctl
+
+
+#ifdef CUSTOM_MODES
+#define OPUS_CUSTOM_NOSTATIC
+#else
+#define OPUS_CUSTOM_NOSTATIC static OPUS_INLINE
+#endif
+
+static const unsigned char trim_icdf[11] = {126, 124, 119, 109, 87, 41, 19, 9, 4, 2, 0};
+/* Probs: NONE: 21.875%, LIGHT: 6.25%, NORMAL: 65.625%, AGGRESSIVE: 6.25% */
+static const unsigned char spread_icdf[4] = {25, 23, 2, 0};
+
+static const unsigned char tapset_icdf[3]={2,1,0};
+
+#ifdef CUSTOM_MODES
+static const unsigned char toOpusTable[20] = {
+      0xE0, 0xE8, 0xF0, 0xF8,
+      0xC0, 0xC8, 0xD0, 0xD8,
+      0xA0, 0xA8, 0xB0, 0xB8,
+      0x00, 0x00, 0x00, 0x00,
+      0x80, 0x88, 0x90, 0x98,
+};
+
+static const unsigned char fromOpusTable[16] = {
+      0x80, 0x88, 0x90, 0x98,
+      0x40, 0x48, 0x50, 0x58,
+      0x20, 0x28, 0x30, 0x38,
+      0x00, 0x08, 0x10, 0x18
+};
+
+static OPUS_INLINE int toOpus(unsigned char c)
+{
+   int ret=0;
+   if (c<0xA0)
+      ret = toOpusTable[c>>3];
+   if (ret == 0)
+      return -1;
+   else
+      return ret|(c&0x7);
+}
+
+static OPUS_INLINE int fromOpus(unsigned char c)
+{
+   if (c<0x80)
+      return -1;
+   else
+      return fromOpusTable[(c>>3)-16] | (c&0x7);
+}
+#endif /* CUSTOM_MODES */
+
+#define COMBFILTER_MAXPERIOD 1024
+#define COMBFILTER_MINPERIOD 15
+
+extern const signed char tf_select_table[4][8];
+
+int resampling_factor(opus_int32 rate);
+
+void celt_preemphasis(const opus_val16 * OPUS_RESTRICT pcmp, celt_sig * OPUS_RESTRICT inp,
+                        int N, int CC, int upsample, const opus_val16 *coef, celt_sig *mem, int clip);
+
+void comb_filter(opus_val32 *y, opus_val32 *x, int T0, int T1, int N,
+      opus_val16 g0, opus_val16 g1, int tapset0, int tapset1,
+      const opus_val16 *window, int overlap);
+
+void init_caps(const CELTMode *m,int *cap,int LM,int C);
+
+#ifdef RESYNTH
+void deemphasis(celt_sig *in[], opus_val16 *pcm, int N, int C, int downsample, const opus_val16 *coef, celt_sig *mem, celt_sig * OPUS_RESTRICT scratch);
+
+void compute_inv_mdcts(const CELTMode *mode, int shortBlocks, celt_sig *X,
+      celt_sig * OPUS_RESTRICT out_mem[], int C, int LM);
+#endif
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* CELT_H */
diff --git a/drivers/opus/celt/celt_decoder.c b/drivers/opus/celt/celt_decoder.c
new file mode 100644
index 0000000000..93791feab4
--- /dev/null
+++ b/drivers/opus/celt/celt_decoder.c
@@ -0,0 +1,1195 @@
+/* Copyright (c) 2007-2008 CSIRO
+   Copyright (c) 2007-2010 Xiph.Org Foundation
+   Copyright (c) 2008 Gregory Maxwell
+   Written by Jean-Marc Valin and Gregory Maxwell */
+/*
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifdef OPUS_HAVE_CONFIG_H
+#include "opus_config.h"
+#endif
+
+#define CELT_DECODER_C
+
+#include "cpu_support.h"
+#include "os_support.h"
+#include "mdct.h"
+#include <math.h>
+#include "celt.h"
+#include "pitch.h"
+#include "bands.h"
+#include "opus_modes.h"
+#include "entcode.h"
+#include "quant_bands.h"
+#include "rate.h"
+#include "stack_alloc.h"
+#include "mathops.h"
+#include "float_cast.h"
+#include <stdarg.h>
+#include "celt_lpc.h"
+#include "vq.h"
+
+/**********************************************************************/
+/*                                                                    */
+/*                             DECODER                                */
+/*                                                                    */
+/**********************************************************************/
+#define DECODE_BUFFER_SIZE 2048
+
+/** Decoder state
+ @brief Decoder state
+ */
+struct OpusCustomDecoder {
+   const OpusCustomMode *mode;
+   int overlap;
+   int channels;
+   int stream_channels;
+
+   int downsample;
+   int start, end;
+   int signalling;
+   int arch;
+
+   /* Everything beyond this point gets cleared on a reset */
+#define DECODER_RESET_START rng
+
+   opus_uint32 rng;
+   int error;
+   int last_pitch_index;
+   int loss_count;
+   int postfilter_period;
+   int postfilter_period_old;
+   opus_val16 postfilter_gain;
+   opus_val16 postfilter_gain_old;
+   int postfilter_tapset;
+   int postfilter_tapset_old;
+
+   celt_sig preemph_memD[2];
+
+   celt_sig _decode_mem[1]; /* Size = channels*(DECODE_BUFFER_SIZE+mode->overlap) */
+   /* opus_val16 lpc[],  Size = channels*LPC_ORDER */
+   /* opus_val16 oldEBands[], Size = 2*mode->nbEBands */
+   /* opus_val16 oldLogE[], Size = 2*mode->nbEBands */
+   /* opus_val16 oldLogE2[], Size = 2*mode->nbEBands */
+   /* opus_val16 backgroundLogE[], Size = 2*mode->nbEBands */
+};
+
+int celt_decoder_get_size(int channels)
+{
+   const CELTMode *mode = opus_custom_mode_create(48000, 960, NULL);
+   return opus_custom_decoder_get_size(mode, channels);
+}
+
+OPUS_CUSTOM_NOSTATIC int opus_custom_decoder_get_size(const CELTMode *mode, int channels)
+{
+   int size = sizeof(struct CELTDecoder)
+            + (channels*(DECODE_BUFFER_SIZE+mode->overlap)-1)*sizeof(celt_sig)
+            + channels*LPC_ORDER*sizeof(opus_val16)
+            + 4*2*mode->nbEBands*sizeof(opus_val16);
+   return size;
+}
+
+#ifdef CUSTOM_MODES
+CELTDecoder *opus_custom_decoder_create(const CELTMode *mode, int channels, int *error)
+{
+   int ret;
+   CELTDecoder *st = (CELTDecoder *)opus_alloc(opus_custom_decoder_get_size(mode, channels));
+   ret = opus_custom_decoder_init(st, mode, channels);
+   if (ret != OPUS_OK)
+   {
+      opus_custom_decoder_destroy(st);
+      st = NULL;
+   }
+   if (error)
+      *error = ret;
+   return st;
+}
+#endif /* CUSTOM_MODES */
+
+int celt_decoder_init(CELTDecoder *st, opus_int32 sampling_rate, int channels)
+{
+   int ret;
+   ret = opus_custom_decoder_init(st, opus_custom_mode_create(48000, 960, NULL), channels);
+   if (ret != OPUS_OK)
+      return ret;
+   st->downsample = resampling_factor(sampling_rate);
+   if (st->downsample==0)
+      return OPUS_BAD_ARG;
+   else
+      return OPUS_OK;
+}
+
+OPUS_CUSTOM_NOSTATIC int opus_custom_decoder_init(CELTDecoder *st, const CELTMode *mode, int channels)
+{
+   if (channels < 0 || channels > 2)
+      return OPUS_BAD_ARG;
+
+   if (st==NULL)
+      return OPUS_ALLOC_FAIL;
+
+   OPUS_CLEAR((char*)st, opus_custom_decoder_get_size(mode, channels));
+
+   st->mode = mode;
+   st->overlap = mode->overlap;
+   st->stream_channels = st->channels = channels;
+
+   st->downsample = 1;
+   st->start = 0;
+   st->end = st->mode->effEBands;
+   st->signalling = 1;
+   st->arch = opus_select_arch();
+
+   st->loss_count = 0;
+
+   opus_custom_decoder_ctl(st, OPUS_RESET_STATE);
+
+   return OPUS_OK;
+}
+
+#ifdef CUSTOM_MODES
+void opus_custom_decoder_destroy(CELTDecoder *st)
+{
+   opus_free(st);
+}
+#endif /* CUSTOM_MODES */
+
+static OPUS_INLINE opus_val16 SIG2WORD16(celt_sig x)
+{
+#ifdef OPUS_FIXED_POINT
+   x = PSHR32(x, SIG_SHIFT);
+   x = MAX32(x, -32768);
+   x = MIN32(x, 32767);
+   return EXTRACT16(x);
+#else
+   return (opus_val16)x;
+#endif
+}
+
+#ifndef RESYNTH
+static
+#endif
+void deemphasis(celt_sig *in[], opus_val16 *pcm, int N, int C, int downsample, const opus_val16 *coef, celt_sig *mem, celt_sig * OPUS_RESTRICT scratch)
+{
+   int c;
+   int Nd;
+   int apply_downsampling=0;
+   opus_val16 coef0;
+
+   coef0 = coef[0];
+   Nd = N/downsample;
+   c=0; do {
+      int j;
+      celt_sig * OPUS_RESTRICT x;
+      opus_val16  * OPUS_RESTRICT y;
+      celt_sig m = mem[c];
+      x =in[c];
+      y = pcm+c;
+#ifdef CUSTOM_MODES
+      if (coef[1] != 0)
+      {
+         opus_val16 coef1 = coef[1];
+         opus_val16 coef3 = coef[3];
+         for (j=0;j<N;j++)
+         {
+            celt_sig tmp = x[j] + m + VERY_SMALL;
+            m = MULT16_32_Q15(coef0, tmp)
+                          - MULT16_32_Q15(coef1, x[j]);
+            tmp = SHL32(MULT16_32_Q15(coef3, tmp), 2);
+            scratch[j] = tmp;
+         }
+         apply_downsampling=1;
+      } else
+#endif
+      if (downsample>1)
+      {
+         /* Shortcut for the standard (non-custom modes) case */
+         for (j=0;j<N;j++)
+         {
+            celt_sig tmp = x[j] + m + VERY_SMALL;
+            m = MULT16_32_Q15(coef0, tmp);
+            scratch[j] = tmp;
+         }
+         apply_downsampling=1;
+      } else {
+         /* Shortcut for the standard (non-custom modes) case */
+         for (j=0;j<N;j++)
+         {
+            celt_sig tmp = x[j] + m + VERY_SMALL;
+            m = MULT16_32_Q15(coef0, tmp);
+            y[j*C] = SCALEOUT(SIG2WORD16(tmp));
+         }
+      }
+      mem[c] = m;
+
+      if (apply_downsampling)
+      {
+         /* Perform down-sampling */
+         for (j=0;j<Nd;j++)
+            y[j*C] = SCALEOUT(SIG2WORD16(scratch[j*downsample]));
+      }
+   } while (++c<C);
+}
+
+/** Compute the IMDCT and apply window for all sub-frames and
+    all channels in a frame */
+#ifndef RESYNTH
+static
+#endif
+void compute_inv_mdcts(const CELTMode *mode, int shortBlocks, celt_sig *X,
+      celt_sig * OPUS_RESTRICT out_mem[], int C, int LM)
+{
+   int b, c;
+   int B;
+   int N;
+   int shift;
+   const int overlap = OVERLAP(mode);
+
+   if (shortBlocks)
+   {
+      B = shortBlocks;
+      N = mode->shortMdctSize;
+      shift = mode->maxLM;
+   } else {
+      B = 1;
+      N = mode->shortMdctSize<<LM;
+      shift = mode->maxLM-LM;
+   }
+   c=0; do {
+      /* IMDCT on the interleaved the sub-frames, overlap-add is performed by the IMDCT */
+      for (b=0;b<B;b++)
+         clt_mdct_backward(&mode->mdct, &X[b+c*N*B], out_mem[c]+N*b, mode->window, overlap, shift, B);
+   } while (++c<C);
+}
+
+static void tf_decode(int start, int end, int isTransient, int *tf_res, int LM, ec_dec *dec)
+{
+   int i, curr, tf_select;
+   int tf_select_rsv;
+   int tf_changed;
+   int logp;
+   opus_uint32 budget;
+   opus_uint32 tell;
+
+   budget = dec->storage*8;
+   tell = ec_tell(dec);
+   logp = isTransient ? 2 : 4;
+   tf_select_rsv = LM>0 && tell+logp+1<=budget;
+   budget -= tf_select_rsv;
+   tf_changed = curr = 0;
+   for (i=start;i<end;i++)
+   {
+      if (tell+logp<=budget)
+      {
+         curr ^= ec_dec_bit_logp(dec, logp);
+         tell = ec_tell(dec);
+         tf_changed |= curr;
+      }
+      tf_res[i] = curr;
+      logp = isTransient ? 4 : 5;
+   }
+   tf_select = 0;
+   if (tf_select_rsv &&
+     tf_select_table[LM][4*isTransient+0+tf_changed] !=
+     tf_select_table[LM][4*isTransient+2+tf_changed])
+   {
+      tf_select = ec_dec_bit_logp(dec, 1);
+   }
+   for (i=start;i<end;i++)
+   {
+      tf_res[i] = tf_select_table[LM][4*isTransient+2*tf_select+tf_res[i]];
+   }
+}
+
+/* The maximum pitch lag to allow in the pitch-based PLC. It's possible to save
+   CPU time in the PLC pitch search by making this smaller than MAX_PERIOD. The
+   current value corresponds to a pitch of 66.67 Hz. */
+#define PLC_PITCH_LAG_MAX (720)
+/* The minimum pitch lag to allow in the pitch-based PLC. This corresponds to a
+   pitch of 480 Hz. */
+#define PLC_PITCH_LAG_MIN (100)
+
+static void celt_decode_lost(CELTDecoder * OPUS_RESTRICT st, opus_val16 * OPUS_RESTRICT pcm, int N, int LM)
+{
+   int c;
+   int i;
+   const int C = st->channels;
+   celt_sig *decode_mem[2];
+   celt_sig *out_syn[2];
+   opus_val16 *lpc;
+   opus_val16 *oldBandE, *oldLogE, *oldLogE2, *backgroundLogE;
+   const OpusCustomMode *mode;
+   int nbEBands;
+   int overlap;
+   int start;
+   int downsample;
+   int loss_count;
+   int noise_based;
+   const opus_int16 *eBands;
+   VARDECL(celt_sig, scratch);
+   SAVE_STACK;
+
+   mode = st->mode;
+   nbEBands = mode->nbEBands;
+   overlap = mode->overlap;
+   eBands = mode->eBands;
+
+   c=0; do {
+      decode_mem[c] = st->_decode_mem + c*(DECODE_BUFFER_SIZE+overlap);
+      out_syn[c] = decode_mem[c]+DECODE_BUFFER_SIZE-N;
+   } while (++c<C);
+   lpc = (opus_val16*)(st->_decode_mem+(DECODE_BUFFER_SIZE+overlap)*C);
+   oldBandE = lpc+C*LPC_ORDER;
+   oldLogE = oldBandE + 2*nbEBands;
+   oldLogE2 = oldLogE + 2*nbEBands;
+   backgroundLogE = oldLogE2  + 2*nbEBands;
+
+   loss_count = st->loss_count;
+   start = st->start;
+   downsample = st->downsample;
+   noise_based = loss_count >= 5 || start != 0;
+   ALLOC(scratch, noise_based?N*C:N, celt_sig);
+   if (noise_based)
+   {
+      /* Noise-based PLC/CNG */
+      celt_sig *freq;
+      VARDECL(celt_norm, X);
+      opus_uint32 seed;
+      opus_val16 *plcLogE;
+      int end;
+      int effEnd;
+
+      end = st->end;
+      effEnd = IMAX(start, IMIN(end, mode->effEBands));
+
+      /* Share the interleaved signal MDCT coefficient buffer with the
+         deemphasis scratch buffer. */
+      freq = scratch;
+      ALLOC(X, C*N, celt_norm);   /**< Interleaved normalised MDCTs */
+
+      if (loss_count >= 5)
+         plcLogE = backgroundLogE;
+      else {
+         /* Energy decay */
+         opus_val16 decay = loss_count==0 ?
+               QCONST16(1.5f, DB_SHIFT) : QCONST16(.5f, DB_SHIFT);
+         c=0; do
+         {
+            for (i=start;i<end;i++)
+               oldBandE[c*nbEBands+i] -= decay;
+         } while (++c<C);
+         plcLogE = oldBandE;
+      }
+      seed = st->rng;
+      for (c=0;c<C;c++)
+      {
+         for (i=start;i<effEnd;i++)
+         {
+            int j;
+            int boffs;
+            int blen;
+            boffs = N*c+(eBands[i]<<LM);
+            blen = (eBands[i+1]-eBands[i])<<LM;
+            for (j=0;j<blen;j++)
+            {
+               seed = celt_lcg_rand(seed);
+               X[boffs+j] = (celt_norm)((opus_int32)seed>>20);
+            }
+            renormalise_vector(X+boffs, blen, Q15ONE);
+         }
+      }
+      st->rng = seed;
+
+      denormalise_bands(mode, X, freq, plcLogE, start, effEnd, C, 1<<LM);
+
+      c=0; do {
+         int bound = eBands[effEnd]<<LM;
+         if (downsample!=1)
+            bound = IMIN(bound, N/downsample);
+         for (i=bound;i<N;i++)
+            freq[c*N+i] = 0;
+      } while (++c<C);
+      c=0; do {
+         OPUS_MOVE(decode_mem[c], decode_mem[c]+N,
+               DECODE_BUFFER_SIZE-N+(overlap>>1));
+      } while (++c<C);
+      compute_inv_mdcts(mode, 0, freq, out_syn, C, LM);
+   } else {
+      /* Pitch-based PLC */
+      const opus_val16 *window;
+      opus_val16 fade = Q15ONE;
+      int pitch_index;
+      VARDECL(opus_val32, etmp);
+      VARDECL(opus_val16, exc);
+
+      if (loss_count == 0)
+      {
+         VARDECL( opus_val16, lp_pitch_buf );
+         ALLOC( lp_pitch_buf, DECODE_BUFFER_SIZE>>1, opus_val16 );
+         pitch_downsample(decode_mem, lp_pitch_buf,
+               DECODE_BUFFER_SIZE, C, st->arch);
+         pitch_search(lp_pitch_buf+(PLC_PITCH_LAG_MAX>>1), lp_pitch_buf,
+               DECODE_BUFFER_SIZE-PLC_PITCH_LAG_MAX,
+               PLC_PITCH_LAG_MAX-PLC_PITCH_LAG_MIN, &pitch_index, st->arch);
+         pitch_index = PLC_PITCH_LAG_MAX-pitch_index;
+         st->last_pitch_index = pitch_index;
+      } else {
+         pitch_index = st->last_pitch_index;
+         fade = QCONST16(.8f,15);
+      }
+
+      ALLOC(etmp, overlap, opus_val32);
+      ALLOC(exc, MAX_PERIOD, opus_val16);
+      window = mode->window;
+      c=0; do {
+         opus_val16 decay;
+         opus_val16 attenuation;
+         opus_val32 S1=0;
+         celt_sig *buf;
+         int extrapolation_offset;
+         int extrapolation_len;
+         int exc_length;
+         int j;
+
+         buf = decode_mem[c];
+         for (i=0;i<MAX_PERIOD;i++) {
+            exc[i] = ROUND16(buf[DECODE_BUFFER_SIZE-MAX_PERIOD+i], SIG_SHIFT);
+         }
+
+         if (loss_count == 0)
+         {
+            opus_val32 ac[LPC_ORDER+1];
+            /* Compute LPC coefficients for the last MAX_PERIOD samples before
+               the first loss so we can work in the excitation-filter domain. */
+            _celt_autocorr(exc, ac, window, overlap,
+                   LPC_ORDER, MAX_PERIOD, st->arch);
+            /* Add a noise floor of -40 dB. */
+#ifdef OPUS_FIXED_POINT
+            ac[0] += SHR32(ac[0],13);
+#else
+            ac[0] *= 1.0001f;
+#endif
+            /* Use lag windowing to stabilize the Levinson-Durbin recursion. */
+            for (i=1;i<=LPC_ORDER;i++)
+            {
+               /*ac[i] *= exp(-.5*(2*M_PI*.002*i)*(2*M_PI*.002*i));*/
+#ifdef OPUS_FIXED_POINT
+               ac[i] -= MULT16_32_Q15(2*i*i, ac[i]);
+#else
+               ac[i] -= ac[i]*(0.008f*0.008f)*i*i;
+#endif
+            }
+            _celt_lpc(lpc+c*LPC_ORDER, ac, LPC_ORDER);
+         }
+         /* We want the excitation for 2 pitch periods in order to look for a
+            decaying signal, but we can't get more than MAX_PERIOD. */
+         exc_length = IMIN(2*pitch_index, MAX_PERIOD);
+         /* Initialize the LPC history with the samples just before the start
+            of the region for which we're computing the excitation. */
+         {
+            opus_val16 lpc_mem[LPC_ORDER];
+            for (i=0;i<LPC_ORDER;i++)
+            {
+               lpc_mem[i] =
+                     ROUND16(buf[DECODE_BUFFER_SIZE-exc_length-1-i], SIG_SHIFT);
+            }
+            /* Compute the excitation for exc_length samples before the loss. */
+            celt_fir(exc+MAX_PERIOD-exc_length, lpc+c*LPC_ORDER,
+                  exc+MAX_PERIOD-exc_length, exc_length, LPC_ORDER, lpc_mem);
+         }
+
+         /* Check if the waveform is decaying, and if so how fast.
+            We do this to avoid adding energy when concealing in a segment
+            with decaying energy. */
+         {
+            opus_val32 E1=1, E2=1;
+            int decay_length;
+#ifdef OPUS_FIXED_POINT
+            int shift = IMAX(0,2*celt_zlog2(celt_maxabs16(&exc[MAX_PERIOD-exc_length], exc_length))-20);
+#endif
+            decay_length = exc_length>>1;
+            for (i=0;i<decay_length;i++)
+            {
+               opus_val16 e;
+               e = exc[MAX_PERIOD-decay_length+i];
+               E1 += SHR32(MULT16_16(e, e), shift);
+               e = exc[MAX_PERIOD-2*decay_length+i];
+               E2 += SHR32(MULT16_16(e, e), shift);
+            }
+            E1 = MIN32(E1, E2);
+            decay = celt_sqrt(frac_div32(SHR32(E1, 1), E2));
+         }
+
+         /* Move the decoder memory one frame to the left to give us room to
+            add the data for the new frame. We ignore the overlap that extends
+            past the end of the buffer, because we aren't going to use it. */
+         OPUS_MOVE(buf, buf+N, DECODE_BUFFER_SIZE-N);
+
+         /* Extrapolate from the end of the excitation with a period of
+            "pitch_index", scaling down each period by an additional factor of
+            "decay". */
+         extrapolation_offset = MAX_PERIOD-pitch_index;
+         /* We need to extrapolate enough samples to cover a complete MDCT
+            window (including overlap/2 samples on both sides). */
+         extrapolation_len = N+overlap;
+         /* We also apply fading if this is not the first loss. */
+         attenuation = MULT16_16_Q15(fade, decay);
+         for (i=j=0;i<extrapolation_len;i++,j++)
+         {
+            opus_val16 tmp;
+            if (j >= pitch_index) {
+               j -= pitch_index;
+               attenuation = MULT16_16_Q15(attenuation, decay);
+            }
+            buf[DECODE_BUFFER_SIZE-N+i] =
+                  SHL32(EXTEND32(MULT16_16_Q15(attenuation,
+                        exc[extrapolation_offset+j])), SIG_SHIFT);
+            /* Compute the energy of the previously decoded signal whose
+               excitation we're copying. */
+            tmp = ROUND16(
+                  buf[DECODE_BUFFER_SIZE-MAX_PERIOD-N+extrapolation_offset+j],
+                  SIG_SHIFT);
+            S1 += SHR32(MULT16_16(tmp, tmp), 8);
+         }
+
+         {
+            opus_val16 lpc_mem[LPC_ORDER];
+            /* Copy the last decoded samples (prior to the overlap region) to
+               synthesis filter memory so we can have a continuous signal. */
+            for (i=0;i<LPC_ORDER;i++)
+               lpc_mem[i] = ROUND16(buf[DECODE_BUFFER_SIZE-N-1-i], SIG_SHIFT);
+            /* Apply the synthesis filter to convert the excitation back into
+               the signal domain. */
+            celt_iir(buf+DECODE_BUFFER_SIZE-N, lpc+c*LPC_ORDER,
+                  buf+DECODE_BUFFER_SIZE-N, extrapolation_len, LPC_ORDER,
+                  lpc_mem);
+         }
+
+         /* Check if the synthesis energy is higher than expected, which can
+            happen with the signal changes during our window. If so,
+            attenuate. */
+         {
+            opus_val32 S2=0;
+            for (i=0;i<extrapolation_len;i++)
+            {
+               opus_val16 tmp = ROUND16(buf[DECODE_BUFFER_SIZE-N+i], SIG_SHIFT);
+               S2 += SHR32(MULT16_16(tmp, tmp), 8);
+            }
+            /* This checks for an "explosion" in the synthesis. */
+#ifdef OPUS_FIXED_POINT
+            if (!(S1 > SHR32(S2,2)))
+#else
+            /* The float test is written this way to catch NaNs in the output
+               of the IIR filter at the same time. */
+            if (!(S1 > 0.2f*S2))
+#endif
+            {
+               for (i=0;i<extrapolation_len;i++)
+                  buf[DECODE_BUFFER_SIZE-N+i] = 0;
+            } else if (S1 < S2)
+            {
+               opus_val16 ratio = celt_sqrt(frac_div32(SHR32(S1,1)+1,S2+1));
+               for (i=0;i<overlap;i++)
+               {
+                  opus_val16 tmp_g = Q15ONE
+                        - MULT16_16_Q15(window[i], Q15ONE-ratio);
+                  buf[DECODE_BUFFER_SIZE-N+i] =
+                        MULT16_32_Q15(tmp_g, buf[DECODE_BUFFER_SIZE-N+i]);
+               }
+               for (i=overlap;i<extrapolation_len;i++)
+               {
+                  buf[DECODE_BUFFER_SIZE-N+i] =
+                        MULT16_32_Q15(ratio, buf[DECODE_BUFFER_SIZE-N+i]);
+               }
+            }
+         }
+
+         /* Apply the pre-filter to the MDCT overlap for the next frame because
+            the post-filter will be re-applied in the decoder after the MDCT
+            overlap. */
+         comb_filter(etmp, buf+DECODE_BUFFER_SIZE,
+              st->postfilter_period, st->postfilter_period, overlap,
+              -st->postfilter_gain, -st->postfilter_gain,
+              st->postfilter_tapset, st->postfilter_tapset, NULL, 0);
+
+         /* Simulate TDAC on the concealed audio so that it blends with the
+            MDCT of the next frame. */
+         for (i=0;i<overlap/2;i++)
+         {
+            buf[DECODE_BUFFER_SIZE+i] =
+               MULT16_32_Q15(window[i], etmp[overlap-1-i])
+               + MULT16_32_Q15(window[overlap-i-1], etmp[i]);
+         }
+      } while (++c<C);
+   }
+
+   deemphasis(out_syn, pcm, N, C, downsample,
+         mode->preemph, st->preemph_memD, scratch);
+
+   st->loss_count = loss_count+1;
+
+   RESTORE_STACK;
+}
+
+int celt_decode_with_ec(CELTDecoder * OPUS_RESTRICT st, const unsigned char *data, int len, opus_val16 * OPUS_RESTRICT pcm, int frame_size, ec_dec *dec)
+{
+   int c, i, N;
+   int spread_decision;
+   opus_int32 bits;
+   ec_dec _dec;
+   VARDECL(celt_sig, freq);
+   VARDECL(celt_norm, X);
+   VARDECL(int, fine_quant);
+   VARDECL(int, pulses);
+   VARDECL(int, cap);
+   VARDECL(int, offsets);
+   VARDECL(int, fine_priority);
+   VARDECL(int, tf_res);
+   VARDECL(unsigned char, collapse_masks);
+   celt_sig *decode_mem[2];
+   celt_sig *out_syn[2];
+   opus_val16 *lpc;
+   opus_val16 *oldBandE, *oldLogE, *oldLogE2, *backgroundLogE;
+
+   int shortBlocks;
+   int isTransient;
+   int intra_ener;
+   const int CC = st->channels;
+   int LM, M;
+   int effEnd;
+   int codedBands;
+   int alloc_trim;
+   int postfilter_pitch;
+   opus_val16 postfilter_gain;
+   int intensity=0;
+   int dual_stereo=0;
+   opus_int32 total_bits;
+   opus_int32 balance;
+   opus_int32 tell;
+   int dynalloc_logp;
+   int postfilter_tapset;
+   int anti_collapse_rsv;
+   int anti_collapse_on=0;
+   int silence;
+   int C = st->stream_channels;
+   const OpusCustomMode *mode;
+   int nbEBands;
+   int overlap;
+   const opus_int16 *eBands;
+   ALLOC_STACK;
+
+   mode = st->mode;
+   nbEBands = mode->nbEBands;
+   overlap = mode->overlap;
+   eBands = mode->eBands;
+   frame_size *= st->downsample;
+
+   c=0; do {
+      decode_mem[c] = st->_decode_mem + c*(DECODE_BUFFER_SIZE+overlap);
+   } while (++c<CC);
+   lpc = (opus_val16*)(st->_decode_mem+(DECODE_BUFFER_SIZE+overlap)*CC);
+   oldBandE = lpc+CC*LPC_ORDER;
+   oldLogE = oldBandE + 2*nbEBands;
+   oldLogE2 = oldLogE + 2*nbEBands;
+   backgroundLogE = oldLogE2  + 2*nbEBands;
+
+#ifdef CUSTOM_MODES
+   if (st->signalling && data!=NULL)
+   {
+      int data0=data[0];
+      /* Convert "standard mode" to Opus header */
+      if (mode->Fs==48000 && mode->shortMdctSize==120)
+      {
+         data0 = fromOpus(data0);
+         if (data0<0)
+            return OPUS_INVALID_PACKET;
+      }
+      st->end = IMAX(1, mode->effEBands-2*(data0>>5));
+      LM = (data0>>3)&0x3;
+      C = 1 + ((data0>>2)&0x1);
+      data++;
+      len--;
+      if (LM>mode->maxLM)
+         return OPUS_INVALID_PACKET;
+      if (frame_size < mode->shortMdctSize<<LM)
+         return OPUS_BUFFER_TOO_SMALL;
+      else
+         frame_size = mode->shortMdctSize<<LM;
+   } else {
+#else
+   {
+#endif
+      for (LM=0;LM<=mode->maxLM;LM++)
+         if (mode->shortMdctSize<<LM==frame_size)
+            break;
+      if (LM>mode->maxLM)
+         return OPUS_BAD_ARG;
+   }
+   M=1<<LM;
+
+   if (len<0 || len>1275 || pcm==NULL)
+      return OPUS_BAD_ARG;
+
+   N = M*mode->shortMdctSize;
+
+   effEnd = st->end;
+   if (effEnd > mode->effEBands)
+      effEnd = mode->effEBands;
+
+   if (data == NULL || len<=1)
+   {
+      celt_decode_lost(st, pcm, N, LM);
+      RESTORE_STACK;
+      return frame_size/st->downsample;
+   }
+
+   if (dec == NULL)
+   {
+      ec_dec_init(&_dec,(unsigned char*)data,len);
+      dec = &_dec;
+   }
+
+   if (C==1)
+   {
+      for (i=0;i<nbEBands;i++)
+         oldBandE[i]=MAX16(oldBandE[i],oldBandE[nbEBands+i]);
+   }
+
+   total_bits = len*8;
+   tell = ec_tell(dec);
+
+   if (tell >= total_bits)
+      silence = 1;
+   else if (tell==1)
+      silence = ec_dec_bit_logp(dec, 15);
+   else
+      silence = 0;
+   if (silence)
+   {
+      /* Pretend we've read all the remaining bits */
+      tell = len*8;
+      dec->nbits_total+=tell-ec_tell(dec);
+   }
+
+   postfilter_gain = 0;
+   postfilter_pitch = 0;
+   postfilter_tapset = 0;
+   if (st->start==0 && tell+16 <= total_bits)
+   {
+      if(ec_dec_bit_logp(dec, 1))
+      {
+         int qg, octave;
+         octave = ec_dec_uint(dec, 6);
+         postfilter_pitch = (16<<octave)+ec_dec_bits(dec, 4+octave)-1;
+         qg = ec_dec_bits(dec, 3);
+         if (ec_tell(dec)+2<=total_bits)
+            postfilter_tapset = ec_dec_icdf(dec, tapset_icdf, 2);
+         postfilter_gain = QCONST16(.09375f,15)*(qg+1);
+      }
+      tell = ec_tell(dec);
+   }
+
+   if (LM > 0 && tell+3 <= total_bits)
+   {
+      isTransient = ec_dec_bit_logp(dec, 3);
+      tell = ec_tell(dec);
+   }
+   else
+      isTransient = 0;
+
+   if (isTransient)
+      shortBlocks = M;
+   else
+      shortBlocks = 0;
+
+   /* Decode the global flags (first symbols in the stream) */
+   intra_ener = tell+3<=total_bits ? ec_dec_bit_logp(dec, 3) : 0;
+   /* Get band energies */
+   unquant_coarse_energy(mode, st->start, st->end, oldBandE,
+         intra_ener, dec, C, LM);
+
+   ALLOC(tf_res, nbEBands, int);
+   tf_decode(st->start, st->end, isTransient, tf_res, LM, dec);
+
+   tell = ec_tell(dec);
+   spread_decision = SPREAD_NORMAL;
+   if (tell+4 <= total_bits)
+      spread_decision = ec_dec_icdf(dec, spread_icdf, 5);
+
+   ALLOC(cap, nbEBands, int);
+
+   init_caps(mode,cap,LM,C);
+
+   ALLOC(offsets, nbEBands, int);
+
+   dynalloc_logp = 6;
+   total_bits<<=BITRES;
+   tell = ec_tell_frac(dec);
+   for (i=st->start;i<st->end;i++)
+   {
+      int width, quanta;
+      int dynalloc_loop_logp;
+      int boost;
+      width = C*(eBands[i+1]-eBands[i])<<LM;
+      /* quanta is 6 bits, but no more than 1 bit/sample
+         and no less than 1/8 bit/sample */
+      quanta = IMIN(width<<BITRES, IMAX(6<<BITRES, width));
+      dynalloc_loop_logp = dynalloc_logp;
+      boost = 0;
+      while (tell+(dynalloc_loop_logp<<BITRES) < total_bits && boost < cap[i])
+      {
+         int flag;
+         flag = ec_dec_bit_logp(dec, dynalloc_loop_logp);
+         tell = ec_tell_frac(dec);
+         if (!flag)
+            break;
+         boost += quanta;
+         total_bits -= quanta;
+         dynalloc_loop_logp = 1;
+      }
+      offsets[i] = boost;
+      /* Making dynalloc more likely */
+      if (boost>0)
+         dynalloc_logp = IMAX(2, dynalloc_logp-1);
+   }
+
+   ALLOC(fine_quant, nbEBands, int);
+   alloc_trim = tell+(6<<BITRES) <= total_bits ?
+         ec_dec_icdf(dec, trim_icdf, 7) : 5;
+
+   bits = (((opus_int32)len*8)<<BITRES) - ec_tell_frac(dec) - 1;
+   anti_collapse_rsv = isTransient&&LM>=2&&bits>=((LM+2)<<BITRES) ? (1<<BITRES) : 0;
+   bits -= anti_collapse_rsv;
+
+   ALLOC(pulses, nbEBands, int);
+   ALLOC(fine_priority, nbEBands, int);
+
+   codedBands = compute_allocation(mode, st->start, st->end, offsets, cap,
+         alloc_trim, &intensity, &dual_stereo, bits, &balance, pulses,
+         fine_quant, fine_priority, C, LM, dec, 0, 0, 0);
+
+   unquant_fine_energy(mode, st->start, st->end, oldBandE, fine_quant, dec, C);
+
+   /* Decode fixed codebook */
+   ALLOC(collapse_masks, C*nbEBands, unsigned char);
+   ALLOC(X, C*N, celt_norm);   /**< Interleaved normalised MDCTs */
+
+   quant_all_bands(0, mode, st->start, st->end, X, C==2 ? X+N : NULL, collapse_masks,
+         NULL, pulses, shortBlocks, spread_decision, dual_stereo, intensity, tf_res,
+         len*(8<<BITRES)-anti_collapse_rsv, balance, dec, LM, codedBands, &st->rng);
+
+   if (anti_collapse_rsv > 0)
+   {
+      anti_collapse_on = ec_dec_bits(dec, 1);
+   }
+
+   unquant_energy_finalise(mode, st->start, st->end, oldBandE,
+         fine_quant, fine_priority, len*8-ec_tell(dec), dec, C);
+
+   if (anti_collapse_on)
+      anti_collapse(mode, X, collapse_masks, LM, C, N,
+            st->start, st->end, oldBandE, oldLogE, oldLogE2, pulses, st->rng);
+
+   ALLOC(freq, IMAX(CC,C)*N, celt_sig); /**< Interleaved signal MDCTs */
+
+   if (silence)
+   {
+      for (i=0;i<C*nbEBands;i++)
+         oldBandE[i] = -QCONST16(28.f,DB_SHIFT);
+      for (i=0;i<C*N;i++)
+         freq[i] = 0;
+   } else {
+      /* Synthesis */
+      denormalise_bands(mode, X, freq, oldBandE, st->start, effEnd, C, M);
+   }
+   c=0; do {
+      OPUS_MOVE(decode_mem[c], decode_mem[c]+N, DECODE_BUFFER_SIZE-N+overlap/2);
+   } while (++c<CC);
+
+   c=0; do {
+      int bound = M*eBands[effEnd];
+      if (st->downsample!=1)
+         bound = IMIN(bound, N/st->downsample);
+      for (i=bound;i<N;i++)
+         freq[c*N+i] = 0;
+   } while (++c<C);
+
+   c=0; do {
+      out_syn[c] = decode_mem[c]+DECODE_BUFFER_SIZE-N;
+   } while (++c<CC);
+
+   if (CC==2&&C==1)
+   {
+      for (i=0;i<N;i++)
+         freq[N+i] = freq[i];
+   }
+   if (CC==1&&C==2)
+   {
+      for (i=0;i<N;i++)
+         freq[i] = HALF32(ADD32(freq[i],freq[N+i]));
+   }
+
+   /* Compute inverse MDCTs */
+   compute_inv_mdcts(mode, shortBlocks, freq, out_syn, CC, LM);
+
+   c=0; do {
+      st->postfilter_period=IMAX(st->postfilter_period, COMBFILTER_MINPERIOD);
+      st->postfilter_period_old=IMAX(st->postfilter_period_old, COMBFILTER_MINPERIOD);
+      comb_filter(out_syn[c], out_syn[c], st->postfilter_period_old, st->postfilter_period, mode->shortMdctSize,
+            st->postfilter_gain_old, st->postfilter_gain, st->postfilter_tapset_old, st->postfilter_tapset,
+            mode->window, overlap);
+      if (LM!=0)
+         comb_filter(out_syn[c]+mode->shortMdctSize, out_syn[c]+mode->shortMdctSize, st->postfilter_period, postfilter_pitch, N-mode->shortMdctSize,
+               st->postfilter_gain, postfilter_gain, st->postfilter_tapset, postfilter_tapset,
+               mode->window, overlap);
+
+   } while (++c<CC);
+   st->postfilter_period_old = st->postfilter_period;
+   st->postfilter_gain_old = st->postfilter_gain;
+   st->postfilter_tapset_old = st->postfilter_tapset;
+   st->postfilter_period = postfilter_pitch;
+   st->postfilter_gain = postfilter_gain;
+   st->postfilter_tapset = postfilter_tapset;
+   if (LM!=0)
+   {
+      st->postfilter_period_old = st->postfilter_period;
+      st->postfilter_gain_old = st->postfilter_gain;
+      st->postfilter_tapset_old = st->postfilter_tapset;
+   }
+
+   if (C==1) {
+      for (i=0;i<nbEBands;i++)
+         oldBandE[nbEBands+i]=oldBandE[i];
+   }
+
+   /* In case start or end were to change */
+   if (!isTransient)
+   {
+      for (i=0;i<2*nbEBands;i++)
+         oldLogE2[i] = oldLogE[i];
+      for (i=0;i<2*nbEBands;i++)
+         oldLogE[i] = oldBandE[i];
+      for (i=0;i<2*nbEBands;i++)
+         backgroundLogE[i] = MIN16(backgroundLogE[i] + M*QCONST16(0.001f,DB_SHIFT), oldBandE[i]);
+   } else {
+      for (i=0;i<2*nbEBands;i++)
+         oldLogE[i] = MIN16(oldLogE[i], oldBandE[i]);
+   }
+   c=0; do
+   {
+      for (i=0;i<st->start;i++)
+      {
+         oldBandE[c*nbEBands+i]=0;
+         oldLogE[c*nbEBands+i]=oldLogE2[c*nbEBands+i]=-QCONST16(28.f,DB_SHIFT);
+      }
+      for (i=st->end;i<nbEBands;i++)
+      {
+         oldBandE[c*nbEBands+i]=0;
+         oldLogE[c*nbEBands+i]=oldLogE2[c*nbEBands+i]=-QCONST16(28.f,DB_SHIFT);
+      }
+   } while (++c<2);
+   st->rng = dec->rng;
+
+   /* We reuse freq[] as scratch space for the de-emphasis */
+   deemphasis(out_syn, pcm, N, CC, st->downsample, mode->preemph, st->preemph_memD, freq);
+   st->loss_count = 0;
+   RESTORE_STACK;
+   if (ec_tell(dec) > 8*len)
+      return OPUS_INTERNAL_ERROR;
+   if(ec_get_error(dec))
+      st->error = 1;
+   return frame_size/st->downsample;
+}
+
+
+#ifdef CUSTOM_MODES
+
+#ifdef OPUS_FIXED_POINT
+int opus_custom_decode(CELTDecoder * OPUS_RESTRICT st, const unsigned char *data, int len, opus_int16 * OPUS_RESTRICT pcm, int frame_size)
+{
+   return celt_decode_with_ec(st, data, len, pcm, frame_size, NULL);
+}
+
+#ifndef DISABLE_FLOAT_API
+int opus_custom_decode_float(CELTDecoder * OPUS_RESTRICT st, const unsigned char *data, int len, float * OPUS_RESTRICT pcm, int frame_size)
+{
+   int j, ret, C, N;
+   VARDECL(opus_int16, out);
+   ALLOC_STACK;
+
+   if (pcm==NULL)
+      return OPUS_BAD_ARG;
+
+   C = st->channels;
+   N = frame_size;
+
+   ALLOC(out, C*N, opus_int16);
+   ret=celt_decode_with_ec(st, data, len, out, frame_size, NULL);
+   if (ret>0)
+      for (j=0;j<C*ret;j++)
+         pcm[j]=out[j]*(1.f/32768.f);
+
+   RESTORE_STACK;
+   return ret;
+}
+#endif /* DISABLE_FLOAT_API */
+
+#else
+
+int opus_custom_decode_float(CELTDecoder * OPUS_RESTRICT st, const unsigned char *data, int len, float * OPUS_RESTRICT pcm, int frame_size)
+{
+   return celt_decode_with_ec(st, data, len, pcm, frame_size, NULL);
+}
+
+int opus_custom_decode(CELTDecoder * OPUS_RESTRICT st, const unsigned char *data, int len, opus_int16 * OPUS_RESTRICT pcm, int frame_size)
+{
+   int j, ret, C, N;
+   VARDECL(celt_sig, out);
+   ALLOC_STACK;
+
+   if (pcm==NULL)
+      return OPUS_BAD_ARG;
+
+   C = st->channels;
+   N = frame_size;
+   ALLOC(out, C*N, celt_sig);
+
+   ret=celt_decode_with_ec(st, data, len, out, frame_size, NULL);
+
+   if (ret>0)
+      for (j=0;j<C*ret;j++)
+         pcm[j] = FLOAT2INT16 (out[j]);
+
+   RESTORE_STACK;
+   return ret;
+}
+
+#endif
+#endif /* CUSTOM_MODES */
+
+int opus_custom_decoder_ctl(CELTDecoder * OPUS_RESTRICT st, int request, ...)
+{
+   va_list ap;
+
+   va_start(ap, request);
+   switch (request)
+   {
+      case CELT_SET_START_BAND_REQUEST:
+      {
+         opus_int32 value = va_arg(ap, opus_int32);
+         if (value<0 || value>=st->mode->nbEBands)
+            goto bad_arg;
+         st->start = value;
+      }
+      break;
+      case CELT_SET_END_BAND_REQUEST:
+      {
+         opus_int32 value = va_arg(ap, opus_int32);
+         if (value<1 || value>st->mode->nbEBands)
+            goto bad_arg;
+         st->end = value;
+      }
+      break;
+      case CELT_SET_CHANNELS_REQUEST:
+      {
+         opus_int32 value = va_arg(ap, opus_int32);
+         if (value<1 || value>2)
+            goto bad_arg;
+         st->stream_channels = value;
+      }
+      break;
+      case CELT_GET_AND_CLEAR_ERROR_REQUEST:
+      {
+         opus_int32 *value = va_arg(ap, opus_int32*);
+         if (value==NULL)
+            goto bad_arg;
+         *value=st->error;
+         st->error = 0;
+      }
+      break;
+      case OPUS_GET_LOOKAHEAD_REQUEST:
+      {
+         opus_int32 *value = va_arg(ap, opus_int32*);
+         if (value==NULL)
+            goto bad_arg;
+         *value = st->overlap/st->downsample;
+      }
+      break;
+      case OPUS_RESET_STATE:
+      {
+         int i;
+         opus_val16 *lpc, *oldBandE, *oldLogE, *oldLogE2;
+         lpc = (opus_val16*)(st->_decode_mem+(DECODE_BUFFER_SIZE+st->overlap)*st->channels);
+         oldBandE = lpc+st->channels*LPC_ORDER;
+         oldLogE = oldBandE + 2*st->mode->nbEBands;
+         oldLogE2 = oldLogE + 2*st->mode->nbEBands;
+         OPUS_CLEAR((char*)&st->DECODER_RESET_START,
+               opus_custom_decoder_get_size(st->mode, st->channels)-
+               ((char*)&st->DECODER_RESET_START - (char*)st));
+         for (i=0;i<2*st->mode->nbEBands;i++)
+            oldLogE[i]=oldLogE2[i]=-QCONST16(28.f,DB_SHIFT);
+      }
+      break;
+      case OPUS_GET_PITCH_REQUEST:
+      {
+         opus_int32 *value = va_arg(ap, opus_int32*);
+         if (value==NULL)
+            goto bad_arg;
+         *value = st->postfilter_period;
+      }
+      break;
+      case CELT_GET_MODE_REQUEST:
+      {
+         const CELTMode ** value = va_arg(ap, const CELTMode**);
+         if (value==0)
+            goto bad_arg;
+         *value=st->mode;
+      }
+      break;
+      case CELT_SET_SIGNALLING_REQUEST:
+      {
+         opus_int32 value = va_arg(ap, opus_int32);
+         st->signalling = value;
+      }
+      break;
+      case OPUS_GET_FINAL_RANGE_REQUEST:
+      {
+         opus_uint32 * value = va_arg(ap, opus_uint32 *);
+         if (value==0)
+            goto bad_arg;
+         *value=st->rng;
+      }
+      break;
+      default:
+         goto bad_request;
+   }
+   va_end(ap);
+   return OPUS_OK;
+bad_arg:
+   va_end(ap);
+   return OPUS_BAD_ARG;
+bad_request:
+      va_end(ap);
+  return OPUS_UNIMPLEMENTED;
+}
diff --git a/drivers/opus/celt/celt_encoder.c b/drivers/opus/celt/celt_encoder.c
new file mode 100644
index 0000000000..a61e41f42d
--- /dev/null
+++ b/drivers/opus/celt/celt_encoder.c
@@ -0,0 +1,2353 @@
+/* Copyright (c) 2007-2008 CSIRO
+   Copyright (c) 2007-2010 Xiph.Org Foundation
+   Copyright (c) 2008 Gregory Maxwell
+   Written by Jean-Marc Valin and Gregory Maxwell */
+/*
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifdef OPUS_HAVE_CONFIG_H
+#include "opus_config.h"
+#endif
+
+#define CELT_ENCODER_C
+
+#include "cpu_support.h"
+#include "os_support.h"
+#include "mdct.h"
+#include <math.h>
+#include "celt.h"
+#include "pitch.h"
+#include "bands.h"
+#include "opus_modes.h"
+#include "entcode.h"
+#include "quant_bands.h"
+#include "rate.h"
+#include "stack_alloc.h"
+#include "mathops.h"
+#include "float_cast.h"
+#include <stdarg.h>
+#include "celt_lpc.h"
+#include "vq.h"
+
+
+/** Encoder state
+ @brief Encoder state
+ */
+struct OpusCustomEncoder {
+   const OpusCustomMode *mode;     /**< Mode used by the encoder */
+   int overlap;
+   int channels;
+   int stream_channels;
+
+   int force_intra;
+   int clip;
+   int disable_pf;
+   int complexity;
+   int upsample;
+   int start, end;
+
+   opus_int32 bitrate;
+   int vbr;
+   int signalling;
+   int constrained_vbr;      /* If zero, VBR can do whatever it likes with the rate */
+   int loss_rate;
+   int lsb_depth;
+   int variable_duration;
+   int lfe;
+   int arch;
+
+   /* Everything beyond this point gets cleared on a reset */
+#define ENCODER_RESET_START rng
+
+   opus_uint32 rng;
+   int spread_decision;
+   opus_val32 delayedIntra;
+   int tonal_average;
+   int lastCodedBands;
+   int hf_average;
+   int tapset_decision;
+
+   int prefilter_period;
+   opus_val16 prefilter_gain;
+   int prefilter_tapset;
+#ifdef RESYNTH
+   int prefilter_period_old;
+   opus_val16 prefilter_gain_old;
+   int prefilter_tapset_old;
+#endif
+   int consec_transient;
+   AnalysisInfo analysis;
+
+   opus_val32 preemph_memE[2];
+   opus_val32 preemph_memD[2];
+
+   /* VBR-related parameters */
+   opus_int32 vbr_reservoir;
+   opus_int32 vbr_drift;
+   opus_int32 vbr_offset;
+   opus_int32 vbr_count;
+   opus_val32 overlap_max;
+   opus_val16 stereo_saving;
+   int intensity;
+   opus_val16 *energy_mask;
+   opus_val16 spec_avg;
+
+#ifdef RESYNTH
+   /* +MAX_PERIOD/2 to make space for overlap */
+   celt_sig syn_mem[2][2*MAX_PERIOD+MAX_PERIOD/2];
+#endif
+
+   celt_sig in_mem[1]; /* Size = channels*mode->overlap */
+   /* celt_sig prefilter_mem[],  Size = channels*COMBFILTER_MAXPERIOD */
+   /* opus_val16 oldBandE[],     Size = channels*mode->nbEBands */
+   /* opus_val16 oldLogE[],      Size = channels*mode->nbEBands */
+   /* opus_val16 oldLogE2[],     Size = channels*mode->nbEBands */
+};
+
+int celt_encoder_get_size(int channels)
+{
+   CELTMode *mode = opus_custom_mode_create(48000, 960, NULL);
+   return opus_custom_encoder_get_size(mode, channels);
+}
+
+OPUS_CUSTOM_NOSTATIC int opus_custom_encoder_get_size(const CELTMode *mode, int channels)
+{
+   int size = sizeof(struct CELTEncoder)
+         + (channels*mode->overlap-1)*sizeof(celt_sig)    /* celt_sig in_mem[channels*mode->overlap]; */
+         + channels*COMBFILTER_MAXPERIOD*sizeof(celt_sig) /* celt_sig prefilter_mem[channels*COMBFILTER_MAXPERIOD]; */
+         + 3*channels*mode->nbEBands*sizeof(opus_val16);  /* opus_val16 oldBandE[channels*mode->nbEBands]; */
+                                                          /* opus_val16 oldLogE[channels*mode->nbEBands]; */
+                                                          /* opus_val16 oldLogE2[channels*mode->nbEBands]; */
+   return size;
+}
+
+#ifdef CUSTOM_MODES
+CELTEncoder *opus_custom_encoder_create(const CELTMode *mode, int channels, int *error)
+{
+   int ret;
+   CELTEncoder *st = (CELTEncoder *)opus_alloc(opus_custom_encoder_get_size(mode, channels));
+   /* init will handle the NULL case */
+   ret = opus_custom_encoder_init(st, mode, channels);
+   if (ret != OPUS_OK)
+   {
+      opus_custom_encoder_destroy(st);
+      st = NULL;
+   }
+   if (error)
+      *error = ret;
+   return st;
+}
+#endif /* CUSTOM_MODES */
+
+static int opus_custom_encoder_init_arch(CELTEncoder *st, const CELTMode *mode,
+                                         int channels, int arch)
+{
+   if (channels < 0 || channels > 2)
+      return OPUS_BAD_ARG;
+
+   if (st==NULL || mode==NULL)
+      return OPUS_ALLOC_FAIL;
+
+   OPUS_CLEAR((char*)st, opus_custom_encoder_get_size(mode, channels));
+
+   st->mode = mode;
+   st->overlap = mode->overlap;
+   st->stream_channels = st->channels = channels;
+
+   st->upsample = 1;
+   st->start = 0;
+   st->end = st->mode->effEBands;
+   st->signalling = 1;
+
+   st->arch = arch;
+
+   st->constrained_vbr = 1;
+   st->clip = 1;
+
+   st->bitrate = OPUS_BITRATE_MAX;
+   st->vbr = 0;
+   st->force_intra  = 0;
+   st->complexity = 5;
+   st->lsb_depth=24;
+
+   opus_custom_encoder_ctl(st, OPUS_RESET_STATE);
+
+   return OPUS_OK;
+}
+
+#ifdef CUSTOM_MODES
+int opus_custom_encoder_init(CELTEncoder *st, const CELTMode *mode, int channels)
+{
+   return opus_custom_encoder_init_arch(st, mode, channels, opus_select_arch());
+}
+#endif
+
+int celt_encoder_init(CELTEncoder *st, opus_int32 sampling_rate, int channels,
+                      int arch)
+{
+   int ret;
+   ret = opus_custom_encoder_init_arch(st,
+           opus_custom_mode_create(48000, 960, NULL), channels, arch);
+   if (ret != OPUS_OK)
+      return ret;
+   st->upsample = resampling_factor(sampling_rate);
+   return OPUS_OK;
+}
+
+#ifdef CUSTOM_MODES
+void opus_custom_encoder_destroy(CELTEncoder *st)
+{
+   opus_free(st);
+}
+#endif /* CUSTOM_MODES */
+
+
+static int transient_analysis(const opus_val32 * OPUS_RESTRICT in, int len, int C,
+                              opus_val16 *tf_estimate, int *tf_chan)
+{
+   int i;
+   VARDECL(opus_val16, tmp);
+   opus_val32 mem0,mem1;
+   int is_transient = 0;
+   opus_int32 mask_metric = 0;
+   int c;
+   opus_val16 tf_max;
+   int len2;
+   /* Table of 6*64/x, trained on real data to minimize the average error */
+   static const unsigned char inv_table[128] = {
+         255,255,156,110, 86, 70, 59, 51, 45, 40, 37, 33, 31, 28, 26, 25,
+          23, 22, 21, 20, 19, 18, 17, 16, 16, 15, 15, 14, 13, 13, 12, 12,
+          12, 12, 11, 11, 11, 10, 10, 10,  9,  9,  9,  9,  9,  9,  8,  8,
+           8,  8,  8,  7,  7,  7,  7,  7,  7,  6,  6,  6,  6,  6,  6,  6,
+           6,  6,  6,  6,  6,  6,  6,  6,  6,  5,  5,  5,  5,  5,  5,  5,
+           5,  5,  5,  5,  5,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,
+           4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  3,  3,
+           3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  2,
+   };
+   SAVE_STACK;
+   ALLOC(tmp, len, opus_val16);
+
+   len2=len/2;
+   for (c=0;c<C;c++)
+   {
+      opus_val32 mean;
+      opus_int32 unmask=0;
+      opus_val32 norm;
+      opus_val16 maxE;
+      mem0=0;
+      mem1=0;
+      /* High-pass filter: (1 - 2*z^-1 + z^-2) / (1 - z^-1 + .5*z^-2) */
+      for (i=0;i<len;i++)
+      {
+         opus_val32 x,y;
+         x = SHR32(in[i+c*len],SIG_SHIFT);
+         y = ADD32(mem0, x);
+#ifdef OPUS_FIXED_POINT
+         mem0 = mem1 + y - SHL32(x,1);
+         mem1 = x - SHR32(y,1);
+#else
+         mem0 = mem1 + y - 2*x;
+         mem1 = x - .5f*y;
+#endif
+         tmp[i] = EXTRACT16(SHR32(y,2));
+         /*printf("%f ", tmp[i]);*/
+      }
+      /*printf("\n");*/
+      /* First few samples are bad because we don't propagate the memory */
+      for (i=0;i<12;i++)
+         tmp[i] = 0;
+
+#ifdef OPUS_FIXED_POINT
+      /* Normalize tmp to max range */
+      {
+         int shift=0;
+         shift = 14-celt_ilog2(1+celt_maxabs16(tmp, len));
+         if (shift!=0)
+         {
+            for (i=0;i<len;i++)
+               tmp[i] = SHL16(tmp[i], shift);
+         }
+      }
+#endif
+
+      mean=0;
+      mem0=0;
+      /* Grouping by two to reduce complexity */
+      /* Forward pass to compute the post-echo threshold*/
+      for (i=0;i<len2;i++)
+      {
+         opus_val16 x2 = PSHR32(MULT16_16(tmp[2*i],tmp[2*i]) + MULT16_16(tmp[2*i+1],tmp[2*i+1]),16);
+         mean += x2;
+#ifdef OPUS_FIXED_POINT
+         /* FIXME: Use PSHR16() instead */
+         tmp[i] = mem0 + PSHR32(x2-mem0,4);
+#else
+         tmp[i] = mem0 + MULT16_16_P15(QCONST16(.0625f,15),x2-mem0);
+#endif
+         mem0 = tmp[i];
+      }
+
+      mem0=0;
+      maxE=0;
+      /* Backward pass to compute the pre-echo threshold */
+      for (i=len2-1;i>=0;i--)
+      {
+#ifdef OPUS_FIXED_POINT
+         /* FIXME: Use PSHR16() instead */
+         tmp[i] = mem0 + PSHR32(tmp[i]-mem0,3);
+#else
+         tmp[i] = mem0 + MULT16_16_P15(QCONST16(0.125f,15),tmp[i]-mem0);
+#endif
+         mem0 = tmp[i];
+         maxE = MAX16(maxE, mem0);
+      }
+      /*for (i=0;i<len2;i++)printf("%f ", tmp[i]/mean);printf("\n");*/
+
+      /* Compute the ratio of the "frame energy" over the harmonic mean of the energy.
+         This essentially corresponds to a bitrate-normalized temporal noise-to-mask
+         ratio */
+
+      /* As a compromise with the old transient detector, frame energy is the
+         geometric mean of the energy and half the max */
+#ifdef OPUS_FIXED_POINT
+      /* Costs two sqrt() to avoid overflows */
+      mean = MULT16_16(celt_sqrt(mean), celt_sqrt(MULT16_16(maxE,len2>>1)));
+#else
+      mean = celt_sqrt(mean * maxE*.5*len2);
+#endif
+      /* Inverse of the mean energy in Q15+6 */
+      norm = SHL32(EXTEND32(len2),6+14)/ADD32(EPSILON,SHR32(mean,1));
+      /* Compute harmonic mean discarding the unreliable boundaries
+         The data is smooth, so we only take 1/4th of the samples */
+      unmask=0;
+      for (i=12;i<len2-5;i+=4)
+      {
+         int id;
+#ifdef OPUS_FIXED_POINT
+         id = IMAX(0,IMIN(127,MULT16_32_Q15(tmp[i],norm))); /* Do not round to nearest */
+#else
+         id = IMAX(0,IMIN(127,(int)floor(64*norm*tmp[i]))); /* Do not round to nearest */
+#endif
+         unmask += inv_table[id];
+      }
+      /*printf("%d\n", unmask);*/
+      /* Normalize, compensate for the 1/4th of the sample and the factor of 6 in the inverse table */
+      unmask = 64*unmask*4/(6*(len2-17));
+      if (unmask>mask_metric)
+      {
+         *tf_chan = c;
+         mask_metric = unmask;
+      }
+   }
+   is_transient = mask_metric>200;
+
+   /* Arbitrary metric for VBR boost */
+   tf_max = MAX16(0,celt_sqrt(27*mask_metric)-42);
+   /* *tf_estimate = 1 + MIN16(1, sqrt(MAX16(0, tf_max-30))/20); */
+   *tf_estimate = celt_sqrt(MAX16(0, SHL32(MULT16_16(QCONST16(0.0069,14),MIN16(163,tf_max)),14)-QCONST32(0.139,28)));
+   /*printf("%d %f\n", tf_max, mask_metric);*/
+   RESTORE_STACK;
+#ifdef FUZZING
+   is_transient = rand()&0x1;
+#endif
+   /*printf("%d %f %d\n", is_transient, (float)*tf_estimate, tf_max);*/
+   return is_transient;
+}
+
+/* Looks for sudden increases of energy to decide whether we need to patch
+   the transient decision */
+int patch_transient_decision(opus_val16 *newE, opus_val16 *oldE, int nbEBands,
+      int end, int C)
+{
+   int i, c;
+   opus_val32 mean_diff=0;
+   opus_val16 spread_old[26];
+   /* Apply an aggressive (-6 dB/Bark) spreading function to the old frame to
+      avoid false detection caused by irrelevant bands */
+   if (C==1)
+   {
+      spread_old[0] = oldE[0];
+      for (i=1;i<end;i++)
+         spread_old[i] = MAX16(spread_old[i-1]-QCONST16(1.0f, DB_SHIFT), oldE[i]);
+   } else {
+      spread_old[0] = MAX16(oldE[0],oldE[nbEBands]);
+      for (i=1;i<end;i++)
+         spread_old[i] = MAX16(spread_old[i-1]-QCONST16(1.0f, DB_SHIFT),
+                               MAX16(oldE[i],oldE[i+nbEBands]));
+   }
+   for (i=end-2;i>=0;i--)
+      spread_old[i] = MAX16(spread_old[i], spread_old[i+1]-QCONST16(1.0f, DB_SHIFT));
+   /* Compute mean increase */
+   c=0; do {
+      for (i=2;i<end-1;i++)
+      {
+         opus_val16 x1, x2;
+         x1 = MAX16(0, newE[i]);
+         x2 = MAX16(0, spread_old[i]);
+         mean_diff = ADD32(mean_diff, EXTEND32(MAX16(0, SUB16(x1, x2))));
+      }
+   } while (++c<C);
+   mean_diff = DIV32(mean_diff, C*(end-3));
+   /*printf("%f %f %d\n", mean_diff, max_diff, count);*/
+   return mean_diff > QCONST16(1.f, DB_SHIFT);
+}
+
+/** Apply window and compute the MDCT for all sub-frames and
+    all channels in a frame */
+static void compute_mdcts(const CELTMode *mode, int shortBlocks, celt_sig * OPUS_RESTRICT in,
+                          celt_sig * OPUS_RESTRICT out, int C, int CC, int LM, int upsample)
+{
+   const int overlap = OVERLAP(mode);
+   int N;
+   int B;
+   int shift;
+   int i, b, c;
+   if (shortBlocks)
+   {
+      B = shortBlocks;
+      N = mode->shortMdctSize;
+      shift = mode->maxLM;
+   } else {
+      B = 1;
+      N = mode->shortMdctSize<<LM;
+      shift = mode->maxLM-LM;
+   }
+   c=0; do {
+      for (b=0;b<B;b++)
+      {
+         /* Interleaving the sub-frames while doing the MDCTs */
+         clt_mdct_forward(&mode->mdct, in+c*(B*N+overlap)+b*N, &out[b+c*N*B], mode->window, overlap, shift, B);
+      }
+   } while (++c<CC);
+   if (CC==2&&C==1)
+   {
+      for (i=0;i<B*N;i++)
+         out[i] = ADD32(HALF32(out[i]), HALF32(out[B*N+i]));
+   }
+   if (upsample != 1)
+   {
+      c=0; do
+      {
+         int bound = B*N/upsample;
+         for (i=0;i<bound;i++)
+            out[c*B*N+i] *= upsample;
+         for (;i<B*N;i++)
+            out[c*B*N+i] = 0;
+      } while (++c<C);
+   }
+}
+
+
+void celt_preemphasis(const opus_val16 * OPUS_RESTRICT pcmp, celt_sig * OPUS_RESTRICT inp,
+                        int N, int CC, int upsample, const opus_val16 *coef, celt_sig *mem, int clip)
+{
+   int i;
+   opus_val16 coef0;
+   celt_sig m;
+   int Nu;
+
+   coef0 = coef[0];
+
+
+   Nu = N/upsample;
+   if (upsample!=1)
+   {
+      for (i=0;i<N;i++)
+         inp[i] = 0;
+   }
+   for (i=0;i<Nu;i++)
+   {
+      celt_sig x;
+
+      x = SCALEIN(pcmp[CC*i]);
+#ifndef OPUS_FIXED_POINT
+      /* Replace NaNs with zeros */
+      if (!(x==x))
+         x = 0;
+#endif
+      inp[i*upsample] = x;
+   }
+
+#ifndef OPUS_FIXED_POINT
+   if (clip)
+   {
+      /* Clip input to avoid encoding non-portable files */
+      for (i=0;i<Nu;i++)
+         inp[i*upsample] = MAX32(-65536.f, MIN32(65536.f,inp[i*upsample]));
+   }
+#else
+   (void)clip; /* Avoids a warning about clip being unused. */
+#endif
+   m = *mem;
+#ifdef CUSTOM_MODES
+   if (coef[1] != 0)
+   {
+      opus_val16 coef1 = coef[1];
+      opus_val16 coef2 = coef[2];
+      for (i=0;i<N;i++)
+      {
+         celt_sig x, tmp;
+         x = inp[i];
+         /* Apply pre-emphasis */
+         tmp = MULT16_16(coef2, x);
+         inp[i] = tmp + m;
+         m = MULT16_32_Q15(coef1, inp[i]) - MULT16_32_Q15(coef0, tmp);
+      }
+   } else
+#endif
+   {
+      for (i=0;i<N;i++)
+      {
+         celt_sig x;
+         x = SHL32(inp[i], SIG_SHIFT);
+         /* Apply pre-emphasis */
+         inp[i] = x + m;
+         m = - MULT16_32_Q15(coef0, x);
+      }
+   }
+   *mem = m;
+}
+
+
+
+static opus_val32 l1_metric(const celt_norm *tmp, int N, int LM, opus_val16 bias)
+{
+   int i;
+   opus_val32 L1;
+   L1 = 0;
+   for (i=0;i<N;i++)
+      L1 += EXTEND32(ABS16(tmp[i]));
+   /* When in doubt, prefer good freq resolution */
+   L1 = MAC16_32_Q15(L1, LM*bias, L1);
+   return L1;
+
+}
+
+static int tf_analysis(const CELTMode *m, int len, int isTransient,
+      int *tf_res, int lambda, celt_norm *X, int N0, int LM,
+      int *tf_sum, opus_val16 tf_estimate, int tf_chan)
+{
+   int i;
+   VARDECL(int, metric);
+   int cost0;
+   int cost1;
+   VARDECL(int, path0);
+   VARDECL(int, path1);
+   VARDECL(celt_norm, tmp);
+   VARDECL(celt_norm, tmp_1);
+   int sel;
+   int selcost[2];
+   int tf_select=0;
+   opus_val16 bias;
+
+   SAVE_STACK;
+   bias = MULT16_16_Q14(QCONST16(.04f,15), MAX16(-QCONST16(.25f,14), QCONST16(.5f,14)-tf_estimate));
+   /*printf("%f ", bias);*/
+
+   ALLOC(metric, len, int);
+   ALLOC(tmp, (m->eBands[len]-m->eBands[len-1])<<LM, celt_norm);
+   ALLOC(tmp_1, (m->eBands[len]-m->eBands[len-1])<<LM, celt_norm);
+   ALLOC(path0, len, int);
+   ALLOC(path1, len, int);
+
+   *tf_sum = 0;
+   for (i=0;i<len;i++)
+   {
+      int j, k, N;
+      int narrow;
+      opus_val32 L1, best_L1;
+      int best_level=0;
+      N = (m->eBands[i+1]-m->eBands[i])<<LM;
+      /* band is too narrow to be split down to LM=-1 */
+      narrow = (m->eBands[i+1]-m->eBands[i])==1;
+      for (j=0;j<N;j++)
+         tmp[j] = X[tf_chan*N0 + j+(m->eBands[i]<<LM)];
+      /* Just add the right channel if we're in stereo */
+      /*if (C==2)
+         for (j=0;j<N;j++)
+            tmp[j] = ADD16(SHR16(tmp[j], 1),SHR16(X[N0+j+(m->eBands[i]<<LM)], 1));*/
+      L1 = l1_metric(tmp, N, isTransient ? LM : 0, bias);
+      best_L1 = L1;
+      /* Check the -1 case for transients */
+      if (isTransient && !narrow)
+      {
+         for (j=0;j<N;j++)
+            tmp_1[j] = tmp[j];
+         haar1(tmp_1, N>>LM, 1<<LM);
+         L1 = l1_metric(tmp_1, N, LM+1, bias);
+         if (L1<best_L1)
+         {
+            best_L1 = L1;
+            best_level = -1;
+         }
+      }
+      /*printf ("%f ", L1);*/
+      for (k=0;k<LM+!(isTransient||narrow);k++)
+      {
+         int B;
+
+         if (isTransient)
+            B = (LM-k-1);
+         else
+            B = k+1;
+
+         haar1(tmp, N>>k, 1<<k);
+
+         L1 = l1_metric(tmp, N, B, bias);
+
+         if (L1 < best_L1)
+         {
+            best_L1 = L1;
+            best_level = k+1;
+         }
+      }
+      /*printf ("%d ", isTransient ? LM-best_level : best_level);*/
+      /* metric is in Q1 to be able to select the mid-point (-0.5) for narrower bands */
+      if (isTransient)
+         metric[i] = 2*best_level;
+      else
+         metric[i] = -2*best_level;
+      *tf_sum += (isTransient ? LM : 0) - metric[i]/2;
+      /* For bands that can't be split to -1, set the metric to the half-way point to avoid
+         biasing the decision */
+      if (narrow && (metric[i]==0 || metric[i]==-2*LM))
+         metric[i]-=1;
+      /*printf("%d ", metric[i]);*/
+   }
+   /*printf("\n");*/
+   /* Search for the optimal tf resolution, including tf_select */
+   tf_select = 0;
+   for (sel=0;sel<2;sel++)
+   {
+      cost0 = 0;
+      cost1 = isTransient ? 0 : lambda;
+      for (i=1;i<len;i++)
+      {
+         int curr0, curr1;
+         curr0 = IMIN(cost0, cost1 + lambda);
+         curr1 = IMIN(cost0 + lambda, cost1);
+         cost0 = curr0 + abs(metric[i]-2*tf_select_table[LM][4*isTransient+2*sel+0]);
+         cost1 = curr1 + abs(metric[i]-2*tf_select_table[LM][4*isTransient+2*sel+1]);
+      }
+      cost0 = IMIN(cost0, cost1);
+      selcost[sel]=cost0;
+   }
+   /* For now, we're conservative and only allow tf_select=1 for transients.
+    * If tests confirm it's useful for non-transients, we could allow it. */
+   if (selcost[1]<selcost[0] && isTransient)
+      tf_select=1;
+   cost0 = 0;
+   cost1 = isTransient ? 0 : lambda;
+   /* Viterbi forward pass */
+   for (i=1;i<len;i++)
+   {
+      int curr0, curr1;
+      int from0, from1;
+
+      from0 = cost0;
+      from1 = cost1 + lambda;
+      if (from0 < from1)
+      {
+         curr0 = from0;
+         path0[i]= 0;
+      } else {
+         curr0 = from1;
+         path0[i]= 1;
+      }
+
+      from0 = cost0 + lambda;
+      from1 = cost1;
+      if (from0 < from1)
+      {
+         curr1 = from0;
+         path1[i]= 0;
+      } else {
+         curr1 = from1;
+         path1[i]= 1;
+      }
+      cost0 = curr0 + abs(metric[i]-2*tf_select_table[LM][4*isTransient+2*tf_select+0]);
+      cost1 = curr1 + abs(metric[i]-2*tf_select_table[LM][4*isTransient+2*tf_select+1]);
+   }
+   tf_res[len-1] = cost0 < cost1 ? 0 : 1;
+   /* Viterbi backward pass to check the decisions */
+   for (i=len-2;i>=0;i--)
+   {
+      if (tf_res[i+1] == 1)
+         tf_res[i] = path1[i+1];
+      else
+         tf_res[i] = path0[i+1];
+   }
+   /*printf("%d %f\n", *tf_sum, tf_estimate);*/
+   RESTORE_STACK;
+#ifdef FUZZING
+   tf_select = rand()&0x1;
+   tf_res[0] = rand()&0x1;
+   for (i=1;i<len;i++)
+      tf_res[i] = tf_res[i-1] ^ ((rand()&0xF) == 0);
+#endif
+   return tf_select;
+}
+
+static void tf_encode(int start, int end, int isTransient, int *tf_res, int LM, int tf_select, ec_enc *enc)
+{
+   int curr, i;
+   int tf_select_rsv;
+   int tf_changed;
+   int logp;
+   opus_uint32 budget;
+   opus_uint32 tell;
+   budget = enc->storage*8;
+   tell = ec_tell(enc);
+   logp = isTransient ? 2 : 4;
+   /* Reserve space to code the tf_select decision. */
+   tf_select_rsv = LM>0 && tell+logp+1 <= budget;
+   budget -= tf_select_rsv;
+   curr = tf_changed = 0;
+   for (i=start;i<end;i++)
+   {
+      if (tell+logp<=budget)
+      {
+         ec_enc_bit_logp(enc, tf_res[i] ^ curr, logp);
+         tell = ec_tell(enc);
+         curr = tf_res[i];
+         tf_changed |= curr;
+      }
+      else
+         tf_res[i] = curr;
+      logp = isTransient ? 4 : 5;
+   }
+   /* Only code tf_select if it would actually make a difference. */
+   if (tf_select_rsv &&
+         tf_select_table[LM][4*isTransient+0+tf_changed]!=
+         tf_select_table[LM][4*isTransient+2+tf_changed])
+      ec_enc_bit_logp(enc, tf_select, 1);
+   else
+      tf_select = 0;
+   for (i=start;i<end;i++)
+      tf_res[i] = tf_select_table[LM][4*isTransient+2*tf_select+tf_res[i]];
+   /*for(i=0;i<end;i++)printf("%d ", isTransient ? tf_res[i] : LM+tf_res[i]);printf("\n");*/
+}
+
+
+static int alloc_trim_analysis(const CELTMode *m, const celt_norm *X,
+      const opus_val16 *bandLogE, int end, int LM, int C, int N0,
+      AnalysisInfo *analysis, opus_val16 *stereo_saving, opus_val16 tf_estimate,
+      int intensity, opus_val16 surround_trim)
+{
+   int i;
+   opus_val32 diff=0;
+   int c;
+   int trim_index = 5;
+   opus_val16 trim = QCONST16(5.f, 8);
+   opus_val16 logXC, logXC2;
+   if (C==2)
+   {
+      opus_val16 sum = 0; /* Q10 */
+      opus_val16 minXC; /* Q10 */
+      /* Compute inter-channel correlation for low frequencies */
+      for (i=0;i<8;i++)
+      {
+         int j;
+         opus_val32 partial = 0;
+         for (j=m->eBands[i]<<LM;j<m->eBands[i+1]<<LM;j++)
+            partial = MAC16_16(partial, X[j], X[N0+j]);
+         sum = ADD16(sum, EXTRACT16(SHR32(partial, 18)));
+      }
+      sum = MULT16_16_Q15(QCONST16(1.f/8, 15), sum);
+      sum = MIN16(QCONST16(1.f, 10), ABS16(sum));
+      minXC = sum;
+      for (i=8;i<intensity;i++)
+      {
+         int j;
+         opus_val32 partial = 0;
+         for (j=m->eBands[i]<<LM;j<m->eBands[i+1]<<LM;j++)
+            partial = MAC16_16(partial, X[j], X[N0+j]);
+         minXC = MIN16(minXC, ABS16(EXTRACT16(SHR32(partial, 18))));
+      }
+      minXC = MIN16(QCONST16(1.f, 10), ABS16(minXC));
+      /*printf ("%f\n", sum);*/
+      if (sum > QCONST16(.995f,10))
+         trim_index-=4;
+      else if (sum > QCONST16(.92f,10))
+         trim_index-=3;
+      else if (sum > QCONST16(.85f,10))
+         trim_index-=2;
+      else if (sum > QCONST16(.8f,10))
+         trim_index-=1;
+      /* mid-side savings estimations based on the LF average*/
+      logXC = celt_log2(QCONST32(1.001f, 20)-MULT16_16(sum, sum));
+      /* mid-side savings estimations based on min correlation */
+      logXC2 = MAX16(HALF16(logXC), celt_log2(QCONST32(1.001f, 20)-MULT16_16(minXC, minXC)));
+#ifdef OPUS_FIXED_POINT
+      /* Compensate for Q20 vs Q14 input and convert output to Q8 */
+      logXC = PSHR32(logXC-QCONST16(6.f, DB_SHIFT),DB_SHIFT-8);
+      logXC2 = PSHR32(logXC2-QCONST16(6.f, DB_SHIFT),DB_SHIFT-8);
+#endif
+
+      trim += MAX16(-QCONST16(4.f, 8), MULT16_16_Q15(QCONST16(.75f,15),logXC));
+      *stereo_saving = MIN16(*stereo_saving + QCONST16(0.25f, 8), -HALF16(logXC2));
+   }
+
+   /* Estimate spectral tilt */
+   c=0; do {
+      for (i=0;i<end-1;i++)
+      {
+         diff += bandLogE[i+c*m->nbEBands]*(opus_int32)(2+2*i-end);
+      }
+   } while (++c<C);
+   diff /= C*(end-1);
+   /*printf("%f\n", diff);*/
+   if (diff > QCONST16(2.f, DB_SHIFT))
+      trim_index--;
+   if (diff > QCONST16(8.f, DB_SHIFT))
+      trim_index--;
+   if (diff < -QCONST16(4.f, DB_SHIFT))
+      trim_index++;
+   if (diff < -QCONST16(10.f, DB_SHIFT))
+      trim_index++;
+   trim -= MAX16(-QCONST16(2.f, 8), MIN16(QCONST16(2.f, 8), SHR16(diff+QCONST16(1.f, DB_SHIFT),DB_SHIFT-8)/6 ));
+   trim -= SHR16(surround_trim, DB_SHIFT-8);
+   trim -= 2*SHR16(tf_estimate, 14-8);
+#ifndef DISABLE_FLOAT_API
+   if (analysis->valid)
+   {
+      trim -= MAX16(-QCONST16(2.f, 8), MIN16(QCONST16(2.f, 8),
+            (opus_val16)(QCONST16(2.f, 8)*(analysis->tonality_slope+.05f))));
+   }
+#endif
+
+#ifdef OPUS_FIXED_POINT
+   trim_index = PSHR32(trim, 8);
+#else
+   trim_index = (int)floor(.5f+trim);
+#endif
+   if (trim_index<0)
+      trim_index = 0;
+   if (trim_index>10)
+      trim_index = 10;
+   /*printf("%d\n", trim_index);*/
+#ifdef FUZZING
+   trim_index = rand()%11;
+#endif
+   return trim_index;
+}
+
+static int stereo_analysis(const CELTMode *m, const celt_norm *X,
+      int LM, int N0)
+{
+   int i;
+   int thetas;
+   opus_val32 sumLR = EPSILON, sumMS = EPSILON;
+
+   /* Use the L1 norm to model the entropy of the L/R signal vs the M/S signal */
+   for (i=0;i<13;i++)
+   {
+      int j;
+      for (j=m->eBands[i]<<LM;j<m->eBands[i+1]<<LM;j++)
+      {
+         opus_val32 L, R, M, S;
+         /* We cast to 32-bit first because of the -32768 case */
+         L = EXTEND32(X[j]);
+         R = EXTEND32(X[N0+j]);
+         M = ADD32(L, R);
+         S = SUB32(L, R);
+         sumLR = ADD32(sumLR, ADD32(ABS32(L), ABS32(R)));
+         sumMS = ADD32(sumMS, ADD32(ABS32(M), ABS32(S)));
+      }
+   }
+   sumMS = MULT16_32_Q15(QCONST16(0.707107f, 15), sumMS);
+   thetas = 13;
+   /* We don't need thetas for lower bands with LM<=1 */
+   if (LM<=1)
+      thetas -= 8;
+   return MULT16_32_Q15((m->eBands[13]<<(LM+1))+thetas, sumMS)
+         > MULT16_32_Q15(m->eBands[13]<<(LM+1), sumLR);
+}
+
+static opus_val16 dynalloc_analysis(const opus_val16 *bandLogE, const opus_val16 *bandLogE2,
+      int nbEBands, int start, int end, int C, int *offsets, int lsb_depth, const opus_int16 *logN,
+      int isTransient, int vbr, int constrained_vbr, const opus_int16 *eBands, int LM,
+      int effectiveBytes, opus_int32 *tot_boost_, int lfe, opus_val16 *surround_dynalloc)
+{
+   int i, c;
+   opus_int32 tot_boost=0;
+   opus_val16 maxDepth;
+   VARDECL(opus_val16, follower);
+   VARDECL(opus_val16, noise_floor);
+   SAVE_STACK;
+   ALLOC(follower, C*nbEBands, opus_val16);
+   ALLOC(noise_floor, C*nbEBands, opus_val16);
+   for (i=0;i<nbEBands;i++)
+      offsets[i] = 0;
+   /* Dynamic allocation code */
+   maxDepth=-QCONST16(31.9f, DB_SHIFT);
+   for (i=0;i<end;i++)
+   {
+      /* Noise floor must take into account eMeans, the depth, the width of the bands
+         and the preemphasis filter (approx. square of bark band ID) */
+      noise_floor[i] = MULT16_16(QCONST16(0.0625f, DB_SHIFT),logN[i])
+            +QCONST16(.5f,DB_SHIFT)+SHL16(9-lsb_depth,DB_SHIFT)-SHL16(eMeans[i],6)
+            +MULT16_16(QCONST16(.0062,DB_SHIFT),(i+5)*(i+5));
+   }
+   c=0;do
+   {
+      for (i=0;i<end;i++)
+         maxDepth = MAX16(maxDepth, bandLogE[c*nbEBands+i]-noise_floor[i]);
+   } while (++c<C);
+   /* Make sure that dynamic allocation can't make us bust the budget */
+   if (effectiveBytes > 50 && LM>=1 && !lfe)
+   {
+      int last=0;
+      c=0;do
+      {
+         follower[c*nbEBands] = bandLogE2[c*nbEBands];
+         for (i=1;i<end;i++)
+         {
+            /* The last band to be at least 3 dB higher than the previous one
+               is the last we'll consider. Otherwise, we run into problems on
+               bandlimited signals. */
+            if (bandLogE2[c*nbEBands+i] > bandLogE2[c*nbEBands+i-1]+QCONST16(.5f,DB_SHIFT))
+               last=i;
+            follower[c*nbEBands+i] = MIN16(follower[c*nbEBands+i-1]+QCONST16(1.5f,DB_SHIFT), bandLogE2[c*nbEBands+i]);
+         }
+         for (i=last-1;i>=0;i--)
+            follower[c*nbEBands+i] = MIN16(follower[c*nbEBands+i], MIN16(follower[c*nbEBands+i+1]+QCONST16(2.f,DB_SHIFT), bandLogE2[c*nbEBands+i]));
+         for (i=0;i<end;i++)
+            follower[c*nbEBands+i] = MAX16(follower[c*nbEBands+i], noise_floor[i]);
+      } while (++c<C);
+      if (C==2)
+      {
+         for (i=start;i<end;i++)
+         {
+            /* Consider 24 dB "cross-talk" */
+            follower[nbEBands+i] = MAX16(follower[nbEBands+i], follower[         i]-QCONST16(4.f,DB_SHIFT));
+            follower[         i] = MAX16(follower[         i], follower[nbEBands+i]-QCONST16(4.f,DB_SHIFT));
+            follower[i] = HALF16(MAX16(0, bandLogE[i]-follower[i]) + MAX16(0, bandLogE[nbEBands+i]-follower[nbEBands+i]));
+         }
+      } else {
+         for (i=start;i<end;i++)
+         {
+            follower[i] = MAX16(0, bandLogE[i]-follower[i]);
+         }
+      }
+      for (i=start;i<end;i++)
+         follower[i] = MAX16(follower[i], surround_dynalloc[i]);
+      /* For non-transient CBR/CVBR frames, halve the dynalloc contribution */
+      if ((!vbr || constrained_vbr)&&!isTransient)
+      {
+         for (i=start;i<end;i++)
+            follower[i] = HALF16(follower[i]);
+      }
+      for (i=start;i<end;i++)
+      {
+         int width;
+         int boost;
+         int boost_bits;
+
+         if (i<8)
+            follower[i] *= 2;
+         if (i>=12)
+            follower[i] = HALF16(follower[i]);
+         follower[i] = MIN16(follower[i], QCONST16(4, DB_SHIFT));
+
+         width = C*(eBands[i+1]-eBands[i])<<LM;
+         if (width<6)
+         {
+            boost = (int)SHR32(EXTEND32(follower[i]),DB_SHIFT);
+            boost_bits = boost*width<<BITRES;
+         } else if (width > 48) {
+            boost = (int)SHR32(EXTEND32(follower[i])*8,DB_SHIFT);
+            boost_bits = (boost*width<<BITRES)/8;
+         } else {
+            boost = (int)SHR32(EXTEND32(follower[i])*width/6,DB_SHIFT);
+            boost_bits = boost*6<<BITRES;
+         }
+         /* For CBR and non-transient CVBR frames, limit dynalloc to 1/4 of the bits */
+         if ((!vbr || (constrained_vbr&&!isTransient))
+               && (tot_boost+boost_bits)>>BITRES>>3 > effectiveBytes/4)
+         {
+            opus_int32 cap = ((effectiveBytes/4)<<BITRES<<3);
+            offsets[i] = cap-tot_boost;
+            tot_boost = cap;
+            break;
+         } else {
+            offsets[i] = boost;
+            tot_boost += boost_bits;
+         }
+      }
+   }
+   *tot_boost_ = tot_boost;
+   RESTORE_STACK;
+   return maxDepth;
+}
+
+
+static int run_prefilter(CELTEncoder *st, celt_sig *in, celt_sig *prefilter_mem, int CC, int N,
+      int prefilter_tapset, int *pitch, opus_val16 *gain, int *qgain, int enabled, int nbAvailableBytes)
+{
+   int c;
+   VARDECL(celt_sig, _pre);
+   celt_sig *pre[2];
+   const CELTMode *mode;
+   int pitch_index;
+   opus_val16 gain1;
+   opus_val16 pf_threshold;
+   int pf_on;
+   int qg;
+   SAVE_STACK;
+
+   mode = st->mode;
+   ALLOC(_pre, CC*(N+COMBFILTER_MAXPERIOD), celt_sig);
+
+   pre[0] = _pre;
+   pre[1] = _pre + (N+COMBFILTER_MAXPERIOD);
+
+
+   c=0; do {
+      OPUS_COPY(pre[c], prefilter_mem+c*COMBFILTER_MAXPERIOD, COMBFILTER_MAXPERIOD);
+      OPUS_COPY(pre[c]+COMBFILTER_MAXPERIOD, in+c*(N+st->overlap)+st->overlap, N);
+   } while (++c<CC);
+
+   if (enabled)
+   {
+      VARDECL(opus_val16, pitch_buf);
+      ALLOC(pitch_buf, (COMBFILTER_MAXPERIOD+N)>>1, opus_val16);
+
+      pitch_downsample(pre, pitch_buf, COMBFILTER_MAXPERIOD+N, CC, st->arch);
+      /* Don't search for the fir last 1.5 octave of the range because
+         there's too many false-positives due to short-term correlation */
+      pitch_search(pitch_buf+(COMBFILTER_MAXPERIOD>>1), pitch_buf, N,
+            COMBFILTER_MAXPERIOD-3*COMBFILTER_MINPERIOD, &pitch_index,
+            st->arch);
+      pitch_index = COMBFILTER_MAXPERIOD-pitch_index;
+
+      gain1 = remove_doubling(pitch_buf, COMBFILTER_MAXPERIOD, COMBFILTER_MINPERIOD,
+            N, &pitch_index, st->prefilter_period, st->prefilter_gain);
+      if (pitch_index > COMBFILTER_MAXPERIOD-2)
+         pitch_index = COMBFILTER_MAXPERIOD-2;
+      gain1 = MULT16_16_Q15(QCONST16(.7f,15),gain1);
+      /*printf("%d %d %f %f\n", pitch_change, pitch_index, gain1, st->analysis.tonality);*/
+      if (st->loss_rate>2)
+         gain1 = HALF32(gain1);
+      if (st->loss_rate>4)
+         gain1 = HALF32(gain1);
+      if (st->loss_rate>8)
+         gain1 = 0;
+   } else {
+      gain1 = 0;
+      pitch_index = COMBFILTER_MINPERIOD;
+   }
+
+   /* Gain threshold for enabling the prefilter/postfilter */
+   pf_threshold = QCONST16(.2f,15);
+
+   /* Adjusting the threshold based on rate and continuity */
+   if (abs(pitch_index-st->prefilter_period)*10>pitch_index)
+      pf_threshold += QCONST16(.2f,15);
+   if (nbAvailableBytes<25)
+      pf_threshold += QCONST16(.1f,15);
+   if (nbAvailableBytes<35)
+      pf_threshold += QCONST16(.1f,15);
+   if (st->prefilter_gain > QCONST16(.4f,15))
+      pf_threshold -= QCONST16(.1f,15);
+   if (st->prefilter_gain > QCONST16(.55f,15))
+      pf_threshold -= QCONST16(.1f,15);
+
+   /* Hard threshold at 0.2 */
+   pf_threshold = MAX16(pf_threshold, QCONST16(.2f,15));
+   if (gain1<pf_threshold)
+   {
+      gain1 = 0;
+      pf_on = 0;
+      qg = 0;
+   } else {
+      /*This block is not gated by a total bits check only because
+        of the nbAvailableBytes check above.*/
+      if (ABS16(gain1-st->prefilter_gain)<QCONST16(.1f,15))
+         gain1=st->prefilter_gain;
+
+#ifdef OPUS_FIXED_POINT
+      qg = ((gain1+1536)>>10)/3-1;
+#else
+      qg = (int)floor(.5f+gain1*32/3)-1;
+#endif
+      qg = IMAX(0, IMIN(7, qg));
+      gain1 = QCONST16(0.09375f,15)*(qg+1);
+      pf_on = 1;
+   }
+   /*printf("%d %f\n", pitch_index, gain1);*/
+
+   c=0; do {
+      int offset = mode->shortMdctSize-st->overlap;
+      st->prefilter_period=IMAX(st->prefilter_period, COMBFILTER_MINPERIOD);
+      OPUS_COPY(in+c*(N+st->overlap), st->in_mem+c*(st->overlap), st->overlap);
+      if (offset)
+         comb_filter(in+c*(N+st->overlap)+st->overlap, pre[c]+COMBFILTER_MAXPERIOD,
+               st->prefilter_period, st->prefilter_period, offset, -st->prefilter_gain, -st->prefilter_gain,
+               st->prefilter_tapset, st->prefilter_tapset, NULL, 0);
+
+      comb_filter(in+c*(N+st->overlap)+st->overlap+offset, pre[c]+COMBFILTER_MAXPERIOD+offset,
+            st->prefilter_period, pitch_index, N-offset, -st->prefilter_gain, -gain1,
+            st->prefilter_tapset, prefilter_tapset, mode->window, st->overlap);
+      OPUS_COPY(st->in_mem+c*(st->overlap), in+c*(N+st->overlap)+N, st->overlap);
+
+      if (N>COMBFILTER_MAXPERIOD)
+      {
+         OPUS_MOVE(prefilter_mem+c*COMBFILTER_MAXPERIOD, pre[c]+N, COMBFILTER_MAXPERIOD);
+      } else {
+         OPUS_MOVE(prefilter_mem+c*COMBFILTER_MAXPERIOD, prefilter_mem+c*COMBFILTER_MAXPERIOD+N, COMBFILTER_MAXPERIOD-N);
+         OPUS_MOVE(prefilter_mem+c*COMBFILTER_MAXPERIOD+COMBFILTER_MAXPERIOD-N, pre[c]+COMBFILTER_MAXPERIOD, N);
+      }
+   } while (++c<CC);
+
+   RESTORE_STACK;
+   *gain = gain1;
+   *pitch = pitch_index;
+   *qgain = qg;
+   return pf_on;
+}
+
+static int compute_vbr(const CELTMode *mode, AnalysisInfo *analysis, opus_int32 base_target,
+      int LM, opus_int32 bitrate, int lastCodedBands, int C, int intensity,
+      int constrained_vbr, opus_val16 stereo_saving, int tot_boost,
+      opus_val16 tf_estimate, int pitch_change, opus_val16 maxDepth,
+      int variable_duration, int lfe, int has_surround_mask, opus_val16 surround_masking,
+      opus_val16 temporal_vbr)
+{
+   /* The target rate in 8th bits per frame */
+   opus_int32 target;
+   int coded_bins;
+   int coded_bands;
+   opus_val16 tf_calibration;
+   int nbEBands;
+   const opus_int16 *eBands;
+
+   nbEBands = mode->nbEBands;
+   eBands = mode->eBands;
+
+   coded_bands = lastCodedBands ? lastCodedBands : nbEBands;
+   coded_bins = eBands[coded_bands]<<LM;
+   if (C==2)
+      coded_bins += eBands[IMIN(intensity, coded_bands)]<<LM;
+
+   target = base_target;
+
+   /*printf("%f %f %f %f %d %d ", st->analysis.activity, st->analysis.tonality, tf_estimate, st->stereo_saving, tot_boost, coded_bands);*/
+#ifndef DISABLE_FLOAT_API
+   if (analysis->valid && analysis->activity<.4)
+      target -= (opus_int32)((coded_bins<<BITRES)*(.4f-analysis->activity));
+#endif
+   /* Stereo savings */
+   if (C==2)
+   {
+      int coded_stereo_bands;
+      int coded_stereo_dof;
+      opus_val16 max_frac;
+      coded_stereo_bands = IMIN(intensity, coded_bands);
+      coded_stereo_dof = (eBands[coded_stereo_bands]<<LM)-coded_stereo_bands;
+      /* Maximum fraction of the bits we can save if the signal is mono. */
+      max_frac = DIV32_16(MULT16_16(QCONST16(0.8f, 15), coded_stereo_dof), coded_bins);
+      stereo_saving = MIN16(stereo_saving, QCONST16(1.f, 8));
+      /*printf("%d %d %d ", coded_stereo_dof, coded_bins, tot_boost);*/
+      target -= (opus_int32)MIN32(MULT16_32_Q15(max_frac,target),
+                      SHR32(MULT16_16(stereo_saving-QCONST16(0.1f,8),(coded_stereo_dof<<BITRES)),8));
+   }
+   /* Boost the rate according to dynalloc (minus the dynalloc average for calibration). */
+   target += tot_boost-(16<<LM);
+   /* Apply transient boost, compensating for average boost. */
+   tf_calibration = variable_duration==OPUS_FRAMESIZE_VARIABLE ?
+                    QCONST16(0.02f,14) : QCONST16(0.04f,14);
+   target += (opus_int32)SHL32(MULT16_32_Q15(tf_estimate-tf_calibration, target),1);
+
+#ifndef DISABLE_FLOAT_API
+   /* Apply tonality boost */
+   if (analysis->valid && !lfe)
+   {
+      opus_int32 tonal_target;
+      float tonal;
+
+      /* Tonality boost (compensating for the average). */
+      tonal = MAX16(0.f,analysis->tonality-.15f)-0.09f;
+      tonal_target = target + (opus_int32)((coded_bins<<BITRES)*1.2f*tonal);
+      if (pitch_change)
+         tonal_target +=  (opus_int32)((coded_bins<<BITRES)*.8f);
+      /*printf("%f %f ", analysis->tonality, tonal);*/
+      target = tonal_target;
+   }
+#endif
+
+   if (has_surround_mask&&!lfe)
+   {
+      opus_int32 surround_target = target + (opus_int32)SHR32(MULT16_16(surround_masking,coded_bins<<BITRES), DB_SHIFT);
+      /*printf("%f %d %d %d %d %d %d ", surround_masking, coded_bins, st->end, st->intensity, surround_target, target, st->bitrate);*/
+      target = IMAX(target/4, surround_target);
+   }
+
+   {
+      opus_int32 floor_depth;
+      int bins;
+      bins = eBands[nbEBands-2]<<LM;
+      /*floor_depth = SHR32(MULT16_16((C*bins<<BITRES),celt_log2(SHL32(MAX16(1,sample_max),13))), DB_SHIFT);*/
+      floor_depth = (opus_int32)SHR32(MULT16_16((C*bins<<BITRES),maxDepth), DB_SHIFT);
+      floor_depth = IMAX(floor_depth, target>>2);
+      target = IMIN(target, floor_depth);
+      /*printf("%f %d\n", maxDepth, floor_depth);*/
+   }
+
+   if ((!has_surround_mask||lfe) && (constrained_vbr || bitrate<64000))
+   {
+      opus_val16 rate_factor;
+#ifdef OPUS_FIXED_POINT
+      rate_factor = MAX16(0,(bitrate-32000));
+#else
+      rate_factor = MAX16(0,(1.f/32768)*(bitrate-32000));
+#endif
+      if (constrained_vbr)
+         rate_factor = MIN16(rate_factor, QCONST16(0.67f, 15));
+      target = base_target + (opus_int32)MULT16_32_Q15(rate_factor, target-base_target);
+
+   }
+
+   if (!has_surround_mask && tf_estimate < QCONST16(.2f, 14))
+   {
+      opus_val16 amount;
+      opus_val16 tvbr_factor;
+      amount = MULT16_16_Q15(QCONST16(.0000031f, 30), IMAX(0, IMIN(32000, 96000-bitrate)));
+      tvbr_factor = SHR32(MULT16_16(temporal_vbr, amount), DB_SHIFT);
+      target += (opus_int32)MULT16_32_Q15(tvbr_factor, target);
+   }
+
+   /* Don't allow more than doubling the rate */
+   target = IMIN(2*base_target, target);
+
+   return target;
+}
+
+int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm, int frame_size, unsigned char *compressed, int nbCompressedBytes, ec_enc *enc)
+{
+   int i, c, N;
+   opus_int32 bits;
+   ec_enc _enc;
+   VARDECL(celt_sig, in);
+   VARDECL(celt_sig, freq);
+   VARDECL(celt_norm, X);
+   VARDECL(celt_ener, bandE);
+   VARDECL(opus_val16, bandLogE);
+   VARDECL(opus_val16, bandLogE2);
+   VARDECL(int, fine_quant);
+   VARDECL(opus_val16, error);
+   VARDECL(int, pulses);
+   VARDECL(int, cap);
+   VARDECL(int, offsets);
+   VARDECL(int, fine_priority);
+   VARDECL(int, tf_res);
+   VARDECL(unsigned char, collapse_masks);
+   celt_sig *prefilter_mem;
+   opus_val16 *oldBandE, *oldLogE, *oldLogE2;
+   int shortBlocks=0;
+   int isTransient=0;
+   const int CC = st->channels;
+   const int C = st->stream_channels;
+   int LM, M;
+   int tf_select;
+   int nbFilledBytes, nbAvailableBytes;
+   int effEnd;
+   int codedBands;
+   int tf_sum;
+   int alloc_trim;
+   int pitch_index=COMBFILTER_MINPERIOD;
+   opus_val16 gain1 = 0;
+   int dual_stereo=0;
+   int effectiveBytes;
+   int dynalloc_logp;
+   opus_int32 vbr_rate;
+   opus_int32 total_bits;
+   opus_int32 total_boost;
+   opus_int32 balance;
+   opus_int32 tell;
+   int prefilter_tapset=0;
+   int pf_on;
+   int anti_collapse_rsv;
+   int anti_collapse_on=0;
+   int silence=0;
+   int tf_chan = 0;
+   opus_val16 tf_estimate;
+   int pitch_change=0;
+   opus_int32 tot_boost;
+   opus_val32 sample_max;
+   opus_val16 maxDepth;
+   const OpusCustomMode *mode;
+   int nbEBands;
+   int overlap;
+   const opus_int16 *eBands;
+   int secondMdct;
+   int signalBandwidth;
+   int transient_got_disabled=0;
+   opus_val16 surround_masking=0;
+   opus_val16 temporal_vbr=0;
+   opus_val16 surround_trim = 0;
+   opus_int32 equiv_rate = 510000;
+   VARDECL(opus_val16, surround_dynalloc);
+   ALLOC_STACK;
+
+   mode = st->mode;
+   nbEBands = mode->nbEBands;
+   overlap = mode->overlap;
+   eBands = mode->eBands;
+   tf_estimate = 0;
+   if (nbCompressedBytes<2 || pcm==NULL)
+   {
+      RESTORE_STACK;
+      return OPUS_BAD_ARG;
+   }
+
+   frame_size *= st->upsample;
+   for (LM=0;LM<=mode->maxLM;LM++)
+      if (mode->shortMdctSize<<LM==frame_size)
+         break;
+   if (LM>mode->maxLM)
+   {
+      RESTORE_STACK;
+      return OPUS_BAD_ARG;
+   }
+   M=1<<LM;
+   N = M*mode->shortMdctSize;
+
+   prefilter_mem = st->in_mem+CC*(st->overlap);
+   oldBandE = (opus_val16*)(st->in_mem+CC*(st->overlap+COMBFILTER_MAXPERIOD));
+   oldLogE = oldBandE + CC*nbEBands;
+   oldLogE2 = oldLogE + CC*nbEBands;
+
+   if (enc==NULL)
+   {
+      tell=1;
+      nbFilledBytes=0;
+   } else {
+      tell=ec_tell(enc);
+      nbFilledBytes=(tell+4)>>3;
+   }
+
+#ifdef CUSTOM_MODES
+   if (st->signalling && enc==NULL)
+   {
+      int tmp = (mode->effEBands-st->end)>>1;
+      st->end = IMAX(1, mode->effEBands-tmp);
+      compressed[0] = tmp<<5;
+      compressed[0] |= LM<<3;
+      compressed[0] |= (C==2)<<2;
+      /* Convert "standard mode" to Opus header */
+      if (mode->Fs==48000 && mode->shortMdctSize==120)
+      {
+         int c0 = toOpus(compressed[0]);
+         if (c0<0)
+         {
+            RESTORE_STACK;
+            return OPUS_BAD_ARG;
+         }
+         compressed[0] = c0;
+      }
+      compressed++;
+      nbCompressedBytes--;
+   }
+#else
+   celt_assert(st->signalling==0);
+#endif
+
+   /* Can't produce more than 1275 output bytes */
+   nbCompressedBytes = IMIN(nbCompressedBytes,1275);
+   nbAvailableBytes = nbCompressedBytes - nbFilledBytes;
+
+   if (st->vbr && st->bitrate!=OPUS_BITRATE_MAX)
+   {
+      opus_int32 den=mode->Fs>>BITRES;
+      vbr_rate=(st->bitrate*frame_size+(den>>1))/den;
+#ifdef CUSTOM_MODES
+      if (st->signalling)
+         vbr_rate -= 8<<BITRES;
+#endif
+      effectiveBytes = vbr_rate>>(3+BITRES);
+   } else {
+      opus_int32 tmp;
+      vbr_rate = 0;
+      tmp = st->bitrate*frame_size;
+      if (tell>1)
+         tmp += tell;
+      if (st->bitrate!=OPUS_BITRATE_MAX)
+         nbCompressedBytes = IMAX(2, IMIN(nbCompressedBytes,
+               (tmp+4*mode->Fs)/(8*mode->Fs)-!!st->signalling));
+      effectiveBytes = nbCompressedBytes;
+   }
+   if (st->bitrate != OPUS_BITRATE_MAX)
+      equiv_rate = st->bitrate - (40*C+20)*((400>>LM) - 50);
+
+   if (enc==NULL)
+   {
+      ec_enc_init(&_enc, compressed, nbCompressedBytes);
+      enc = &_enc;
+   }
+
+   if (vbr_rate>0)
+   {
+      /* Computes the max bit-rate allowed in VBR mode to avoid violating the
+          target rate and buffering.
+         We must do this up front so that bust-prevention logic triggers
+          correctly if we don't have enough bits. */
+      if (st->constrained_vbr)
+      {
+         opus_int32 vbr_bound;
+         opus_int32 max_allowed;
+         /* We could use any multiple of vbr_rate as bound (depending on the
+             delay).
+            This is clamped to ensure we use at least two bytes if the encoder
+             was entirely empty, but to allow 0 in hybrid mode. */
+         vbr_bound = vbr_rate;
+         max_allowed = IMIN(IMAX(tell==1?2:0,
+               (vbr_rate+vbr_bound-st->vbr_reservoir)>>(BITRES+3)),
+               nbAvailableBytes);
+         if(max_allowed < nbAvailableBytes)
+         {
+            nbCompressedBytes = nbFilledBytes+max_allowed;
+            nbAvailableBytes = max_allowed;
+            ec_enc_shrink(enc, nbCompressedBytes);
+         }
+      }
+   }
+   total_bits = nbCompressedBytes*8;
+
+   effEnd = st->end;
+   if (effEnd > mode->effEBands)
+      effEnd = mode->effEBands;
+
+   ALLOC(in, CC*(N+st->overlap), celt_sig);
+
+   sample_max=MAX32(st->overlap_max, celt_maxabs16(pcm, C*(N-overlap)/st->upsample));
+   st->overlap_max=celt_maxabs16(pcm+C*(N-overlap)/st->upsample, C*overlap/st->upsample);
+   sample_max=MAX32(sample_max, st->overlap_max);
+#ifdef OPUS_FIXED_POINT
+   silence = (sample_max==0);
+#else
+   silence = (sample_max <= (opus_val16)1/(1<<st->lsb_depth));
+#endif
+#ifdef FUZZING
+   if ((rand()&0x3F)==0)
+      silence = 1;
+#endif
+   if (tell==1)
+      ec_enc_bit_logp(enc, silence, 15);
+   else
+      silence=0;
+   if (silence)
+   {
+      /*In VBR mode there is no need to send more than the minimum. */
+      if (vbr_rate>0)
+      {
+         effectiveBytes=nbCompressedBytes=IMIN(nbCompressedBytes, nbFilledBytes+2);
+         total_bits=nbCompressedBytes*8;
+         nbAvailableBytes=2;
+         ec_enc_shrink(enc, nbCompressedBytes);
+      }
+      /* Pretend we've filled all the remaining bits with zeros
+            (that's what the initialiser did anyway) */
+      tell = nbCompressedBytes*8;
+      enc->nbits_total+=tell-ec_tell(enc);
+   }
+   c=0; do {
+      celt_preemphasis(pcm+c, in+c*(N+st->overlap)+st->overlap, N, CC, st->upsample,
+                  mode->preemph, st->preemph_memE+c, st->clip);
+   } while (++c<CC);
+
+
+
+   /* Find pitch period and gain */
+   {
+      int enabled;
+      int qg;
+      enabled = ((st->lfe&&nbAvailableBytes>3) || nbAvailableBytes>12*C) && st->start==0 && !silence && !st->disable_pf
+            && st->complexity >= 5 && !(st->consec_transient && LM!=3 && st->variable_duration==OPUS_FRAMESIZE_VARIABLE);
+
+      prefilter_tapset = st->tapset_decision;
+      pf_on = run_prefilter(st, in, prefilter_mem, CC, N, prefilter_tapset, &pitch_index, &gain1, &qg, enabled, nbAvailableBytes);
+      if ((gain1 > QCONST16(.4f,15) || st->prefilter_gain > QCONST16(.4f,15)) && (!st->analysis.valid || st->analysis.tonality > .3)
+            && (pitch_index > 1.26*st->prefilter_period || pitch_index < .79*st->prefilter_period))
+         pitch_change = 1;
+      if (pf_on==0)
+      {
+         if(st->start==0 && tell+16<=total_bits)
+            ec_enc_bit_logp(enc, 0, 1);
+      } else {
+         /*This block is not gated by a total bits check only because
+           of the nbAvailableBytes check above.*/
+         int octave;
+         ec_enc_bit_logp(enc, 1, 1);
+         pitch_index += 1;
+         octave = EC_ILOG(pitch_index)-5;
+         ec_enc_uint(enc, octave, 6);
+         ec_enc_bits(enc, pitch_index-(16<<octave), 4+octave);
+         pitch_index -= 1;
+         ec_enc_bits(enc, qg, 3);
+         ec_enc_icdf(enc, prefilter_tapset, tapset_icdf, 2);
+      }
+   }
+
+   isTransient = 0;
+   shortBlocks = 0;
+   if (st->complexity >= 1 && !st->lfe)
+   {
+      isTransient = transient_analysis(in, N+st->overlap, CC,
+            &tf_estimate, &tf_chan);
+   }
+   if (LM>0 && ec_tell(enc)+3<=total_bits)
+   {
+      if (isTransient)
+         shortBlocks = M;
+   } else {
+      isTransient = 0;
+      transient_got_disabled=1;
+   }
+
+   ALLOC(freq, CC*N, celt_sig); /**< Interleaved signal MDCTs */
+   ALLOC(bandE,nbEBands*CC, celt_ener);
+   ALLOC(bandLogE,nbEBands*CC, opus_val16);
+
+   secondMdct = shortBlocks && st->complexity>=8;
+   ALLOC(bandLogE2, C*nbEBands, opus_val16);
+   if (secondMdct)
+   {
+      compute_mdcts(mode, 0, in, freq, C, CC, LM, st->upsample);
+      compute_band_energies(mode, freq, bandE, effEnd, C, M);
+      amp2Log2(mode, effEnd, st->end, bandE, bandLogE2, C);
+      for (i=0;i<C*nbEBands;i++)
+         bandLogE2[i] += HALF16(SHL16(LM, DB_SHIFT));
+   }
+
+   compute_mdcts(mode, shortBlocks, in, freq, C, CC, LM, st->upsample);
+   if (CC==2&&C==1)
+      tf_chan = 0;
+   compute_band_energies(mode, freq, bandE, effEnd, C, M);
+
+   if (st->lfe)
+   {
+      for (i=2;i<st->end;i++)
+      {
+         bandE[i] = IMIN(bandE[i], MULT16_32_Q15(QCONST16(1e-4f,15),bandE[0]));
+         bandE[i] = MAX32(bandE[i], EPSILON);
+      }
+   }
+   amp2Log2(mode, effEnd, st->end, bandE, bandLogE, C);
+
+   ALLOC(surround_dynalloc, C*nbEBands, opus_val16);
+   for(i=0;i<st->end;i++)
+      surround_dynalloc[i] = 0;
+   /* This computes how much masking takes place between surround channels */
+   if (st->start==0&&st->energy_mask&&!st->lfe)
+   {
+      int mask_end;
+      int midband;
+      int count_dynalloc;
+      opus_val32 mask_avg=0;
+      opus_val32 diff=0;
+      int count=0;
+      mask_end = IMAX(2,st->lastCodedBands);
+      for (c=0;c<C;c++)
+      {
+         for(i=0;i<mask_end;i++)
+         {
+            opus_val16 mask;
+            mask = MAX16(MIN16(st->energy_mask[nbEBands*c+i],
+                   QCONST16(.25f, DB_SHIFT)), -QCONST16(2.0f, DB_SHIFT));
+            if (mask > 0)
+               mask = HALF16(mask);
+            mask_avg += MULT16_16(mask, eBands[i+1]-eBands[i]);
+            count += eBands[i+1]-eBands[i];
+            diff += MULT16_16(mask, 1+2*i-mask_end);
+         }
+      }
+      mask_avg = DIV32_16(mask_avg,count);
+      mask_avg += QCONST16(.2f, DB_SHIFT);
+      diff = diff*6/(C*(mask_end-1)*(mask_end+1)*mask_end);
+      /* Again, being conservative */
+      diff = HALF32(diff);
+      diff = MAX32(MIN32(diff, QCONST32(.031f, DB_SHIFT)), -QCONST32(.031f, DB_SHIFT));
+      /* Find the band that's in the middle of the coded spectrum */
+      for (midband=0;eBands[midband+1] < eBands[mask_end]/2;midband++);
+      count_dynalloc=0;
+      for(i=0;i<mask_end;i++)
+      {
+         opus_val32 lin;
+         opus_val16 unmask;
+         lin = mask_avg + diff*(i-midband);
+         if (C==2)
+            unmask = MAX16(st->energy_mask[i], st->energy_mask[nbEBands+i]);
+         else
+            unmask = st->energy_mask[i];
+         unmask = MIN16(unmask, QCONST16(.0f, DB_SHIFT));
+         unmask -= lin;
+         if (unmask > QCONST16(.25f, DB_SHIFT))
+         {
+            surround_dynalloc[i] = unmask - QCONST16(.25f, DB_SHIFT);
+            count_dynalloc++;
+         }
+      }
+      if (count_dynalloc>=3)
+      {
+         /* If we need dynalloc in many bands, it's probably because our
+            initial masking rate was too low. */
+         mask_avg += QCONST16(.25f, DB_SHIFT);
+         if (mask_avg>0)
+         {
+            /* Something went really wrong in the original calculations,
+               disabling masking. */
+            mask_avg = 0;
+            diff = 0;
+            for(i=0;i<mask_end;i++)
+               surround_dynalloc[i] = 0;
+         } else {
+            for(i=0;i<mask_end;i++)
+               surround_dynalloc[i] = MAX16(0, surround_dynalloc[i]-QCONST16(.25f, DB_SHIFT));
+         }
+      }
+      mask_avg += QCONST16(.2f, DB_SHIFT);
+      /* Convert to 1/64th units used for the trim */
+      surround_trim = 64*diff;
+      /*printf("%d %d ", mask_avg, surround_trim);*/
+      surround_masking = mask_avg;
+   }
+   /* Temporal VBR (but not for LFE) */
+   if (!st->lfe)
+   {
+      opus_val16 follow=-QCONST16(10.0f,DB_SHIFT);
+      opus_val32 frame_avg=0;
+      opus_val16 offset = shortBlocks?HALF16(SHL16(LM, DB_SHIFT)):0;
+      for(i=st->start;i<st->end;i++)
+      {
+         follow = MAX16(follow-QCONST16(1.f, DB_SHIFT), bandLogE[i]-offset);
+         if (C==2)
+            follow = MAX16(follow, bandLogE[i+nbEBands]-offset);
+         frame_avg += follow;
+      }
+      frame_avg /= (st->end-st->start);
+      temporal_vbr = SUB16(frame_avg,st->spec_avg);
+      temporal_vbr = MIN16(QCONST16(3.f, DB_SHIFT), MAX16(-QCONST16(1.5f, DB_SHIFT), temporal_vbr));
+      st->spec_avg += MULT16_16_Q15(QCONST16(.02f, 15), temporal_vbr);
+   }
+   /*for (i=0;i<21;i++)
+      printf("%f ", bandLogE[i]);
+   printf("\n");*/
+
+   if (!secondMdct)
+   {
+      for (i=0;i<C*nbEBands;i++)
+         bandLogE2[i] = bandLogE[i];
+   }
+
+   /* Last chance to catch any transient we might have missed in the
+      time-domain analysis */
+   if (LM>0 && ec_tell(enc)+3<=total_bits && !isTransient && st->complexity>=5 && !st->lfe)
+   {
+      if (patch_transient_decision(bandLogE, oldBandE, nbEBands, st->end, C))
+      {
+         isTransient = 1;
+         shortBlocks = M;
+         compute_mdcts(mode, shortBlocks, in, freq, C, CC, LM, st->upsample);
+         compute_band_energies(mode, freq, bandE, effEnd, C, M);
+         amp2Log2(mode, effEnd, st->end, bandE, bandLogE, C);
+         /* Compensate for the scaling of short vs long mdcts */
+         for (i=0;i<C*nbEBands;i++)
+            bandLogE2[i] += HALF16(SHL16(LM, DB_SHIFT));
+         tf_estimate = QCONST16(.2f,14);
+      }
+   }
+
+   if (LM>0 && ec_tell(enc)+3<=total_bits)
+      ec_enc_bit_logp(enc, isTransient, 3);
+
+   ALLOC(X, C*N, celt_norm);         /**< Interleaved normalised MDCTs */
+
+   /* Band normalisation */
+   normalise_bands(mode, freq, X, bandE, effEnd, C, M);
+
+   ALLOC(tf_res, nbEBands, int);
+   /* Disable variable tf resolution for hybrid and at very low bitrate */
+   if (effectiveBytes>=15*C && st->start==0 && st->complexity>=2 && !st->lfe)
+   {
+      int lambda;
+      if (effectiveBytes<40)
+         lambda = 12;
+      else if (effectiveBytes<60)
+         lambda = 6;
+      else if (effectiveBytes<100)
+         lambda = 4;
+      else
+         lambda = 3;
+      lambda*=2;
+      tf_select = tf_analysis(mode, effEnd, isTransient, tf_res, lambda, X, N, LM, &tf_sum, tf_estimate, tf_chan);
+      for (i=effEnd;i<st->end;i++)
+         tf_res[i] = tf_res[effEnd-1];
+   } else {
+      tf_sum = 0;
+      for (i=0;i<st->end;i++)
+         tf_res[i] = isTransient;
+      tf_select=0;
+   }
+
+   ALLOC(error, C*nbEBands, opus_val16);
+   quant_coarse_energy(mode, st->start, st->end, effEnd, bandLogE,
+         oldBandE, total_bits, error, enc,
+         C, LM, nbAvailableBytes, st->force_intra,
+         &st->delayedIntra, st->complexity >= 4, st->loss_rate, st->lfe);
+
+   tf_encode(st->start, st->end, isTransient, tf_res, LM, tf_select, enc);
+
+   if (ec_tell(enc)+4<=total_bits)
+   {
+      if (st->lfe)
+      {
+         st->tapset_decision = 0;
+         st->spread_decision = SPREAD_NORMAL;
+      } else if (shortBlocks || st->complexity < 3 || nbAvailableBytes < 10*C || st->start != 0)
+      {
+         if (st->complexity == 0)
+            st->spread_decision = SPREAD_NONE;
+         else
+            st->spread_decision = SPREAD_NORMAL;
+      } else {
+         /* Disable new spreading+tapset estimator until we can show it works
+            better than the old one. So far it seems like spreading_decision()
+            works best. */
+#if 0
+         if (st->analysis.valid)
+         {
+            static const opus_val16 spread_thresholds[3] = {-QCONST16(.6f, 15), -QCONST16(.2f, 15), -QCONST16(.07f, 15)};
+            static const opus_val16 spread_histeresis[3] = {QCONST16(.15f, 15), QCONST16(.07f, 15), QCONST16(.02f, 15)};
+            static const opus_val16 tapset_thresholds[2] = {QCONST16(.0f, 15), QCONST16(.15f, 15)};
+            static const opus_val16 tapset_histeresis[2] = {QCONST16(.1f, 15), QCONST16(.05f, 15)};
+            st->spread_decision = hysteresis_decision(-st->analysis.tonality, spread_thresholds, spread_histeresis, 3, st->spread_decision);
+            st->tapset_decision = hysteresis_decision(st->analysis.tonality_slope, tapset_thresholds, tapset_histeresis, 2, st->tapset_decision);
+         } else
+#endif
+         {
+            st->spread_decision = spreading_decision(mode, X,
+                  &st->tonal_average, st->spread_decision, &st->hf_average,
+                  &st->tapset_decision, pf_on&&!shortBlocks, effEnd, C, M);
+         }
+         /*printf("%d %d\n", st->tapset_decision, st->spread_decision);*/
+         /*printf("%f %d %f %d\n\n", st->analysis.tonality, st->spread_decision, st->analysis.tonality_slope, st->tapset_decision);*/
+      }
+      ec_enc_icdf(enc, st->spread_decision, spread_icdf, 5);
+   }
+
+   ALLOC(offsets, nbEBands, int);
+
+   maxDepth = dynalloc_analysis(bandLogE, bandLogE2, nbEBands, st->start, st->end, C, offsets,
+         st->lsb_depth, mode->logN, isTransient, st->vbr, st->constrained_vbr,
+         eBands, LM, effectiveBytes, &tot_boost, st->lfe, surround_dynalloc);
+   /* For LFE, everything interesting is in the first band */
+   if (st->lfe)
+      offsets[0] = IMIN(8, effectiveBytes/3);
+   ALLOC(cap, nbEBands, int);
+   init_caps(mode,cap,LM,C);
+
+   dynalloc_logp = 6;
+   total_bits<<=BITRES;
+   total_boost = 0;
+   tell = ec_tell_frac(enc);
+   for (i=st->start;i<st->end;i++)
+   {
+      int width, quanta;
+      int dynalloc_loop_logp;
+      int boost;
+      int j;
+      width = C*(eBands[i+1]-eBands[i])<<LM;
+      /* quanta is 6 bits, but no more than 1 bit/sample
+         and no less than 1/8 bit/sample */
+      quanta = IMIN(width<<BITRES, IMAX(6<<BITRES, width));
+      dynalloc_loop_logp = dynalloc_logp;
+      boost = 0;
+      for (j = 0; tell+(dynalloc_loop_logp<<BITRES) < total_bits-total_boost
+            && boost < cap[i]; j++)
+      {
+         int flag;
+         flag = j<offsets[i];
+         ec_enc_bit_logp(enc, flag, dynalloc_loop_logp);
+         tell = ec_tell_frac(enc);
+         if (!flag)
+            break;
+         boost += quanta;
+         total_boost += quanta;
+         dynalloc_loop_logp = 1;
+      }
+      /* Making dynalloc more likely */
+      if (j)
+         dynalloc_logp = IMAX(2, dynalloc_logp-1);
+      offsets[i] = boost;
+   }
+
+   if (C==2)
+   {
+      static const opus_val16 intensity_thresholds[21]=
+      /* 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19  20  off*/
+        {  1, 2, 3, 4, 5, 6, 7, 8,16,24,36,44,50,56,62,67,72,79,88,106,134};
+      static const opus_val16 intensity_histeresis[21]=
+        {  1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 3, 3, 4, 5, 6,  8, 8};
+
+      /* Always use MS for 2.5 ms frames until we can do a better analysis */
+      if (LM!=0)
+         dual_stereo = stereo_analysis(mode, X, LM, N);
+
+      st->intensity = hysteresis_decision((opus_val16)(equiv_rate/1000),
+            intensity_thresholds, intensity_histeresis, 21, st->intensity);
+      st->intensity = IMIN(st->end,IMAX(st->start, st->intensity));
+   }
+
+   alloc_trim = 5;
+   if (tell+(6<<BITRES) <= total_bits - total_boost)
+   {
+      if (st->lfe)
+         alloc_trim = 5;
+      else
+         alloc_trim = alloc_trim_analysis(mode, X, bandLogE,
+            st->end, LM, C, N, &st->analysis, &st->stereo_saving, tf_estimate, st->intensity, surround_trim);
+      ec_enc_icdf(enc, alloc_trim, trim_icdf, 7);
+      tell = ec_tell_frac(enc);
+   }
+
+   /* Variable bitrate */
+   if (vbr_rate>0)
+   {
+     opus_val16 alpha;
+     opus_int32 delta;
+     /* The target rate in 8th bits per frame */
+     opus_int32 target, base_target;
+     opus_int32 min_allowed;
+     int lm_diff = mode->maxLM - LM;
+
+     /* Don't attempt to use more than 510 kb/s, even for frames smaller than 20 ms.
+        The CELT allocator will just not be able to use more than that anyway. */
+     nbCompressedBytes = IMIN(nbCompressedBytes,1275>>(3-LM));
+     base_target = vbr_rate - ((40*C+20)<<BITRES);
+
+     if (st->constrained_vbr)
+        base_target += (st->vbr_offset>>lm_diff);
+
+     target = compute_vbr(mode, &st->analysis, base_target, LM, equiv_rate,
+           st->lastCodedBands, C, st->intensity, st->constrained_vbr,
+           st->stereo_saving, tot_boost, tf_estimate, pitch_change, maxDepth,
+           st->variable_duration, st->lfe, st->energy_mask!=NULL, surround_masking,
+           temporal_vbr);
+
+     /* The current offset is removed from the target and the space used
+        so far is added*/
+     target=target+tell;
+     /* In VBR mode the frame size must not be reduced so much that it would
+         result in the encoder running out of bits.
+        The margin of 2 bytes ensures that none of the bust-prevention logic
+         in the decoder will have triggered so far. */
+     min_allowed = ((tell+total_boost+(1<<(BITRES+3))-1)>>(BITRES+3)) + 2 - nbFilledBytes;
+
+     nbAvailableBytes = (target+(1<<(BITRES+2)))>>(BITRES+3);
+     nbAvailableBytes = IMAX(min_allowed,nbAvailableBytes);
+     nbAvailableBytes = IMIN(nbCompressedBytes,nbAvailableBytes+nbFilledBytes) - nbFilledBytes;
+
+     /* By how much did we "miss" the target on that frame */
+     delta = target - vbr_rate;
+
+     target=nbAvailableBytes<<(BITRES+3);
+
+     /*If the frame is silent we don't adjust our drift, otherwise
+       the encoder will shoot to very high rates after hitting a
+       span of silence, but we do allow the bitres to refill.
+       This means that we'll undershoot our target in CVBR/VBR modes
+       on files with lots of silence. */
+     if(silence)
+     {
+       nbAvailableBytes = 2;
+       target = 2*8<<BITRES;
+       delta = 0;
+     }
+
+     if (st->vbr_count < 970)
+     {
+        st->vbr_count++;
+        alpha = celt_rcp(SHL32(EXTEND32(st->vbr_count+20),16));
+     } else
+        alpha = QCONST16(.001f,15);
+     /* How many bits have we used in excess of what we're allowed */
+     if (st->constrained_vbr)
+        st->vbr_reservoir += target - vbr_rate;
+     /*printf ("%d\n", st->vbr_reservoir);*/
+
+     /* Compute the offset we need to apply in order to reach the target */
+     if (st->constrained_vbr)
+     {
+        st->vbr_drift += (opus_int32)MULT16_32_Q15(alpha,(delta*(1<<lm_diff))-st->vbr_offset-st->vbr_drift);
+        st->vbr_offset = -st->vbr_drift;
+     }
+     /*printf ("%d\n", st->vbr_drift);*/
+
+     if (st->constrained_vbr && st->vbr_reservoir < 0)
+     {
+        /* We're under the min value -- increase rate */
+        int adjust = (-st->vbr_reservoir)/(8<<BITRES);
+        /* Unless we're just coding silence */
+        nbAvailableBytes += silence?0:adjust;
+        st->vbr_reservoir = 0;
+        /*printf ("+%d\n", adjust);*/
+     }
+     nbCompressedBytes = IMIN(nbCompressedBytes,nbAvailableBytes+nbFilledBytes);
+     /*printf("%d\n", nbCompressedBytes*50*8);*/
+     /* This moves the raw bits to take into account the new compressed size */
+     ec_enc_shrink(enc, nbCompressedBytes);
+   }
+
+   /* Bit allocation */
+   ALLOC(fine_quant, nbEBands, int);
+   ALLOC(pulses, nbEBands, int);
+   ALLOC(fine_priority, nbEBands, int);
+
+   /* bits =           packet size                    - where we are - safety*/
+   bits = (((opus_int32)nbCompressedBytes*8)<<BITRES) - ec_tell_frac(enc) - 1;
+   anti_collapse_rsv = isTransient&&LM>=2&&bits>=((LM+2)<<BITRES) ? (1<<BITRES) : 0;
+   bits -= anti_collapse_rsv;
+   signalBandwidth = st->end-1;
+#ifndef DISABLE_FLOAT_API
+   if (st->analysis.valid)
+   {
+      int min_bandwidth;
+      if (equiv_rate < (opus_int32)32000*C)
+         min_bandwidth = 13;
+      else if (equiv_rate < (opus_int32)48000*C)
+         min_bandwidth = 16;
+      else if (equiv_rate < (opus_int32)60000*C)
+         min_bandwidth = 18;
+      else  if (equiv_rate < (opus_int32)80000*C)
+         min_bandwidth = 19;
+      else
+         min_bandwidth = 20;
+      signalBandwidth = IMAX(st->analysis.bandwidth, min_bandwidth);
+   }
+#endif
+   if (st->lfe)
+      signalBandwidth = 1;
+   codedBands = compute_allocation(mode, st->start, st->end, offsets, cap,
+         alloc_trim, &st->intensity, &dual_stereo, bits, &balance, pulses,
+         fine_quant, fine_priority, C, LM, enc, 1, st->lastCodedBands, signalBandwidth);
+   if (st->lastCodedBands)
+      st->lastCodedBands = IMIN(st->lastCodedBands+1,IMAX(st->lastCodedBands-1,codedBands));
+   else
+      st->lastCodedBands = codedBands;
+
+   quant_fine_energy(mode, st->start, st->end, oldBandE, error, fine_quant, enc, C);
+
+   /* Residual quantisation */
+   ALLOC(collapse_masks, C*nbEBands, unsigned char);
+   quant_all_bands(1, mode, st->start, st->end, X, C==2 ? X+N : NULL, collapse_masks,
+         bandE, pulses, shortBlocks, st->spread_decision, dual_stereo, st->intensity, tf_res,
+         nbCompressedBytes*(8<<BITRES)-anti_collapse_rsv, balance, enc, LM, codedBands, &st->rng);
+
+   if (anti_collapse_rsv > 0)
+   {
+      anti_collapse_on = st->consec_transient<2;
+#ifdef FUZZING
+      anti_collapse_on = rand()&0x1;
+#endif
+      ec_enc_bits(enc, anti_collapse_on, 1);
+   }
+   quant_energy_finalise(mode, st->start, st->end, oldBandE, error, fine_quant, fine_priority, nbCompressedBytes*8-ec_tell(enc), enc, C);
+
+   if (silence)
+   {
+      for (i=0;i<C*nbEBands;i++)
+         oldBandE[i] = -QCONST16(28.f,DB_SHIFT);
+   }
+
+#ifdef RESYNTH
+   /* Re-synthesis of the coded audio if required */
+   {
+      celt_sig *out_mem[2];
+
+      if (anti_collapse_on)
+      {
+         anti_collapse(mode, X, collapse_masks, LM, C, N,
+               st->start, st->end, oldBandE, oldLogE, oldLogE2, pulses, st->rng);
+      }
+
+      if (silence)
+      {
+         for (i=0;i<C*N;i++)
+            freq[i] = 0;
+      } else {
+         /* Synthesis */
+         denormalise_bands(mode, X, freq, oldBandE, st->start, effEnd, C, M);
+      }
+
+      c=0; do {
+         OPUS_MOVE(st->syn_mem[c], st->syn_mem[c]+N, 2*MAX_PERIOD-N+overlap/2);
+      } while (++c<CC);
+
+      if (CC==2&&C==1)
+      {
+         for (i=0;i<N;i++)
+            freq[N+i] = freq[i];
+      }
+
+      c=0; do {
+         out_mem[c] = st->syn_mem[c]+2*MAX_PERIOD-N;
+      } while (++c<CC);
+
+      compute_inv_mdcts(mode, shortBlocks, freq, out_mem, CC, LM);
+
+      c=0; do {
+         st->prefilter_period=IMAX(st->prefilter_period, COMBFILTER_MINPERIOD);
+         st->prefilter_period_old=IMAX(st->prefilter_period_old, COMBFILTER_MINPERIOD);
+         comb_filter(out_mem[c], out_mem[c], st->prefilter_period_old, st->prefilter_period, mode->shortMdctSize,
+               st->prefilter_gain_old, st->prefilter_gain, st->prefilter_tapset_old, st->prefilter_tapset,
+               mode->window, st->overlap);
+         if (LM!=0)
+            comb_filter(out_mem[c]+mode->shortMdctSize, out_mem[c]+mode->shortMdctSize, st->prefilter_period, pitch_index, N-mode->shortMdctSize,
+                  st->prefilter_gain, gain1, st->prefilter_tapset, prefilter_tapset,
+                  mode->window, overlap);
+      } while (++c<CC);
+
+      /* We reuse freq[] as scratch space for the de-emphasis */
+      deemphasis(out_mem, (opus_val16*)pcm, N, CC, st->upsample, mode->preemph, st->preemph_memD, freq);
+      st->prefilter_period_old = st->prefilter_period;
+      st->prefilter_gain_old = st->prefilter_gain;
+      st->prefilter_tapset_old = st->prefilter_tapset;
+   }
+#endif
+
+   st->prefilter_period = pitch_index;
+   st->prefilter_gain = gain1;
+   st->prefilter_tapset = prefilter_tapset;
+#ifdef RESYNTH
+   if (LM!=0)
+   {
+      st->prefilter_period_old = st->prefilter_period;
+      st->prefilter_gain_old = st->prefilter_gain;
+      st->prefilter_tapset_old = st->prefilter_tapset;
+   }
+#endif
+
+   if (CC==2&&C==1) {
+      for (i=0;i<nbEBands;i++)
+         oldBandE[nbEBands+i]=oldBandE[i];
+   }
+
+   if (!isTransient)
+   {
+      for (i=0;i<CC*nbEBands;i++)
+         oldLogE2[i] = oldLogE[i];
+      for (i=0;i<CC*nbEBands;i++)
+         oldLogE[i] = oldBandE[i];
+   } else {
+      for (i=0;i<CC*nbEBands;i++)
+         oldLogE[i] = MIN16(oldLogE[i], oldBandE[i]);
+   }
+   /* In case start or end were to change */
+   c=0; do
+   {
+      for (i=0;i<st->start;i++)
+      {
+         oldBandE[c*nbEBands+i]=0;
+         oldLogE[c*nbEBands+i]=oldLogE2[c*nbEBands+i]=-QCONST16(28.f,DB_SHIFT);
+      }
+      for (i=st->end;i<nbEBands;i++)
+      {
+         oldBandE[c*nbEBands+i]=0;
+         oldLogE[c*nbEBands+i]=oldLogE2[c*nbEBands+i]=-QCONST16(28.f,DB_SHIFT);
+      }
+   } while (++c<CC);
+
+   if (isTransient || transient_got_disabled)
+      st->consec_transient++;
+   else
+      st->consec_transient=0;
+   st->rng = enc->rng;
+
+   /* If there's any room left (can only happen for very high rates),
+      it's already filled with zeros */
+   ec_enc_done(enc);
+
+#ifdef CUSTOM_MODES
+   if (st->signalling)
+      nbCompressedBytes++;
+#endif
+
+   RESTORE_STACK;
+   if (ec_get_error(enc))
+      return OPUS_INTERNAL_ERROR;
+   else
+      return nbCompressedBytes;
+}
+
+
+#ifdef CUSTOM_MODES
+
+#ifdef OPUS_FIXED_POINT
+int opus_custom_encode(CELTEncoder * OPUS_RESTRICT st, const opus_int16 * pcm, int frame_size, unsigned char *compressed, int nbCompressedBytes)
+{
+   return celt_encode_with_ec(st, pcm, frame_size, compressed, nbCompressedBytes, NULL);
+}
+
+#ifndef DISABLE_FLOAT_API
+int opus_custom_encode_float(CELTEncoder * OPUS_RESTRICT st, const float * pcm, int frame_size, unsigned char *compressed, int nbCompressedBytes)
+{
+   int j, ret, C, N;
+   VARDECL(opus_int16, in);
+   ALLOC_STACK;
+
+   if (pcm==NULL)
+      return OPUS_BAD_ARG;
+
+   C = st->channels;
+   N = frame_size;
+   ALLOC(in, C*N, opus_int16);
+
+   for (j=0;j<C*N;j++)
+     in[j] = FLOAT2INT16(pcm[j]);
+
+   ret=celt_encode_with_ec(st,in,frame_size,compressed,nbCompressedBytes, NULL);
+#ifdef RESYNTH
+   for (j=0;j<C*N;j++)
+      ((float*)pcm)[j]=in[j]*(1.f/32768.f);
+#endif
+   RESTORE_STACK;
+   return ret;
+}
+#endif /* DISABLE_FLOAT_API */
+#else
+
+int opus_custom_encode(CELTEncoder * OPUS_RESTRICT st, const opus_int16 * pcm, int frame_size, unsigned char *compressed, int nbCompressedBytes)
+{
+   int j, ret, C, N;
+   VARDECL(celt_sig, in);
+   ALLOC_STACK;
+
+   if (pcm==NULL)
+      return OPUS_BAD_ARG;
+
+   C=st->channels;
+   N=frame_size;
+   ALLOC(in, C*N, celt_sig);
+   for (j=0;j<C*N;j++) {
+     in[j] = SCALEOUT(pcm[j]);
+   }
+
+   ret = celt_encode_with_ec(st,in,frame_size,compressed,nbCompressedBytes, NULL);
+#ifdef RESYNTH
+   for (j=0;j<C*N;j++)
+      ((opus_int16*)pcm)[j] = FLOAT2INT16(in[j]);
+#endif
+   RESTORE_STACK;
+   return ret;
+}
+
+int opus_custom_encode_float(CELTEncoder * OPUS_RESTRICT st, const float * pcm, int frame_size, unsigned char *compressed, int nbCompressedBytes)
+{
+   return celt_encode_with_ec(st, pcm, frame_size, compressed, nbCompressedBytes, NULL);
+}
+
+#endif
+
+#endif /* CUSTOM_MODES */
+
+int opus_custom_encoder_ctl(CELTEncoder * OPUS_RESTRICT st, int request, ...)
+{
+   va_list ap;
+
+   va_start(ap, request);
+   switch (request)
+   {
+      case OPUS_SET_COMPLEXITY_REQUEST:
+      {
+         int value = va_arg(ap, opus_int32);
+         if (value<0 || value>10)
+            goto bad_arg;
+         st->complexity = value;
+      }
+      break;
+      case CELT_SET_START_BAND_REQUEST:
+      {
+         opus_int32 value = va_arg(ap, opus_int32);
+         if (value<0 || value>=st->mode->nbEBands)
+            goto bad_arg;
+         st->start = value;
+      }
+      break;
+      case CELT_SET_END_BAND_REQUEST:
+      {
+         opus_int32 value = va_arg(ap, opus_int32);
+         if (value<1 || value>st->mode->nbEBands)
+            goto bad_arg;
+         st->end = value;
+      }
+      break;
+      case CELT_SET_PREDICTION_REQUEST:
+      {
+         int value = va_arg(ap, opus_int32);
+         if (value<0 || value>2)
+            goto bad_arg;
+         st->disable_pf = value<=1;
+         st->force_intra = value==0;
+      }
+      break;
+      case OPUS_SET_PACKET_LOSS_PERC_REQUEST:
+      {
+         int value = va_arg(ap, opus_int32);
+         if (value<0 || value>100)
+            goto bad_arg;
+         st->loss_rate = value;
+      }
+      break;
+      case OPUS_SET_VBR_CONSTRAINT_REQUEST:
+      {
+         opus_int32 value = va_arg(ap, opus_int32);
+         st->constrained_vbr = value;
+      }
+      break;
+      case OPUS_SET_VBR_REQUEST:
+      {
+         opus_int32 value = va_arg(ap, opus_int32);
+         st->vbr = value;
+      }
+      break;
+      case OPUS_SET_BITRATE_REQUEST:
+      {
+         opus_int32 value = va_arg(ap, opus_int32);
+         if (value<=500 && value!=OPUS_BITRATE_MAX)
+            goto bad_arg;
+         value = IMIN(value, 260000*st->channels);
+         st->bitrate = value;
+      }
+      break;
+      case CELT_SET_CHANNELS_REQUEST:
+      {
+         opus_int32 value = va_arg(ap, opus_int32);
+         if (value<1 || value>2)
+            goto bad_arg;
+         st->stream_channels = value;
+      }
+      break;
+      case OPUS_SET_LSB_DEPTH_REQUEST:
+      {
+          opus_int32 value = va_arg(ap, opus_int32);
+          if (value<8 || value>24)
+             goto bad_arg;
+          st->lsb_depth=value;
+      }
+      break;
+      case OPUS_GET_LSB_DEPTH_REQUEST:
+      {
+          opus_int32 *value = va_arg(ap, opus_int32*);
+          *value=st->lsb_depth;
+      }
+      break;
+      case OPUS_SET_EXPERT_FRAME_DURATION_REQUEST:
+      {
+          opus_int32 value = va_arg(ap, opus_int32);
+          st->variable_duration = value;
+      }
+      break;
+      case OPUS_RESET_STATE:
+      {
+         int i;
+         opus_val16 *oldBandE, *oldLogE, *oldLogE2;
+         oldBandE = (opus_val16*)(st->in_mem+st->channels*(st->overlap+COMBFILTER_MAXPERIOD));
+         oldLogE = oldBandE + st->channels*st->mode->nbEBands;
+         oldLogE2 = oldLogE + st->channels*st->mode->nbEBands;
+         OPUS_CLEAR((char*)&st->ENCODER_RESET_START,
+               opus_custom_encoder_get_size(st->mode, st->channels)-
+               ((char*)&st->ENCODER_RESET_START - (char*)st));
+         for (i=0;i<st->channels*st->mode->nbEBands;i++)
+            oldLogE[i]=oldLogE2[i]=-QCONST16(28.f,DB_SHIFT);
+         st->vbr_offset = 0;
+         st->delayedIntra = 1;
+         st->spread_decision = SPREAD_NORMAL;
+         st->tonal_average = 256;
+         st->hf_average = 0;
+         st->tapset_decision = 0;
+      }
+      break;
+#ifdef CUSTOM_MODES
+      case CELT_SET_INPUT_CLIPPING_REQUEST:
+      {
+         opus_int32 value = va_arg(ap, opus_int32);
+         st->clip = value;
+      }
+      break;
+#endif
+      case CELT_SET_SIGNALLING_REQUEST:
+      {
+         opus_int32 value = va_arg(ap, opus_int32);
+         st->signalling = value;
+      }
+      break;
+      case CELT_SET_ANALYSIS_REQUEST:
+      {
+         AnalysisInfo *info = va_arg(ap, AnalysisInfo *);
+         if (info)
+            OPUS_COPY(&st->analysis, info, 1);
+      }
+      break;
+      case CELT_GET_MODE_REQUEST:
+      {
+         const CELTMode ** value = va_arg(ap, const CELTMode**);
+         if (value==0)
+            goto bad_arg;
+         *value=st->mode;
+      }
+      break;
+      case OPUS_GET_FINAL_RANGE_REQUEST:
+      {
+         opus_uint32 * value = va_arg(ap, opus_uint32 *);
+         if (value==0)
+            goto bad_arg;
+         *value=st->rng;
+      }
+      break;
+      case OPUS_SET_LFE_REQUEST:
+      {
+          opus_int32 value = va_arg(ap, opus_int32);
+          st->lfe = value;
+      }
+      break;
+      case OPUS_SET_ENERGY_MASK_REQUEST:
+      {
+          opus_val16 *value = va_arg(ap, opus_val16*);
+          st->energy_mask = value;
+      }
+      break;
+      default:
+         goto bad_request;
+   }
+   va_end(ap);
+   return OPUS_OK;
+bad_arg:
+   va_end(ap);
+   return OPUS_BAD_ARG;
+bad_request:
+   va_end(ap);
+   return OPUS_UNIMPLEMENTED;
+}
diff --git a/drivers/opus/celt/celt_lpc.c b/drivers/opus/celt/celt_lpc.c
new file mode 100644
index 0000000000..1fa4406bc9
--- /dev/null
+++ b/drivers/opus/celt/celt_lpc.c
@@ -0,0 +1,309 @@
+/* Copyright (c) 2009-2010 Xiph.Org Foundation
+   Written by Jean-Marc Valin */
+/*
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifdef OPUS_HAVE_CONFIG_H
+#include "opus_config.h"
+#endif
+
+#include "celt_lpc.h"
+#include "stack_alloc.h"
+#include "mathops.h"
+#include "pitch.h"
+
+void _celt_lpc(
+      opus_val16       *_lpc, /* out: [0...p-1] LPC coefficients      */
+const opus_val32 *ac,  /* in:  [0...p] autocorrelation values  */
+int          p
+)
+{
+   int i, j;
+   opus_val32 r;
+   opus_val32 error = ac[0];
+#ifdef OPUS_FIXED_POINT
+   opus_val32 lpc[LPC_ORDER];
+#else
+   float *lpc = _lpc;
+#endif
+
+   for (i = 0; i < p; i++)
+      lpc[i] = 0;
+   if (ac[0] != 0)
+   {
+      for (i = 0; i < p; i++) {
+         /* Sum up this iteration's reflection coefficient */
+         opus_val32 rr = 0;
+         for (j = 0; j < i; j++)
+            rr += MULT32_32_Q31(lpc[j],ac[i - j]);
+         rr += SHR32(ac[i + 1],3);
+         r = -frac_div32(SHL32(rr,3), error);
+         /*  Update LPC coefficients and total error */
+         lpc[i] = SHR32(r,3);
+         for (j = 0; j < (i+1)>>1; j++)
+         {
+            opus_val32 tmp1, tmp2;
+            tmp1 = lpc[j];
+            tmp2 = lpc[i-1-j];
+            lpc[j]     = tmp1 + MULT32_32_Q31(r,tmp2);
+            lpc[i-1-j] = tmp2 + MULT32_32_Q31(r,tmp1);
+         }
+
+         error = error - MULT32_32_Q31(MULT32_32_Q31(r,r),error);
+         /* Bail out once we get 30 dB gain */
+#ifdef OPUS_FIXED_POINT
+         if (error<SHR32(ac[0],10))
+            break;
+#else
+         if (error<.001f*ac[0])
+            break;
+#endif
+      }
+   }
+#ifdef OPUS_FIXED_POINT
+   for (i=0;i<p;i++)
+      _lpc[i] = ROUND16(lpc[i],16);
+#endif
+}
+
+void celt_fir(const opus_val16 *_x,
+         const opus_val16 *num,
+         opus_val16 *_y,
+         int N,
+         int ord,
+         opus_val16 *mem)
+{
+   int i,j;
+   VARDECL(opus_val16, rnum);
+   VARDECL(opus_val16, x);
+   SAVE_STACK;
+
+   ALLOC(rnum, ord, opus_val16);
+   ALLOC(x, N+ord, opus_val16);
+   for(i=0;i<ord;i++)
+      rnum[i] = num[ord-i-1];
+   for(i=0;i<ord;i++)
+      x[i] = mem[ord-i-1];
+   for (i=0;i<N;i++)
+      x[i+ord]=_x[i];
+   for(i=0;i<ord;i++)
+      mem[i] = _x[N-i-1];
+#ifdef SMALL_FOOTPRINT
+   for (i=0;i<N;i++)
+   {
+      opus_val32 sum = SHL32(EXTEND32(_x[i]), SIG_SHIFT);
+      for (j=0;j<ord;j++)
+      {
+         sum = MAC16_16(sum,rnum[j],x[i+j]);
+      }
+      _y[i] = SATURATE16(PSHR32(sum, SIG_SHIFT));
+   }
+#else
+   for (i=0;i<N-3;i+=4)
+   {
+      opus_val32 sum[4]={0,0,0,0};
+      xcorr_kernel(rnum, x+i, sum, ord);
+      _y[i  ] = SATURATE16(ADD32(EXTEND32(_x[i  ]), PSHR32(sum[0], SIG_SHIFT)));
+      _y[i+1] = SATURATE16(ADD32(EXTEND32(_x[i+1]), PSHR32(sum[1], SIG_SHIFT)));
+      _y[i+2] = SATURATE16(ADD32(EXTEND32(_x[i+2]), PSHR32(sum[2], SIG_SHIFT)));
+      _y[i+3] = SATURATE16(ADD32(EXTEND32(_x[i+3]), PSHR32(sum[3], SIG_SHIFT)));
+   }
+   for (;i<N;i++)
+   {
+      opus_val32 sum = 0;
+      for (j=0;j<ord;j++)
+         sum = MAC16_16(sum,rnum[j],x[i+j]);
+      _y[i] = SATURATE16(ADD32(EXTEND32(_x[i]), PSHR32(sum, SIG_SHIFT)));
+   }
+#endif
+   RESTORE_STACK;
+}
+
+void celt_iir(const opus_val32 *_x,
+         const opus_val16 *den,
+         opus_val32 *_y,
+         int N,
+         int ord,
+         opus_val16 *mem)
+{
+#ifdef SMALL_FOOTPRINT
+   int i,j;
+   for (i=0;i<N;i++)
+   {
+      opus_val32 sum = _x[i];
+      for (j=0;j<ord;j++)
+      {
+         sum -= MULT16_16(den[j],mem[j]);
+      }
+      for (j=ord-1;j>=1;j--)
+      {
+         mem[j]=mem[j-1];
+      }
+      mem[0] = ROUND16(sum,SIG_SHIFT);
+      _y[i] = sum;
+   }
+#else
+   int i,j;
+   VARDECL(opus_val16, rden);
+   VARDECL(opus_val16, y);
+   SAVE_STACK;
+
+   celt_assert((ord&3)==0);
+   ALLOC(rden, ord, opus_val16);
+   ALLOC(y, N+ord, opus_val16);
+   for(i=0;i<ord;i++)
+      rden[i] = den[ord-i-1];
+   for(i=0;i<ord;i++)
+      y[i] = -mem[ord-i-1];
+   for(;i<N+ord;i++)
+      y[i]=0;
+   for (i=0;i<N-3;i+=4)
+   {
+      /* Unroll by 4 as if it were an FIR filter */
+      opus_val32 sum[4];
+      sum[0]=_x[i];
+      sum[1]=_x[i+1];
+      sum[2]=_x[i+2];
+      sum[3]=_x[i+3];
+      xcorr_kernel(rden, y+i, sum, ord);
+
+      /* Patch up the result to compensate for the fact that this is an IIR */
+      y[i+ord  ] = -ROUND16(sum[0],SIG_SHIFT);
+      _y[i  ] = sum[0];
+      sum[1] = MAC16_16(sum[1], y[i+ord  ], den[0]);
+      y[i+ord+1] = -ROUND16(sum[1],SIG_SHIFT);
+      _y[i+1] = sum[1];
+      sum[2] = MAC16_16(sum[2], y[i+ord+1], den[0]);
+      sum[2] = MAC16_16(sum[2], y[i+ord  ], den[1]);
+      y[i+ord+2] = -ROUND16(sum[2],SIG_SHIFT);
+      _y[i+2] = sum[2];
+
+      sum[3] = MAC16_16(sum[3], y[i+ord+2], den[0]);
+      sum[3] = MAC16_16(sum[3], y[i+ord+1], den[1]);
+      sum[3] = MAC16_16(sum[3], y[i+ord  ], den[2]);
+      y[i+ord+3] = -ROUND16(sum[3],SIG_SHIFT);
+      _y[i+3] = sum[3];
+   }
+   for (;i<N;i++)
+   {
+      opus_val32 sum = _x[i];
+      for (j=0;j<ord;j++)
+         sum -= MULT16_16(rden[j],y[i+j]);
+      y[i+ord] = ROUND16(sum,SIG_SHIFT);
+      _y[i] = sum;
+   }
+   for(i=0;i<ord;i++)
+      mem[i] = _y[N-i-1];
+   RESTORE_STACK;
+#endif
+}
+
+int _celt_autocorr(
+                   const opus_val16 *x,   /*  in: [0...n-1] samples x   */
+                   opus_val32       *ac,  /* out: [0...lag-1] ac values */
+                   const opus_val16       *window,
+                   int          overlap,
+                   int          lag,
+                   int          n,
+                   int          arch
+                  )
+{
+   opus_val32 d;
+   int i, k;
+   int fastN=n-lag;
+   int shift;
+   const opus_val16 *xptr;
+   VARDECL(opus_val16, xx);
+   SAVE_STACK;
+   ALLOC(xx, n, opus_val16);
+   celt_assert(n>0);
+   celt_assert(overlap>=0);
+   if (overlap == 0)
+   {
+      xptr = x;
+   } else {
+      for (i=0;i<n;i++)
+         xx[i] = x[i];
+      for (i=0;i<overlap;i++)
+      {
+         xx[i] = MULT16_16_Q15(x[i],window[i]);
+         xx[n-i-1] = MULT16_16_Q15(x[n-i-1],window[i]);
+      }
+      xptr = xx;
+   }
+   shift=0;
+#ifdef OPUS_FIXED_POINT
+   {
+      opus_val32 ac0;
+      ac0 = 1+(n<<7);
+      if (n&1) ac0 += SHR32(MULT16_16(xptr[0],xptr[0]),9);
+      for(i=(n&1);i<n;i+=2)
+      {
+         ac0 += SHR32(MULT16_16(xptr[i],xptr[i]),9);
+         ac0 += SHR32(MULT16_16(xptr[i+1],xptr[i+1]),9);
+      }
+
+      shift = celt_ilog2(ac0)-30+10;
+      shift = (shift)/2;
+      if (shift>0)
+      {
+         for(i=0;i<n;i++)
+            xx[i] = PSHR32(xptr[i], shift);
+         xptr = xx;
+      } else
+         shift = 0;
+   }
+#endif
+   celt_pitch_xcorr(xptr, xptr, ac, fastN, lag+1, arch);
+   for (k=0;k<=lag;k++)
+   {
+      for (i = k+fastN, d = 0; i < n; i++)
+         d = MAC16_16(d, xptr[i], xptr[i-k]);
+      ac[k] += d;
+   }
+#ifdef OPUS_FIXED_POINT
+   shift = 2*shift;
+   if (shift<=0)
+      ac[0] += SHL32((opus_int32)1, -shift);
+   if (ac[0] < 268435456)
+   {
+      int shift2 = 29 - EC_ILOG(ac[0]);
+      for (i=0;i<=lag;i++)
+         ac[i] = SHL32(ac[i], shift2);
+      shift -= shift2;
+   } else if (ac[0] >= 536870912)
+   {
+      int shift2=1;
+      if (ac[0] >= 1073741824)
+         shift2++;
+      for (i=0;i<=lag;i++)
+         ac[i] = SHR32(ac[i], shift2);
+      shift += shift2;
+   }
+#endif
+
+   RESTORE_STACK;
+   return shift;
+}
diff --git a/drivers/opus/celt/celt_lpc.h b/drivers/opus/celt/celt_lpc.h
new file mode 100644
index 0000000000..dc2a0a3d26
--- /dev/null
+++ b/drivers/opus/celt/celt_lpc.h
@@ -0,0 +1,54 @@
+/* Copyright (c) 2009-2010 Xiph.Org Foundation
+   Written by Jean-Marc Valin */
+/*
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef PLC_H
+#define PLC_H
+
+#include "arch.h"
+
+#define LPC_ORDER 24
+
+void _celt_lpc(opus_val16 *_lpc, const opus_val32 *ac, int p);
+
+void celt_fir(const opus_val16 *x,
+         const opus_val16 *num,
+         opus_val16 *y,
+         int N,
+         int ord,
+         opus_val16 *mem);
+
+void celt_iir(const opus_val32 *x,
+         const opus_val16 *den,
+         opus_val32 *y,
+         int N,
+         int ord,
+         opus_val16 *mem);
+
+int _celt_autocorr(const opus_val16 *x, opus_val32 *ac,
+         const opus_val16 *window, int overlap, int lag, int n, int arch);
+
+#endif /* PLC_H */
diff --git a/drivers/opus/celt/cpu_support.h b/drivers/opus/celt/cpu_support.h
new file mode 100644
index 0000000000..d68dbe62c5
--- /dev/null
+++ b/drivers/opus/celt/cpu_support.h
@@ -0,0 +1,54 @@
+/* Copyright (c) 2010 Xiph.Org Foundation
+ * Copyright (c) 2013 Parrot */
+/*
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef CPU_SUPPORT_H
+#define CPU_SUPPORT_H
+
+#include "opus_types.h"
+#include "opus_defines.h"
+
+#if defined(OPUS_HAVE_RTCD) && defined(OPUS_ARM_ASM)
+#include "arm/armcpu.h"
+
+/* We currently support 4 ARM variants:
+ * arch[0] -> ARMv4
+ * arch[1] -> ARMv5E
+ * arch[2] -> ARMv6
+ * arch[3] -> NEON
+ */
+#define OPUS_ARCHMASK 3
+
+#else
+#define OPUS_ARCHMASK 0
+
+static OPUS_INLINE int opus_select_arch(void)
+{
+  return 0;
+}
+#endif
+
+#endif
diff --git a/drivers/opus/celt/cwrs.c b/drivers/opus/celt/cwrs.c
new file mode 100644
index 0000000000..b866aa9210
--- /dev/null
+++ b/drivers/opus/celt/cwrs.c
@@ -0,0 +1,697 @@
+/* Copyright (c) 2007-2008 CSIRO
+   Copyright (c) 2007-2009 Xiph.Org Foundation
+   Copyright (c) 2007-2009 Timothy B. Terriberry
+   Written by Timothy B. Terriberry and Jean-Marc Valin */
+/*
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifdef OPUS_HAVE_CONFIG_H
+#include "opus_config.h"
+#endif
+
+#include "os_support.h"
+#include "cwrs.h"
+#include "mathops.h"
+#include "arch.h"
+
+#ifdef CUSTOM_MODES
+
+/*Guaranteed to return a conservatively large estimate of the binary logarithm
+   with frac bits of fractional precision.
+  Tested for all possible 32-bit inputs with frac=4, where the maximum
+   overestimation is 0.06254243 bits.*/
+int log2_frac(opus_uint32 val, int frac)
+{
+  int l;
+  l=EC_ILOG(val);
+  if(val&(val-1)){
+    /*This is (val>>l-16), but guaranteed to round up, even if adding a bias
+       before the shift would cause overflow (e.g., for 0xFFFFxxxx).
+       Doesn't work for val=0, but that case fails the test above.*/
+    if(l>16)val=((val-1)>>(l-16))+1;
+    else val<<=16-l;
+    l=(l-1)<<frac;
+    /*Note that we always need one iteration, since the rounding up above means
+       that we might need to adjust the integer part of the logarithm.*/
+    do{
+      int b;
+      b=(int)(val>>16);
+      l+=b<<frac;
+      val=(val+b)>>b;
+      val=(val*val+0x7FFF)>>15;
+    }
+    while(frac-->0);
+    /*If val is not exactly 0x8000, then we have to round up the remainder.*/
+    return l+(val>0x8000);
+  }
+  /*Exact powers of two require no rounding.*/
+  else return (l-1)<<frac;
+}
+#endif
+
+/*Although derived separately, the pulse vector coding scheme is equivalent to
+   a Pyramid Vector Quantizer \cite{Fis86}.
+  Some additional notes about an early version appear at
+   http://people.xiph.org/~tterribe/notes/cwrs.html, but the codebook ordering
+   and the definitions of some terms have evolved since that was written.
+
+  The conversion from a pulse vector to an integer index (encoding) and back
+   (decoding) is governed by two related functions, V(N,K) and U(N,K).
+
+  V(N,K) = the number of combinations, with replacement, of N items, taken K
+   at a time, when a sign bit is added to each item taken at least once (i.e.,
+   the number of N-dimensional unit pulse vectors with K pulses).
+  One way to compute this is via
+    V(N,K) = K>0 ? sum(k=1...K,2**k*choose(N,k)*choose(K-1,k-1)) : 1,
+   where choose() is the binomial function.
+  A table of values for N<10 and K<10 looks like:
+  V[10][10] = {
+    {1,  0,   0,    0,    0,     0,     0,      0,      0,       0},
+    {1,  2,   2,    2,    2,     2,     2,      2,      2,       2},
+    {1,  4,   8,   12,   16,    20,    24,     28,     32,      36},
+    {1,  6,  18,   38,   66,   102,   146,    198,    258,     326},
+    {1,  8,  32,   88,  192,   360,   608,    952,   1408,    1992},
+    {1, 10,  50,  170,  450,  1002,  1970,   3530,   5890,    9290},
+    {1, 12,  72,  292,  912,  2364,  5336,  10836,  20256,   35436},
+    {1, 14,  98,  462, 1666,  4942, 12642,  28814,  59906,  115598},
+    {1, 16, 128,  688, 2816,  9424, 27008,  68464, 157184,  332688},
+    {1, 18, 162,  978, 4482, 16722, 53154, 148626, 374274,  864146}
+  };
+
+  U(N,K) = the number of such combinations wherein N-1 objects are taken at
+   most K-1 at a time.
+  This is given by
+    U(N,K) = sum(k=0...K-1,V(N-1,k))
+           = K>0 ? (V(N-1,K-1) + V(N,K-1))/2 : 0.
+  The latter expression also makes clear that U(N,K) is half the number of such
+   combinations wherein the first object is taken at least once.
+  Although it may not be clear from either of these definitions, U(N,K) is the
+   natural function to work with when enumerating the pulse vector codebooks,
+   not V(N,K).
+  U(N,K) is not well-defined for N=0, but with the extension
+    U(0,K) = K>0 ? 0 : 1,
+   the function becomes symmetric: U(N,K) = U(K,N), with a similar table:
+  U[10][10] = {
+    {1, 0,  0,   0,    0,    0,     0,     0,      0,      0},
+    {0, 1,  1,   1,    1,    1,     1,     1,      1,      1},
+    {0, 1,  3,   5,    7,    9,    11,    13,     15,     17},
+    {0, 1,  5,  13,   25,   41,    61,    85,    113,    145},
+    {0, 1,  7,  25,   63,  129,   231,   377,    575,    833},
+    {0, 1,  9,  41,  129,  321,   681,  1289,   2241,   3649},
+    {0, 1, 11,  61,  231,  681,  1683,  3653,   7183,  13073},
+    {0, 1, 13,  85,  377, 1289,  3653,  8989,  19825,  40081},
+    {0, 1, 15, 113,  575, 2241,  7183, 19825,  48639, 108545},
+    {0, 1, 17, 145,  833, 3649, 13073, 40081, 108545, 265729}
+  };
+
+  With this extension, V(N,K) may be written in terms of U(N,K):
+    V(N,K) = U(N,K) + U(N,K+1)
+   for all N>=0, K>=0.
+  Thus U(N,K+1) represents the number of combinations where the first element
+   is positive or zero, and U(N,K) represents the number of combinations where
+   it is negative.
+  With a large enough table of U(N,K) values, we could write O(N) encoding
+   and O(min(N*log(K),N+K)) decoding routines, but such a table would be
+   prohibitively large for small embedded devices (K may be as large as 32767
+   for small N, and N may be as large as 200).
+
+  Both functions obey the same recurrence relation:
+    V(N,K) = V(N-1,K) + V(N,K-1) + V(N-1,K-1),
+    U(N,K) = U(N-1,K) + U(N,K-1) + U(N-1,K-1),
+   for all N>0, K>0, with different initial conditions at N=0 or K=0.
+  This allows us to construct a row of one of the tables above given the
+   previous row or the next row.
+  Thus we can derive O(NK) encoding and decoding routines with O(K) memory
+   using only addition and subtraction.
+
+  When encoding, we build up from the U(2,K) row and work our way forwards.
+  When decoding, we need to start at the U(N,K) row and work our way backwards,
+   which requires a means of computing U(N,K).
+  U(N,K) may be computed from two previous values with the same N:
+    U(N,K) = ((2*N-1)*U(N,K-1) - U(N,K-2))/(K-1) + U(N,K-2)
+   for all N>1, and since U(N,K) is symmetric, a similar relation holds for two
+   previous values with the same K:
+    U(N,K>1) = ((2*K-1)*U(N-1,K) - U(N-2,K))/(N-1) + U(N-2,K)
+   for all K>1.
+  This allows us to construct an arbitrary row of the U(N,K) table by starting
+   with the first two values, which are constants.
+  This saves roughly 2/3 the work in our O(NK) decoding routine, but costs O(K)
+   multiplications.
+  Similar relations can be derived for V(N,K), but are not used here.
+
+  For N>0 and K>0, U(N,K) and V(N,K) take on the form of an (N-1)-degree
+   polynomial for fixed N.
+  The first few are
+    U(1,K) = 1,
+    U(2,K) = 2*K-1,
+    U(3,K) = (2*K-2)*K+1,
+    U(4,K) = (((4*K-6)*K+8)*K-3)/3,
+    U(5,K) = ((((2*K-4)*K+10)*K-8)*K+3)/3,
+   and
+    V(1,K) = 2,
+    V(2,K) = 4*K,
+    V(3,K) = 4*K*K+2,
+    V(4,K) = 8*(K*K+2)*K/3,
+    V(5,K) = ((4*K*K+20)*K*K+6)/3,
+   for all K>0.
+  This allows us to derive O(N) encoding and O(N*log(K)) decoding routines for
+   small N (and indeed decoding is also O(N) for N<3).
+
+  @ARTICLE{Fis86,
+    author="Thomas R. Fischer",
+    title="A Pyramid Vector Quantizer",
+    journal="IEEE Transactions on Information Theory",
+    volume="IT-32",
+    number=4,
+    pages="568--583",
+    month=Jul,
+    year=1986
+  }*/
+
+#if !defined(SMALL_FOOTPRINT)
+
+/*U(N,K) = U(K,N) := N>0?K>0?U(N-1,K)+U(N,K-1)+U(N-1,K-1):0:K>0?1:0*/
+# define CELT_PVQ_U(_n,_k) (CELT_PVQ_U_ROW[IMIN(_n,_k)][IMAX(_n,_k)])
+/*V(N,K) := U(N,K)+U(N,K+1) = the number of PVQ codewords for a band of size N
+   with K pulses allocated to it.*/
+# define CELT_PVQ_V(_n,_k) (CELT_PVQ_U(_n,_k)+CELT_PVQ_U(_n,(_k)+1))
+
+/*For each V(N,K) supported, we will access element U(min(N,K+1),max(N,K+1)).
+  Thus, the number of entries in row I is the larger of the maximum number of
+   pulses we will ever allocate for a given N=I (K=128, or however many fit in
+   32 bits, whichever is smaller), plus one, and the maximum N for which
+   K=I-1 pulses fit in 32 bits.
+  The largest band size in an Opus Custom mode is 208.
+  Otherwise, we can limit things to the set of N which can be achieved by
+   splitting a band from a standard Opus mode: 176, 144, 96, 88, 72, 64, 48,
+   44, 36, 32, 24, 22, 18, 16, 8, 4, 2).*/
+#if defined(CUSTOM_MODES)
+static const opus_uint32 CELT_PVQ_U_DATA[1488]={
+#else
+static const opus_uint32 CELT_PVQ_U_DATA[1272]={
+#endif
+  /*N=0, K=0...176:*/
+  1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+#if defined(CUSTOM_MODES)
+  /*...208:*/
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+  0, 0, 0, 0, 0, 0,
+#endif
+  /*N=1, K=1...176:*/
+  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+#if defined(CUSTOM_MODES)
+  /*...208:*/
+  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+  1, 1, 1, 1, 1, 1,
+#endif
+  /*N=2, K=2...176:*/
+  3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31, 33, 35, 37, 39, 41,
+  43, 45, 47, 49, 51, 53, 55, 57, 59, 61, 63, 65, 67, 69, 71, 73, 75, 77, 79,
+  81, 83, 85, 87, 89, 91, 93, 95, 97, 99, 101, 103, 105, 107, 109, 111, 113,
+  115, 117, 119, 121, 123, 125, 127, 129, 131, 133, 135, 137, 139, 141, 143,
+  145, 147, 149, 151, 153, 155, 157, 159, 161, 163, 165, 167, 169, 171, 173,
+  175, 177, 179, 181, 183, 185, 187, 189, 191, 193, 195, 197, 199, 201, 203,
+  205, 207, 209, 211, 213, 215, 217, 219, 221, 223, 225, 227, 229, 231, 233,
+  235, 237, 239, 241, 243, 245, 247, 249, 251, 253, 255, 257, 259, 261, 263,
+  265, 267, 269, 271, 273, 275, 277, 279, 281, 283, 285, 287, 289, 291, 293,
+  295, 297, 299, 301, 303, 305, 307, 309, 311, 313, 315, 317, 319, 321, 323,
+  325, 327, 329, 331, 333, 335, 337, 339, 341, 343, 345, 347, 349, 351,
+#if defined(CUSTOM_MODES)
+  /*...208:*/
+  353, 355, 357, 359, 361, 363, 365, 367, 369, 371, 373, 375, 377, 379, 381,
+  383, 385, 387, 389, 391, 393, 395, 397, 399, 401, 403, 405, 407, 409, 411,
+  413, 415,
+#endif
+  /*N=3, K=3...176:*/
+  13, 25, 41, 61, 85, 113, 145, 181, 221, 265, 313, 365, 421, 481, 545, 613,
+  685, 761, 841, 925, 1013, 1105, 1201, 1301, 1405, 1513, 1625, 1741, 1861,
+  1985, 2113, 2245, 2381, 2521, 2665, 2813, 2965, 3121, 3281, 3445, 3613, 3785,
+  3961, 4141, 4325, 4513, 4705, 4901, 5101, 5305, 5513, 5725, 5941, 6161, 6385,
+  6613, 6845, 7081, 7321, 7565, 7813, 8065, 8321, 8581, 8845, 9113, 9385, 9661,
+  9941, 10225, 10513, 10805, 11101, 11401, 11705, 12013, 12325, 12641, 12961,
+  13285, 13613, 13945, 14281, 14621, 14965, 15313, 15665, 16021, 16381, 16745,
+  17113, 17485, 17861, 18241, 18625, 19013, 19405, 19801, 20201, 20605, 21013,
+  21425, 21841, 22261, 22685, 23113, 23545, 23981, 24421, 24865, 25313, 25765,
+  26221, 26681, 27145, 27613, 28085, 28561, 29041, 29525, 30013, 30505, 31001,
+  31501, 32005, 32513, 33025, 33541, 34061, 34585, 35113, 35645, 36181, 36721,
+  37265, 37813, 38365, 38921, 39481, 40045, 40613, 41185, 41761, 42341, 42925,
+  43513, 44105, 44701, 45301, 45905, 46513, 47125, 47741, 48361, 48985, 49613,
+  50245, 50881, 51521, 52165, 52813, 53465, 54121, 54781, 55445, 56113, 56785,
+  57461, 58141, 58825, 59513, 60205, 60901, 61601,
+#if defined(CUSTOM_MODES)
+  /*...208:*/
+  62305, 63013, 63725, 64441, 65161, 65885, 66613, 67345, 68081, 68821, 69565,
+  70313, 71065, 71821, 72581, 73345, 74113, 74885, 75661, 76441, 77225, 78013,
+  78805, 79601, 80401, 81205, 82013, 82825, 83641, 84461, 85285, 86113,
+#endif
+  /*N=4, K=4...176:*/
+  63, 129, 231, 377, 575, 833, 1159, 1561, 2047, 2625, 3303, 4089, 4991, 6017,
+  7175, 8473, 9919, 11521, 13287, 15225, 17343, 19649, 22151, 24857, 27775,
+  30913, 34279, 37881, 41727, 45825, 50183, 54809, 59711, 64897, 70375, 76153,
+  82239, 88641, 95367, 102425, 109823, 117569, 125671, 134137, 142975, 152193,
+  161799, 171801, 182207, 193025, 204263, 215929, 228031, 240577, 253575,
+  267033, 280959, 295361, 310247, 325625, 341503, 357889, 374791, 392217,
+  410175, 428673, 447719, 467321, 487487, 508225, 529543, 551449, 573951,
+  597057, 620775, 645113, 670079, 695681, 721927, 748825, 776383, 804609,
+  833511, 863097, 893375, 924353, 956039, 988441, 1021567, 1055425, 1090023,
+  1125369, 1161471, 1198337, 1235975, 1274393, 1313599, 1353601, 1394407,
+  1436025, 1478463, 1521729, 1565831, 1610777, 1656575, 1703233, 1750759,
+  1799161, 1848447, 1898625, 1949703, 2001689, 2054591, 2108417, 2163175,
+  2218873, 2275519, 2333121, 2391687, 2451225, 2511743, 2573249, 2635751,
+  2699257, 2763775, 2829313, 2895879, 2963481, 3032127, 3101825, 3172583,
+  3244409, 3317311, 3391297, 3466375, 3542553, 3619839, 3698241, 3777767,
+  3858425, 3940223, 4023169, 4107271, 4192537, 4278975, 4366593, 4455399,
+  4545401, 4636607, 4729025, 4822663, 4917529, 5013631, 5110977, 5209575,
+  5309433, 5410559, 5512961, 5616647, 5721625, 5827903, 5935489, 6044391,
+  6154617, 6266175, 6379073, 6493319, 6608921, 6725887, 6844225, 6963943,
+  7085049, 7207551,
+#if defined(CUSTOM_MODES)
+  /*...208:*/
+  7331457, 7456775, 7583513, 7711679, 7841281, 7972327, 8104825, 8238783,
+  8374209, 8511111, 8649497, 8789375, 8930753, 9073639, 9218041, 9363967,
+  9511425, 9660423, 9810969, 9963071, 10116737, 10271975, 10428793, 10587199,
+  10747201, 10908807, 11072025, 11236863, 11403329, 11571431, 11741177,
+  11912575,
+#endif
+  /*N=5, K=5...176:*/
+  321, 681, 1289, 2241, 3649, 5641, 8361, 11969, 16641, 22569, 29961, 39041,
+  50049, 63241, 78889, 97281, 118721, 143529, 172041, 204609, 241601, 283401,
+  330409, 383041, 441729, 506921, 579081, 658689, 746241, 842249, 947241,
+  1061761, 1186369, 1321641, 1468169, 1626561, 1797441, 1981449, 2179241,
+  2391489, 2618881, 2862121, 3121929, 3399041, 3694209, 4008201, 4341801,
+  4695809, 5071041, 5468329, 5888521, 6332481, 6801089, 7295241, 7815849,
+  8363841, 8940161, 9545769, 10181641, 10848769, 11548161, 12280841, 13047849,
+  13850241, 14689089, 15565481, 16480521, 17435329, 18431041, 19468809,
+  20549801, 21675201, 22846209, 24064041, 25329929, 26645121, 28010881,
+  29428489, 30899241, 32424449, 34005441, 35643561, 37340169, 39096641,
+  40914369, 42794761, 44739241, 46749249, 48826241, 50971689, 53187081,
+  55473921, 57833729, 60268041, 62778409, 65366401, 68033601, 70781609,
+  73612041, 76526529, 79526721, 82614281, 85790889, 89058241, 92418049,
+  95872041, 99421961, 103069569, 106816641, 110664969, 114616361, 118672641,
+  122835649, 127107241, 131489289, 135983681, 140592321, 145317129, 150160041,
+  155123009, 160208001, 165417001, 170752009, 176215041, 181808129, 187533321,
+  193392681, 199388289, 205522241, 211796649, 218213641, 224775361, 231483969,
+  238341641, 245350569, 252512961, 259831041, 267307049, 274943241, 282741889,
+  290705281, 298835721, 307135529, 315607041, 324252609, 333074601, 342075401,
+  351257409, 360623041, 370174729, 379914921, 389846081, 399970689, 410291241,
+  420810249, 431530241, 442453761, 453583369, 464921641, 476471169, 488234561,
+  500214441, 512413449, 524834241, 537479489, 550351881, 563454121, 576788929,
+  590359041, 604167209, 618216201, 632508801,
+#if defined(CUSTOM_MODES)
+  /*...208:*/
+  647047809, 661836041, 676876329, 692171521, 707724481, 723538089, 739615241,
+  755958849, 772571841, 789457161, 806617769, 824056641, 841776769, 859781161,
+  878072841, 896654849, 915530241, 934702089, 954173481, 973947521, 994027329,
+  1014416041, 1035116809, 1056132801, 1077467201, 1099123209, 1121104041,
+  1143412929, 1166053121, 1189027881, 1212340489, 1235994241,
+#endif
+  /*N=6, K=6...96:*/
+  1683, 3653, 7183, 13073, 22363, 36365, 56695, 85305, 124515, 177045, 246047,
+  335137, 448427, 590557, 766727, 982729, 1244979, 1560549, 1937199, 2383409,
+  2908411, 3522221, 4235671, 5060441, 6009091, 7095093, 8332863, 9737793,
+  11326283, 13115773, 15124775, 17372905, 19880915, 22670725, 25765455,
+  29189457, 32968347, 37129037, 41699767, 46710137, 52191139, 58175189,
+  64696159, 71789409, 79491819, 87841821, 96879431, 106646281, 117185651,
+  128542501, 140763503, 153897073, 167993403, 183104493, 199284183, 216588185,
+  235074115, 254801525, 275831935, 298228865, 322057867, 347386557, 374284647,
+  402823977, 433078547, 465124549, 499040399, 534906769, 572806619, 612825229,
+  655050231, 699571641, 746481891, 795875861, 847850911, 902506913, 959946283,
+  1020274013, 1083597703, 1150027593, 1219676595, 1292660325, 1369097135,
+  1449108145, 1532817275, 1620351277, 1711839767, 1807415257, 1907213187,
+  2011371957, 2120032959,
+#if defined(CUSTOM_MODES)
+  /*...109:*/
+  2233340609U, 2351442379U, 2474488829U, 2602633639U, 2736033641U, 2874848851U,
+  3019242501U, 3169381071U, 3325434321U, 3487575323U, 3655980493U, 3830829623U,
+  4012305913U,
+#endif
+  /*N=7, K=7...54*/
+  8989, 19825, 40081, 75517, 134245, 227305, 369305, 579125, 880685, 1303777,
+  1884961, 2668525, 3707509, 5064793, 6814249, 9041957, 11847485, 15345233,
+  19665841, 24957661, 31388293, 39146185, 48442297, 59511829, 72616013,
+  88043969, 106114625, 127178701, 151620757, 179861305, 212358985, 249612805,
+  292164445, 340600625, 395555537, 457713341, 527810725, 606639529, 695049433,
+  793950709, 904317037, 1027188385, 1163673953, 1314955181, 1482288821,
+  1667010073, 1870535785, 2094367717,
+#if defined(CUSTOM_MODES)
+  /*...60:*/
+  2340095869U, 2609401873U, 2904062449U, 3225952925U, 3577050821U, 3959439497U,
+#endif
+  /*N=8, K=8...37*/
+  48639, 108545, 224143, 433905, 795455, 1392065, 2340495, 3800305, 5984767,
+  9173505, 13726991, 20103025, 28875327, 40754369, 56610575, 77500017,
+  104692735, 139703809, 184327311, 240673265, 311207743, 398796225, 506750351,
+  638878193, 799538175, 993696769, 1226990095, 1505789553, 1837271615,
+  2229491905U,
+#if defined(CUSTOM_MODES)
+  /*...40:*/
+  2691463695U, 3233240945U, 3866006015U,
+#endif
+  /*N=9, K=9...28:*/
+  265729, 598417, 1256465, 2485825, 4673345, 8405905, 14546705, 24331777,
+  39490049, 62390545, 96220561, 145198913, 214828609, 312193553, 446304145,
+  628496897, 872893441, 1196924561, 1621925137, 2173806145U,
+#if defined(CUSTOM_MODES)
+  /*...29:*/
+  2883810113U,
+#endif
+  /*N=10, K=10...24:*/
+  1462563, 3317445, 7059735, 14218905, 27298155, 50250765, 89129247, 152951073,
+  254831667, 413442773, 654862247, 1014889769, 1541911931, 2300409629U,
+  3375210671U,
+  /*N=11, K=11...19:*/
+  8097453, 18474633, 39753273, 81270333, 158819253, 298199265, 540279585,
+  948062325, 1616336765,
+#if defined(CUSTOM_MODES)
+  /*...20:*/
+  2684641785U,
+#endif
+  /*N=12, K=12...18:*/
+  45046719, 103274625, 224298231, 464387817, 921406335, 1759885185,
+  3248227095U,
+  /*N=13, K=13...16:*/
+  251595969, 579168825, 1267854873, 2653649025U,
+  /*N=14, K=14:*/
+  1409933619
+};
+
+#if defined(CUSTOM_MODES)
+static const opus_uint32 *const CELT_PVQ_U_ROW[15]={
+  CELT_PVQ_U_DATA+   0,CELT_PVQ_U_DATA+ 208,CELT_PVQ_U_DATA+ 415,
+  CELT_PVQ_U_DATA+ 621,CELT_PVQ_U_DATA+ 826,CELT_PVQ_U_DATA+1030,
+  CELT_PVQ_U_DATA+1233,CELT_PVQ_U_DATA+1336,CELT_PVQ_U_DATA+1389,
+  CELT_PVQ_U_DATA+1421,CELT_PVQ_U_DATA+1441,CELT_PVQ_U_DATA+1455,
+  CELT_PVQ_U_DATA+1464,CELT_PVQ_U_DATA+1470,CELT_PVQ_U_DATA+1473
+};
+#else
+static const opus_uint32 *const CELT_PVQ_U_ROW[15]={
+  CELT_PVQ_U_DATA+   0,CELT_PVQ_U_DATA+ 176,CELT_PVQ_U_DATA+ 351,
+  CELT_PVQ_U_DATA+ 525,CELT_PVQ_U_DATA+ 698,CELT_PVQ_U_DATA+ 870,
+  CELT_PVQ_U_DATA+1041,CELT_PVQ_U_DATA+1131,CELT_PVQ_U_DATA+1178,
+  CELT_PVQ_U_DATA+1207,CELT_PVQ_U_DATA+1226,CELT_PVQ_U_DATA+1240,
+  CELT_PVQ_U_DATA+1248,CELT_PVQ_U_DATA+1254,CELT_PVQ_U_DATA+1257
+};
+#endif
+
+#if defined(CUSTOM_MODES)
+void get_required_bits(opus_int16 *_bits,int _n,int _maxk,int _frac){
+  int k;
+  /*_maxk==0 => there's nothing to do.*/
+  celt_assert(_maxk>0);
+  _bits[0]=0;
+  for(k=1;k<=_maxk;k++)_bits[k]=log2_frac(CELT_PVQ_V(_n,k),_frac);
+}
+#endif
+
+static opus_uint32 icwrs(int _n,const int *_y){
+  opus_uint32 i;
+  int         j;
+  int         k;
+  celt_assert(_n>=2);
+  j=_n-1;
+  i=_y[j]<0;
+  k=abs(_y[j]);
+  do{
+    j--;
+    i+=CELT_PVQ_U(_n-j,k);
+    k+=abs(_y[j]);
+    if(_y[j]<0)i+=CELT_PVQ_U(_n-j,k+1);
+  }
+  while(j>0);
+  return i;
+}
+
+void encode_pulses(const int *_y,int _n,int _k,ec_enc *_enc){
+  celt_assert(_k>0);
+  ec_enc_uint(_enc,icwrs(_n,_y),CELT_PVQ_V(_n,_k));
+}
+
+static void cwrsi(int _n,int _k,opus_uint32 _i,int *_y){
+  opus_uint32 p;
+  int         s;
+  int         k0;
+  celt_assert(_k>0);
+  celt_assert(_n>1);
+  while(_n>2){
+    opus_uint32 q;
+    /*Lots of pulses case:*/
+    if(_k>=_n){
+      const opus_uint32 *row;
+      row=CELT_PVQ_U_ROW[_n];
+      /*Are the pulses in this dimension negative?*/
+      p=row[_k+1];
+      s=-(_i>=p);
+      _i-=p&s;
+      /*Count how many pulses were placed in this dimension.*/
+      k0=_k;
+      q=row[_n];
+      if(q>_i){
+        celt_assert(p>q);
+        _k=_n;
+        do p=CELT_PVQ_U_ROW[--_k][_n];
+        while(p>_i);
+      }
+      else for(p=row[_k];p>_i;p=row[_k])_k--;
+      _i-=p;
+      *_y++=(k0-_k+s)^s;
+    }
+    /*Lots of dimensions case:*/
+    else{
+      /*Are there any pulses in this dimension at all?*/
+      p=CELT_PVQ_U_ROW[_k][_n];
+      q=CELT_PVQ_U_ROW[_k+1][_n];
+      if(p<=_i&&_i<q){
+        _i-=p;
+        *_y++=0;
+      }
+      else{
+        /*Are the pulses in this dimension negative?*/
+        s=-(_i>=q);
+        _i-=q&s;
+        /*Count how many pulses were placed in this dimension.*/
+        k0=_k;
+        do p=CELT_PVQ_U_ROW[--_k][_n];
+        while(p>_i);
+        _i-=p;
+        *_y++=(k0-_k+s)^s;
+      }
+    }
+    _n--;
+  }
+  /*_n==2*/
+  p=2*_k+1;
+  s=-(_i>=p);
+  _i-=p&s;
+  k0=_k;
+  _k=(_i+1)>>1;
+  if(_k)_i-=2*_k-1;
+  *_y++=(k0-_k+s)^s;
+  /*_n==1*/
+  s=-(int)_i;
+  *_y=(_k+s)^s;
+}
+
+void decode_pulses(int *_y,int _n,int _k,ec_dec *_dec){
+  cwrsi(_n,_k,ec_dec_uint(_dec,CELT_PVQ_V(_n,_k)),_y);
+}
+
+#else /* SMALL_FOOTPRINT */
+
+/*Computes the next row/column of any recurrence that obeys the relation
+   u[i][j]=u[i-1][j]+u[i][j-1]+u[i-1][j-1].
+  _ui0 is the base case for the new row/column.*/
+static OPUS_INLINE void unext(opus_uint32 *_ui,unsigned _len,opus_uint32 _ui0){
+  opus_uint32 ui1;
+  unsigned      j;
+  /*This do-while will overrun the array if we don't have storage for at least
+     2 values.*/
+  j=1; do {
+    ui1=UADD32(UADD32(_ui[j],_ui[j-1]),_ui0);
+    _ui[j-1]=_ui0;
+    _ui0=ui1;
+  } while (++j<_len);
+  _ui[j-1]=_ui0;
+}
+
+/*Computes the previous row/column of any recurrence that obeys the relation
+   u[i-1][j]=u[i][j]-u[i][j-1]-u[i-1][j-1].
+  _ui0 is the base case for the new row/column.*/
+static OPUS_INLINE void uprev(opus_uint32 *_ui,unsigned _n,opus_uint32 _ui0){
+  opus_uint32 ui1;
+  unsigned      j;
+  /*This do-while will overrun the array if we don't have storage for at least
+     2 values.*/
+  j=1; do {
+    ui1=USUB32(USUB32(_ui[j],_ui[j-1]),_ui0);
+    _ui[j-1]=_ui0;
+    _ui0=ui1;
+  } while (++j<_n);
+  _ui[j-1]=_ui0;
+}
+
+/*Compute V(_n,_k), as well as U(_n,0..._k+1).
+  _u: On exit, _u[i] contains U(_n,i) for i in [0..._k+1].*/
+static opus_uint32 ncwrs_urow(unsigned _n,unsigned _k,opus_uint32 *_u){
+  opus_uint32 um2;
+  unsigned      len;
+  unsigned      k;
+  len=_k+2;
+  /*We require storage at least 3 values (e.g., _k>0).*/
+  celt_assert(len>=3);
+  _u[0]=0;
+  _u[1]=um2=1;
+  /*If _n==0, _u[0] should be 1 and the rest should be 0.*/
+  /*If _n==1, _u[i] should be 1 for i>1.*/
+  celt_assert(_n>=2);
+  /*If _k==0, the following do-while loop will overflow the buffer.*/
+  celt_assert(_k>0);
+  k=2;
+  do _u[k]=(k<<1)-1;
+  while(++k<len);
+  for(k=2;k<_n;k++)unext(_u+1,_k+1,1);
+  return _u[_k]+_u[_k+1];
+}
+
+/*Returns the _i'th combination of _k elements chosen from a set of size _n
+   with associated sign bits.
+  _y: Returns the vector of pulses.
+  _u: Must contain entries [0..._k+1] of row _n of U() on input.
+      Its contents will be destructively modified.*/
+static void cwrsi(int _n,int _k,opus_uint32 _i,int *_y,opus_uint32 *_u){
+  int j;
+  celt_assert(_n>0);
+  j=0;
+  do{
+    opus_uint32 p;
+    int           s;
+    int           yj;
+    p=_u[_k+1];
+    s=-(_i>=p);
+    _i-=p&s;
+    yj=_k;
+    p=_u[_k];
+    while(p>_i)p=_u[--_k];
+    _i-=p;
+    yj-=_k;
+    _y[j]=(yj+s)^s;
+    uprev(_u,_k+2,0);
+  }
+  while(++j<_n);
+}
+
+/*Returns the index of the given combination of K elements chosen from a set
+   of size 1 with associated sign bits.
+  _y: The vector of pulses, whose sum of absolute values is K.
+  _k: Returns K.*/
+static OPUS_INLINE opus_uint32 icwrs1(const int *_y,int *_k){
+  *_k=abs(_y[0]);
+  return _y[0]<0;
+}
+
+/*Returns the index of the given combination of K elements chosen from a set
+   of size _n with associated sign bits.
+  _y:  The vector of pulses, whose sum of absolute values must be _k.
+  _nc: Returns V(_n,_k).*/
+static OPUS_INLINE opus_uint32 icwrs(int _n,int _k,opus_uint32 *_nc,const int *_y,
+ opus_uint32 *_u){
+  opus_uint32 i;
+  int         j;
+  int         k;
+  /*We can't unroll the first two iterations of the loop unless _n>=2.*/
+  celt_assert(_n>=2);
+  _u[0]=0;
+  for(k=1;k<=_k+1;k++)_u[k]=(k<<1)-1;
+  i=icwrs1(_y+_n-1,&k);
+  j=_n-2;
+  i+=_u[k];
+  k+=abs(_y[j]);
+  if(_y[j]<0)i+=_u[k+1];
+  while(j-->0){
+    unext(_u,_k+2,0);
+    i+=_u[k];
+    k+=abs(_y[j]);
+    if(_y[j]<0)i+=_u[k+1];
+  }
+  *_nc=_u[k]+_u[k+1];
+  return i;
+}
+
+#ifdef CUSTOM_MODES
+void get_required_bits(opus_int16 *_bits,int _n,int _maxk,int _frac){
+  int k;
+  /*_maxk==0 => there's nothing to do.*/
+  celt_assert(_maxk>0);
+  _bits[0]=0;
+  if (_n==1)
+  {
+    for (k=1;k<=_maxk;k++)
+      _bits[k] = 1<<_frac;
+  }
+  else {
+    VARDECL(opus_uint32,u);
+    SAVE_STACK;
+    ALLOC(u,_maxk+2U,opus_uint32);
+    ncwrs_urow(_n,_maxk,u);
+    for(k=1;k<=_maxk;k++)
+      _bits[k]=log2_frac(u[k]+u[k+1],_frac);
+    RESTORE_STACK;
+  }
+}
+#endif /* CUSTOM_MODES */
+
+void encode_pulses(const int *_y,int _n,int _k,ec_enc *_enc){
+  opus_uint32 i;
+  VARDECL(opus_uint32,u);
+  opus_uint32 nc;
+  SAVE_STACK;
+  celt_assert(_k>0);
+  ALLOC(u,_k+2U,opus_uint32);
+  i=icwrs(_n,_k,&nc,_y,u);
+  ec_enc_uint(_enc,i,nc);
+  RESTORE_STACK;
+}
+
+void decode_pulses(int *_y,int _n,int _k,ec_dec *_dec){
+  VARDECL(opus_uint32,u);
+  SAVE_STACK;
+  celt_assert(_k>0);
+  ALLOC(u,_k+2U,opus_uint32);
+  cwrsi(_n,_k,ec_dec_uint(_dec,ncwrs_urow(_n,_k,u)),_y,u);
+  RESTORE_STACK;
+}
+
+#endif /* SMALL_FOOTPRINT */
diff --git a/drivers/opus/celt/cwrs.h b/drivers/opus/celt/cwrs.h
new file mode 100644
index 0000000000..7dfbd076d1
--- /dev/null
+++ b/drivers/opus/celt/cwrs.h
@@ -0,0 +1,48 @@
+/* Copyright (c) 2007-2008 CSIRO
+   Copyright (c) 2007-2009 Xiph.Org Foundation
+   Copyright (c) 2007-2009 Timothy B. Terriberry
+   Written by Timothy B. Terriberry and Jean-Marc Valin */
+/*
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef CWRS_H
+#define CWRS_H
+
+#include "arch.h"
+#include "stack_alloc.h"
+#include "entenc.h"
+#include "entdec.h"
+
+#ifdef CUSTOM_MODES
+int log2_frac(opus_uint32 val, int frac);
+#endif
+
+void get_required_bits(opus_int16 *bits, int N, int K, int frac);
+
+void encode_pulses(const int *_y, int N, int K, ec_enc *enc);
+
+void decode_pulses(int *_y, int N, int K, ec_dec *dec);
+
+#endif /* CWRS_H */
diff --git a/drivers/opus/celt/ecintrin.h b/drivers/opus/celt/ecintrin.h
new file mode 100644
index 0000000000..2263cff6bd
--- /dev/null
+++ b/drivers/opus/celt/ecintrin.h
@@ -0,0 +1,87 @@
+/* Copyright (c) 2003-2008 Timothy B. Terriberry
+   Copyright (c) 2008 Xiph.Org Foundation */
+/*
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+/*Some common macros for potential platform-specific optimization.*/
+#include "opus_types.h"
+#include <math.h>
+#include <limits.h>
+#include "arch.h"
+#if !defined(_ecintrin_H)
+# define _ecintrin_H (1)
+
+/*Some specific platforms may have optimized intrinsic or OPUS_INLINE assembly
+   versions of these functions which can substantially improve performance.
+  We define macros for them to allow easy incorporation of these non-ANSI
+   features.*/
+
+/*Modern gcc (4.x) can compile the naive versions of min and max with cmov if
+   given an appropriate architecture, but the branchless bit-twiddling versions
+   are just as fast, and do not require any special target architecture.
+  Earlier gcc versions (3.x) compiled both code to the same assembly
+   instructions, because of the way they represented ((_b)>(_a)) internally.*/
+# define EC_MINI(_a,_b)      ((_a)+(((_b)-(_a))&-((_b)<(_a))))
+
+/*Count leading zeros.
+  This macro should only be used for implementing ec_ilog(), if it is defined.
+  All other code should use EC_ILOG() instead.*/
+#if defined(_MSC_VER) && (_MSC_VER >= 1400)
+# include <intrin.h>
+/*In _DEBUG mode this is not an intrinsic by default.*/
+# pragma intrinsic(_BitScanReverse)
+
+static __inline int ec_bsr(unsigned long _x){
+  unsigned long ret;
+  _BitScanReverse(&ret,_x);
+  return (int)ret;
+}
+# define EC_CLZ0    (1)
+# define EC_CLZ(_x) (-ec_bsr(_x))
+#elif defined(ENABLE_TI_DSPLIB)
+# include "dsplib.h"
+# define EC_CLZ0    (31)
+# define EC_CLZ(_x) (_lnorm(_x))
+#elif __GNUC_PREREQ(3,4)
+# if INT_MAX>=2147483647
+#  define EC_CLZ0    ((int)sizeof(unsigned)*CHAR_BIT)
+#  define EC_CLZ(_x) (__builtin_clz(_x))
+# elif LONG_MAX>=2147483647L
+#  define EC_CLZ0    ((int)sizeof(unsigned long)*CHAR_BIT)
+#  define EC_CLZ(_x) (__builtin_clzl(_x))
+# endif
+#endif
+
+#if defined(EC_CLZ)
+/*Note that __builtin_clz is not defined when _x==0, according to the gcc
+   documentation (and that of the BSR instruction that implements it on x86).
+  The majority of the time we can never pass it zero.
+  When we need to, it can be special cased.*/
+# define EC_ILOG(_x) (EC_CLZ0-EC_CLZ(_x))
+#else
+int ec_ilog(opus_uint32 _v);
+# define EC_ILOG(_x) (ec_ilog(_x))
+#endif
+#endif
diff --git a/drivers/opus/celt/entcode.c b/drivers/opus/celt/entcode.c
new file mode 100644
index 0000000000..fd817a9db5
--- /dev/null
+++ b/drivers/opus/celt/entcode.c
@@ -0,0 +1,93 @@
+/* Copyright (c) 2001-2011 Timothy B. Terriberry
+*/
+/*
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifdef OPUS_HAVE_CONFIG_H
+#include "opus_config.h"
+#endif
+
+#include "entcode.h"
+#include "arch.h"
+
+#if !defined(EC_CLZ)
+/*This is a fallback for systems where we don't know how to access
+   a BSR or CLZ instruction (see ecintrin.h).
+  If you are optimizing Opus on a new platform and it has a native CLZ or
+   BZR (e.g. cell, MIPS, x86, etc) then making it available to Opus will be
+   an easy performance win.*/
+int ec_ilog(opus_uint32 _v){
+  /*On a Pentium M, this branchless version tested as the fastest on
+     1,000,000,000 random 32-bit integers, edging out a similar version with
+     branches, and a 256-entry LUT version.*/
+  int ret;
+  int m;
+  ret=!!_v;
+  m=!!(_v&0xFFFF0000)<<4;
+  _v>>=m;
+  ret|=m;
+  m=!!(_v&0xFF00)<<3;
+  _v>>=m;
+  ret|=m;
+  m=!!(_v&0xF0)<<2;
+  _v>>=m;
+  ret|=m;
+  m=!!(_v&0xC)<<1;
+  _v>>=m;
+  ret|=m;
+  ret+=!!(_v&0x2);
+  return ret;
+}
+#endif
+
+opus_uint32 ec_tell_frac(ec_ctx *_this){
+  opus_uint32 nbits;
+  opus_uint32 r;
+  int         l;
+  int         i;
+  /*To handle the non-integral number of bits still left in the encoder/decoder
+     state, we compute the worst-case number of bits of val that must be
+     encoded to ensure that the value is inside the range for any possible
+     subsequent bits.
+    The computation here is independent of val itself (the decoder does not
+     even track that value), even though the real number of bits used after
+     ec_enc_done() may be 1 smaller if rng is a power of two and the
+     corresponding trailing bits of val are all zeros.
+    If we did try to track that special case, then coding a value with a
+     probability of 1/(1<<n) might sometimes appear to use more than n bits.
+    This may help explain the surprising result that a newly initialized
+     encoder or decoder claims to have used 1 bit.*/
+  nbits=_this->nbits_total<<BITRES;
+  l=EC_ILOG(_this->rng);
+  r=_this->rng>>(l-16);
+  for(i=BITRES;i-->0;){
+    int b;
+    r=r*r>>15;
+    b=(int)(r>>16);
+    l=l<<1|b;
+    r>>=b;
+  }
+  return nbits-l;
+}
diff --git a/drivers/opus/celt/entcode.h b/drivers/opus/celt/entcode.h
new file mode 100644
index 0000000000..dd13e49e50
--- /dev/null
+++ b/drivers/opus/celt/entcode.h
@@ -0,0 +1,117 @@
+/* Copyright (c) 2001-2011 Timothy B. Terriberry
+   Copyright (c) 2008-2009 Xiph.Org Foundation */
+/*
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#include "opus_types.h"
+#include "opus_defines.h"
+
+#if !defined(_entcode_H)
+# define _entcode_H (1)
+# include <limits.h>
+# include <stddef.h>
+# include "ecintrin.h"
+
+/*OPT: ec_window must be at least 32 bits, but if you have fast arithmetic on a
+   larger type, you can speed up the decoder by using it here.*/
+typedef opus_uint32           ec_window;
+typedef struct ec_ctx         ec_ctx;
+typedef struct ec_ctx         ec_enc;
+typedef struct ec_ctx         ec_dec;
+
+# define EC_WINDOW_SIZE ((int)sizeof(ec_window)*CHAR_BIT)
+
+/*The number of bits to use for the range-coded part of unsigned integers.*/
+# define EC_UINT_BITS   (8)
+
+/*The resolution of fractional-precision bit usage measurements, i.e.,
+   3 => 1/8th bits.*/
+# define BITRES 3
+
+/*The entropy encoder/decoder context.
+  We use the same structure for both, so that common functions like ec_tell()
+   can be used on either one.*/
+struct ec_ctx{
+   /*Buffered input/output.*/
+   unsigned char *buf;
+   /*The size of the buffer.*/
+   opus_uint32    storage;
+   /*The offset at which the last byte containing raw bits was read/written.*/
+   opus_uint32    end_offs;
+   /*Bits that will be read from/written at the end.*/
+   ec_window      end_window;
+   /*Number of valid bits in end_window.*/
+   int            nend_bits;
+   /*The total number of whole bits read/written.
+     This does not include partial bits currently in the range coder.*/
+   int            nbits_total;
+   /*The offset at which the next range coder byte will be read/written.*/
+   opus_uint32    offs;
+   /*The number of values in the current range.*/
+   opus_uint32    rng;
+   /*In the decoder: the difference between the top of the current range and
+      the input value, minus one.
+     In the encoder: the low end of the current range.*/
+   opus_uint32    val;
+   /*In the decoder: the saved normalization factor from ec_decode().
+     In the encoder: the number of oustanding carry propagating symbols.*/
+   opus_uint32    ext;
+   /*A buffered input/output symbol, awaiting carry propagation.*/
+   int            rem;
+   /*Nonzero if an error occurred.*/
+   int            error;
+};
+
+static OPUS_INLINE opus_uint32 ec_range_bytes(ec_ctx *_this){
+  return _this->offs;
+}
+
+static OPUS_INLINE unsigned char *ec_get_buffer(ec_ctx *_this){
+  return _this->buf;
+}
+
+static OPUS_INLINE int ec_get_error(ec_ctx *_this){
+  return _this->error;
+}
+
+/*Returns the number of bits "used" by the encoded or decoded symbols so far.
+  This same number can be computed in either the encoder or the decoder, and is
+   suitable for making coding decisions.
+  Return: The number of bits.
+          This will always be slightly larger than the exact value (e.g., all
+           rounding error is in the positive direction).*/
+static OPUS_INLINE int ec_tell(ec_ctx *_this){
+  return _this->nbits_total-EC_ILOG(_this->rng);
+}
+
+/*Returns the number of bits "used" by the encoded or decoded symbols so far.
+  This same number can be computed in either the encoder or the decoder, and is
+   suitable for making coding decisions.
+  Return: The number of bits scaled by 2**BITRES.
+          This will always be slightly larger than the exact value (e.g., all
+           rounding error is in the positive direction).*/
+opus_uint32 ec_tell_frac(ec_ctx *_this);
+
+#endif
diff --git a/drivers/opus/celt/entdec.c b/drivers/opus/celt/entdec.c
new file mode 100644
index 0000000000..383da571c9
--- /dev/null
+++ b/drivers/opus/celt/entdec.c
@@ -0,0 +1,245 @@
+/* Copyright (c) 2001-2011 Timothy B. Terriberry
+   Copyright (c) 2008-2009 Xiph.Org Foundation */
+/*
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifdef OPUS_HAVE_CONFIG_H
+#include "opus_config.h"
+#endif
+
+#include <stddef.h>
+#include "os_support.h"
+#include "arch.h"
+#include "entdec.h"
+#include "mfrngcod.h"
+
+/*A range decoder.
+  This is an entropy decoder based upon \cite{Mar79}, which is itself a
+   rediscovery of the FIFO arithmetic code introduced by \cite{Pas76}.
+  It is very similar to arithmetic encoding, except that encoding is done with
+   digits in any base, instead of with bits, and so it is faster when using
+   larger bases (i.e.: a byte).
+  The author claims an average waste of $\frac{1}{2}\log_b(2b)$ bits, where $b$
+   is the base, longer than the theoretical optimum, but to my knowledge there
+   is no published justification for this claim.
+  This only seems true when using near-infinite precision arithmetic so that
+   the process is carried out with no rounding errors.
+
+  An excellent description of implementation details is available at
+   http://www.arturocampos.com/ac_range.html
+  A recent work \cite{MNW98} which proposes several changes to arithmetic
+   encoding for efficiency actually re-discovers many of the principles
+   behind range encoding, and presents a good theoretical analysis of them.
+
+  End of stream is handled by writing out the smallest number of bits that
+   ensures that the stream will be correctly decoded regardless of the value of
+   any subsequent bits.
+  ec_tell() can be used to determine how many bits were needed to decode
+   all the symbols thus far; other data can be packed in the remaining bits of
+   the input buffer.
+  @PHDTHESIS{Pas76,
+    author="Richard Clark Pasco",
+    title="Source coding algorithms for fast data compression",
+    school="Dept. of Electrical Engineering, Stanford University",
+    address="Stanford, CA",
+    month=May,
+    year=1976
+  }
+  @INPROCEEDINGS{Mar79,
+   author="Martin, G.N.N.",
+   title="Range encoding: an algorithm for removing redundancy from a digitised
+    message",
+   booktitle="Video & Data Recording Conference",
+   year=1979,
+   address="Southampton",
+   month=Jul
+  }
+  @ARTICLE{MNW98,
+   author="Alistair Moffat and Radford Neal and Ian H. Witten",
+   title="Arithmetic Coding Revisited",
+   journal="{ACM} Transactions on Information Systems",
+   year=1998,
+   volume=16,
+   number=3,
+   pages="256--294",
+   month=Jul,
+   URL="http://www.stanford.edu/class/ee398a/handouts/papers/Moffat98ArithmCoding.pdf"
+  }*/
+
+static int ec_read_byte(ec_dec *_this){
+  return _this->offs<_this->storage?_this->buf[_this->offs++]:0;
+}
+
+static int ec_read_byte_from_end(ec_dec *_this){
+  return _this->end_offs<_this->storage?
+   _this->buf[_this->storage-++(_this->end_offs)]:0;
+}
+
+/*Normalizes the contents of val and rng so that rng lies entirely in the
+   high-order symbol.*/
+static void ec_dec_normalize(ec_dec *_this){
+  /*If the range is too small, rescale it and input some bits.*/
+  while(_this->rng<=EC_CODE_BOT){
+    int sym;
+    _this->nbits_total+=EC_SYM_BITS;
+    _this->rng<<=EC_SYM_BITS;
+    /*Use up the remaining bits from our last symbol.*/
+    sym=_this->rem;
+    /*Read the next value from the input.*/
+    _this->rem=ec_read_byte(_this);
+    /*Take the rest of the bits we need from this new symbol.*/
+    sym=(sym<<EC_SYM_BITS|_this->rem)>>(EC_SYM_BITS-EC_CODE_EXTRA);
+    /*And subtract them from val, capped to be less than EC_CODE_TOP.*/
+    _this->val=((_this->val<<EC_SYM_BITS)+(EC_SYM_MAX&~sym))&(EC_CODE_TOP-1);
+  }
+}
+
+void ec_dec_init(ec_dec *_this,unsigned char *_buf,opus_uint32 _storage){
+  _this->buf=_buf;
+  _this->storage=_storage;
+  _this->end_offs=0;
+  _this->end_window=0;
+  _this->nend_bits=0;
+  /*This is the offset from which ec_tell() will subtract partial bits.
+    The final value after the ec_dec_normalize() call will be the same as in
+     the encoder, but we have to compensate for the bits that are added there.*/
+  _this->nbits_total=EC_CODE_BITS+1
+   -((EC_CODE_BITS-EC_CODE_EXTRA)/EC_SYM_BITS)*EC_SYM_BITS;
+  _this->offs=0;
+  _this->rng=1U<<EC_CODE_EXTRA;
+  _this->rem=ec_read_byte(_this);
+  _this->val=_this->rng-1-(_this->rem>>(EC_SYM_BITS-EC_CODE_EXTRA));
+  _this->error=0;
+  /*Normalize the interval.*/
+  ec_dec_normalize(_this);
+}
+
+unsigned ec_decode(ec_dec *_this,unsigned _ft){
+  unsigned s;
+  _this->ext=_this->rng/_ft;
+  s=(unsigned)(_this->val/_this->ext);
+  return _ft-EC_MINI(s+1,_ft);
+}
+
+unsigned ec_decode_bin(ec_dec *_this,unsigned _bits){
+   unsigned s;
+   _this->ext=_this->rng>>_bits;
+   s=(unsigned)(_this->val/_this->ext);
+   return (1U<<_bits)-EC_MINI(s+1U,1U<<_bits);
+}
+
+void ec_dec_update(ec_dec *_this,unsigned _fl,unsigned _fh,unsigned _ft){
+  opus_uint32 s;
+  s=IMUL32(_this->ext,_ft-_fh);
+  _this->val-=s;
+  _this->rng=_fl>0?IMUL32(_this->ext,_fh-_fl):_this->rng-s;
+  ec_dec_normalize(_this);
+}
+
+/*The probability of having a "one" is 1/(1<<_logp).*/
+int ec_dec_bit_logp(ec_dec *_this,unsigned _logp){
+  opus_uint32 r;
+  opus_uint32 d;
+  opus_uint32 s;
+  int         ret;
+  r=_this->rng;
+  d=_this->val;
+  s=r>>_logp;
+  ret=d<s;
+  if(!ret)_this->val=d-s;
+  _this->rng=ret?s:r-s;
+  ec_dec_normalize(_this);
+  return ret;
+}
+
+int ec_dec_icdf(ec_dec *_this,const unsigned char *_icdf,unsigned _ftb){
+  opus_uint32 r;
+  opus_uint32 d;
+  opus_uint32 s;
+  opus_uint32 t;
+  int         ret;
+  s=_this->rng;
+  d=_this->val;
+  r=s>>_ftb;
+  ret=-1;
+  do{
+    t=s;
+    s=IMUL32(r,_icdf[++ret]);
+  }
+  while(d<s);
+  _this->val=d-s;
+  _this->rng=t-s;
+  ec_dec_normalize(_this);
+  return ret;
+}
+
+opus_uint32 ec_dec_uint(ec_dec *_this,opus_uint32 _ft){
+  unsigned ft;
+  unsigned s;
+  int      ftb;
+  /*In order to optimize EC_ILOG(), it is undefined for the value 0.*/
+  celt_assert(_ft>1);
+  _ft--;
+  ftb=EC_ILOG(_ft);
+  if(ftb>EC_UINT_BITS){
+    opus_uint32 t;
+    ftb-=EC_UINT_BITS;
+    ft=(unsigned)(_ft>>ftb)+1;
+    s=ec_decode(_this,ft);
+    ec_dec_update(_this,s,s+1,ft);
+    t=(opus_uint32)s<<ftb|ec_dec_bits(_this,ftb);
+    if(t<=_ft)return t;
+    _this->error=1;
+    return _ft;
+  }
+  else{
+    _ft++;
+    s=ec_decode(_this,(unsigned)_ft);
+    ec_dec_update(_this,s,s+1,(unsigned)_ft);
+    return s;
+  }
+}
+
+opus_uint32 ec_dec_bits(ec_dec *_this,unsigned _bits){
+  ec_window   window;
+  int         available;
+  opus_uint32 ret;
+  window=_this->end_window;
+  available=_this->nend_bits;
+  if((unsigned)available<_bits){
+    do{
+      window|=(ec_window)ec_read_byte_from_end(_this)<<available;
+      available+=EC_SYM_BITS;
+    }
+    while(available<=EC_WINDOW_SIZE-EC_SYM_BITS);
+  }
+  ret=(opus_uint32)window&(((opus_uint32)1<<_bits)-1U);
+  window>>=_bits;
+  available-=_bits;
+  _this->end_window=window;
+  _this->nend_bits=available;
+  _this->nbits_total+=_bits;
+  return ret;
+}
diff --git a/drivers/opus/celt/entdec.h b/drivers/opus/celt/entdec.h
new file mode 100644
index 0000000000..d8ab318730
--- /dev/null
+++ b/drivers/opus/celt/entdec.h
@@ -0,0 +1,100 @@
+/* Copyright (c) 2001-2011 Timothy B. Terriberry
+   Copyright (c) 2008-2009 Xiph.Org Foundation */
+/*
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#if !defined(_entdec_H)
+# define _entdec_H (1)
+# include <limits.h>
+# include "entcode.h"
+
+/*Initializes the decoder.
+  _buf: The input buffer to use.
+  Return: 0 on success, or a negative value on error.*/
+void ec_dec_init(ec_dec *_this,unsigned char *_buf,opus_uint32 _storage);
+
+/*Calculates the cumulative frequency for the next symbol.
+  This can then be fed into the probability model to determine what that
+   symbol is, and the additional frequency information required to advance to
+   the next symbol.
+  This function cannot be called more than once without a corresponding call to
+   ec_dec_update(), or decoding will not proceed correctly.
+  _ft: The total frequency of the symbols in the alphabet the next symbol was
+        encoded with.
+  Return: A cumulative frequency representing the encoded symbol.
+          If the cumulative frequency of all the symbols before the one that
+           was encoded was fl, and the cumulative frequency of all the symbols
+           up to and including the one encoded is fh, then the returned value
+           will fall in the range [fl,fh).*/
+unsigned ec_decode(ec_dec *_this,unsigned _ft);
+
+/*Equivalent to ec_decode() with _ft==1<<_bits.*/
+unsigned ec_decode_bin(ec_dec *_this,unsigned _bits);
+
+/*Advance the decoder past the next symbol using the frequency information the
+   symbol was encoded with.
+  Exactly one call to ec_decode() must have been made so that all necessary
+   intermediate calculations are performed.
+  _fl:  The cumulative frequency of all symbols that come before the symbol
+         decoded.
+  _fh:  The cumulative frequency of all symbols up to and including the symbol
+         decoded.
+        Together with _fl, this defines the range [_fl,_fh) in which the value
+         returned above must fall.
+  _ft:  The total frequency of the symbols in the alphabet the symbol decoded
+         was encoded in.
+        This must be the same as passed to the preceding call to ec_decode().*/
+void ec_dec_update(ec_dec *_this,unsigned _fl,unsigned _fh,unsigned _ft);
+
+/* Decode a bit that has a 1/(1<<_logp) probability of being a one */
+int ec_dec_bit_logp(ec_dec *_this,unsigned _logp);
+
+/*Decodes a symbol given an "inverse" CDF table.
+  No call to ec_dec_update() is necessary after this call.
+  _icdf: The "inverse" CDF, such that symbol s falls in the range
+          [s>0?ft-_icdf[s-1]:0,ft-_icdf[s]), where ft=1<<_ftb.
+         The values must be monotonically non-increasing, and the last value
+          must be 0.
+  _ftb: The number of bits of precision in the cumulative distribution.
+  Return: The decoded symbol s.*/
+int ec_dec_icdf(ec_dec *_this,const unsigned char *_icdf,unsigned _ftb);
+
+/*Extracts a raw unsigned integer with a non-power-of-2 range from the stream.
+  The bits must have been encoded with ec_enc_uint().
+  No call to ec_dec_update() is necessary after this call.
+  _ft: The number of integers that can be decoded (one more than the max).
+       This must be at least one, and no more than 2**32-1.
+  Return: The decoded bits.*/
+opus_uint32 ec_dec_uint(ec_dec *_this,opus_uint32 _ft);
+
+/*Extracts a sequence of raw bits from the stream.
+  The bits must have been encoded with ec_enc_bits().
+  No call to ec_dec_update() is necessary after this call.
+  _ftb: The number of bits to extract.
+        This must be between 0 and 25, inclusive.
+  Return: The decoded bits.*/
+opus_uint32 ec_dec_bits(ec_dec *_this,unsigned _ftb);
+
+#endif
diff --git a/drivers/opus/celt/entenc.c b/drivers/opus/celt/entenc.c
new file mode 100644
index 0000000000..299329c63f
--- /dev/null
+++ b/drivers/opus/celt/entenc.c
@@ -0,0 +1,294 @@
+/* Copyright (c) 2001-2011 Timothy B. Terriberry
+   Copyright (c) 2008-2009 Xiph.Org Foundation */
+/*
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#if defined(OPUS_HAVE_CONFIG_H)
+# include "opus_config.h"
+#endif
+#include "os_support.h"
+#include "arch.h"
+#include "entenc.h"
+#include "mfrngcod.h"
+
+/*A range encoder.
+  See entdec.c and the references for implementation details \cite{Mar79,MNW98}.
+
+  @INPROCEEDINGS{Mar79,
+   author="Martin, G.N.N.",
+   title="Range encoding: an algorithm for removing redundancy from a digitised
+    message",
+   booktitle="Video \& Data Recording Conference",
+   year=1979,
+   address="Southampton",
+   month=Jul
+  }
+  @ARTICLE{MNW98,
+   author="Alistair Moffat and Radford Neal and Ian H. Witten",
+   title="Arithmetic Coding Revisited",
+   journal="{ACM} Transactions on Information Systems",
+   year=1998,
+   volume=16,
+   number=3,
+   pages="256--294",
+   month=Jul,
+   URL="http://www.stanford.edu/class/ee398/handouts/papers/Moffat98ArithmCoding.pdf"
+  }*/
+
+static int ec_write_byte(ec_enc *_this,unsigned _value){
+  if(_this->offs+_this->end_offs>=_this->storage)return -1;
+  _this->buf[_this->offs++]=(unsigned char)_value;
+  return 0;
+}
+
+static int ec_write_byte_at_end(ec_enc *_this,unsigned _value){
+  if(_this->offs+_this->end_offs>=_this->storage)return -1;
+  _this->buf[_this->storage-++(_this->end_offs)]=(unsigned char)_value;
+  return 0;
+}
+
+/*Outputs a symbol, with a carry bit.
+  If there is a potential to propagate a carry over several symbols, they are
+   buffered until it can be determined whether or not an actual carry will
+   occur.
+  If the counter for the buffered symbols overflows, then the stream becomes
+   undecodable.
+  This gives a theoretical limit of a few billion symbols in a single packet on
+   32-bit systems.
+  The alternative is to truncate the range in order to force a carry, but
+   requires similar carry tracking in the decoder, needlessly slowing it down.*/
+static void ec_enc_carry_out(ec_enc *_this,int _c){
+  if(_c!=EC_SYM_MAX){
+    /*No further carry propagation possible, flush buffer.*/
+    int carry;
+    carry=_c>>EC_SYM_BITS;
+    /*Don't output a byte on the first write.
+      This compare should be taken care of by branch-prediction thereafter.*/
+    if(_this->rem>=0)_this->error|=ec_write_byte(_this,_this->rem+carry);
+    if(_this->ext>0){
+      unsigned sym;
+      sym=(EC_SYM_MAX+carry)&EC_SYM_MAX;
+      do _this->error|=ec_write_byte(_this,sym);
+      while(--(_this->ext)>0);
+    }
+    _this->rem=_c&EC_SYM_MAX;
+  }
+  else _this->ext++;
+}
+
+static void ec_enc_normalize(ec_enc *_this){
+  /*If the range is too small, output some bits and rescale it.*/
+  while(_this->rng<=EC_CODE_BOT){
+    ec_enc_carry_out(_this,(int)(_this->val>>EC_CODE_SHIFT));
+    /*Move the next-to-high-order symbol into the high-order position.*/
+    _this->val=(_this->val<<EC_SYM_BITS)&(EC_CODE_TOP-1);
+    _this->rng<<=EC_SYM_BITS;
+    _this->nbits_total+=EC_SYM_BITS;
+  }
+}
+
+void ec_enc_init(ec_enc *_this,unsigned char *_buf,opus_uint32 _size){
+  _this->buf=_buf;
+  _this->end_offs=0;
+  _this->end_window=0;
+  _this->nend_bits=0;
+  /*This is the offset from which ec_tell() will subtract partial bits.*/
+  _this->nbits_total=EC_CODE_BITS+1;
+  _this->offs=0;
+  _this->rng=EC_CODE_TOP;
+  _this->rem=-1;
+  _this->val=0;
+  _this->ext=0;
+  _this->storage=_size;
+  _this->error=0;
+}
+
+void ec_encode(ec_enc *_this,unsigned _fl,unsigned _fh,unsigned _ft){
+  opus_uint32 r;
+  r=_this->rng/_ft;
+  if(_fl>0){
+    _this->val+=_this->rng-IMUL32(r,(_ft-_fl));
+    _this->rng=IMUL32(r,(_fh-_fl));
+  }
+  else _this->rng-=IMUL32(r,(_ft-_fh));
+  ec_enc_normalize(_this);
+}
+
+void ec_encode_bin(ec_enc *_this,unsigned _fl,unsigned _fh,unsigned _bits){
+  opus_uint32 r;
+  r=_this->rng>>_bits;
+  if(_fl>0){
+    _this->val+=_this->rng-IMUL32(r,((1U<<_bits)-_fl));
+    _this->rng=IMUL32(r,(_fh-_fl));
+  }
+  else _this->rng-=IMUL32(r,((1U<<_bits)-_fh));
+  ec_enc_normalize(_this);
+}
+
+/*The probability of having a "one" is 1/(1<<_logp).*/
+void ec_enc_bit_logp(ec_enc *_this,int _val,unsigned _logp){
+  opus_uint32 r;
+  opus_uint32 s;
+  opus_uint32 l;
+  r=_this->rng;
+  l=_this->val;
+  s=r>>_logp;
+  r-=s;
+  if(_val)_this->val=l+r;
+  _this->rng=_val?s:r;
+  ec_enc_normalize(_this);
+}
+
+void ec_enc_icdf(ec_enc *_this,int _s,const unsigned char *_icdf,unsigned _ftb){
+  opus_uint32 r;
+  r=_this->rng>>_ftb;
+  if(_s>0){
+    _this->val+=_this->rng-IMUL32(r,_icdf[_s-1]);
+    _this->rng=IMUL32(r,_icdf[_s-1]-_icdf[_s]);
+  }
+  else _this->rng-=IMUL32(r,_icdf[_s]);
+  ec_enc_normalize(_this);
+}
+
+void ec_enc_uint(ec_enc *_this,opus_uint32 _fl,opus_uint32 _ft){
+  unsigned  ft;
+  unsigned  fl;
+  int       ftb;
+  /*In order to optimize EC_ILOG(), it is undefined for the value 0.*/
+  celt_assert(_ft>1);
+  _ft--;
+  ftb=EC_ILOG(_ft);
+  if(ftb>EC_UINT_BITS){
+    ftb-=EC_UINT_BITS;
+    ft=(_ft>>ftb)+1;
+    fl=(unsigned)(_fl>>ftb);
+    ec_encode(_this,fl,fl+1,ft);
+    ec_enc_bits(_this,_fl&(((opus_uint32)1<<ftb)-1U),ftb);
+  }
+  else ec_encode(_this,_fl,_fl+1,_ft+1);
+}
+
+void ec_enc_bits(ec_enc *_this,opus_uint32 _fl,unsigned _bits){
+  ec_window window;
+  int       used;
+  window=_this->end_window;
+  used=_this->nend_bits;
+  celt_assert(_bits>0);
+  if(used+_bits>EC_WINDOW_SIZE){
+    do{
+      _this->error|=ec_write_byte_at_end(_this,(unsigned)window&EC_SYM_MAX);
+      window>>=EC_SYM_BITS;
+      used-=EC_SYM_BITS;
+    }
+    while(used>=EC_SYM_BITS);
+  }
+  window|=(ec_window)_fl<<used;
+  used+=_bits;
+  _this->end_window=window;
+  _this->nend_bits=used;
+  _this->nbits_total+=_bits;
+}
+
+void ec_enc_patch_initial_bits(ec_enc *_this,unsigned _val,unsigned _nbits){
+  int      shift;
+  unsigned mask;
+  celt_assert(_nbits<=EC_SYM_BITS);
+  shift=EC_SYM_BITS-_nbits;
+  mask=((1<<_nbits)-1)<<shift;
+  if(_this->offs>0){
+    /*The first byte has been finalized.*/
+    _this->buf[0]=(unsigned char)((_this->buf[0]&~mask)|_val<<shift);
+  }
+  else if(_this->rem>=0){
+    /*The first byte is still awaiting carry propagation.*/
+    _this->rem=(_this->rem&~mask)|_val<<shift;
+  }
+  else if(_this->rng<=(EC_CODE_TOP>>_nbits)){
+    /*The renormalization loop has never been run.*/
+    _this->val=(_this->val&~((opus_uint32)mask<<EC_CODE_SHIFT))|
+     (opus_uint32)_val<<(EC_CODE_SHIFT+shift);
+  }
+  /*The encoder hasn't even encoded _nbits of data yet.*/
+  else _this->error=-1;
+}
+
+void ec_enc_shrink(ec_enc *_this,opus_uint32 _size){
+  celt_assert(_this->offs+_this->end_offs<=_size);
+  OPUS_MOVE(_this->buf+_size-_this->end_offs,
+   _this->buf+_this->storage-_this->end_offs,_this->end_offs);
+  _this->storage=_size;
+}
+
+void ec_enc_done(ec_enc *_this){
+  ec_window   window;
+  int         used;
+  opus_uint32 msk;
+  opus_uint32 end;
+  int         l;
+  /*We output the minimum number of bits that ensures that the symbols encoded
+     thus far will be decoded correctly regardless of the bits that follow.*/
+  l=EC_CODE_BITS-EC_ILOG(_this->rng);
+  msk=(EC_CODE_TOP-1)>>l;
+  end=(_this->val+msk)&~msk;
+  if((end|msk)>=_this->val+_this->rng){
+    l++;
+    msk>>=1;
+    end=(_this->val+msk)&~msk;
+  }
+  while(l>0){
+    ec_enc_carry_out(_this,(int)(end>>EC_CODE_SHIFT));
+    end=(end<<EC_SYM_BITS)&(EC_CODE_TOP-1);
+    l-=EC_SYM_BITS;
+  }
+  /*If we have a buffered byte flush it into the output buffer.*/
+  if(_this->rem>=0||_this->ext>0)ec_enc_carry_out(_this,0);
+  /*If we have buffered extra bits, flush them as well.*/
+  window=_this->end_window;
+  used=_this->nend_bits;
+  while(used>=EC_SYM_BITS){
+    _this->error|=ec_write_byte_at_end(_this,(unsigned)window&EC_SYM_MAX);
+    window>>=EC_SYM_BITS;
+    used-=EC_SYM_BITS;
+  }
+  /*Clear any excess space and add any remaining extra bits to the last byte.*/
+  if(!_this->error){
+    OPUS_CLEAR(_this->buf+_this->offs,
+     _this->storage-_this->offs-_this->end_offs);
+    if(used>0){
+      /*If there's no range coder data at all, give up.*/
+      if(_this->end_offs>=_this->storage)_this->error=-1;
+      else{
+        l=-l;
+        /*If we've busted, don't add too many extra bits to the last byte; it
+           would corrupt the range coder data, and that's more important.*/
+        if(_this->offs+_this->end_offs>=_this->storage&&l<used){
+          window&=(1<<l)-1;
+          _this->error=-1;
+        }
+        _this->buf[_this->storage-_this->end_offs-1]|=(unsigned char)window;
+      }
+    }
+  }
+}
diff --git a/drivers/opus/celt/entenc.h b/drivers/opus/celt/entenc.h
new file mode 100644
index 0000000000..796bc4d572
--- /dev/null
+++ b/drivers/opus/celt/entenc.h
@@ -0,0 +1,110 @@
+/* Copyright (c) 2001-2011 Timothy B. Terriberry
+   Copyright (c) 2008-2009 Xiph.Org Foundation */
+/*
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#if !defined(_entenc_H)
+# define _entenc_H (1)
+# include <stddef.h>
+# include "entcode.h"
+
+/*Initializes the encoder.
+  _buf:  The buffer to store output bytes in.
+  _size: The size of the buffer, in chars.*/
+void ec_enc_init(ec_enc *_this,unsigned char *_buf,opus_uint32 _size);
+/*Encodes a symbol given its frequency information.
+  The frequency information must be discernable by the decoder, assuming it
+   has read only the previous symbols from the stream.
+  It is allowable to change the frequency information, or even the entire
+   source alphabet, so long as the decoder can tell from the context of the
+   previously encoded information that it is supposed to do so as well.
+  _fl: The cumulative frequency of all symbols that come before the one to be
+        encoded.
+  _fh: The cumulative frequency of all symbols up to and including the one to
+        be encoded.
+       Together with _fl, this defines the range [_fl,_fh) in which the
+        decoded value will fall.
+  _ft: The sum of the frequencies of all the symbols*/
+void ec_encode(ec_enc *_this,unsigned _fl,unsigned _fh,unsigned _ft);
+
+/*Equivalent to ec_encode() with _ft==1<<_bits.*/
+void ec_encode_bin(ec_enc *_this,unsigned _fl,unsigned _fh,unsigned _bits);
+
+/* Encode a bit that has a 1/(1<<_logp) probability of being a one */
+void ec_enc_bit_logp(ec_enc *_this,int _val,unsigned _logp);
+
+/*Encodes a symbol given an "inverse" CDF table.
+  _s:    The index of the symbol to encode.
+  _icdf: The "inverse" CDF, such that symbol _s falls in the range
+          [_s>0?ft-_icdf[_s-1]:0,ft-_icdf[_s]), where ft=1<<_ftb.
+         The values must be monotonically non-increasing, and the last value
+          must be 0.
+  _ftb: The number of bits of precision in the cumulative distribution.*/
+void ec_enc_icdf(ec_enc *_this,int _s,const unsigned char *_icdf,unsigned _ftb);
+
+/*Encodes a raw unsigned integer in the stream.
+  _fl: The integer to encode.
+  _ft: The number of integers that can be encoded (one more than the max).
+       This must be at least one, and no more than 2**32-1.*/
+void ec_enc_uint(ec_enc *_this,opus_uint32 _fl,opus_uint32 _ft);
+
+/*Encodes a sequence of raw bits in the stream.
+  _fl:  The bits to encode.
+  _ftb: The number of bits to encode.
+        This must be between 1 and 25, inclusive.*/
+void ec_enc_bits(ec_enc *_this,opus_uint32 _fl,unsigned _ftb);
+
+/*Overwrites a few bits at the very start of an existing stream, after they
+   have already been encoded.
+  This makes it possible to have a few flags up front, where it is easy for
+   decoders to access them without parsing the whole stream, even if their
+   values are not determined until late in the encoding process, without having
+   to buffer all the intermediate symbols in the encoder.
+  In order for this to work, at least _nbits bits must have already been
+   encoded using probabilities that are an exact power of two.
+  The encoder can verify the number of encoded bits is sufficient, but cannot
+   check this latter condition.
+  _val:   The bits to encode (in the least _nbits significant bits).
+          They will be decoded in order from most-significant to least.
+  _nbits: The number of bits to overwrite.
+          This must be no more than 8.*/
+void ec_enc_patch_initial_bits(ec_enc *_this,unsigned _val,unsigned _nbits);
+
+/*Compacts the data to fit in the target size.
+  This moves up the raw bits at the end of the current buffer so they are at
+   the end of the new buffer size.
+  The caller must ensure that the amount of data that's already been written
+   will fit in the new size.
+  _size: The number of bytes in the new buffer.
+         This must be large enough to contain the bits already written, and
+          must be no larger than the existing size.*/
+void ec_enc_shrink(ec_enc *_this,opus_uint32 _size);
+
+/*Indicates that there are no more symbols to encode.
+  All reamining output bytes are flushed to the output buffer.
+  ec_enc_init() must be called before the encoder can be used again.*/
+void ec_enc_done(ec_enc *_this);
+
+#endif
diff --git a/drivers/opus/celt/fixed_debug.h b/drivers/opus/celt/fixed_debug.h
new file mode 100644
index 0000000000..80bc94910f
--- /dev/null
+++ b/drivers/opus/celt/fixed_debug.h
@@ -0,0 +1,773 @@
+/* Copyright (C) 2003-2008 Jean-Marc Valin
+   Copyright (C) 2007-2012 Xiph.Org Foundation */
+/**
+   @file fixed_debug.h
+   @brief Fixed-point operations with debugging
+*/
+/*
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef FIXED_DEBUG_H
+#define FIXED_DEBUG_H
+
+#include <stdio.h>
+#include "opus_defines.h"
+
+#ifdef CELT_C
+OPUS_EXPORT opus_int64 celt_mips=0;
+#else
+extern opus_int64 celt_mips;
+#endif
+
+#define MULT16_16SU(a,b) ((opus_val32)(opus_val16)(a)*(opus_val32)(opus_uint16)(b))
+#define MULT32_32_Q31(a,b) ADD32(ADD32(SHL32(MULT16_16(SHR32((a),16),SHR((b),16)),1), SHR32(MULT16_16SU(SHR32((a),16),((b)&0x0000ffff)),15)), SHR32(MULT16_16SU(SHR32((b),16),((a)&0x0000ffff)),15))
+
+/** 16x32 multiplication, followed by a 16-bit shift right. Results fits in 32 bits */
+#define MULT16_32_Q16(a,b) ADD32(MULT16_16((a),SHR32((b),16)), SHR32(MULT16_16SU((a),((b)&0x0000ffff)),16))
+
+#define MULT16_32_P16(a,b) MULT16_32_PX(a,b,16)
+
+#define QCONST16(x,bits) ((opus_val16)(.5+(x)*(((opus_val32)1)<<(bits))))
+#define QCONST32(x,bits) ((opus_val32)(.5+(x)*(((opus_val32)1)<<(bits))))
+
+#define VERIFY_SHORT(x) ((x)<=32767&&(x)>=-32768)
+#define VERIFY_INT(x) ((x)<=2147483647LL&&(x)>=-2147483648LL)
+#define VERIFY_UINT(x) ((x)<=(2147483647LLU<<1))
+
+#define SHR(a,b) SHR32(a,b)
+#define PSHR(a,b) PSHR32(a,b)
+
+static OPUS_INLINE short NEG16(int x)
+{
+   int res;
+   if (!VERIFY_SHORT(x))
+   {
+      fprintf (stderr, "NEG16: input is not short: %d\n", (int)x);
+#ifdef FIXED_DEBUG_ASSERT
+      celt_assert(0);
+#endif
+   }
+   res = -x;
+   if (!VERIFY_SHORT(res))
+   {
+      fprintf (stderr, "NEG16: output is not short: %d\n", (int)res);
+#ifdef FIXED_DEBUG_ASSERT
+      celt_assert(0);
+#endif
+   }
+   celt_mips++;
+   return res;
+}
+static OPUS_INLINE int NEG32(opus_int64 x)
+{
+   opus_int64 res;
+   if (!VERIFY_INT(x))
+   {
+      fprintf (stderr, "NEG16: input is not int: %d\n", (int)x);
+#ifdef FIXED_DEBUG_ASSERT
+      celt_assert(0);
+#endif
+   }
+   res = -x;
+   if (!VERIFY_INT(res))
+   {
+      fprintf (stderr, "NEG16: output is not int: %d\n", (int)res);
+#ifdef FIXED_DEBUG_ASSERT
+      celt_assert(0);
+#endif
+   }
+   celt_mips+=2;
+   return res;
+}
+
+#define EXTRACT16(x) EXTRACT16_(x, __FILE__, __LINE__)
+static OPUS_INLINE short EXTRACT16_(int x, char *file, int line)
+{
+   int res;
+   if (!VERIFY_SHORT(x))
+   {
+      fprintf (stderr, "EXTRACT16: input is not short: %d in %s: line %d\n", x, file, line);
+#ifdef FIXED_DEBUG_ASSERT
+      celt_assert(0);
+#endif
+   }
+   res = x;
+   celt_mips++;
+   return res;
+}
+
+#define EXTEND32(x) EXTEND32_(x, __FILE__, __LINE__)
+static OPUS_INLINE int EXTEND32_(int x, char *file, int line)
+{
+   int res;
+   if (!VERIFY_SHORT(x))
+   {
+      fprintf (stderr, "EXTEND32: input is not short: %d in %s: line %d\n", x, file, line);
+#ifdef FIXED_DEBUG_ASSERT
+      celt_assert(0);
+#endif
+   }
+   res = x;
+   celt_mips++;
+   return res;
+}
+
+#define SHR16(a, shift) SHR16_(a, shift, __FILE__, __LINE__)
+static OPUS_INLINE short SHR16_(int a, int shift, char *file, int line)
+{
+   int res;
+   if (!VERIFY_SHORT(a) || !VERIFY_SHORT(shift))
+   {
+      fprintf (stderr, "SHR16: inputs are not short: %d >> %d in %s: line %d\n", a, shift, file, line);
+#ifdef FIXED_DEBUG_ASSERT
+      celt_assert(0);
+#endif
+   }
+   res = a>>shift;
+   if (!VERIFY_SHORT(res))
+   {
+      fprintf (stderr, "SHR16: output is not short: %d in %s: line %d\n", res, file, line);
+#ifdef FIXED_DEBUG_ASSERT
+      celt_assert(0);
+#endif
+   }
+   celt_mips++;
+   return res;
+}
+#define SHL16(a, shift) SHL16_(a, shift, __FILE__, __LINE__)
+static OPUS_INLINE short SHL16_(int a, int shift, char *file, int line)
+{
+   int res;
+   if (!VERIFY_SHORT(a) || !VERIFY_SHORT(shift))
+   {
+      fprintf (stderr, "SHL16: inputs are not short: %d %d in %s: line %d\n", a, shift, file, line);
+#ifdef FIXED_DEBUG_ASSERT
+      celt_assert(0);
+#endif
+   }
+   res = a<<shift;
+   if (!VERIFY_SHORT(res))
+   {
+      fprintf (stderr, "SHL16: output is not short: %d in %s: line %d\n", res, file, line);
+#ifdef FIXED_DEBUG_ASSERT
+      celt_assert(0);
+#endif
+   }
+   celt_mips++;
+   return res;
+}
+
+static OPUS_INLINE int SHR32(opus_int64 a, int shift)
+{
+   opus_int64  res;
+   if (!VERIFY_INT(a) || !VERIFY_SHORT(shift))
+   {
+      fprintf (stderr, "SHR32: inputs are not int: %d %d\n", (int)a, shift);
+#ifdef FIXED_DEBUG_ASSERT
+      celt_assert(0);
+#endif
+   }
+   res = a>>shift;
+   if (!VERIFY_INT(res))
+   {
+      fprintf (stderr, "SHR32: output is not int: %d\n", (int)res);
+#ifdef FIXED_DEBUG_ASSERT
+      celt_assert(0);
+#endif
+   }
+   celt_mips+=2;
+   return res;
+}
+#define SHL32(a, shift) SHL32_(a, shift, __FILE__, __LINE__)
+static OPUS_INLINE int SHL32_(opus_int64 a, int shift, char *file, int line)
+{
+   opus_int64  res;
+   if (!VERIFY_INT(a) || !VERIFY_SHORT(shift))
+   {
+      fprintf (stderr, "SHL32: inputs are not int: %lld %d in %s: line %d\n", a, shift, file, line);
+#ifdef FIXED_DEBUG_ASSERT
+      celt_assert(0);
+#endif
+   }
+   res = a<<shift;
+   if (!VERIFY_INT(res))
+   {
+      fprintf (stderr, "SHL32: output is not int: %lld<<%d = %lld in %s: line %d\n", a, shift, res, file, line);
+#ifdef FIXED_DEBUG_ASSERT
+      celt_assert(0);
+#endif
+   }
+   celt_mips+=2;
+   return res;
+}
+
+#define PSHR32(a,shift) (celt_mips--,SHR32(ADD32((a),(((opus_val32)(1)<<((shift))>>1))),shift))
+#define VSHR32(a, shift) (((shift)>0) ? SHR32(a, shift) : SHL32(a, -(shift)))
+
+#define ROUND16(x,a) (celt_mips--,EXTRACT16(PSHR32((x),(a))))
+#define HALF16(x)  (SHR16(x,1))
+#define HALF32(x)  (SHR32(x,1))
+
+//#define SHR(a,shift) ((a) >> (shift))
+//#define SHL(a,shift) ((a) << (shift))
+
+#define ADD16(a, b) ADD16_(a, b, __FILE__, __LINE__)
+static OPUS_INLINE short ADD16_(int a, int b, char *file, int line)
+{
+   int res;
+   if (!VERIFY_SHORT(a) || !VERIFY_SHORT(b))
+   {
+      fprintf (stderr, "ADD16: inputs are not short: %d %d in %s: line %d\n", a, b, file, line);
+#ifdef FIXED_DEBUG_ASSERT
+      celt_assert(0);
+#endif
+   }
+   res = a+b;
+   if (!VERIFY_SHORT(res))
+   {
+      fprintf (stderr, "ADD16: output is not short: %d+%d=%d in %s: line %d\n", a,b,res, file, line);
+#ifdef FIXED_DEBUG_ASSERT
+      celt_assert(0);
+#endif
+   }
+   celt_mips++;
+   return res;
+}
+
+#define SUB16(a, b) SUB16_(a, b, __FILE__, __LINE__)
+static OPUS_INLINE short SUB16_(int a, int b, char *file, int line)
+{
+   int res;
+   if (!VERIFY_SHORT(a) || !VERIFY_SHORT(b))
+   {
+      fprintf (stderr, "SUB16: inputs are not short: %d %d in %s: line %d\n", a, b, file, line);
+#ifdef FIXED_DEBUG_ASSERT
+      celt_assert(0);
+#endif
+   }
+   res = a-b;
+   if (!VERIFY_SHORT(res))
+   {
+      fprintf (stderr, "SUB16: output is not short: %d in %s: line %d\n", res, file, line);
+#ifdef FIXED_DEBUG_ASSERT
+      celt_assert(0);
+#endif
+   }
+   celt_mips++;
+   return res;
+}
+
+#define ADD32(a, b) ADD32_(a, b, __FILE__, __LINE__)
+static OPUS_INLINE int ADD32_(opus_int64 a, opus_int64 b, char *file, int line)
+{
+   opus_int64 res;
+   if (!VERIFY_INT(a) || !VERIFY_INT(b))
+   {
+      fprintf (stderr, "ADD32: inputs are not int: %d %d in %s: line %d\n", (int)a, (int)b, file, line);
+#ifdef FIXED_DEBUG_ASSERT
+      celt_assert(0);
+#endif
+   }
+   res = a+b;
+   if (!VERIFY_INT(res))
+   {
+      fprintf (stderr, "ADD32: output is not int: %d in %s: line %d\n", (int)res, file, line);
+#ifdef FIXED_DEBUG_ASSERT
+      celt_assert(0);
+#endif
+   }
+   celt_mips+=2;
+   return res;
+}
+
+#define SUB32(a, b) SUB32_(a, b, __FILE__, __LINE__)
+static OPUS_INLINE int SUB32_(opus_int64 a, opus_int64 b, char *file, int line)
+{
+   opus_int64 res;
+   if (!VERIFY_INT(a) || !VERIFY_INT(b))
+   {
+      fprintf (stderr, "SUB32: inputs are not int: %d %d in %s: line %d\n", (int)a, (int)b, file, line);
+#ifdef FIXED_DEBUG_ASSERT
+      celt_assert(0);
+#endif
+   }
+   res = a-b;
+   if (!VERIFY_INT(res))
+   {
+      fprintf (stderr, "SUB32: output is not int: %d in %s: line %d\n", (int)res, file, line);
+#ifdef FIXED_DEBUG_ASSERT
+      celt_assert(0);
+#endif
+   }
+   celt_mips+=2;
+   return res;
+}
+
+#undef UADD32
+#define UADD32(a, b) UADD32_(a, b, __FILE__, __LINE__)
+static OPUS_INLINE unsigned int UADD32_(opus_uint64 a, opus_uint64 b, char *file, int line)
+{
+   opus_uint64 res;
+   if (!VERIFY_UINT(a) || !VERIFY_UINT(b))
+   {
+      fprintf (stderr, "UADD32: inputs are not uint32: %llu %llu in %s: line %d\n", a, b, file, line);
+#ifdef FIXED_DEBUG_ASSERT
+      celt_assert(0);
+#endif
+   }
+   res = a+b;
+   if (!VERIFY_UINT(res))
+   {
+      fprintf (stderr, "UADD32: output is not uint32: %llu in %s: line %d\n", res, file, line);
+#ifdef FIXED_DEBUG_ASSERT
+      celt_assert(0);
+#endif
+   }
+   celt_mips+=2;
+   return res;
+}
+
+#undef USUB32
+#define USUB32(a, b) USUB32_(a, b, __FILE__, __LINE__)
+static OPUS_INLINE unsigned int USUB32_(opus_uint64 a, opus_uint64 b, char *file, int line)
+{
+   opus_uint64 res;
+   if (!VERIFY_UINT(a) || !VERIFY_UINT(b))
+   {
+      fprintf (stderr, "USUB32: inputs are not uint32: %llu %llu in %s: line %d\n", a, b, file, line);
+#ifdef FIXED_DEBUG_ASSERT
+      celt_assert(0);
+#endif
+   }
+   if (a<b)
+   {
+      fprintf (stderr, "USUB32: inputs underflow: %llu < %llu in %s: line %d\n", a, b, file, line);
+#ifdef FIXED_DEBUG_ASSERT
+      celt_assert(0);
+#endif
+   }
+   res = a-b;
+   if (!VERIFY_UINT(res))
+   {
+      fprintf (stderr, "USUB32: output is not uint32: %llu - %llu = %llu in %s: line %d\n", a, b, res, file, line);
+#ifdef FIXED_DEBUG_ASSERT
+      celt_assert(0);
+#endif
+   }
+   celt_mips+=2;
+   return res;
+}
+
+/* result fits in 16 bits */
+static OPUS_INLINE short MULT16_16_16(int a, int b)
+{
+   int res;
+   if (!VERIFY_SHORT(a) || !VERIFY_SHORT(b))
+   {
+      fprintf (stderr, "MULT16_16_16: inputs are not short: %d %d\n", a, b);
+#ifdef FIXED_DEBUG_ASSERT
+      celt_assert(0);
+#endif
+   }
+   res = a*b;
+   if (!VERIFY_SHORT(res))
+   {
+      fprintf (stderr, "MULT16_16_16: output is not short: %d\n", res);
+#ifdef FIXED_DEBUG_ASSERT
+      celt_assert(0);
+#endif
+   }
+   celt_mips++;
+   return res;
+}
+
+#define MULT16_16(a, b) MULT16_16_(a, b, __FILE__, __LINE__)
+static OPUS_INLINE int MULT16_16_(int a, int b, char *file, int line)
+{
+   opus_int64 res;
+   if (!VERIFY_SHORT(a) || !VERIFY_SHORT(b))
+   {
+      fprintf (stderr, "MULT16_16: inputs are not short: %d %d in %s: line %d\n", a, b, file, line);
+#ifdef FIXED_DEBUG_ASSERT
+      celt_assert(0);
+#endif
+   }
+   res = ((opus_int64)a)*b;
+   if (!VERIFY_INT(res))
+   {
+      fprintf (stderr, "MULT16_16: output is not int: %d in %s: line %d\n", (int)res, file, line);
+#ifdef FIXED_DEBUG_ASSERT
+      celt_assert(0);
+#endif
+   }
+   celt_mips++;
+   return res;
+}
+
+#define MAC16_16(c,a,b)     (celt_mips-=2,ADD32((c),MULT16_16((a),(b))))
+
+#define MULT16_32_QX(a, b, Q) MULT16_32_QX_(a, b, Q, __FILE__, __LINE__)
+static OPUS_INLINE int MULT16_32_QX_(int a, opus_int64 b, int Q, char *file, int line)
+{
+   opus_int64 res;
+   if (!VERIFY_SHORT(a) || !VERIFY_INT(b))
+   {
+      fprintf (stderr, "MULT16_32_Q%d: inputs are not short+int: %d %d in %s: line %d\n", Q, (int)a, (int)b, file, line);
+#ifdef FIXED_DEBUG_ASSERT
+      celt_assert(0);
+#endif
+   }
+   if (ABS32(b)>=((opus_val32)(1)<<(15+Q)))
+   {
+      fprintf (stderr, "MULT16_32_Q%d: second operand too large: %d %d in %s: line %d\n", Q, (int)a, (int)b, file, line);
+#ifdef FIXED_DEBUG_ASSERT
+      celt_assert(0);
+#endif
+   }
+   res = (((opus_int64)a)*(opus_int64)b) >> Q;
+   if (!VERIFY_INT(res))
+   {
+      fprintf (stderr, "MULT16_32_Q%d: output is not int: %d*%d=%d in %s: line %d\n", Q, (int)a, (int)b,(int)res, file, line);
+#ifdef FIXED_DEBUG_ASSERT
+      celt_assert(0);
+#endif
+   }
+   if (Q==15)
+      celt_mips+=3;
+   else
+      celt_mips+=4;
+   return res;
+}
+
+#define MULT16_32_PX(a, b, Q) MULT16_32_PX_(a, b, Q, __FILE__, __LINE__)
+static OPUS_INLINE int MULT16_32_PX_(int a, opus_int64 b, int Q, char *file, int line)
+{
+   opus_int64 res;
+   if (!VERIFY_SHORT(a) || !VERIFY_INT(b))
+   {
+      fprintf (stderr, "MULT16_32_P%d: inputs are not short+int: %d %d in %s: line %d\n\n", Q, (int)a, (int)b, file, line);
+#ifdef FIXED_DEBUG_ASSERT
+      celt_assert(0);
+#endif
+   }
+   if (ABS32(b)>=((opus_int64)(1)<<(15+Q)))
+   {
+      fprintf (stderr, "MULT16_32_Q%d: second operand too large: %d %d in %s: line %d\n\n", Q, (int)a, (int)b,file, line);
+#ifdef FIXED_DEBUG_ASSERT
+      celt_assert(0);
+#endif
+   }
+   res = ((((opus_int64)a)*(opus_int64)b) + (((opus_val32)(1)<<Q)>>1))>> Q;
+   if (!VERIFY_INT(res))
+   {
+      fprintf (stderr, "MULT16_32_P%d: output is not int: %d*%d=%d in %s: line %d\n\n", Q, (int)a, (int)b,(int)res, file, line);
+#ifdef FIXED_DEBUG_ASSERT
+      celt_assert(0);
+#endif
+   }
+   if (Q==15)
+      celt_mips+=4;
+   else
+      celt_mips+=5;
+   return res;
+}
+
+#define MULT16_32_Q15(a,b) MULT16_32_QX(a,b,15)
+#define MAC16_32_Q15(c,a,b) (celt_mips-=2,ADD32((c),MULT16_32_Q15((a),(b))))
+
+static OPUS_INLINE int SATURATE(int a, int b)
+{
+   if (a>b)
+      a=b;
+   if (a<-b)
+      a = -b;
+   celt_mips+=3;
+   return a;
+}
+
+static OPUS_INLINE opus_int16 SATURATE16(opus_int32 a)
+{
+   celt_mips+=3;
+   if (a>32767)
+      return 32767;
+   else if (a<-32768)
+      return -32768;
+   else return a;
+}
+
+static OPUS_INLINE int MULT16_16_Q11_32(int a, int b)
+{
+   opus_int64 res;
+   if (!VERIFY_SHORT(a) || !VERIFY_SHORT(b))
+   {
+      fprintf (stderr, "MULT16_16_Q11: inputs are not short: %d %d\n", a, b);
+#ifdef FIXED_DEBUG_ASSERT
+      celt_assert(0);
+#endif
+   }
+   res = ((opus_int64)a)*b;
+   res >>= 11;
+   if (!VERIFY_INT(res))
+   {
+      fprintf (stderr, "MULT16_16_Q11: output is not short: %d*%d=%d\n", (int)a, (int)b, (int)res);
+#ifdef FIXED_DEBUG_ASSERT
+      celt_assert(0);
+#endif
+   }
+   celt_mips+=3;
+   return res;
+}
+static OPUS_INLINE short MULT16_16_Q13(int a, int b)
+{
+   opus_int64 res;
+   if (!VERIFY_SHORT(a) || !VERIFY_SHORT(b))
+   {
+      fprintf (stderr, "MULT16_16_Q13: inputs are not short: %d %d\n", a, b);
+#ifdef FIXED_DEBUG_ASSERT
+      celt_assert(0);
+#endif
+   }
+   res = ((opus_int64)a)*b;
+   res >>= 13;
+   if (!VERIFY_SHORT(res))
+   {
+      fprintf (stderr, "MULT16_16_Q13: output is not short: %d*%d=%d\n", a, b, (int)res);
+#ifdef FIXED_DEBUG_ASSERT
+      celt_assert(0);
+#endif
+   }
+   celt_mips+=3;
+   return res;
+}
+static OPUS_INLINE short MULT16_16_Q14(int a, int b)
+{
+   opus_int64 res;
+   if (!VERIFY_SHORT(a) || !VERIFY_SHORT(b))
+   {
+      fprintf (stderr, "MULT16_16_Q14: inputs are not short: %d %d\n", a, b);
+#ifdef FIXED_DEBUG_ASSERT
+      celt_assert(0);
+#endif
+   }
+   res = ((opus_int64)a)*b;
+   res >>= 14;
+   if (!VERIFY_SHORT(res))
+   {
+      fprintf (stderr, "MULT16_16_Q14: output is not short: %d\n", (int)res);
+#ifdef FIXED_DEBUG_ASSERT
+      celt_assert(0);
+#endif
+   }
+   celt_mips+=3;
+   return res;
+}
+
+#define MULT16_16_Q15(a, b) MULT16_16_Q15_(a, b, __FILE__, __LINE__)
+static OPUS_INLINE short MULT16_16_Q15_(int a, int b, char *file, int line)
+{
+   opus_int64 res;
+   if (!VERIFY_SHORT(a) || !VERIFY_SHORT(b))
+   {
+      fprintf (stderr, "MULT16_16_Q15: inputs are not short: %d %d in %s: line %d\n", a, b, file, line);
+#ifdef FIXED_DEBUG_ASSERT
+      celt_assert(0);
+#endif
+   }
+   res = ((opus_int64)a)*b;
+   res >>= 15;
+   if (!VERIFY_SHORT(res))
+   {
+      fprintf (stderr, "MULT16_16_Q15: output is not short: %d in %s: line %d\n", (int)res, file, line);
+#ifdef FIXED_DEBUG_ASSERT
+      celt_assert(0);
+#endif
+   }
+   celt_mips+=1;
+   return res;
+}
+
+static OPUS_INLINE short MULT16_16_P13(int a, int b)
+{
+   opus_int64 res;
+   if (!VERIFY_SHORT(a) || !VERIFY_SHORT(b))
+   {
+      fprintf (stderr, "MULT16_16_P13: inputs are not short: %d %d\n", a, b);
+#ifdef FIXED_DEBUG_ASSERT
+      celt_assert(0);
+#endif
+   }
+   res = ((opus_int64)a)*b;
+   res += 4096;
+   if (!VERIFY_INT(res))
+   {
+      fprintf (stderr, "MULT16_16_P13: overflow: %d*%d=%d\n", a, b, (int)res);
+#ifdef FIXED_DEBUG_ASSERT
+      celt_assert(0);
+#endif
+   }
+   res >>= 13;
+   if (!VERIFY_SHORT(res))
+   {
+      fprintf (stderr, "MULT16_16_P13: output is not short: %d*%d=%d\n", a, b, (int)res);
+#ifdef FIXED_DEBUG_ASSERT
+      celt_assert(0);
+#endif
+   }
+   celt_mips+=4;
+   return res;
+}
+static OPUS_INLINE short MULT16_16_P14(int a, int b)
+{
+   opus_int64 res;
+   if (!VERIFY_SHORT(a) || !VERIFY_SHORT(b))
+   {
+      fprintf (stderr, "MULT16_16_P14: inputs are not short: %d %d\n", a, b);
+#ifdef FIXED_DEBUG_ASSERT
+      celt_assert(0);
+#endif
+   }
+   res = ((opus_int64)a)*b;
+   res += 8192;
+   if (!VERIFY_INT(res))
+   {
+      fprintf (stderr, "MULT16_16_P14: overflow: %d*%d=%d\n", a, b, (int)res);
+#ifdef FIXED_DEBUG_ASSERT
+      celt_assert(0);
+#endif
+   }
+   res >>= 14;
+   if (!VERIFY_SHORT(res))
+   {
+      fprintf (stderr, "MULT16_16_P14: output is not short: %d*%d=%d\n", a, b, (int)res);
+#ifdef FIXED_DEBUG_ASSERT
+      celt_assert(0);
+#endif
+   }
+   celt_mips+=4;
+   return res;
+}
+static OPUS_INLINE short MULT16_16_P15(int a, int b)
+{
+   opus_int64 res;
+   if (!VERIFY_SHORT(a) || !VERIFY_SHORT(b))
+   {
+      fprintf (stderr, "MULT16_16_P15: inputs are not short: %d %d\n", a, b);
+#ifdef FIXED_DEBUG_ASSERT
+      celt_assert(0);
+#endif
+   }
+   res = ((opus_int64)a)*b;
+   res += 16384;
+   if (!VERIFY_INT(res))
+   {
+      fprintf (stderr, "MULT16_16_P15: overflow: %d*%d=%d\n", a, b, (int)res);
+#ifdef FIXED_DEBUG_ASSERT
+      celt_assert(0);
+#endif
+   }
+   res >>= 15;
+   if (!VERIFY_SHORT(res))
+   {
+      fprintf (stderr, "MULT16_16_P15: output is not short: %d*%d=%d\n", a, b, (int)res);
+#ifdef FIXED_DEBUG_ASSERT
+      celt_assert(0);
+#endif
+   }
+   celt_mips+=2;
+   return res;
+}
+
+#define DIV32_16(a, b) DIV32_16_(a, b, __FILE__, __LINE__)
+
+static OPUS_INLINE int DIV32_16_(opus_int64 a, opus_int64 b, char *file, int line)
+{
+   opus_int64 res;
+   if (b==0)
+   {
+      fprintf(stderr, "DIV32_16: divide by zero: %d/%d in %s: line %d\n", (int)a, (int)b, file, line);
+#ifdef FIXED_DEBUG_ASSERT
+      celt_assert(0);
+#endif
+      return 0;
+   }
+   if (!VERIFY_INT(a) || !VERIFY_SHORT(b))
+   {
+      fprintf (stderr, "DIV32_16: inputs are not int/short: %d %d in %s: line %d\n", (int)a, (int)b, file, line);
+#ifdef FIXED_DEBUG_ASSERT
+      celt_assert(0);
+#endif
+   }
+   res = a/b;
+   if (!VERIFY_SHORT(res))
+   {
+      fprintf (stderr, "DIV32_16: output is not short: %d / %d = %d in %s: line %d\n", (int)a,(int)b,(int)res, file, line);
+      if (res>32767)
+         res = 32767;
+      if (res<-32768)
+         res = -32768;
+#ifdef FIXED_DEBUG_ASSERT
+      celt_assert(0);
+#endif
+   }
+   celt_mips+=35;
+   return res;
+}
+
+#define DIV32(a, b) DIV32_(a, b, __FILE__, __LINE__)
+static OPUS_INLINE int DIV32_(opus_int64 a, opus_int64 b, char *file, int line)
+{
+   opus_int64 res;
+   if (b==0)
+   {
+      fprintf(stderr, "DIV32: divide by zero: %d/%d in %s: line %d\n", (int)a, (int)b, file, line);
+#ifdef FIXED_DEBUG_ASSERT
+      celt_assert(0);
+#endif
+      return 0;
+   }
+
+   if (!VERIFY_INT(a) || !VERIFY_INT(b))
+   {
+      fprintf (stderr, "DIV32: inputs are not int/short: %d %d in %s: line %d\n", (int)a, (int)b, file, line);
+#ifdef FIXED_DEBUG_ASSERT
+      celt_assert(0);
+#endif
+   }
+   res = a/b;
+   if (!VERIFY_INT(res))
+   {
+      fprintf (stderr, "DIV32: output is not int: %d in %s: line %d\n", (int)res, file, line);
+#ifdef FIXED_DEBUG_ASSERT
+      celt_assert(0);
+#endif
+   }
+   celt_mips+=70;
+   return res;
+}
+
+#undef PRINT_MIPS
+#define PRINT_MIPS(file) do {fprintf (file, "total complexity = %llu MIPS\n", celt_mips);} while (0);
+
+#endif
diff --git a/drivers/opus/celt/fixed_generic.h b/drivers/opus/celt/fixed_generic.h
new file mode 100644
index 0000000000..ecf018a244
--- /dev/null
+++ b/drivers/opus/celt/fixed_generic.h
@@ -0,0 +1,134 @@
+/* Copyright (C) 2007-2009 Xiph.Org Foundation
+   Copyright (C) 2003-2008 Jean-Marc Valin
+   Copyright (C) 2007-2008 CSIRO */
+/**
+   @file fixed_generic.h
+   @brief Generic fixed-point operations
+*/
+/*
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef FIXED_GENERIC_H
+#define FIXED_GENERIC_H
+
+/** Multiply a 16-bit signed value by a 16-bit unsigned value. The result is a 32-bit signed value */
+#define MULT16_16SU(a,b) ((opus_val32)(opus_val16)(a)*(opus_val32)(opus_uint16)(b))
+
+/** 16x32 multiplication, followed by a 16-bit shift right. Results fits in 32 bits */
+#define MULT16_32_Q16(a,b) ADD32(MULT16_16((a),SHR((b),16)), SHR(MULT16_16SU((a),((b)&0x0000ffff)),16))
+
+/** 16x32 multiplication, followed by a 16-bit shift right (round-to-nearest). Results fits in 32 bits */
+#define MULT16_32_P16(a,b) ADD32(MULT16_16((a),SHR((b),16)), PSHR(MULT16_16SU((a),((b)&0x0000ffff)),16))
+
+/** 16x32 multiplication, followed by a 15-bit shift right. Results fits in 32 bits */
+#define MULT16_32_Q15(a,b) ADD32(SHL(MULT16_16((a),SHR((b),16)),1), SHR(MULT16_16SU((a),((b)&0x0000ffff)),15))
+
+/** 32x32 multiplication, followed by a 31-bit shift right. Results fits in 32 bits */
+#define MULT32_32_Q31(a,b) ADD32(ADD32(SHL(MULT16_16(SHR((a),16),SHR((b),16)),1), SHR(MULT16_16SU(SHR((a),16),((b)&0x0000ffff)),15)), SHR(MULT16_16SU(SHR((b),16),((a)&0x0000ffff)),15))
+
+/** Compile-time conversion of float constant to 16-bit value */
+#define QCONST16(x,bits) ((opus_val16)(.5+(x)*(((opus_val32)1)<<(bits))))
+
+/** Compile-time conversion of float constant to 32-bit value */
+#define QCONST32(x,bits) ((opus_val32)(.5+(x)*(((opus_val32)1)<<(bits))))
+
+/** Negate a 16-bit value */
+#define NEG16(x) (-(x))
+/** Negate a 32-bit value */
+#define NEG32(x) (-(x))
+
+/** Change a 32-bit value into a 16-bit value. The value is assumed to fit in 16-bit, otherwise the result is undefined */
+#define EXTRACT16(x) ((opus_val16)(x))
+/** Change a 16-bit value into a 32-bit value */
+#define EXTEND32(x) ((opus_val32)(x))
+
+/** Arithmetic shift-right of a 16-bit value */
+#define SHR16(a,shift) ((a) >> (shift))
+/** Arithmetic shift-left of a 16-bit value */
+#define SHL16(a,shift) ((opus_int16)((opus_uint16)(a)<<(shift)))
+/** Arithmetic shift-right of a 32-bit value */
+#define SHR32(a,shift) ((a) >> (shift))
+/** Arithmetic shift-left of a 32-bit value */
+#define SHL32(a,shift) ((opus_int32)((opus_uint32)(a)<<(shift)))
+
+/** 32-bit arithmetic shift right with rounding-to-nearest instead of rounding down */
+#define PSHR32(a,shift) (SHR32((a)+((EXTEND32(1)<<((shift))>>1)),shift))
+/** 32-bit arithmetic shift right where the argument can be negative */
+#define VSHR32(a, shift) (((shift)>0) ? SHR32(a, shift) : SHL32(a, -(shift)))
+
+/** "RAW" macros, should not be used outside of this header file */
+#define SHR(a,shift) ((a) >> (shift))
+#define SHL(a,shift) SHL32(a,shift)
+#define PSHR(a,shift) (SHR((a)+((EXTEND32(1)<<((shift))>>1)),shift))
+#define SATURATE(x,a) (((x)>(a) ? (a) : (x)<-(a) ? -(a) : (x)))
+
+#define SATURATE16(x) (EXTRACT16((x)>32767 ? 32767 : (x)<-32768 ? -32768 : (x)))
+
+/** Shift by a and round-to-neareast 32-bit value. Result is a 16-bit value */
+#define ROUND16(x,a) (EXTRACT16(PSHR32((x),(a))))
+/** Divide by two */
+#define HALF16(x)  (SHR16(x,1))
+#define HALF32(x)  (SHR32(x,1))
+
+/** Add two 16-bit values */
+#define ADD16(a,b) ((opus_val16)((opus_val16)(a)+(opus_val16)(b)))
+/** Subtract two 16-bit values */
+#define SUB16(a,b) ((opus_val16)(a)-(opus_val16)(b))
+/** Add two 32-bit values */
+#define ADD32(a,b) ((opus_val32)(a)+(opus_val32)(b))
+/** Subtract two 32-bit values */
+#define SUB32(a,b) ((opus_val32)(a)-(opus_val32)(b))
+
+/** 16x16 multiplication where the result fits in 16 bits */
+#define MULT16_16_16(a,b)     ((((opus_val16)(a))*((opus_val16)(b))))
+
+/* (opus_val32)(opus_val16) gives TI compiler a hint that it's 16x16->32 multiply */
+/** 16x16 multiplication where the result fits in 32 bits */
+#define MULT16_16(a,b)     (((opus_val32)(opus_val16)(a))*((opus_val32)(opus_val16)(b)))
+
+/** 16x16 multiply-add where the result fits in 32 bits */
+#define MAC16_16(c,a,b) (ADD32((c),MULT16_16((a),(b))))
+/** 16x32 multiply, followed by a 15-bit shift right and 32-bit add.
+    b must fit in 31 bits.
+    Result fits in 32 bits. */
+#define MAC16_32_Q15(c,a,b) ADD32(c,ADD32(MULT16_16((a),SHR((b),15)), SHR(MULT16_16((a),((b)&0x00007fff)),15)))
+
+#define MULT16_16_Q11_32(a,b) (SHR(MULT16_16((a),(b)),11))
+#define MULT16_16_Q11(a,b) (SHR(MULT16_16((a),(b)),11))
+#define MULT16_16_Q13(a,b) (SHR(MULT16_16((a),(b)),13))
+#define MULT16_16_Q14(a,b) (SHR(MULT16_16((a),(b)),14))
+#define MULT16_16_Q15(a,b) (SHR(MULT16_16((a),(b)),15))
+
+#define MULT16_16_P13(a,b) (SHR(ADD32(4096,MULT16_16((a),(b))),13))
+#define MULT16_16_P14(a,b) (SHR(ADD32(8192,MULT16_16((a),(b))),14))
+#define MULT16_16_P15(a,b) (SHR(ADD32(16384,MULT16_16((a),(b))),15))
+
+/** Divide a 32-bit value by a 16-bit value. Result fits in 16 bits */
+#define DIV32_16(a,b) ((opus_val16)(((opus_val32)(a))/((opus_val16)(b))))
+
+/** Divide a 32-bit value by a 32-bit value. Result fits in 32 bits */
+#define DIV32(a,b) (((opus_val32)(a))/((opus_val32)(b)))
+
+#endif
diff --git a/drivers/opus/celt/float_cast.h b/drivers/opus/celt/float_cast.h
new file mode 100644
index 0000000000..ede6574860
--- /dev/null
+++ b/drivers/opus/celt/float_cast.h
@@ -0,0 +1,140 @@
+/* Copyright (C) 2001 Erik de Castro Lopo <erikd AT mega-nerd DOT com> */
+/*
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+/* Version 1.1 */
+
+#ifndef FLOAT_CAST_H
+#define FLOAT_CAST_H
+
+
+#include "arch.h"
+
+/*============================================================================
+**      On Intel Pentium processors (especially PIII and probably P4), converting
+**      from float to int is very slow. To meet the C specs, the code produced by
+**      most C compilers targeting Pentium needs to change the FPU rounding mode
+**      before the float to int conversion is performed.
+**
+**      Changing the FPU rounding mode causes the FPU pipeline to be flushed. It
+**      is this flushing of the pipeline which is so slow.
+**
+**      Fortunately the ISO C99 specifications define the functions lrint, lrintf,
+**      llrint and llrintf which fix this problem as a side effect.
+**
+**      On Unix-like systems, the configure process should have detected the
+**      presence of these functions. If they weren't found we have to replace them
+**      here with a standard C cast.
+*/
+
+/*
+**      The C99 prototypes for lrint and lrintf are as follows:
+**
+**              long int lrintf (float x) ;
+**              long int lrint  (double x) ;
+*/
+
+/*      The presence of the required functions are detected during the configure
+**      process and the values HAVE_LRINT and HAVE_LRINTF are set accordingly in
+**      the config.h file.
+*/
+
+#if (HAVE_LRINTF)
+
+/*      These defines enable functionality introduced with the 1999 ISO C
+**      standard. They must be defined before the inclusion of math.h to
+**      engage them. If optimisation is enabled, these functions will be
+**      inlined. With optimisation switched off, you have to link in the
+**      maths library using -lm.
+*/
+
+#define _ISOC9X_SOURCE 1
+#define _ISOC99_SOURCE 1
+
+#define __USE_ISOC9X 1
+#define __USE_ISOC99 1
+
+#include <math.h>
+#define float2int(x) lrintf(x)
+
+#elif (defined(HAVE_LRINT))
+
+#define _ISOC9X_SOURCE 1
+#define _ISOC99_SOURCE 1
+
+#define __USE_ISOC9X 1
+#define __USE_ISOC99 1
+
+#include <math.h>
+#define float2int(x) lrint(x)
+
+#elif (defined(_MSC_VER) && _MSC_VER >= 1400) && (defined (WIN64) || defined (_WIN64))
+        #include <xmmintrin.h>
+
+        __inline long int float2int(float value)
+        {
+                return _mm_cvtss_si32(_mm_load_ss(&value));
+        }
+#elif (defined(_MSC_VER) && _MSC_VER >= 1400) && (defined (WIN32) || defined (_WIN32))
+        #include <math.h>
+
+        /*      Win32 doesn't seem to have these functions.
+        **      Therefore implement OPUS_INLINE versions of these functions here.
+        */
+
+        __inline long int
+        float2int (float flt)
+        {       int intgr;
+
+                _asm
+                {       fld flt
+                        fistp intgr
+                } ;
+
+                return intgr ;
+        }
+
+#else
+
+#if (defined(__GNUC__) && defined(__STDC__) && __STDC__ && __STDC_VERSION__ >= 199901L)
+        /* supported by gcc in C99 mode, but not by all other compilers */
+        #warning "Don't have the functions lrint() and lrintf ()."
+        #warning "Replacing these functions with a standard C cast."
+#endif /* __STDC_VERSION__ >= 199901L */
+        #include <math.h>
+        #define float2int(flt) ((int)(floor(.5+flt)))
+#endif
+
+#ifndef DISABLE_FLOAT_API
+static OPUS_INLINE opus_int16 FLOAT2INT16(float x)
+{
+   x = x*CELT_SIG_SCALE;
+   x = MAX32(x, -32768);
+   x = MIN32(x, 32767);
+   return (opus_int16)float2int(x);
+}
+#endif /* DISABLE_FLOAT_API */
+
+#endif /* FLOAT_CAST_H */
diff --git a/drivers/opus/celt/kiss_fft.c b/drivers/opus/celt/kiss_fft.c
new file mode 100644
index 0000000000..333be975d1
--- /dev/null
+++ b/drivers/opus/celt/kiss_fft.c
@@ -0,0 +1,719 @@
+/*Copyright (c) 2003-2004, Mark Borgerding
+  Lots of modifications by Jean-Marc Valin
+  Copyright (c) 2005-2007, Xiph.Org Foundation
+  Copyright (c) 2008,      Xiph.Org Foundation, CSIRO
+
+  All rights reserved.
+
+  Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions are met:
+
+    * Redistributions of source code must retain the above copyright notice,
+       this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright notice,
+       this list of conditions and the following disclaimer in the
+       documentation and/or other materials provided with the distribution.
+
+  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+  POSSIBILITY OF SUCH DAMAGE.*/
+
+/* This code is originally from Mark Borgerding's KISS-FFT but has been
+   heavily modified to better suit Opus */
+
+#ifndef SKIP_CONFIG_H
+#  ifdef OPUS_HAVE_CONFIG_H
+#include "opus_config.h"
+#  endif
+#endif
+
+#include "_kiss_fft_guts.h"
+#include "arch.h"
+#include "os_support.h"
+#include "mathops.h"
+#include "stack_alloc.h"
+
+/* The guts header contains all the multiplication and addition macros that are defined for
+   complex numbers.  It also delares the kf_ internal functions.
+*/
+
+static void kf_bfly2(
+                     kiss_fft_cpx * Fout,
+                     const size_t fstride,
+                     const kiss_fft_state *st,
+                     int m,
+                     int N,
+                     int mm
+                    )
+{
+   kiss_fft_cpx * Fout2;
+   const kiss_twiddle_cpx * tw1;
+   int i,j;
+   kiss_fft_cpx * Fout_beg = Fout;
+   for (i=0;i<N;i++)
+   {
+      Fout = Fout_beg + i*mm;
+      Fout2 = Fout + m;
+      tw1 = st->twiddles;
+      for(j=0;j<m;j++)
+      {
+         kiss_fft_cpx t;
+         Fout->r = SHR32(Fout->r, 1);Fout->i = SHR32(Fout->i, 1);
+         Fout2->r = SHR32(Fout2->r, 1);Fout2->i = SHR32(Fout2->i, 1);
+         C_MUL (t,  *Fout2 , *tw1);
+         tw1 += fstride;
+         C_SUB( *Fout2 ,  *Fout , t );
+         C_ADDTO( *Fout ,  t );
+         ++Fout2;
+         ++Fout;
+      }
+   }
+}
+
+static void ki_bfly2(
+                     kiss_fft_cpx * Fout,
+                     const size_t fstride,
+                     const kiss_fft_state *st,
+                     int m,
+                     int N,
+                     int mm
+                    )
+{
+   kiss_fft_cpx * Fout2;
+   const kiss_twiddle_cpx * tw1;
+   kiss_fft_cpx t;
+   int i,j;
+   kiss_fft_cpx * Fout_beg = Fout;
+   for (i=0;i<N;i++)
+   {
+      Fout = Fout_beg + i*mm;
+      Fout2 = Fout + m;
+      tw1 = st->twiddles;
+      for(j=0;j<m;j++)
+      {
+         C_MULC (t,  *Fout2 , *tw1);
+         tw1 += fstride;
+         C_SUB( *Fout2 ,  *Fout , t );
+         C_ADDTO( *Fout ,  t );
+         ++Fout2;
+         ++Fout;
+      }
+   }
+}
+
+static void kf_bfly4(
+                     kiss_fft_cpx * Fout,
+                     const size_t fstride,
+                     const kiss_fft_state *st,
+                     int m,
+                     int N,
+                     int mm
+                    )
+{
+   const kiss_twiddle_cpx *tw1,*tw2,*tw3;
+   kiss_fft_cpx scratch[6];
+   const size_t m2=2*m;
+   const size_t m3=3*m;
+   int i, j;
+
+   kiss_fft_cpx * Fout_beg = Fout;
+   for (i=0;i<N;i++)
+   {
+      Fout = Fout_beg + i*mm;
+      tw3 = tw2 = tw1 = st->twiddles;
+      for (j=0;j<m;j++)
+      {
+         C_MUL4(scratch[0],Fout[m] , *tw1 );
+         C_MUL4(scratch[1],Fout[m2] , *tw2 );
+         C_MUL4(scratch[2],Fout[m3] , *tw3 );
+
+         Fout->r = PSHR32(Fout->r, 2);
+         Fout->i = PSHR32(Fout->i, 2);
+         C_SUB( scratch[5] , *Fout, scratch[1] );
+         C_ADDTO(*Fout, scratch[1]);
+         C_ADD( scratch[3] , scratch[0] , scratch[2] );
+         C_SUB( scratch[4] , scratch[0] , scratch[2] );
+         C_SUB( Fout[m2], *Fout, scratch[3] );
+         tw1 += fstride;
+         tw2 += fstride*2;
+         tw3 += fstride*3;
+         C_ADDTO( *Fout , scratch[3] );
+
+         Fout[m].r = scratch[5].r + scratch[4].i;
+         Fout[m].i = scratch[5].i - scratch[4].r;
+         Fout[m3].r = scratch[5].r - scratch[4].i;
+         Fout[m3].i = scratch[5].i + scratch[4].r;
+         ++Fout;
+      }
+   }
+}
+
+static void ki_bfly4(
+                     kiss_fft_cpx * Fout,
+                     const size_t fstride,
+                     const kiss_fft_state *st,
+                     int m,
+                     int N,
+                     int mm
+                    )
+{
+   const kiss_twiddle_cpx *tw1,*tw2,*tw3;
+   kiss_fft_cpx scratch[6];
+   const size_t m2=2*m;
+   const size_t m3=3*m;
+   int i, j;
+
+   kiss_fft_cpx * Fout_beg = Fout;
+   for (i=0;i<N;i++)
+   {
+      Fout = Fout_beg + i*mm;
+      tw3 = tw2 = tw1 = st->twiddles;
+      for (j=0;j<m;j++)
+      {
+         C_MULC(scratch[0],Fout[m] , *tw1 );
+         C_MULC(scratch[1],Fout[m2] , *tw2 );
+         C_MULC(scratch[2],Fout[m3] , *tw3 );
+
+         C_SUB( scratch[5] , *Fout, scratch[1] );
+         C_ADDTO(*Fout, scratch[1]);
+         C_ADD( scratch[3] , scratch[0] , scratch[2] );
+         C_SUB( scratch[4] , scratch[0] , scratch[2] );
+         C_SUB( Fout[m2], *Fout, scratch[3] );
+         tw1 += fstride;
+         tw2 += fstride*2;
+         tw3 += fstride*3;
+         C_ADDTO( *Fout , scratch[3] );
+
+         Fout[m].r = scratch[5].r - scratch[4].i;
+         Fout[m].i = scratch[5].i + scratch[4].r;
+         Fout[m3].r = scratch[5].r + scratch[4].i;
+         Fout[m3].i = scratch[5].i - scratch[4].r;
+         ++Fout;
+      }
+   }
+}
+
+#ifndef RADIX_TWO_ONLY
+
+static void kf_bfly3(
+                     kiss_fft_cpx * Fout,
+                     const size_t fstride,
+                     const kiss_fft_state *st,
+                     int m,
+                     int N,
+                     int mm
+                    )
+{
+   int i;
+   size_t k;
+   const size_t m2 = 2*m;
+   const kiss_twiddle_cpx *tw1,*tw2;
+   kiss_fft_cpx scratch[5];
+   kiss_twiddle_cpx epi3;
+
+   kiss_fft_cpx * Fout_beg = Fout;
+   epi3 = st->twiddles[fstride*m];
+   for (i=0;i<N;i++)
+   {
+      Fout = Fout_beg + i*mm;
+      tw1=tw2=st->twiddles;
+      k=m;
+      do {
+         C_FIXDIV(*Fout,3); C_FIXDIV(Fout[m],3); C_FIXDIV(Fout[m2],3);
+
+         C_MUL(scratch[1],Fout[m] , *tw1);
+         C_MUL(scratch[2],Fout[m2] , *tw2);
+
+         C_ADD(scratch[3],scratch[1],scratch[2]);
+         C_SUB(scratch[0],scratch[1],scratch[2]);
+         tw1 += fstride;
+         tw2 += fstride*2;
+
+         Fout[m].r = Fout->r - HALF_OF(scratch[3].r);
+         Fout[m].i = Fout->i - HALF_OF(scratch[3].i);
+
+         C_MULBYSCALAR( scratch[0] , epi3.i );
+
+         C_ADDTO(*Fout,scratch[3]);
+
+         Fout[m2].r = Fout[m].r + scratch[0].i;
+         Fout[m2].i = Fout[m].i - scratch[0].r;
+
+         Fout[m].r -= scratch[0].i;
+         Fout[m].i += scratch[0].r;
+
+         ++Fout;
+      } while(--k);
+   }
+}
+
+static void ki_bfly3(
+                     kiss_fft_cpx * Fout,
+                     const size_t fstride,
+                     const kiss_fft_state *st,
+                     int m,
+                     int N,
+                     int mm
+                    )
+{
+   int i, k;
+   const size_t m2 = 2*m;
+   const kiss_twiddle_cpx *tw1,*tw2;
+   kiss_fft_cpx scratch[5];
+   kiss_twiddle_cpx epi3;
+
+   kiss_fft_cpx * Fout_beg = Fout;
+   epi3 = st->twiddles[fstride*m];
+   for (i=0;i<N;i++)
+   {
+      Fout = Fout_beg + i*mm;
+      tw1=tw2=st->twiddles;
+      k=m;
+      do{
+
+         C_MULC(scratch[1],Fout[m] , *tw1);
+         C_MULC(scratch[2],Fout[m2] , *tw2);
+
+         C_ADD(scratch[3],scratch[1],scratch[2]);
+         C_SUB(scratch[0],scratch[1],scratch[2]);
+         tw1 += fstride;
+         tw2 += fstride*2;
+
+         Fout[m].r = Fout->r - HALF_OF(scratch[3].r);
+         Fout[m].i = Fout->i - HALF_OF(scratch[3].i);
+
+         C_MULBYSCALAR( scratch[0] , -epi3.i );
+
+         C_ADDTO(*Fout,scratch[3]);
+
+         Fout[m2].r = Fout[m].r + scratch[0].i;
+         Fout[m2].i = Fout[m].i - scratch[0].r;
+
+         Fout[m].r -= scratch[0].i;
+         Fout[m].i += scratch[0].r;
+
+         ++Fout;
+      }while(--k);
+   }
+}
+
+static void kf_bfly5(
+                     kiss_fft_cpx * Fout,
+                     const size_t fstride,
+                     const kiss_fft_state *st,
+                     int m,
+                     int N,
+                     int mm
+                    )
+{
+   kiss_fft_cpx *Fout0,*Fout1,*Fout2,*Fout3,*Fout4;
+   int i, u;
+   kiss_fft_cpx scratch[13];
+   const kiss_twiddle_cpx * twiddles = st->twiddles;
+   const kiss_twiddle_cpx *tw;
+   kiss_twiddle_cpx ya,yb;
+   kiss_fft_cpx * Fout_beg = Fout;
+
+   ya = twiddles[fstride*m];
+   yb = twiddles[fstride*2*m];
+   tw=st->twiddles;
+
+   for (i=0;i<N;i++)
+   {
+      Fout = Fout_beg + i*mm;
+      Fout0=Fout;
+      Fout1=Fout0+m;
+      Fout2=Fout0+2*m;
+      Fout3=Fout0+3*m;
+      Fout4=Fout0+4*m;
+
+      for ( u=0; u<m; ++u ) {
+         C_FIXDIV( *Fout0,5); C_FIXDIV( *Fout1,5); C_FIXDIV( *Fout2,5); C_FIXDIV( *Fout3,5); C_FIXDIV( *Fout4,5);
+         scratch[0] = *Fout0;
+
+         C_MUL(scratch[1] ,*Fout1, tw[u*fstride]);
+         C_MUL(scratch[2] ,*Fout2, tw[2*u*fstride]);
+         C_MUL(scratch[3] ,*Fout3, tw[3*u*fstride]);
+         C_MUL(scratch[4] ,*Fout4, tw[4*u*fstride]);
+
+         C_ADD( scratch[7],scratch[1],scratch[4]);
+         C_SUB( scratch[10],scratch[1],scratch[4]);
+         C_ADD( scratch[8],scratch[2],scratch[3]);
+         C_SUB( scratch[9],scratch[2],scratch[3]);
+
+         Fout0->r += scratch[7].r + scratch[8].r;
+         Fout0->i += scratch[7].i + scratch[8].i;
+
+         scratch[5].r = scratch[0].r + S_MUL(scratch[7].r,ya.r) + S_MUL(scratch[8].r,yb.r);
+         scratch[5].i = scratch[0].i + S_MUL(scratch[7].i,ya.r) + S_MUL(scratch[8].i,yb.r);
+
+         scratch[6].r =  S_MUL(scratch[10].i,ya.i) + S_MUL(scratch[9].i,yb.i);
+         scratch[6].i = -S_MUL(scratch[10].r,ya.i) - S_MUL(scratch[9].r,yb.i);
+
+         C_SUB(*Fout1,scratch[5],scratch[6]);
+         C_ADD(*Fout4,scratch[5],scratch[6]);
+
+         scratch[11].r = scratch[0].r + S_MUL(scratch[7].r,yb.r) + S_MUL(scratch[8].r,ya.r);
+         scratch[11].i = scratch[0].i + S_MUL(scratch[7].i,yb.r) + S_MUL(scratch[8].i,ya.r);
+         scratch[12].r = - S_MUL(scratch[10].i,yb.i) + S_MUL(scratch[9].i,ya.i);
+         scratch[12].i = S_MUL(scratch[10].r,yb.i) - S_MUL(scratch[9].r,ya.i);
+
+         C_ADD(*Fout2,scratch[11],scratch[12]);
+         C_SUB(*Fout3,scratch[11],scratch[12]);
+
+         ++Fout0;++Fout1;++Fout2;++Fout3;++Fout4;
+      }
+   }
+}
+
+static void ki_bfly5(
+                     kiss_fft_cpx * Fout,
+                     const size_t fstride,
+                     const kiss_fft_state *st,
+                     int m,
+                     int N,
+                     int mm
+                    )
+{
+   kiss_fft_cpx *Fout0,*Fout1,*Fout2,*Fout3,*Fout4;
+   int i, u;
+   kiss_fft_cpx scratch[13];
+   const kiss_twiddle_cpx * twiddles = st->twiddles;
+   const kiss_twiddle_cpx *tw;
+   kiss_twiddle_cpx ya,yb;
+   kiss_fft_cpx * Fout_beg = Fout;
+
+   ya = twiddles[fstride*m];
+   yb = twiddles[fstride*2*m];
+   tw=st->twiddles;
+
+   for (i=0;i<N;i++)
+   {
+      Fout = Fout_beg + i*mm;
+      Fout0=Fout;
+      Fout1=Fout0+m;
+      Fout2=Fout0+2*m;
+      Fout3=Fout0+3*m;
+      Fout4=Fout0+4*m;
+
+      for ( u=0; u<m; ++u ) {
+         scratch[0] = *Fout0;
+
+         C_MULC(scratch[1] ,*Fout1, tw[u*fstride]);
+         C_MULC(scratch[2] ,*Fout2, tw[2*u*fstride]);
+         C_MULC(scratch[3] ,*Fout3, tw[3*u*fstride]);
+         C_MULC(scratch[4] ,*Fout4, tw[4*u*fstride]);
+
+         C_ADD( scratch[7],scratch[1],scratch[4]);
+         C_SUB( scratch[10],scratch[1],scratch[4]);
+         C_ADD( scratch[8],scratch[2],scratch[3]);
+         C_SUB( scratch[9],scratch[2],scratch[3]);
+
+         Fout0->r += scratch[7].r + scratch[8].r;
+         Fout0->i += scratch[7].i + scratch[8].i;
+
+         scratch[5].r = scratch[0].r + S_MUL(scratch[7].r,ya.r) + S_MUL(scratch[8].r,yb.r);
+         scratch[5].i = scratch[0].i + S_MUL(scratch[7].i,ya.r) + S_MUL(scratch[8].i,yb.r);
+
+         scratch[6].r = -S_MUL(scratch[10].i,ya.i) - S_MUL(scratch[9].i,yb.i);
+         scratch[6].i =  S_MUL(scratch[10].r,ya.i) + S_MUL(scratch[9].r,yb.i);
+
+         C_SUB(*Fout1,scratch[5],scratch[6]);
+         C_ADD(*Fout4,scratch[5],scratch[6]);
+
+         scratch[11].r = scratch[0].r + S_MUL(scratch[7].r,yb.r) + S_MUL(scratch[8].r,ya.r);
+         scratch[11].i = scratch[0].i + S_MUL(scratch[7].i,yb.r) + S_MUL(scratch[8].i,ya.r);
+         scratch[12].r =  S_MUL(scratch[10].i,yb.i) - S_MUL(scratch[9].i,ya.i);
+         scratch[12].i = -S_MUL(scratch[10].r,yb.i) + S_MUL(scratch[9].r,ya.i);
+
+         C_ADD(*Fout2,scratch[11],scratch[12]);
+         C_SUB(*Fout3,scratch[11],scratch[12]);
+
+         ++Fout0;++Fout1;++Fout2;++Fout3;++Fout4;
+      }
+   }
+}
+
+#endif
+
+
+#ifdef CUSTOM_MODES
+
+static
+void compute_bitrev_table(
+         int Fout,
+         opus_int16 *f,
+         const size_t fstride,
+         int in_stride,
+         opus_int16 * factors,
+         const kiss_fft_state *st
+            )
+{
+   const int p=*factors++; /* the radix  */
+   const int m=*factors++; /* stage's fft length/p */
+
+    /*printf ("fft %d %d %d %d %d %d\n", p*m, m, p, s2, fstride*in_stride, N);*/
+   if (m==1)
+   {
+      int j;
+      for (j=0;j<p;j++)
+      {
+         *f = Fout+j;
+         f += fstride*in_stride;
+      }
+   } else {
+      int j;
+      for (j=0;j<p;j++)
+      {
+         compute_bitrev_table( Fout , f, fstride*p, in_stride, factors,st);
+         f += fstride*in_stride;
+         Fout += m;
+      }
+   }
+}
+
+/*  facbuf is populated by p1,m1,p2,m2, ...
+    where
+    p[i] * m[i] = m[i-1]
+    m0 = n                  */
+static
+int kf_factor(int n,opus_int16 * facbuf)
+{
+    int p=4;
+
+    /*factor out powers of 4, powers of 2, then any remaining primes */
+    do {
+        while (n % p) {
+            switch (p) {
+                case 4: p = 2; break;
+                case 2: p = 3; break;
+                default: p += 2; break;
+            }
+            if (p>32000 || (opus_int32)p*(opus_int32)p > n)
+                p = n;          /* no more factors, skip to end */
+        }
+        n /= p;
+#ifdef RADIX_TWO_ONLY
+        if (p!=2 && p != 4)
+#else
+        if (p>5)
+#endif
+        {
+           return 0;
+        }
+        *facbuf++ = p;
+        *facbuf++ = n;
+    } while (n > 1);
+    return 1;
+}
+
+static void compute_twiddles(kiss_twiddle_cpx *twiddles, int nfft)
+{
+   int i;
+#ifdef OPUS_FIXED_POINT
+   for (i=0;i<nfft;++i) {
+      opus_val32 phase = -i;
+      kf_cexp2(twiddles+i, DIV32(SHL32(phase,17),nfft));
+   }
+#else
+   for (i=0;i<nfft;++i) {
+      const double pi=3.14159265358979323846264338327;
+      double phase = ( -2*pi /nfft ) * i;
+      kf_cexp(twiddles+i, phase );
+   }
+#endif
+}
+
+/*
+ *
+ * Allocates all necessary storage space for the fft and ifft.
+ * The return value is a contiguous block of memory.  As such,
+ * It can be freed with free().
+ * */
+kiss_fft_state *opus_fft_alloc_twiddles(int nfft,void * mem,size_t * lenmem,  const kiss_fft_state *base)
+{
+    kiss_fft_state *st=NULL;
+    size_t memneeded = sizeof(struct kiss_fft_state); /* twiddle factors*/
+
+    if ( lenmem==NULL ) {
+        st = ( kiss_fft_state*)KISS_FFT_MALLOC( memneeded );
+    }else{
+        if (mem != NULL && *lenmem >= memneeded)
+            st = (kiss_fft_state*)mem;
+        *lenmem = memneeded;
+    }
+    if (st) {
+        opus_int16 *bitrev;
+        kiss_twiddle_cpx *twiddles;
+
+        st->nfft=nfft;
+#ifndef OPUS_FIXED_POINT
+        st->scale = 1.f/nfft;
+#endif
+        if (base != NULL)
+        {
+           st->twiddles = base->twiddles;
+           st->shift = 0;
+           while (nfft<<st->shift != base->nfft && st->shift < 32)
+              st->shift++;
+           if (st->shift>=32)
+              goto fail;
+        } else {
+           st->twiddles = twiddles = (kiss_twiddle_cpx*)KISS_FFT_MALLOC(sizeof(kiss_twiddle_cpx)*nfft);
+           compute_twiddles(twiddles, nfft);
+           st->shift = -1;
+        }
+        if (!kf_factor(nfft,st->factors))
+        {
+           goto fail;
+        }
+
+        /* bitrev */
+        st->bitrev = bitrev = (opus_int16*)KISS_FFT_MALLOC(sizeof(opus_int16)*nfft);
+        if (st->bitrev==NULL)
+            goto fail;
+        compute_bitrev_table(0, bitrev, 1,1, st->factors,st);
+    }
+    return st;
+fail:
+    opus_fft_free(st);
+    return NULL;
+}
+
+kiss_fft_state *opus_fft_alloc(int nfft,void * mem,size_t * lenmem )
+{
+   return opus_fft_alloc_twiddles(nfft, mem, lenmem, NULL);
+}
+
+void opus_fft_free(const kiss_fft_state *cfg)
+{
+   if (cfg)
+   {
+      opus_free((opus_int16*)cfg->bitrev);
+      if (cfg->shift < 0)
+         opus_free((kiss_twiddle_cpx*)cfg->twiddles);
+      opus_free((kiss_fft_state*)cfg);
+   }
+}
+
+#endif /* CUSTOM_MODES */
+
+void opus_fft(const kiss_fft_state *st,const kiss_fft_cpx *fin,kiss_fft_cpx *fout)
+{
+    int m2, m;
+    int p;
+    int L;
+    int fstride[MAXFACTORS];
+    int i;
+    int shift;
+
+    /* st->shift can be -1 */
+    shift = st->shift>0 ? st->shift : 0;
+
+    celt_assert2 (fin != fout, "In-place FFT not supported");
+    /* Bit-reverse the input */
+    for (i=0;i<st->nfft;i++)
+    {
+       fout[st->bitrev[i]] = fin[i];
+#ifndef OPUS_FIXED_POINT
+       fout[st->bitrev[i]].r *= st->scale;
+       fout[st->bitrev[i]].i *= st->scale;
+#endif
+    }
+
+    fstride[0] = 1;
+    L=0;
+    do {
+       p = st->factors[2*L];
+       m = st->factors[2*L+1];
+       fstride[L+1] = fstride[L]*p;
+       L++;
+    } while(m!=1);
+    m = st->factors[2*L-1];
+    for (i=L-1;i>=0;i--)
+    {
+       if (i!=0)
+          m2 = st->factors[2*i-1];
+       else
+          m2 = 1;
+       switch (st->factors[2*i])
+       {
+       case 2:
+          kf_bfly2(fout,fstride[i]<<shift,st,m, fstride[i], m2);
+          break;
+       case 4:
+          kf_bfly4(fout,fstride[i]<<shift,st,m, fstride[i], m2);
+          break;
+ #ifndef RADIX_TWO_ONLY
+       case 3:
+          kf_bfly3(fout,fstride[i]<<shift,st,m, fstride[i], m2);
+          break;
+       case 5:
+          kf_bfly5(fout,fstride[i]<<shift,st,m, fstride[i], m2);
+          break;
+ #endif
+       }
+       m = m2;
+    }
+}
+
+void opus_ifft(const kiss_fft_state *st,const kiss_fft_cpx *fin,kiss_fft_cpx *fout)
+{
+   int m2, m;
+   int p;
+   int L;
+   int fstride[MAXFACTORS];
+   int i;
+   int shift;
+
+   /* st->shift can be -1 */
+   shift = st->shift>0 ? st->shift : 0;
+   celt_assert2 (fin != fout, "In-place FFT not supported");
+   /* Bit-reverse the input */
+   for (i=0;i<st->nfft;i++)
+      fout[st->bitrev[i]] = fin[i];
+
+   fstride[0] = 1;
+   L=0;
+   do {
+      p = st->factors[2*L];
+      m = st->factors[2*L+1];
+      fstride[L+1] = fstride[L]*p;
+      L++;
+   } while(m!=1);
+   m = st->factors[2*L-1];
+   for (i=L-1;i>=0;i--)
+   {
+      if (i!=0)
+         m2 = st->factors[2*i-1];
+      else
+         m2 = 1;
+      switch (st->factors[2*i])
+      {
+      case 2:
+         ki_bfly2(fout,fstride[i]<<shift,st,m, fstride[i], m2);
+         break;
+      case 4:
+         ki_bfly4(fout,fstride[i]<<shift,st,m, fstride[i], m2);
+         break;
+#ifndef RADIX_TWO_ONLY
+      case 3:
+         ki_bfly3(fout,fstride[i]<<shift,st,m, fstride[i], m2);
+         break;
+      case 5:
+         ki_bfly5(fout,fstride[i]<<shift,st,m, fstride[i], m2);
+         break;
+#endif
+      }
+      m = m2;
+   }
+}
+
diff --git a/drivers/opus/celt/kiss_fft.h b/drivers/opus/celt/kiss_fft.h
new file mode 100644
index 0000000000..aa22b3a419
--- /dev/null
+++ b/drivers/opus/celt/kiss_fft.h
@@ -0,0 +1,139 @@
+/*Copyright (c) 2003-2004, Mark Borgerding
+  Lots of modifications by Jean-Marc Valin
+  Copyright (c) 2005-2007, Xiph.Org Foundation
+  Copyright (c) 2008,      Xiph.Org Foundation, CSIRO
+
+  All rights reserved.
+
+  Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions are met:
+
+    * Redistributions of source code must retain the above copyright notice,
+       this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright notice,
+       this list of conditions and the following disclaimer in the
+       documentation and/or other materials provided with the distribution.
+
+  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+  POSSIBILITY OF SUCH DAMAGE.*/
+
+#ifndef KISS_FFT_H
+#define KISS_FFT_H
+
+#include <stdlib.h>
+#include <math.h>
+#include "arch.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#ifdef USE_SIMD
+# include <xmmintrin.h>
+# define kiss_fft_scalar __m128
+#define KISS_FFT_MALLOC(nbytes) memalign(16,nbytes)
+#else
+#define KISS_FFT_MALLOC opus_alloc
+#endif
+
+#ifdef OPUS_FIXED_POINT
+#include "arch.h"
+
+#  define kiss_fft_scalar opus_int32
+#  define kiss_twiddle_scalar opus_int16
+
+
+#else
+# ifndef kiss_fft_scalar
+/*  default is float */
+#   define kiss_fft_scalar float
+#   define kiss_twiddle_scalar float
+#   define KF_SUFFIX _celt_single
+# endif
+#endif
+
+typedef struct {
+    kiss_fft_scalar r;
+    kiss_fft_scalar i;
+}kiss_fft_cpx;
+
+typedef struct {
+   kiss_twiddle_scalar r;
+   kiss_twiddle_scalar i;
+}kiss_twiddle_cpx;
+
+#define MAXFACTORS 8
+/* e.g. an fft of length 128 has 4 factors
+ as far as kissfft is concerned
+ 4*4*4*2
+ */
+
+typedef struct kiss_fft_state{
+    int nfft;
+#ifndef OPUS_FIXED_POINT
+    kiss_fft_scalar scale;
+#endif
+    int shift;
+    opus_int16 factors[2*MAXFACTORS];
+    const opus_int16 *bitrev;
+    const kiss_twiddle_cpx *twiddles;
+} kiss_fft_state;
+
+/*typedef struct kiss_fft_state* kiss_fft_cfg;*/
+
+/**
+ *  opus_fft_alloc
+ *
+ *  Initialize a FFT (or IFFT) algorithm's cfg/state buffer.
+ *
+ *  typical usage:      kiss_fft_cfg mycfg=opus_fft_alloc(1024,0,NULL,NULL);
+ *
+ *  The return value from fft_alloc is a cfg buffer used internally
+ *  by the fft routine or NULL.
+ *
+ *  If lenmem is NULL, then opus_fft_alloc will allocate a cfg buffer using malloc.
+ *  The returned value should be free()d when done to avoid memory leaks.
+ *
+ *  The state can be placed in a user supplied buffer 'mem':
+ *  If lenmem is not NULL and mem is not NULL and *lenmem is large enough,
+ *      then the function places the cfg in mem and the size used in *lenmem
+ *      and returns mem.
+ *
+ *  If lenmem is not NULL and ( mem is NULL or *lenmem is not large enough),
+ *      then the function returns NULL and places the minimum cfg
+ *      buffer size in *lenmem.
+ * */
+
+kiss_fft_state *opus_fft_alloc_twiddles(int nfft,void * mem,size_t * lenmem, const kiss_fft_state *base);
+
+kiss_fft_state *opus_fft_alloc(int nfft,void * mem,size_t * lenmem);
+
+/**
+ * opus_fft(cfg,in_out_buf)
+ *
+ * Perform an FFT on a complex input buffer.
+ * for a forward FFT,
+ * fin should be  f[0] , f[1] , ... ,f[nfft-1]
+ * fout will be   F[0] , F[1] , ... ,F[nfft-1]
+ * Note that each element is complex and can be accessed like
+    f[k].r and f[k].i
+ * */
+void opus_fft(const kiss_fft_state *cfg,const kiss_fft_cpx *fin,kiss_fft_cpx *fout);
+void opus_ifft(const kiss_fft_state *cfg,const kiss_fft_cpx *fin,kiss_fft_cpx *fout);
+
+void opus_fft_free(const kiss_fft_state *cfg);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/drivers/opus/celt/laplace.c b/drivers/opus/celt/laplace.c
new file mode 100644
index 0000000000..c6d293f298
--- /dev/null
+++ b/drivers/opus/celt/laplace.c
@@ -0,0 +1,134 @@
+/* Copyright (c) 2007 CSIRO
+   Copyright (c) 2007-2009 Xiph.Org Foundation
+   Written by Jean-Marc Valin */
+/*
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifdef OPUS_HAVE_CONFIG_H
+#include "opus_config.h"
+#endif
+
+#include "laplace.h"
+#include "mathops.h"
+
+/* The minimum probability of an energy delta (out of 32768). */
+#define LAPLACE_LOG_MINP (0)
+#define LAPLACE_MINP (1<<LAPLACE_LOG_MINP)
+/* The minimum number of guaranteed representable energy deltas (in one
+    direction). */
+#define LAPLACE_NMIN (16)
+
+/* When called, decay is positive and at most 11456. */
+static unsigned ec_laplace_get_freq1(unsigned fs0, int decay)
+{
+   unsigned ft;
+   ft = 32768 - LAPLACE_MINP*(2*LAPLACE_NMIN) - fs0;
+   return ft*(opus_int32)(16384-decay)>>15;
+}
+
+void ec_laplace_encode(ec_enc *enc, int *value, unsigned fs, int decay)
+{
+   unsigned fl;
+   int val = *value;
+   fl = 0;
+   if (val)
+   {
+      int s;
+      int i;
+      s = -(val<0);
+      val = (val+s)^s;
+      fl = fs;
+      fs = ec_laplace_get_freq1(fs, decay);
+      /* Search the decaying part of the PDF.*/
+      for (i=1; fs > 0 && i < val; i++)
+      {
+         fs *= 2;
+         fl += fs+2*LAPLACE_MINP;
+         fs = (fs*(opus_int32)decay)>>15;
+      }
+      /* Everything beyond that has probability LAPLACE_MINP. */
+      if (!fs)
+      {
+         int di;
+         int ndi_max;
+         ndi_max = (32768-fl+LAPLACE_MINP-1)>>LAPLACE_LOG_MINP;
+         ndi_max = (ndi_max-s)>>1;
+         di = IMIN(val - i, ndi_max - 1);
+         fl += (2*di+1+s)*LAPLACE_MINP;
+         fs = IMIN(LAPLACE_MINP, 32768-fl);
+         *value = (i+di+s)^s;
+      }
+      else
+      {
+         fs += LAPLACE_MINP;
+         fl += fs&~s;
+      }
+      celt_assert(fl+fs<=32768);
+      celt_assert(fs>0);
+   }
+   ec_encode_bin(enc, fl, fl+fs, 15);
+}
+
+int ec_laplace_decode(ec_dec *dec, unsigned fs, int decay)
+{
+   int val=0;
+   unsigned fl;
+   unsigned fm;
+   fm = ec_decode_bin(dec, 15);
+   fl = 0;
+   if (fm >= fs)
+   {
+      val++;
+      fl = fs;
+      fs = ec_laplace_get_freq1(fs, decay)+LAPLACE_MINP;
+      /* Search the decaying part of the PDF.*/
+      while(fs > LAPLACE_MINP && fm >= fl+2*fs)
+      {
+         fs *= 2;
+         fl += fs;
+         fs = ((fs-2*LAPLACE_MINP)*(opus_int32)decay)>>15;
+         fs += LAPLACE_MINP;
+         val++;
+      }
+      /* Everything beyond that has probability LAPLACE_MINP. */
+      if (fs <= LAPLACE_MINP)
+      {
+         int di;
+         di = (fm-fl)>>(LAPLACE_LOG_MINP+1);
+         val += di;
+         fl += 2*di*LAPLACE_MINP;
+      }
+      if (fm < fl+fs)
+         val = -val;
+      else
+         fl += fs;
+   }
+   celt_assert(fl<32768);
+   celt_assert(fs>0);
+   celt_assert(fl<=fm);
+   celt_assert(fm<IMIN(fl+fs,32768));
+   ec_dec_update(dec, fl, IMIN(fl+fs,32768), 32768);
+   return val;
+}
diff --git a/drivers/opus/celt/laplace.h b/drivers/opus/celt/laplace.h
new file mode 100644
index 0000000000..46c14b5da5
--- /dev/null
+++ b/drivers/opus/celt/laplace.h
@@ -0,0 +1,48 @@
+/* Copyright (c) 2007 CSIRO
+   Copyright (c) 2007-2009 Xiph.Org Foundation
+   Written by Jean-Marc Valin */
+/*
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#include "entenc.h"
+#include "entdec.h"
+
+/** Encode a value that is assumed to be the realisation of a
+    Laplace-distributed random process
+ @param enc Entropy encoder state
+ @param value Value to encode
+ @param fs Probability of 0, multiplied by 32768
+ @param decay Probability of the value +/- 1, multiplied by 16384
+*/
+void ec_laplace_encode(ec_enc *enc, int *value, unsigned fs, int decay);
+
+/** Decode a value that is assumed to be the realisation of a
+    Laplace-distributed random process
+ @param dec Entropy decoder state
+ @param fs Probability of 0, multiplied by 32768
+ @param decay Probability of the value +/- 1, multiplied by 16384
+ @return Value decoded
+ */
+int ec_laplace_decode(ec_dec *dec, unsigned fs, int decay);
diff --git a/drivers/opus/celt/mathops.c b/drivers/opus/celt/mathops.c
new file mode 100644
index 0000000000..49be746d8c
--- /dev/null
+++ b/drivers/opus/celt/mathops.c
@@ -0,0 +1,208 @@
+/* Copyright (c) 2002-2008 Jean-Marc Valin
+   Copyright (c) 2007-2008 CSIRO
+   Copyright (c) 2007-2009 Xiph.Org Foundation
+   Written by Jean-Marc Valin */
+/**
+   @file mathops.h
+   @brief Various math functions
+*/
+/*
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifdef OPUS_HAVE_CONFIG_H
+#include "opus_config.h"
+#endif
+
+#include "mathops.h"
+
+/*Compute floor(sqrt(_val)) with exact arithmetic.
+  This has been tested on all possible 32-bit inputs.*/
+unsigned isqrt32(opus_uint32 _val){
+  unsigned b;
+  unsigned g;
+  int      bshift;
+  /*Uses the second method from
+     http://www.azillionmonkeys.com/qed/sqroot.html
+    The main idea is to search for the largest binary digit b such that
+     (g+b)*(g+b) <= _val, and add it to the solution g.*/
+  g=0;
+  bshift=(EC_ILOG(_val)-1)>>1;
+  b=1U<<bshift;
+  do{
+    opus_uint32 t;
+    t=(((opus_uint32)g<<1)+b)<<bshift;
+    if(t<=_val){
+      g+=b;
+      _val-=t;
+    }
+    b>>=1;
+    bshift--;
+  }
+  while(bshift>=0);
+  return g;
+}
+
+#ifdef OPUS_FIXED_POINT
+
+opus_val32 frac_div32(opus_val32 a, opus_val32 b)
+{
+   opus_val16 rcp;
+   opus_val32 result, rem;
+   int shift = celt_ilog2(b)-29;
+   a = VSHR32(a,shift);
+   b = VSHR32(b,shift);
+   /* 16-bit reciprocal */
+   rcp = ROUND16(celt_rcp(ROUND16(b,16)),3);
+   result = MULT16_32_Q15(rcp, a);
+   rem = PSHR32(a,2)-MULT32_32_Q31(result, b);
+   result = ADD32(result, SHL32(MULT16_32_Q15(rcp, rem),2));
+   if (result >= 536870912)       /*  2^29 */
+      return 2147483647;          /*  2^31 - 1 */
+   else if (result <= -536870912) /* -2^29 */
+      return -2147483647;         /* -2^31 */
+   else
+      return SHL32(result, 2);
+}
+
+/** Reciprocal sqrt approximation in the range [0.25,1) (Q16 in, Q14 out) */
+opus_val16 celt_rsqrt_norm(opus_val32 x)
+{
+   opus_val16 n;
+   opus_val16 r;
+   opus_val16 r2;
+   opus_val16 y;
+   /* Range of n is [-16384,32767] ([-0.5,1) in Q15). */
+   n = x-32768;
+   /* Get a rough initial guess for the root.
+      The optimal minimax quadratic approximation (using relative error) is
+       r = 1.437799046117536+n*(-0.823394375837328+n*0.4096419668459485).
+      Coefficients here, and the final result r, are Q14.*/
+   r = ADD16(23557, MULT16_16_Q15(n, ADD16(-13490, MULT16_16_Q15(n, 6713))));
+   /* We want y = x*r*r-1 in Q15, but x is 32-bit Q16 and r is Q14.
+      We can compute the result from n and r using Q15 multiplies with some
+       adjustment, carefully done to avoid overflow.
+      Range of y is [-1564,1594]. */
+   r2 = MULT16_16_Q15(r, r);
+   y = SHL16(SUB16(ADD16(MULT16_16_Q15(r2, n), r2), 16384), 1);
+   /* Apply a 2nd-order Householder iteration: r += r*y*(y*0.375-0.5).
+      This yields the Q14 reciprocal square root of the Q16 x, with a maximum
+       relative error of 1.04956E-4, a (relative) RMSE of 2.80979E-5, and a
+       peak absolute error of 2.26591/16384. */
+   return ADD16(r, MULT16_16_Q15(r, MULT16_16_Q15(y,
+              SUB16(MULT16_16_Q15(y, 12288), 16384))));
+}
+
+/** Sqrt approximation (QX input, QX/2 output) */
+opus_val32 celt_sqrt(opus_val32 x)
+{
+   int k;
+   opus_val16 n;
+   opus_val32 rt;
+   static const opus_val16 C[5] = {23175, 11561, -3011, 1699, -664};
+   if (x==0)
+      return 0;
+   else if (x>=1073741824)
+      return 32767;
+   k = (celt_ilog2(x)>>1)-7;
+   x = VSHR32(x, 2*k);
+   n = x-32768;
+   rt = ADD16(C[0], MULT16_16_Q15(n, ADD16(C[1], MULT16_16_Q15(n, ADD16(C[2],
+              MULT16_16_Q15(n, ADD16(C[3], MULT16_16_Q15(n, (C[4])))))))));
+   rt = VSHR32(rt,7-k);
+   return rt;
+}
+
+#define L1 32767
+#define L2 -7651
+#define L3 8277
+#define L4 -626
+
+static OPUS_INLINE opus_val16 _celt_cos_pi_2(opus_val16 x)
+{
+   opus_val16 x2;
+
+   x2 = MULT16_16_P15(x,x);
+   return ADD16(1,MIN16(32766,ADD32(SUB16(L1,x2), MULT16_16_P15(x2, ADD32(L2, MULT16_16_P15(x2, ADD32(L3, MULT16_16_P15(L4, x2
+                                                                                ))))))));
+}
+
+#undef L1
+#undef L2
+#undef L3
+#undef L4
+
+opus_val16 celt_cos_norm(opus_val32 x)
+{
+   x = x&0x0001ffff;
+   if (x>SHL32(EXTEND32(1), 16))
+      x = SUB32(SHL32(EXTEND32(1), 17),x);
+   if (x&0x00007fff)
+   {
+      if (x<SHL32(EXTEND32(1), 15))
+      {
+         return _celt_cos_pi_2(EXTRACT16(x));
+      } else {
+         return NEG32(_celt_cos_pi_2(EXTRACT16(65536-x)));
+      }
+   } else {
+      if (x&0x0000ffff)
+         return 0;
+      else if (x&0x0001ffff)
+         return -32767;
+      else
+         return 32767;
+   }
+}
+
+/** Reciprocal approximation (Q15 input, Q16 output) */
+opus_val32 celt_rcp(opus_val32 x)
+{
+   int i;
+   opus_val16 n;
+   opus_val16 r;
+   celt_assert2(x>0, "celt_rcp() only defined for positive values");
+   i = celt_ilog2(x);
+   /* n is Q15 with range [0,1). */
+   n = VSHR32(x,i-15)-32768;
+   /* Start with a linear approximation:
+      r = 1.8823529411764706-0.9411764705882353*n.
+      The coefficients and the result are Q14 in the range [15420,30840].*/
+   r = ADD16(30840, MULT16_16_Q15(-15420, n));
+   /* Perform two Newton iterations:
+      r -= r*((r*n)-1.Q15)
+         = r*((r*n)+(r-1.Q15)). */
+   r = SUB16(r, MULT16_16_Q15(r,
+             ADD16(MULT16_16_Q15(r, n), ADD16(r, -32768))));
+   /* We subtract an extra 1 in the second iteration to avoid overflow; it also
+       neatly compensates for truncation error in the rest of the process. */
+   r = SUB16(r, ADD16(1, MULT16_16_Q15(r,
+             ADD16(MULT16_16_Q15(r, n), ADD16(r, -32768)))));
+   /* r is now the Q15 solution to 2/(n+1), with a maximum relative error
+       of 7.05346E-5, a (relative) RMSE of 2.14418E-5, and a peak absolute
+       error of 1.24665/32768. */
+   return VSHR32(EXTEND32(r),i-16);
+}
+
+#endif
diff --git a/drivers/opus/celt/mathops.h b/drivers/opus/celt/mathops.h
new file mode 100644
index 0000000000..4a6bc539bc
--- /dev/null
+++ b/drivers/opus/celt/mathops.h
@@ -0,0 +1,258 @@
+/* Copyright (c) 2002-2008 Jean-Marc Valin
+   Copyright (c) 2007-2008 CSIRO
+   Copyright (c) 2007-2009 Xiph.Org Foundation
+   Written by Jean-Marc Valin */
+/**
+   @file mathops.h
+   @brief Various math functions
+*/
+/*
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef MATHOPS_H
+#define MATHOPS_H
+
+#include "arch.h"
+#include "entcode.h"
+#include "os_support.h"
+
+/* Multiplies two 16-bit fractional values. Bit-exactness of this macro is important */
+#define FRAC_MUL16(a,b) ((16384+((opus_int32)(opus_int16)(a)*(opus_int16)(b)))>>15)
+
+unsigned isqrt32(opus_uint32 _val);
+
+#ifndef OVERRIDE_CELT_MAXABS16
+static OPUS_INLINE opus_val32 celt_maxabs16(const opus_val16 *x, int len)
+{
+   int i;
+   opus_val16 maxval = 0;
+   opus_val16 minval = 0;
+   for (i=0;i<len;i++)
+   {
+      maxval = MAX16(maxval, x[i]);
+      minval = MIN16(minval, x[i]);
+   }
+   return MAX32(EXTEND32(maxval),-EXTEND32(minval));
+}
+#endif
+
+#ifndef OVERRIDE_CELT_MAXABS32
+#ifdef OPUS_FIXED_POINT
+static OPUS_INLINE opus_val32 celt_maxabs32(const opus_val32 *x, int len)
+{
+   int i;
+   opus_val32 maxval = 0;
+   opus_val32 minval = 0;
+   for (i=0;i<len;i++)
+   {
+      maxval = MAX32(maxval, x[i]);
+      minval = MIN32(minval, x[i]);
+   }
+   return MAX32(maxval, -minval);
+}
+#else
+#define celt_maxabs32(x,len) celt_maxabs16(x,len)
+#endif
+#endif
+
+
+#ifndef OPUS_FIXED_POINT
+
+#define PI 3.141592653f
+#define celt_sqrt(x) ((float)sqrt(x))
+#define celt_rsqrt(x) (1.f/celt_sqrt(x))
+#define celt_rsqrt_norm(x) (celt_rsqrt(x))
+#define celt_cos_norm(x) ((float)cos((.5f*PI)*(x)))
+#define celt_rcp(x) (1.f/(x))
+#define celt_div(a,b) ((a)/(b))
+#define frac_div32(a,b) ((float)(a)/(b))
+
+#ifdef FLOAT_APPROX
+
+/* Note: This assumes radix-2 floating point with the exponent at bits 23..30 and an offset of 127
+         denorm, +/- inf and NaN are *not* handled */
+
+/** Base-2 log approximation (log2(x)). */
+static OPUS_INLINE float celt_log2(float x)
+{
+   int integer;
+   float frac;
+   union {
+      float f;
+      opus_uint32 i;
+   } in;
+   in.f = x;
+   integer = (in.i>>23)-127;
+   in.i -= integer<<23;
+   frac = in.f - 1.5f;
+   frac = -0.41445418f + frac*(0.95909232f
+          + frac*(-0.33951290f + frac*0.16541097f));
+   return 1+integer+frac;
+}
+
+/** Base-2 exponential approximation (2^x). */
+static OPUS_INLINE float celt_exp2(float x)
+{
+   int integer;
+   float frac;
+   union {
+      float f;
+      opus_uint32 i;
+   } res;
+   integer = floor(x);
+   if (integer < -50)
+      return 0;
+   frac = x-integer;
+   /* K0 = 1, K1 = log(2), K2 = 3-4*log(2), K3 = 3*log(2) - 2 */
+   res.f = 0.99992522f + frac * (0.69583354f
+           + frac * (0.22606716f + 0.078024523f*frac));
+   res.i = (res.i + (integer<<23)) & 0x7fffffff;
+   return res.f;
+}
+
+#else
+#define celt_log2(x) ((float)(1.442695040888963387*log(x)))
+#define celt_exp2(x) ((float)exp(0.6931471805599453094*(x)))
+#endif
+
+#endif
+
+#ifdef OPUS_FIXED_POINT
+
+#include "os_support.h"
+
+#ifndef OVERRIDE_CELT_ILOG2
+/** Integer log in base2. Undefined for zero and negative numbers */
+static OPUS_INLINE opus_int16 celt_ilog2(opus_int32 x)
+{
+   celt_assert2(x>0, "celt_ilog2() only defined for strictly positive numbers");
+   return EC_ILOG(x)-1;
+}
+#endif
+
+
+/** Integer log in base2. Defined for zero, but not for negative numbers */
+static OPUS_INLINE opus_int16 celt_zlog2(opus_val32 x)
+{
+   return x <= 0 ? 0 : celt_ilog2(x);
+}
+
+opus_val16 celt_rsqrt_norm(opus_val32 x);
+
+opus_val32 celt_sqrt(opus_val32 x);
+
+opus_val16 celt_cos_norm(opus_val32 x);
+
+/** Base-2 logarithm approximation (log2(x)). (Q14 input, Q10 output) */
+static OPUS_INLINE opus_val16 celt_log2(opus_val32 x)
+{
+   int i;
+   opus_val16 n, frac;
+   /* -0.41509302963303146, 0.9609890551383969, -0.31836011537636605,
+       0.15530808010959576, -0.08556153059057618 */
+   static const opus_val16 C[5] = {-6801+(1<<(13-DB_SHIFT)), 15746, -5217, 2545, -1401};
+   if (x==0)
+      return -32767;
+   i = celt_ilog2(x);
+   n = VSHR32(x,i-15)-32768-16384;
+   frac = ADD16(C[0], MULT16_16_Q15(n, ADD16(C[1], MULT16_16_Q15(n, ADD16(C[2], MULT16_16_Q15(n, ADD16(C[3], MULT16_16_Q15(n, C[4]))))))));
+   return SHL16(i-13,DB_SHIFT)+SHR16(frac,14-DB_SHIFT);
+}
+
+/*
+ K0 = 1
+ K1 = log(2)
+ K2 = 3-4*log(2)
+ K3 = 3*log(2) - 2
+*/
+#define D0 16383
+#define D1 22804
+#define D2 14819
+#define D3 10204
+
+static OPUS_INLINE opus_val32 celt_exp2_frac(opus_val16 x)
+{
+   opus_val16 frac;
+   frac = SHL16(x, 4);
+   return ADD16(D0, MULT16_16_Q15(frac, ADD16(D1, MULT16_16_Q15(frac, ADD16(D2 , MULT16_16_Q15(D3,frac))))));
+}
+/** Base-2 exponential approximation (2^x). (Q10 input, Q16 output) */
+static OPUS_INLINE opus_val32 celt_exp2(opus_val16 x)
+{
+   int integer;
+   opus_val16 frac;
+   integer = SHR16(x,10);
+   if (integer>14)
+      return 0x7f000000;
+   else if (integer < -15)
+      return 0;
+   frac = celt_exp2_frac(x-SHL16(integer,10));
+   return VSHR32(EXTEND32(frac), -integer-2);
+}
+
+opus_val32 celt_rcp(opus_val32 x);
+
+#define celt_div(a,b) MULT32_32_Q31((opus_val32)(a),celt_rcp(b))
+
+opus_val32 frac_div32(opus_val32 a, opus_val32 b);
+
+#define M1 32767
+#define M2 -21
+#define M3 -11943
+#define M4 4936
+
+/* Atan approximation using a 4th order polynomial. Input is in Q15 format
+   and normalized by pi/4. Output is in Q15 format */
+static OPUS_INLINE opus_val16 celt_atan01(opus_val16 x)
+{
+   return MULT16_16_P15(x, ADD32(M1, MULT16_16_P15(x, ADD32(M2, MULT16_16_P15(x, ADD32(M3, MULT16_16_P15(M4, x)))))));
+}
+
+#undef M1
+#undef M2
+#undef M3
+#undef M4
+
+/* atan2() approximation valid for positive input values */
+static OPUS_INLINE opus_val16 celt_atan2p(opus_val16 y, opus_val16 x)
+{
+   if (y < x)
+   {
+      opus_val32 arg;
+      arg = celt_div(SHL32(EXTEND32(y),15),x);
+      if (arg >= 32767)
+         arg = 32767;
+      return SHR16(celt_atan01(EXTRACT16(arg)),1);
+   } else {
+      opus_val32 arg;
+      arg = celt_div(SHL32(EXTEND32(x),15),y);
+      if (arg >= 32767)
+         arg = 32767;
+      return 25736-SHR16(celt_atan01(EXTRACT16(arg)),1);
+   }
+}
+
+#endif /* OPUS_FIXED_POINT */
+#endif /* MATHOPS_H */
diff --git a/drivers/opus/celt/mdct.c b/drivers/opus/celt/mdct.c
new file mode 100644
index 0000000000..d08d026fac
--- /dev/null
+++ b/drivers/opus/celt/mdct.c
@@ -0,0 +1,311 @@
+	/* Copyright (c) 2007-2008 CSIRO
+   Copyright (c) 2007-2008 Xiph.Org Foundation
+   Written by Jean-Marc Valin */
+/*
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+/* This is a simple MDCT implementation that uses a N/4 complex FFT
+   to do most of the work. It should be relatively straightforward to
+   plug in pretty much and FFT here.
+
+   This replaces the Vorbis FFT (and uses the exact same API), which
+   was a bit too messy and that was ending up duplicating code
+   (might as well use the same FFT everywhere).
+
+   The algorithm is similar to (and inspired from) Fabrice Bellard's
+   MDCT implementation in FFMPEG, but has differences in signs, ordering
+   and scaling in many places.
+*/
+
+#ifndef SKIP_CONFIG_H
+#ifdef OPUS_HAVE_CONFIG_H
+#include "opus_config.h"
+#endif
+#endif
+
+#include "mdct.h"
+#include "kiss_fft.h"
+#include "_kiss_fft_guts.h"
+#include <math.h>
+#include "os_support.h"
+#include "mathops.h"
+#include "stack_alloc.h"
+
+#ifdef CUSTOM_MODES
+
+int clt_mdct_init(celt_mdct_lookup *l,int N, int maxshift)
+{
+   int i;
+   int N4;
+   kiss_twiddle_scalar *trig;
+#if defined(OPUS_FIXED_POINT)
+   int N2=N>>1;
+#endif
+   l->n = N;
+   N4 = N>>2;
+   l->maxshift = maxshift;
+   for (i=0;i<=maxshift;i++)
+   {
+      if (i==0)
+         l->kfft[i] = opus_fft_alloc(N>>2>>i, 0, 0);
+      else
+         l->kfft[i] = opus_fft_alloc_twiddles(N>>2>>i, 0, 0, l->kfft[0]);
+#ifndef ENABLE_TI_DSPLIB55
+      if (l->kfft[i]==NULL)
+         return 0;
+#endif
+   }
+   l->trig = trig = (kiss_twiddle_scalar*)opus_alloc((N4+1)*sizeof(kiss_twiddle_scalar));
+   if (l->trig==NULL)
+     return 0;
+   /* We have enough points that sine isn't necessary */
+#if defined(OPUS_FIXED_POINT)
+   for (i=0;i<=N4;i++)
+      trig[i] = TRIG_UPSCALE*celt_cos_norm(DIV32(ADD32(SHL32(EXTEND32(i),17),N2),N));
+#else
+   for (i=0;i<=N4;i++)
+      trig[i] = (kiss_twiddle_scalar)cos(2*PI*i/N);
+#endif
+   return 1;
+}
+
+void clt_mdct_clear(celt_mdct_lookup *l)
+{
+   int i;
+   for (i=0;i<=l->maxshift;i++)
+      opus_fft_free(l->kfft[i]);
+   opus_free((kiss_twiddle_scalar*)l->trig);
+}
+
+#endif /* CUSTOM_MODES */
+
+/* Forward MDCT trashes the input array */
+void clt_mdct_forward(const celt_mdct_lookup *l, kiss_fft_scalar *in, kiss_fft_scalar * OPUS_RESTRICT out,
+      const opus_val16 *window, int overlap, int shift, int stride)
+{
+   int i;
+   int N, N2, N4;
+   kiss_twiddle_scalar sine;
+   VARDECL(kiss_fft_scalar, f);
+   VARDECL(kiss_fft_scalar, f2);
+   SAVE_STACK;
+   N = l->n;
+   N >>= shift;
+   N2 = N>>1;
+   N4 = N>>2;
+   ALLOC(f, N2, kiss_fft_scalar);
+   ALLOC(f2, N2, kiss_fft_scalar);
+   /* sin(x) ~= x here */
+#ifdef OPUS_FIXED_POINT
+   sine = TRIG_UPSCALE*(QCONST16(0.7853981f, 15)+N2)/N;
+#else
+   sine = (kiss_twiddle_scalar)2*PI*(.125f)/N;
+#endif
+
+   /* Consider the input to be composed of four blocks: [a, b, c, d] */
+   /* Window, shuffle, fold */
+   {
+      /* Temp pointers to make it really clear to the compiler what we're doing */
+      const kiss_fft_scalar * OPUS_RESTRICT xp1 = in+(overlap>>1);
+      const kiss_fft_scalar * OPUS_RESTRICT xp2 = in+N2-1+(overlap>>1);
+      kiss_fft_scalar * OPUS_RESTRICT yp = f;
+      const opus_val16 * OPUS_RESTRICT wp1 = window+(overlap>>1);
+      const opus_val16 * OPUS_RESTRICT wp2 = window+(overlap>>1)-1;
+      for(i=0;i<((overlap+3)>>2);i++)
+      {
+         /* Real part arranged as -d-cR, Imag part arranged as -b+aR*/
+         *yp++ = MULT16_32_Q15(*wp2, xp1[N2]) + MULT16_32_Q15(*wp1,*xp2);
+         *yp++ = MULT16_32_Q15(*wp1, *xp1)    - MULT16_32_Q15(*wp2, xp2[-N2]);
+         xp1+=2;
+         xp2-=2;
+         wp1+=2;
+         wp2-=2;
+      }
+      wp1 = window;
+      wp2 = window+overlap-1;
+      for(;i<N4-((overlap+3)>>2);i++)
+      {
+         /* Real part arranged as a-bR, Imag part arranged as -c-dR */
+         *yp++ = *xp2;
+         *yp++ = *xp1;
+         xp1+=2;
+         xp2-=2;
+      }
+      for(;i<N4;i++)
+      {
+         /* Real part arranged as a-bR, Imag part arranged as -c-dR */
+         *yp++ =  -MULT16_32_Q15(*wp1, xp1[-N2]) + MULT16_32_Q15(*wp2, *xp2);
+         *yp++ = MULT16_32_Q15(*wp2, *xp1)     + MULT16_32_Q15(*wp1, xp2[N2]);
+         xp1+=2;
+         xp2-=2;
+         wp1+=2;
+         wp2-=2;
+      }
+   }
+   /* Pre-rotation */
+   {
+      kiss_fft_scalar * OPUS_RESTRICT yp = f;
+      const kiss_twiddle_scalar *t = &l->trig[0];
+      for(i=0;i<N4;i++)
+      {
+         kiss_fft_scalar re, im, yr, yi;
+         re = yp[0];
+         im = yp[1];
+         yr = -S_MUL(re,t[i<<shift])  -  S_MUL(im,t[(N4-i)<<shift]);
+         yi = -S_MUL(im,t[i<<shift])  +  S_MUL(re,t[(N4-i)<<shift]);
+         /* works because the cos is nearly one */
+         *yp++ = yr + S_MUL(yi,sine);
+         *yp++ = yi - S_MUL(yr,sine);
+      }
+   }
+
+   /* N/4 complex FFT, down-scales by 4/N */
+   opus_fft(l->kfft[shift], (kiss_fft_cpx *)f, (kiss_fft_cpx *)f2);
+
+   /* Post-rotate */
+   {
+      /* Temp pointers to make it really clear to the compiler what we're doing */
+      const kiss_fft_scalar * OPUS_RESTRICT fp = f2;
+      kiss_fft_scalar * OPUS_RESTRICT yp1 = out;
+      kiss_fft_scalar * OPUS_RESTRICT yp2 = out+stride*(N2-1);
+      const kiss_twiddle_scalar *t = &l->trig[0];
+      /* Temp pointers to make it really clear to the compiler what we're doing */
+      for(i=0;i<N4;i++)
+      {
+         kiss_fft_scalar yr, yi;
+         yr = S_MUL(fp[1],t[(N4-i)<<shift]) + S_MUL(fp[0],t[i<<shift]);
+         yi = S_MUL(fp[0],t[(N4-i)<<shift]) - S_MUL(fp[1],t[i<<shift]);
+         /* works because the cos is nearly one */
+         *yp1 = yr - S_MUL(yi,sine);
+         *yp2 = yi + S_MUL(yr,sine);;
+         fp += 2;
+         yp1 += 2*stride;
+         yp2 -= 2*stride;
+      }
+   }
+   RESTORE_STACK;
+}
+
+void clt_mdct_backward(const celt_mdct_lookup *l, kiss_fft_scalar *in, kiss_fft_scalar * OPUS_RESTRICT out,
+      const opus_val16 * OPUS_RESTRICT window, int overlap, int shift, int stride)
+{
+   int i;
+   int N, N2, N4;
+   kiss_twiddle_scalar sine;
+   VARDECL(kiss_fft_scalar, f2);
+   SAVE_STACK;
+   N = l->n;
+   N >>= shift;
+   N2 = N>>1;
+   N4 = N>>2;
+   ALLOC(f2, N2, kiss_fft_scalar);
+   /* sin(x) ~= x here */
+#ifdef OPUS_FIXED_POINT
+   sine = TRIG_UPSCALE*(QCONST16(0.7853981f, 15)+N2)/N;
+#else
+   sine = (kiss_twiddle_scalar)2*PI*(.125f)/N;
+#endif
+
+   /* Pre-rotate */
+   {
+      /* Temp pointers to make it really clear to the compiler what we're doing */
+      const kiss_fft_scalar * OPUS_RESTRICT xp1 = in;
+      const kiss_fft_scalar * OPUS_RESTRICT xp2 = in+stride*(N2-1);
+      kiss_fft_scalar * OPUS_RESTRICT yp = f2;
+      const kiss_twiddle_scalar *t = &l->trig[0];
+      for(i=0;i<N4;i++)
+      {
+         kiss_fft_scalar yr, yi;
+         yr = -S_MUL(*xp2, t[i<<shift]) + S_MUL(*xp1,t[(N4-i)<<shift]);
+         yi =  -S_MUL(*xp2, t[(N4-i)<<shift]) - S_MUL(*xp1,t[i<<shift]);
+         /* works because the cos is nearly one */
+         *yp++ = yr - S_MUL(yi,sine);
+         *yp++ = yi + S_MUL(yr,sine);
+         xp1+=2*stride;
+         xp2-=2*stride;
+      }
+   }
+
+   /* Inverse N/4 complex FFT. This one should *not* downscale even in fixed-point */
+   opus_ifft(l->kfft[shift], (kiss_fft_cpx *)f2, (kiss_fft_cpx *)(out+(overlap>>1)));
+
+   /* Post-rotate and de-shuffle from both ends of the buffer at once to make
+      it in-place. */
+   {
+      kiss_fft_scalar * OPUS_RESTRICT yp0 = out+(overlap>>1);
+      kiss_fft_scalar * OPUS_RESTRICT yp1 = out+(overlap>>1)+N2-2;
+      const kiss_twiddle_scalar *t = &l->trig[0];
+      /* Loop to (N4+1)>>1 to handle odd N4. When N4 is odd, the
+         middle pair will be computed twice. */
+      for(i=0;i<(N4+1)>>1;i++)
+      {
+         kiss_fft_scalar re, im, yr, yi;
+         kiss_twiddle_scalar t0, t1;
+         re = yp0[0];
+         im = yp0[1];
+         t0 = t[i<<shift];
+         t1 = t[(N4-i)<<shift];
+         /* We'd scale up by 2 here, but instead it's done when mixing the windows */
+         yr = S_MUL(re,t0) - S_MUL(im,t1);
+         yi = S_MUL(im,t0) + S_MUL(re,t1);
+         re = yp1[0];
+         im = yp1[1];
+         /* works because the cos is nearly one */
+         yp0[0] = -(yr - S_MUL(yi,sine));
+         yp1[1] = yi + S_MUL(yr,sine);
+
+         t0 = t[(N4-i-1)<<shift];
+         t1 = t[(i+1)<<shift];
+         /* We'd scale up by 2 here, but instead it's done when mixing the windows */
+         yr = S_MUL(re,t0) - S_MUL(im,t1);
+         yi = S_MUL(im,t0) + S_MUL(re,t1);
+         /* works because the cos is nearly one */
+         yp1[0] = -(yr - S_MUL(yi,sine));
+         yp0[1] = yi + S_MUL(yr,sine);
+         yp0 += 2;
+         yp1 -= 2;
+      }
+   }
+
+   /* Mirror on both sides for TDAC */
+   {
+      kiss_fft_scalar * OPUS_RESTRICT xp1 = out+overlap-1;
+      kiss_fft_scalar * OPUS_RESTRICT yp1 = out;
+      const opus_val16 * OPUS_RESTRICT wp1 = window;
+      const opus_val16 * OPUS_RESTRICT wp2 = window+overlap-1;
+
+      for(i = 0; i < overlap/2; i++)
+      {
+         kiss_fft_scalar x1, x2;
+         x1 = *xp1;
+         x2 = *yp1;
+         *yp1++ = MULT16_32_Q15(*wp2, x2) - MULT16_32_Q15(*wp1, x1);
+         *xp1-- = MULT16_32_Q15(*wp1, x2) + MULT16_32_Q15(*wp2, x1);
+         wp1++;
+         wp2--;
+      }
+   }
+   RESTORE_STACK;
+}
diff --git a/drivers/opus/celt/mdct.h b/drivers/opus/celt/mdct.h
new file mode 100644
index 0000000000..4e7a199246
--- /dev/null
+++ b/drivers/opus/celt/mdct.h
@@ -0,0 +1,70 @@
+/* Copyright (c) 2007-2008 CSIRO
+   Copyright (c) 2007-2008 Xiph.Org Foundation
+   Written by Jean-Marc Valin */
+/*
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+/* This is a simple MDCT implementation that uses a N/4 complex FFT
+   to do most of the work. It should be relatively straightforward to
+   plug in pretty much and FFT here.
+
+   This replaces the Vorbis FFT (and uses the exact same API), which
+   was a bit too messy and that was ending up duplicating code
+   (might as well use the same FFT everywhere).
+
+   The algorithm is similar to (and inspired from) Fabrice Bellard's
+   MDCT implementation in FFMPEG, but has differences in signs, ordering
+   and scaling in many places.
+*/
+
+#ifndef MDCT_H
+#define MDCT_H
+
+#include "opus_defines.h"
+#include "kiss_fft.h"
+#include "arch.h"
+
+typedef struct {
+   int n;
+   int maxshift;
+   const kiss_fft_state *kfft[4];
+   const kiss_twiddle_scalar * OPUS_RESTRICT trig;
+} celt_mdct_lookup;
+
+int clt_mdct_init(celt_mdct_lookup *l,int N, int maxshift);
+void clt_mdct_clear(celt_mdct_lookup *l);
+
+/** Compute a forward MDCT and scale by 4/N, trashes the input array */
+void clt_mdct_forward(const celt_mdct_lookup *l, kiss_fft_scalar *in,
+      kiss_fft_scalar * OPUS_RESTRICT out,
+      const opus_val16 *window, int overlap, int shift, int stride);
+
+/** Compute a backward MDCT (no scaling) and performs weighted overlap-add
+    (scales implicitly by 1/2) */
+void clt_mdct_backward(const celt_mdct_lookup *l, kiss_fft_scalar *in,
+      kiss_fft_scalar * OPUS_RESTRICT out,
+      const opus_val16 * OPUS_RESTRICT window, int overlap, int shift, int stride);
+
+#endif
diff --git a/drivers/opus/celt/mfrngcod.h b/drivers/opus/celt/mfrngcod.h
new file mode 100644
index 0000000000..809152a59a
--- /dev/null
+++ b/drivers/opus/celt/mfrngcod.h
@@ -0,0 +1,48 @@
+/* Copyright (c) 2001-2008 Timothy B. Terriberry
+   Copyright (c) 2008-2009 Xiph.Org Foundation */
+/*
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#if !defined(_mfrngcode_H)
+# define _mfrngcode_H (1)
+# include "entcode.h"
+
+/*Constants used by the entropy encoder/decoder.*/
+
+/*The number of bits to output at a time.*/
+# define EC_SYM_BITS   (8)
+/*The total number of bits in each of the state registers.*/
+# define EC_CODE_BITS  (32)
+/*The maximum symbol value.*/
+# define EC_SYM_MAX    ((1U<<EC_SYM_BITS)-1)
+/*Bits to shift by to move a symbol into the high-order position.*/
+# define EC_CODE_SHIFT (EC_CODE_BITS-EC_SYM_BITS-1)
+/*Carry bit of the high-order range symbol.*/
+# define EC_CODE_TOP   (((opus_uint32)1U)<<(EC_CODE_BITS-1))
+/*Low-order bit of the high-order range symbol.*/
+# define EC_CODE_BOT   (EC_CODE_TOP>>EC_SYM_BITS)
+/*The number of bits available for the last, partial symbol in the code field.*/
+# define EC_CODE_EXTRA ((EC_CODE_BITS-2)%EC_SYM_BITS+1)
+#endif
diff --git a/drivers/opus/celt/modes.c b/drivers/opus/celt/modes.c
new file mode 100644
index 0000000000..3794074aaa
--- /dev/null
+++ b/drivers/opus/celt/modes.c
@@ -0,0 +1,438 @@
+/* Copyright (c) 2007-2008 CSIRO
+   Copyright (c) 2007-2009 Xiph.Org Foundation
+   Copyright (c) 2008 Gregory Maxwell
+   Written by Jean-Marc Valin and Gregory Maxwell */
+/*
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifdef OPUS_HAVE_CONFIG_H
+#include "opus_config.h"
+#endif
+
+#include "celt.h"
+#include "opus_modes.h"
+#include "rate.h"
+#include "os_support.h"
+#include "stack_alloc.h"
+#include "quant_bands.h"
+
+static const opus_int16 eband5ms[] = {
+/*0  200 400 600 800  1k 1.2 1.4 1.6  2k 2.4 2.8 3.2  4k 4.8 5.6 6.8  8k 9.6 12k 15.6 */
+  0,  1,  2,  3,  4,  5,  6,  7,  8, 10, 12, 14, 16, 20, 24, 28, 34, 40, 48, 60, 78, 100
+};
+
+/* Alternate tuning (partially derived from Vorbis) */
+#define BITALLOC_SIZE 11
+/* Bit allocation table in units of 1/32 bit/sample (0.1875 dB SNR) */
+static const unsigned char band_allocation[] = {
+/*0  200 400 600 800  1k 1.2 1.4 1.6  2k 2.4 2.8 3.2  4k 4.8 5.6 6.8  8k 9.6 12k 15.6 */
+  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+ 90, 80, 75, 69, 63, 56, 49, 40, 34, 29, 20, 18, 10,  0,  0,  0,  0,  0,  0,  0,  0,
+110,100, 90, 84, 78, 71, 65, 58, 51, 45, 39, 32, 26, 20, 12,  0,  0,  0,  0,  0,  0,
+118,110,103, 93, 86, 80, 75, 70, 65, 59, 53, 47, 40, 31, 23, 15,  4,  0,  0,  0,  0,
+126,119,112,104, 95, 89, 83, 78, 72, 66, 60, 54, 47, 39, 32, 25, 17, 12,  1,  0,  0,
+134,127,120,114,103, 97, 91, 85, 78, 72, 66, 60, 54, 47, 41, 35, 29, 23, 16, 10,  1,
+144,137,130,124,113,107,101, 95, 88, 82, 76, 70, 64, 57, 51, 45, 39, 33, 26, 15,  1,
+152,145,138,132,123,117,111,105, 98, 92, 86, 80, 74, 67, 61, 55, 49, 43, 36, 20,  1,
+162,155,148,142,133,127,121,115,108,102, 96, 90, 84, 77, 71, 65, 59, 53, 46, 30,  1,
+172,165,158,152,143,137,131,125,118,112,106,100, 94, 87, 81, 75, 69, 63, 56, 45, 20,
+200,200,200,200,200,200,200,200,198,193,188,183,178,173,168,163,158,153,148,129,104,
+};
+
+#ifndef CUSTOM_MODES_ONLY
+ #ifdef OPUS_FIXED_POINT
+  #include "static_modes_fixed.h"
+ #else
+  #include "static_modes_float.h"
+ #endif
+#endif /* CUSTOM_MODES_ONLY */
+
+#ifndef M_PI
+#define M_PI 3.141592653
+#endif
+
+#ifdef CUSTOM_MODES
+
+/* Defining 25 critical bands for the full 0-20 kHz audio bandwidth
+   Taken from http://ccrma.stanford.edu/~jos/bbt/Bark_Frequency_Scale.html */
+#define BARK_BANDS 25
+static const opus_int16 bark_freq[BARK_BANDS+1] = {
+      0,   100,   200,   300,   400,
+    510,   630,   770,   920,  1080,
+   1270,  1480,  1720,  2000,  2320,
+   2700,  3150,  3700,  4400,  5300,
+   6400,  7700,  9500, 12000, 15500,
+  20000};
+
+static opus_int16 *compute_ebands(opus_int32 Fs, int frame_size, int res, int *nbEBands)
+{
+   opus_int16 *eBands;
+   int i, j, lin, low, high, nBark, offset=0;
+
+   /* All modes that have 2.5 ms short blocks use the same definition */
+   if (Fs == 400*(opus_int32)frame_size)
+   {
+      *nbEBands = sizeof(eband5ms)/sizeof(eband5ms[0])-1;
+      eBands = opus_alloc(sizeof(opus_int16)*(*nbEBands+1));
+      for (i=0;i<*nbEBands+1;i++)
+         eBands[i] = eband5ms[i];
+      return eBands;
+   }
+   /* Find the number of critical bands supported by our sampling rate */
+   for (nBark=1;nBark<BARK_BANDS;nBark++)
+    if (bark_freq[nBark+1]*2 >= Fs)
+       break;
+
+   /* Find where the linear part ends (i.e. where the spacing is more than min_width */
+   for (lin=0;lin<nBark;lin++)
+      if (bark_freq[lin+1]-bark_freq[lin] >= res)
+         break;
+
+   low = (bark_freq[lin]+res/2)/res;
+   high = nBark-lin;
+   *nbEBands = low+high;
+   eBands = opus_alloc(sizeof(opus_int16)*(*nbEBands+2));
+
+   if (eBands==NULL)
+      return NULL;
+
+   /* Linear spacing (min_width) */
+   for (i=0;i<low;i++)
+      eBands[i] = i;
+   if (low>0)
+      offset = eBands[low-1]*res - bark_freq[lin-1];
+   /* Spacing follows critical bands */
+   for (i=0;i<high;i++)
+   {
+      int target = bark_freq[lin+i];
+      /* Round to an even value */
+      eBands[i+low] = (target+offset/2+res)/(2*res)*2;
+      offset = eBands[i+low]*res - target;
+   }
+   /* Enforce the minimum spacing at the boundary */
+   for (i=0;i<*nbEBands;i++)
+      if (eBands[i] < i)
+         eBands[i] = i;
+   /* Round to an even value */
+   eBands[*nbEBands] = (bark_freq[nBark]+res)/(2*res)*2;
+   if (eBands[*nbEBands] > frame_size)
+      eBands[*nbEBands] = frame_size;
+   for (i=1;i<*nbEBands-1;i++)
+   {
+      if (eBands[i+1]-eBands[i] < eBands[i]-eBands[i-1])
+      {
+         eBands[i] -= (2*eBands[i]-eBands[i-1]-eBands[i+1])/2;
+      }
+   }
+   /* Remove any empty bands. */
+   for (i=j=0;i<*nbEBands;i++)
+      if(eBands[i+1]>eBands[j])
+         eBands[++j]=eBands[i+1];
+   *nbEBands=j;
+
+   for (i=1;i<*nbEBands;i++)
+   {
+      /* Every band must be smaller than the last band. */
+      celt_assert(eBands[i]-eBands[i-1]<=eBands[*nbEBands]-eBands[*nbEBands-1]);
+      /* Each band must be no larger than twice the size of the previous one. */
+      celt_assert(eBands[i+1]-eBands[i]<=2*(eBands[i]-eBands[i-1]));
+   }
+
+   return eBands;
+}
+
+static void compute_allocation_table(CELTMode *mode)
+{
+   int i, j;
+   unsigned char *allocVectors;
+   int maxBands = sizeof(eband5ms)/sizeof(eband5ms[0])-1;
+
+   mode->nbAllocVectors = BITALLOC_SIZE;
+   allocVectors = opus_alloc(sizeof(unsigned char)*(BITALLOC_SIZE*mode->nbEBands));
+   if (allocVectors==NULL)
+      return;
+
+   /* Check for standard mode */
+   if (mode->Fs == 400*(opus_int32)mode->shortMdctSize)
+   {
+      for (i=0;i<BITALLOC_SIZE*mode->nbEBands;i++)
+         allocVectors[i] = band_allocation[i];
+      mode->allocVectors = allocVectors;
+      return;
+   }
+   /* If not the standard mode, interpolate */
+   /* Compute per-codec-band allocation from per-critical-band matrix */
+   for (i=0;i<BITALLOC_SIZE;i++)
+   {
+      for (j=0;j<mode->nbEBands;j++)
+      {
+         int k;
+         for (k=0;k<maxBands;k++)
+         {
+            if (400*(opus_int32)eband5ms[k] > mode->eBands[j]*(opus_int32)mode->Fs/mode->shortMdctSize)
+               break;
+         }
+         if (k>maxBands-1)
+            allocVectors[i*mode->nbEBands+j] = band_allocation[i*maxBands + maxBands-1];
+         else {
+            opus_int32 a0, a1;
+            a1 = mode->eBands[j]*(opus_int32)mode->Fs/mode->shortMdctSize - 400*(opus_int32)eband5ms[k-1];
+            a0 = 400*(opus_int32)eband5ms[k] - mode->eBands[j]*(opus_int32)mode->Fs/mode->shortMdctSize;
+            allocVectors[i*mode->nbEBands+j] = (a0*band_allocation[i*maxBands+k-1]
+                                             + a1*band_allocation[i*maxBands+k])/(a0+a1);
+         }
+      }
+   }
+
+   /*printf ("\n");
+   for (i=0;i<BITALLOC_SIZE;i++)
+   {
+      for (j=0;j<mode->nbEBands;j++)
+         printf ("%d ", allocVectors[i*mode->nbEBands+j]);
+      printf ("\n");
+   }
+   exit(0);*/
+
+   mode->allocVectors = allocVectors;
+}
+
+#endif /* CUSTOM_MODES */
+
+CELTMode *opus_custom_mode_create(opus_int32 Fs, int frame_size, int *error)
+{
+   int i;
+#ifdef CUSTOM_MODES
+   CELTMode *mode=NULL;
+   int res;
+   opus_val16 *window;
+   opus_int16 *logN;
+   int LM;
+   ALLOC_STACK;
+#if !defined(VAR_ARRAYS) && !defined(USE_ALLOCA)
+   if (global_stack==NULL)
+      goto failure;
+#endif
+#endif
+
+#ifndef CUSTOM_MODES_ONLY
+   for (i=0;i<TOTAL_MODES;i++)
+   {
+      int j;
+      for (j=0;j<4;j++)
+      {
+         if (Fs == static_mode_list[i]->Fs &&
+               (frame_size<<j) == static_mode_list[i]->shortMdctSize*static_mode_list[i]->nbShortMdcts)
+         {
+            if (error)
+               *error = OPUS_OK;
+            return (CELTMode*)static_mode_list[i];
+         }
+      }
+   }
+#endif /* CUSTOM_MODES_ONLY */
+
+#ifndef CUSTOM_MODES
+   if (error)
+      *error = OPUS_BAD_ARG;
+   return NULL;
+#else
+
+   /* The good thing here is that permutation of the arguments will automatically be invalid */
+
+   if (Fs < 8000 || Fs > 96000)
+   {
+      if (error)
+         *error = OPUS_BAD_ARG;
+      return NULL;
+   }
+   if (frame_size < 40 || frame_size > 1024 || frame_size%2!=0)
+   {
+      if (error)
+         *error = OPUS_BAD_ARG;
+      return NULL;
+   }
+   /* Frames of less than 1ms are not supported. */
+   if ((opus_int32)frame_size*1000 < Fs)
+   {
+      if (error)
+         *error = OPUS_BAD_ARG;
+      return NULL;
+   }
+
+   if ((opus_int32)frame_size*75 >= Fs && (frame_size%16)==0)
+   {
+     LM = 3;
+   } else if ((opus_int32)frame_size*150 >= Fs && (frame_size%8)==0)
+   {
+     LM = 2;
+   } else if ((opus_int32)frame_size*300 >= Fs && (frame_size%4)==0)
+   {
+     LM = 1;
+   } else
+   {
+     LM = 0;
+   }
+
+   /* Shorts longer than 3.3ms are not supported. */
+   if ((opus_int32)(frame_size>>LM)*300 > Fs)
+   {
+      if (error)
+         *error = OPUS_BAD_ARG;
+      return NULL;
+   }
+
+   mode = opus_alloc(sizeof(CELTMode));
+   if (mode==NULL)
+      goto failure;
+   mode->Fs = Fs;
+
+   /* Pre/de-emphasis depends on sampling rate. The "standard" pre-emphasis
+      is defined as A(z) = 1 - 0.85*z^-1 at 48 kHz. Other rates should
+      approximate that. */
+   if(Fs < 12000) /* 8 kHz */
+   {
+      mode->preemph[0] =  QCONST16(0.3500061035f, 15);
+      mode->preemph[1] = -QCONST16(0.1799926758f, 15);
+      mode->preemph[2] =  QCONST16(0.2719968125f, SIG_SHIFT); /* exact 1/preemph[3] */
+      mode->preemph[3] =  QCONST16(3.6765136719f, 13);
+   } else if(Fs < 24000) /* 16 kHz */
+   {
+      mode->preemph[0] =  QCONST16(0.6000061035f, 15);
+      mode->preemph[1] = -QCONST16(0.1799926758f, 15);
+      mode->preemph[2] =  QCONST16(0.4424998650f, SIG_SHIFT); /* exact 1/preemph[3] */
+      mode->preemph[3] =  QCONST16(2.2598876953f, 13);
+   } else if(Fs < 40000) /* 32 kHz */
+   {
+      mode->preemph[0] =  QCONST16(0.7799987793f, 15);
+      mode->preemph[1] = -QCONST16(0.1000061035f, 15);
+      mode->preemph[2] =  QCONST16(0.7499771125f, SIG_SHIFT); /* exact 1/preemph[3] */
+      mode->preemph[3] =  QCONST16(1.3333740234f, 13);
+   } else /* 48 kHz */
+   {
+      mode->preemph[0] =  QCONST16(0.8500061035f, 15);
+      mode->preemph[1] =  QCONST16(0.0f, 15);
+      mode->preemph[2] =  QCONST16(1.f, SIG_SHIFT);
+      mode->preemph[3] =  QCONST16(1.f, 13);
+   }
+
+   mode->maxLM = LM;
+   mode->nbShortMdcts = 1<<LM;
+   mode->shortMdctSize = frame_size/mode->nbShortMdcts;
+   res = (mode->Fs+mode->shortMdctSize)/(2*mode->shortMdctSize);
+
+   mode->eBands = compute_ebands(Fs, mode->shortMdctSize, res, &mode->nbEBands);
+   if (mode->eBands==NULL)
+      goto failure;
+#if !defined(SMALL_FOOTPRINT)
+   /* Make sure we don't allocate a band larger than our PVQ table.
+      208 should be enough, but let's be paranoid. */
+   if ((mode->eBands[mode->nbEBands] - mode->eBands[mode->nbEBands-1])<<LM >
+    208) {
+       goto failure;
+   }
+#endif
+
+   mode->effEBands = mode->nbEBands;
+   while (mode->eBands[mode->effEBands] > mode->shortMdctSize)
+      mode->effEBands--;
+
+   /* Overlap must be divisible by 4 */
+   mode->overlap = ((mode->shortMdctSize>>2)<<2);
+
+   compute_allocation_table(mode);
+   if (mode->allocVectors==NULL)
+      goto failure;
+
+   window = (opus_val16*)opus_alloc(mode->overlap*sizeof(opus_val16));
+   if (window==NULL)
+      goto failure;
+
+#ifndef OPUS_FIXED_POINT
+   for (i=0;i<mode->overlap;i++)
+      window[i] = Q15ONE*sin(.5*M_PI* sin(.5*M_PI*(i+.5)/mode->overlap) * sin(.5*M_PI*(i+.5)/mode->overlap));
+#else
+   for (i=0;i<mode->overlap;i++)
+      window[i] = MIN32(32767,floor(.5+32768.*sin(.5*M_PI* sin(.5*M_PI*(i+.5)/mode->overlap) * sin(.5*M_PI*(i+.5)/mode->overlap))));
+#endif
+   mode->window = window;
+
+   logN = (opus_int16*)opus_alloc(mode->nbEBands*sizeof(opus_int16));
+   if (logN==NULL)
+      goto failure;
+
+   for (i=0;i<mode->nbEBands;i++)
+      logN[i] = log2_frac(mode->eBands[i+1]-mode->eBands[i], BITRES);
+   mode->logN = logN;
+
+   compute_pulse_cache(mode, mode->maxLM);
+
+   if (clt_mdct_init(&mode->mdct, 2*mode->shortMdctSize*mode->nbShortMdcts,
+           mode->maxLM) == 0)
+      goto failure;
+
+   if (error)
+      *error = OPUS_OK;
+
+   return mode;
+failure:
+   if (error)
+      *error = OPUS_ALLOC_FAIL;
+   if (mode!=NULL)
+      opus_custom_mode_destroy(mode);
+   return NULL;
+#endif /* !CUSTOM_MODES */
+}
+
+#ifdef CUSTOM_MODES
+void opus_custom_mode_destroy(CELTMode *mode)
+{
+   if (mode == NULL)
+      return;
+#ifndef CUSTOM_MODES_ONLY
+   {
+     int i;
+     for (i=0;i<TOTAL_MODES;i++)
+     {
+        if (mode == static_mode_list[i])
+        {
+           return;
+        }
+     }
+   }
+#endif /* CUSTOM_MODES_ONLY */
+   opus_free((opus_int16*)mode->eBands);
+   opus_free((opus_int16*)mode->allocVectors);
+
+   opus_free((opus_val16*)mode->window);
+   opus_free((opus_int16*)mode->logN);
+
+   opus_free((opus_int16*)mode->cache.index);
+   opus_free((unsigned char*)mode->cache.bits);
+   opus_free((unsigned char*)mode->cache.caps);
+   clt_mdct_clear(&mode->mdct);
+
+   opus_free((CELTMode *)mode);
+}
+#endif
diff --git a/drivers/opus/celt/opus_custom_demo.c b/drivers/opus/celt/opus_custom_demo.c
new file mode 100644
index 0000000000..8c7f58b6e6
--- /dev/null
+++ b/drivers/opus/celt/opus_custom_demo.c
@@ -0,0 +1,210 @@
+/* Copyright (c) 2007-2008 CSIRO
+   Copyright (c) 2007-2009 Xiph.Org Foundation
+   Written by Jean-Marc Valin */
+/*
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifdef OPUS_HAVE_CONFIG_H
+#include "opus_config.h"
+#endif
+
+#include "opus_custom.h"
+#include "arch.h"
+#include <stdio.h>
+#include <stdlib.h>
+#include <math.h>
+#include <string.h>
+
+#define MAX_PACKET 1275
+
+int main(int argc, char *argv[])
+{
+   int err;
+   char *inFile, *outFile;
+   FILE *fin, *fout;
+   OpusCustomMode *mode=NULL;
+   OpusCustomEncoder *enc;
+   OpusCustomDecoder *dec;
+   int len;
+   opus_int32 frame_size, channels, rate;
+   int bytes_per_packet;
+   unsigned char data[MAX_PACKET];
+   int complexity;
+#if !(defined (OPUS_FIXED_POINT) && !defined(CUSTOM_MODES)) && defined(RESYNTH)
+   int i;
+   double rmsd = 0;
+#endif
+   int count = 0;
+   opus_int32 skip;
+   opus_int16 *in, *out;
+   if (argc != 9 && argc != 8 && argc != 7)
+   {
+      fprintf (stderr, "Usage: test_opus_custom <rate> <channels> <frame size> "
+               " <bytes per packet> [<complexity> [packet loss rate]] "
+               "<input> <output>\n");
+      return 1;
+   }
+
+   rate = (opus_int32)atol(argv[1]);
+   channels = atoi(argv[2]);
+   frame_size = atoi(argv[3]);
+   mode = opus_custom_mode_create(rate, frame_size, NULL);
+   if (mode == NULL)
+   {
+      fprintf(stderr, "failed to create a mode\n");
+      return 1;
+   }
+
+   bytes_per_packet = atoi(argv[4]);
+   if (bytes_per_packet < 0 || bytes_per_packet > MAX_PACKET)
+   {
+      fprintf (stderr, "bytes per packet must be between 0 and %d\n",
+                        MAX_PACKET);
+      return 1;
+   }
+
+   inFile = argv[argc-2];
+   fin = fopen(inFile, "rb");
+   if (!fin)
+   {
+      fprintf (stderr, "Could not open input file %s\n", argv[argc-2]);
+      return 1;
+   }
+   outFile = argv[argc-1];
+   fout = fopen(outFile, "wb+");
+   if (!fout)
+   {
+      fprintf (stderr, "Could not open output file %s\n", argv[argc-1]);
+      fclose(fin);
+      return 1;
+   }
+
+   enc = opus_custom_encoder_create(mode, channels, &err);
+   if (err != 0)
+   {
+      fprintf(stderr, "Failed to create the encoder: %s\n", opus_strerror(err));
+      fclose(fin);
+      fclose(fout);
+      return 1;
+   }
+   dec = opus_custom_decoder_create(mode, channels, &err);
+   if (err != 0)
+   {
+      fprintf(stderr, "Failed to create the decoder: %s\n", opus_strerror(err));
+      fclose(fin);
+      fclose(fout);
+      return 1;
+   }
+   opus_custom_decoder_ctl(dec, OPUS_GET_LOOKAHEAD(&skip));
+
+   if (argc>7)
+   {
+      complexity=atoi(argv[5]);
+      opus_custom_encoder_ctl(enc,OPUS_SET_COMPLEXITY(complexity));
+   }
+
+   in = (opus_int16*)malloc(frame_size*channels*sizeof(opus_int16));
+   out = (opus_int16*)malloc(frame_size*channels*sizeof(opus_int16));
+
+   while (!feof(fin))
+   {
+      int ret;
+      err = fread(in, sizeof(short), frame_size*channels, fin);
+      if (feof(fin))
+         break;
+      len = opus_custom_encode(enc, in, frame_size, data, bytes_per_packet);
+      if (len <= 0)
+         fprintf (stderr, "opus_custom_encode() failed: %s\n", opus_strerror(len));
+
+      /* This is for simulating bit errors */
+#if 0
+      int errors = 0;
+      int eid = 0;
+      /* This simulates random bit error */
+      for (i=0;i<len*8;i++)
+      {
+         if (rand()%atoi(argv[8])==0)
+         {
+            if (i<64)
+            {
+               errors++;
+               eid = i;
+            }
+            data[i/8] ^= 1<<(7-(i%8));
+         }
+      }
+      if (errors == 1)
+         data[eid/8] ^= 1<<(7-(eid%8));
+      else if (errors%2 == 1)
+         data[rand()%8] ^= 1<<rand()%8;
+#endif
+
+#if 1 /* Set to zero to use the encoder's output instead */
+      /* This is to simulate packet loss */
+      if (argc==9 && rand()%1000<atoi(argv[argc-3]))
+      /*if (errors && (errors%2==0))*/
+         ret = opus_custom_decode(dec, NULL, len, out, frame_size);
+      else
+         ret = opus_custom_decode(dec, data, len, out, frame_size);
+      if (ret < 0)
+         fprintf(stderr, "opus_custom_decode() failed: %s\n", opus_strerror(ret));
+#else
+      for (i=0;i<ret*channels;i++)
+         out[i] = in[i];
+#endif
+#if !(defined (OPUS_FIXED_POINT) && !defined(CUSTOM_MODES)) && defined(RESYNTH)
+      for (i=0;i<ret*channels;i++)
+      {
+         rmsd += (in[i]-out[i])*1.0*(in[i]-out[i]);
+         /*out[i] -= in[i];*/
+      }
+#endif
+      count++;
+      fwrite(out+skip*channels, sizeof(short), (ret-skip)*channels, fout);
+      skip = 0;
+   }
+   PRINT_MIPS(stderr);
+
+   opus_custom_encoder_destroy(enc);
+   opus_custom_decoder_destroy(dec);
+   fclose(fin);
+   fclose(fout);
+   opus_custom_mode_destroy(mode);
+   free(in);
+   free(out);
+#if !(defined (OPUS_FIXED_POINT) && !defined(CUSTOM_MODES)) && defined(RESYNTH)
+   if (rmsd > 0)
+   {
+      rmsd = sqrt(rmsd/(1.0*frame_size*channels*count));
+      fprintf (stderr, "Error: encoder doesn't match decoder\n");
+      fprintf (stderr, "RMS mismatch is %f\n", rmsd);
+      return 1;
+   } else {
+      fprintf (stderr, "Encoder matches decoder!!\n");
+   }
+#endif
+   return 0;
+}
+
diff --git a/drivers/opus/celt/opus_modes.h b/drivers/opus/celt/opus_modes.h
new file mode 100644
index 0000000000..a1df46265e
--- /dev/null
+++ b/drivers/opus/celt/opus_modes.h
@@ -0,0 +1,83 @@
+/* Copyright (c) 2007-2008 CSIRO
+   Copyright (c) 2007-2009 Xiph.Org Foundation
+   Copyright (c) 2008 Gregory Maxwell
+   Written by Jean-Marc Valin and Gregory Maxwell */
+/*
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef OPUS_MODES_H
+#define OPUS_MODES_H
+
+#include "opus_types.h"
+#include "celt.h"
+#include "arch.h"
+#include "mdct.h"
+#include "entenc.h"
+#include "entdec.h"
+
+#define MAX_PERIOD 1024
+
+#ifndef OVERLAP
+#define OVERLAP(mode) ((mode)->overlap)
+#endif
+
+#ifndef FRAMESIZE
+#define FRAMESIZE(mode) ((mode)->mdctSize)
+#endif
+
+typedef struct {
+   int size;
+   const opus_int16 *index;
+   const unsigned char *bits;
+   const unsigned char *caps;
+} PulseCache;
+
+/** Mode definition (opaque)
+ @brief Mode definition
+ */
+struct OpusCustomMode {
+   opus_int32 Fs;
+   int          overlap;
+
+   int          nbEBands;
+   int          effEBands;
+   opus_val16    preemph[4];
+   const opus_int16   *eBands;   /**< Definition for each "pseudo-critical band" */
+
+   int         maxLM;
+   int         nbShortMdcts;
+   int         shortMdctSize;
+
+   int          nbAllocVectors; /**< Number of lines in the matrix below */
+   const unsigned char   *allocVectors;   /**< Number of bits in each band for several rates */
+   const opus_int16 *logN;
+
+   const opus_val16 *window;
+   celt_mdct_lookup mdct;
+   PulseCache cache;
+};
+
+
+#endif
diff --git a/drivers/opus/celt/os_support.h b/drivers/opus/celt/os_support.h
new file mode 100644
index 0000000000..5e47e3cff9
--- /dev/null
+++ b/drivers/opus/celt/os_support.h
@@ -0,0 +1,92 @@
+/* Copyright (C) 2007 Jean-Marc Valin
+
+   File: os_support.h
+   This is the (tiny) OS abstraction layer. Aside from math.h, this is the
+   only place where system headers are allowed.
+
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions are
+   met:
+
+   1. Redistributions of source code must retain the above copyright notice,
+   this list of conditions and the following disclaimer.
+
+   2. Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+   IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+   OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+   DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT,
+   INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+   (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+   SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+   HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+   STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
+   ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+   POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef OS_SUPPORT_H
+#define OS_SUPPORT_H
+
+#ifdef CUSTOM_SUPPORT
+#  include "custom_support.h"
+#endif
+
+#include "opus_types.h"
+#include "opus_defines.h"
+
+#include <string.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+/** Opus wrapper for malloc(). To do your own dynamic allocation, all you need to do is replace this function and opus_free */
+#ifndef OVERRIDE_OPUS_ALLOC
+static OPUS_INLINE void *opus_alloc (size_t size)
+{
+   return malloc(size);
+}
+#endif
+
+/** Same as celt_alloc(), except that the area is only needed inside a CELT call (might cause problem with wideband though) */
+#ifndef OVERRIDE_OPUS_ALLOC_SCRATCH
+static OPUS_INLINE void *opus_alloc_scratch (size_t size)
+{
+   /* Scratch space doesn't need to be cleared */
+   return opus_alloc(size);
+}
+#endif
+
+/** Opus wrapper for free(). To do your own dynamic allocation, all you need to do is replace this function and opus_alloc */
+#ifndef OVERRIDE_OPUS_FREE
+static OPUS_INLINE void opus_free (void *ptr)
+{
+   free(ptr);
+}
+#endif
+
+/** Copy n bytes of memory from src to dst. The 0* term provides compile-time type checking  */
+#ifndef OVERRIDE_OPUS_COPY
+#define OPUS_COPY(dst, src, n) (memcpy((dst), (src), (n)*sizeof(*(dst)) + 0*((dst)-(src)) ))
+#endif
+
+/** Copy n bytes of memory from src to dst, allowing overlapping regions. The 0* term
+    provides compile-time type checking */
+#ifndef OVERRIDE_OPUS_MOVE
+#define OPUS_MOVE(dst, src, n) (memmove((dst), (src), (n)*sizeof(*(dst)) + 0*((dst)-(src)) ))
+#endif
+
+/** Set n elements of dst to zero, starting at address s */
+#ifndef OVERRIDE_OPUS_CLEAR
+#define OPUS_CLEAR(dst, n) (memset((dst), 0, (n)*sizeof(*(dst))))
+#endif
+
+/*#ifdef __GNUC__
+#pragma GCC poison printf sprintf
+#pragma GCC poison malloc free realloc calloc
+#endif*/
+
+#endif /* OS_SUPPORT_H */
+
diff --git a/drivers/opus/celt/pitch.c b/drivers/opus/celt/pitch.c
new file mode 100644
index 0000000000..48cd02fb2b
--- /dev/null
+++ b/drivers/opus/celt/pitch.c
@@ -0,0 +1,537 @@
+/* Copyright (c) 2007-2008 CSIRO
+   Copyright (c) 2007-2009 Xiph.Org Foundation
+   Written by Jean-Marc Valin */
+/**
+   @file pitch.c
+   @brief Pitch analysis
+ */
+
+/*
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifdef OPUS_HAVE_CONFIG_H
+#include "opus_config.h"
+#endif
+
+#include "pitch.h"
+#include "os_support.h"
+#include "opus_modes.h"
+#include "stack_alloc.h"
+#include "mathops.h"
+#include "celt_lpc.h"
+
+static void find_best_pitch(opus_val32 *xcorr, opus_val16 *y, int len,
+                            int max_pitch, int *best_pitch
+#ifdef OPUS_FIXED_POINT
+                            , int yshift, opus_val32 maxcorr
+#endif
+                            )
+{
+   int i, j;
+   opus_val32 Syy=1;
+   opus_val16 best_num[2];
+   opus_val32 best_den[2];
+#ifdef OPUS_FIXED_POINT
+   int xshift;
+
+   xshift = celt_ilog2(maxcorr)-14;
+#endif
+
+   best_num[0] = -1;
+   best_num[1] = -1;
+   best_den[0] = 0;
+   best_den[1] = 0;
+   best_pitch[0] = 0;
+   best_pitch[1] = 1;
+   for (j=0;j<len;j++)
+      Syy = ADD32(Syy, SHR32(MULT16_16(y[j],y[j]), yshift));
+   for (i=0;i<max_pitch;i++)
+   {
+      if (xcorr[i]>0)
+      {
+         opus_val16 num;
+         opus_val32 xcorr16;
+         xcorr16 = EXTRACT16(VSHR32(xcorr[i], xshift));
+#ifndef OPUS_FIXED_POINT
+         /* Considering the range of xcorr16, this should avoid both underflows
+            and overflows (inf) when squaring xcorr16 */
+         xcorr16 *= 1e-12f;
+#endif
+         num = MULT16_16_Q15(xcorr16,xcorr16);
+         if (MULT16_32_Q15(num,best_den[1]) > MULT16_32_Q15(best_num[1],Syy))
+         {
+            if (MULT16_32_Q15(num,best_den[0]) > MULT16_32_Q15(best_num[0],Syy))
+            {
+               best_num[1] = best_num[0];
+               best_den[1] = best_den[0];
+               best_pitch[1] = best_pitch[0];
+               best_num[0] = num;
+               best_den[0] = Syy;
+               best_pitch[0] = i;
+            } else {
+               best_num[1] = num;
+               best_den[1] = Syy;
+               best_pitch[1] = i;
+            }
+         }
+      }
+      Syy += SHR32(MULT16_16(y[i+len],y[i+len]),yshift) - SHR32(MULT16_16(y[i],y[i]),yshift);
+      Syy = MAX32(1, Syy);
+   }
+}
+
+static void celt_fir5(const opus_val16 *x,
+         const opus_val16 *num,
+         opus_val16 *y,
+         int N,
+         opus_val16 *mem)
+{
+   int i;
+   opus_val16 num0, num1, num2, num3, num4;
+   opus_val32 mem0, mem1, mem2, mem3, mem4;
+   num0=num[0];
+   num1=num[1];
+   num2=num[2];
+   num3=num[3];
+   num4=num[4];
+   mem0=mem[0];
+   mem1=mem[1];
+   mem2=mem[2];
+   mem3=mem[3];
+   mem4=mem[4];
+   for (i=0;i<N;i++)
+   {
+      opus_val32 sum = SHL32(EXTEND32(x[i]), SIG_SHIFT);
+      sum = MAC16_16(sum,num0,mem0);
+      sum = MAC16_16(sum,num1,mem1);
+      sum = MAC16_16(sum,num2,mem2);
+      sum = MAC16_16(sum,num3,mem3);
+      sum = MAC16_16(sum,num4,mem4);
+      mem4 = mem3;
+      mem3 = mem2;
+      mem2 = mem1;
+      mem1 = mem0;
+      mem0 = x[i];
+      y[i] = ROUND16(sum, SIG_SHIFT);
+   }
+   mem[0]=mem0;
+   mem[1]=mem1;
+   mem[2]=mem2;
+   mem[3]=mem3;
+   mem[4]=mem4;
+}
+
+
+void pitch_downsample(celt_sig * OPUS_RESTRICT x[], opus_val16 * OPUS_RESTRICT x_lp,
+      int len, int C, int arch)
+{
+   int i;
+   opus_val32 ac[5];
+   opus_val16 tmp=Q15ONE;
+   opus_val16 lpc[4], mem[5]={0,0,0,0,0};
+   opus_val16 lpc2[5];
+   opus_val16 c1 = QCONST16(.8f,15);
+#ifdef OPUS_FIXED_POINT
+   int shift;
+   opus_val32 maxabs = celt_maxabs32(x[0], len);
+   if (C==2)
+   {
+      opus_val32 maxabs_1 = celt_maxabs32(x[1], len);
+      maxabs = MAX32(maxabs, maxabs_1);
+   }
+   if (maxabs<1)
+      maxabs=1;
+   shift = celt_ilog2(maxabs)-10;
+   if (shift<0)
+      shift=0;
+   if (C==2)
+      shift++;
+#endif
+   for (i=1;i<len>>1;i++)
+      x_lp[i] = SHR32(HALF32(HALF32(x[0][(2*i-1)]+x[0][(2*i+1)])+x[0][2*i]), shift);
+   x_lp[0] = SHR32(HALF32(HALF32(x[0][1])+x[0][0]), shift);
+   if (C==2)
+   {
+      for (i=1;i<len>>1;i++)
+         x_lp[i] += SHR32(HALF32(HALF32(x[1][(2*i-1)]+x[1][(2*i+1)])+x[1][2*i]), shift);
+      x_lp[0] += SHR32(HALF32(HALF32(x[1][1])+x[1][0]), shift);
+   }
+
+   _celt_autocorr(x_lp, ac, NULL, 0,
+                  4, len>>1, arch);
+
+   /* Noise floor -40 dB */
+#ifdef OPUS_FIXED_POINT
+   ac[0] += SHR32(ac[0],13);
+#else
+   ac[0] *= 1.0001f;
+#endif
+   /* Lag windowing */
+   for (i=1;i<=4;i++)
+   {
+      /*ac[i] *= exp(-.5*(2*M_PI*.002*i)*(2*M_PI*.002*i));*/
+#ifdef OPUS_FIXED_POINT
+      ac[i] -= MULT16_32_Q15(2*i*i, ac[i]);
+#else
+      ac[i] -= ac[i]*(.008f*i)*(.008f*i);
+#endif
+   }
+
+   _celt_lpc(lpc, ac, 4);
+   for (i=0;i<4;i++)
+   {
+      tmp = MULT16_16_Q15(QCONST16(.9f,15), tmp);
+      lpc[i] = MULT16_16_Q15(lpc[i], tmp);
+   }
+   /* Add a zero */
+   lpc2[0] = lpc[0] + QCONST16(.8f,SIG_SHIFT);
+   lpc2[1] = lpc[1] + MULT16_16_Q15(c1,lpc[0]);
+   lpc2[2] = lpc[2] + MULT16_16_Q15(c1,lpc[1]);
+   lpc2[3] = lpc[3] + MULT16_16_Q15(c1,lpc[2]);
+   lpc2[4] = MULT16_16_Q15(c1,lpc[3]);
+   celt_fir5(x_lp, lpc2, x_lp, len>>1, mem);
+}
+
+#if 0 /* This is a simple version of the pitch correlation that should work
+         well on DSPs like Blackfin and TI C5x/C6x */
+
+#ifdef OPUS_FIXED_POINT
+opus_val32
+#else
+void
+#endif
+celt_pitch_xcorr(opus_val16 *x, opus_val16 *y, opus_val32 *xcorr, int len, int max_pitch)
+{
+   int i, j;
+#ifdef OPUS_FIXED_POINT
+   opus_val32 maxcorr=1;
+#endif
+   for (i=0;i<max_pitch;i++)
+   {
+      opus_val32 sum = 0;
+      for (j=0;j<len;j++)
+         sum = MAC16_16(sum, x[j],y[i+j]);
+      xcorr[i] = sum;
+#ifdef OPUS_FIXED_POINT
+      maxcorr = MAX32(maxcorr, sum);
+#endif
+   }
+#ifdef OPUS_FIXED_POINT
+   return maxcorr;
+#endif
+}
+
+#else /* Unrolled version of the pitch correlation -- runs faster on x86 and ARM */
+
+#ifdef OPUS_FIXED_POINT
+opus_val32
+#else
+void
+#endif
+celt_pitch_xcorr_c(const opus_val16 *_x, const opus_val16 *_y, opus_val32 *xcorr, int len, int max_pitch)
+{
+   int i,j;
+   /*The EDSP version requires that max_pitch is at least 1, and that _x is
+      32-bit aligned.
+     Since it's hard to put asserts in assembly, put them here.*/
+   celt_assert(max_pitch>0);
+   celt_assert((((unsigned char *)_x-(unsigned char *)NULL)&3)==0);
+#ifdef OPUS_FIXED_POINT
+   opus_val32 maxcorr=1;
+#endif
+   for (i=0;i<max_pitch-3;i+=4)
+   {
+      opus_val32 sum[4]={0,0,0,0};
+      xcorr_kernel(_x, _y+i, sum, len);
+      xcorr[i]=sum[0];
+      xcorr[i+1]=sum[1];
+      xcorr[i+2]=sum[2];
+      xcorr[i+3]=sum[3];
+#ifdef OPUS_FIXED_POINT
+      sum[0] = MAX32(sum[0], sum[1]);
+      sum[2] = MAX32(sum[2], sum[3]);
+      sum[0] = MAX32(sum[0], sum[2]);
+      maxcorr = MAX32(maxcorr, sum[0]);
+#endif
+   }
+   /* In case max_pitch isn't a multiple of 4, do non-unrolled version. */
+   for (;i<max_pitch;i++)
+   {
+      opus_val32 sum = 0;
+      for (j=0;j<len;j++)
+         sum = MAC16_16(sum, _x[j],_y[i+j]);
+      xcorr[i] = sum;
+#ifdef OPUS_FIXED_POINT
+      maxcorr = MAX32(maxcorr, sum);
+#endif
+   }
+#ifdef OPUS_FIXED_POINT
+   return maxcorr;
+#endif
+}
+
+#endif
+void pitch_search(const opus_val16 * OPUS_RESTRICT x_lp, opus_val16 * OPUS_RESTRICT y,
+                  int len, int max_pitch, int *pitch, int arch)
+{
+   int i, j;
+   int lag;
+   int best_pitch[2]={0,0};
+   VARDECL(opus_val16, x_lp4);
+   VARDECL(opus_val16, y_lp4);
+   VARDECL(opus_val32, xcorr);
+#ifdef OPUS_FIXED_POINT
+   opus_val32 maxcorr;
+   opus_val32 xmax, ymax;
+   int shift=0;
+#endif
+   int offset;
+
+   SAVE_STACK;
+
+   celt_assert(len>0);
+   celt_assert(max_pitch>0);
+   lag = len+max_pitch;
+
+   ALLOC(x_lp4, len>>2, opus_val16);
+   ALLOC(y_lp4, lag>>2, opus_val16);
+   ALLOC(xcorr, max_pitch>>1, opus_val32);
+
+   /* Downsample by 2 again */
+   for (j=0;j<len>>2;j++)
+      x_lp4[j] = x_lp[2*j];
+   for (j=0;j<lag>>2;j++)
+      y_lp4[j] = y[2*j];
+
+#ifdef OPUS_FIXED_POINT
+   xmax = celt_maxabs16(x_lp4, len>>2);
+   ymax = celt_maxabs16(y_lp4, lag>>2);
+   shift = celt_ilog2(MAX32(1, MAX32(xmax, ymax)))-11;
+   if (shift>0)
+   {
+      for (j=0;j<len>>2;j++)
+         x_lp4[j] = SHR16(x_lp4[j], shift);
+      for (j=0;j<lag>>2;j++)
+         y_lp4[j] = SHR16(y_lp4[j], shift);
+      /* Use double the shift for a MAC */
+      shift *= 2;
+   } else {
+      shift = 0;
+   }
+#endif
+
+   /* Coarse search with 4x decimation */
+
+#ifdef OPUS_FIXED_POINT
+   maxcorr =
+#endif
+   celt_pitch_xcorr(x_lp4, y_lp4, xcorr, len>>2, max_pitch>>2, arch);
+
+   find_best_pitch(xcorr, y_lp4, len>>2, max_pitch>>2, best_pitch
+#ifdef OPUS_FIXED_POINT
+                   , 0, maxcorr
+#endif
+                   );
+
+   /* Finer search with 2x decimation */
+#ifdef OPUS_FIXED_POINT
+   maxcorr=1;
+#endif
+   for (i=0;i<max_pitch>>1;i++)
+   {
+      opus_val32 sum=0;
+      xcorr[i] = 0;
+      if (abs(i-2*best_pitch[0])>2 && abs(i-2*best_pitch[1])>2)
+         continue;
+      for (j=0;j<len>>1;j++)
+         sum += SHR32(MULT16_16(x_lp[j],y[i+j]), shift);
+      xcorr[i] = MAX32(-1, sum);
+#ifdef OPUS_FIXED_POINT
+      maxcorr = MAX32(maxcorr, sum);
+#endif
+   }
+   find_best_pitch(xcorr, y, len>>1, max_pitch>>1, best_pitch
+#ifdef OPUS_FIXED_POINT
+                   , shift+1, maxcorr
+#endif
+                   );
+
+   /* Refine by pseudo-interpolation */
+   if (best_pitch[0]>0 && best_pitch[0]<(max_pitch>>1)-1)
+   {
+      opus_val32 a, b, c;
+      a = xcorr[best_pitch[0]-1];
+      b = xcorr[best_pitch[0]];
+      c = xcorr[best_pitch[0]+1];
+      if ((c-a) > MULT16_32_Q15(QCONST16(.7f,15),b-a))
+         offset = 1;
+      else if ((a-c) > MULT16_32_Q15(QCONST16(.7f,15),b-c))
+         offset = -1;
+      else
+         offset = 0;
+   } else {
+      offset = 0;
+   }
+   *pitch = 2*best_pitch[0]-offset;
+
+   RESTORE_STACK;
+}
+
+static const int second_check[16] = {0, 0, 3, 2, 3, 2, 5, 2, 3, 2, 3, 2, 5, 2, 3, 2};
+opus_val16 remove_doubling(opus_val16 *x, int maxperiod, int minperiod,
+      int N, int *T0_, int prev_period, opus_val16 prev_gain)
+{
+   int k, i, T, T0;
+   opus_val16 g, g0;
+   opus_val16 pg;
+   opus_val32 xy,xx,yy,xy2;
+   opus_val32 xcorr[3];
+   opus_val32 best_xy, best_yy;
+   int offset;
+   int minperiod0;
+   VARDECL(opus_val32, yy_lookup);
+   SAVE_STACK;
+
+   minperiod0 = minperiod;
+   maxperiod /= 2;
+   minperiod /= 2;
+   *T0_ /= 2;
+   prev_period /= 2;
+   N /= 2;
+   x += maxperiod;
+   if (*T0_>=maxperiod)
+      *T0_=maxperiod-1;
+
+   T = T0 = *T0_;
+   ALLOC(yy_lookup, maxperiod+1, opus_val32);
+   dual_inner_prod(x, x, x-T0, N, &xx, &xy);
+   yy_lookup[0] = xx;
+   yy=xx;
+   for (i=1;i<=maxperiod;i++)
+   {
+      yy = yy+MULT16_16(x[-i],x[-i])-MULT16_16(x[N-i],x[N-i]);
+      yy_lookup[i] = MAX32(0, yy);
+   }
+   yy = yy_lookup[T0];
+   best_xy = xy;
+   best_yy = yy;
+#ifdef OPUS_FIXED_POINT
+      {
+         opus_val32 x2y2;
+         int sh, t;
+         x2y2 = 1+HALF32(MULT32_32_Q31(xx,yy));
+         sh = celt_ilog2(x2y2)>>1;
+         t = VSHR32(x2y2, 2*(sh-7));
+         g = g0 = VSHR32(MULT16_32_Q15(celt_rsqrt_norm(t), xy),sh+1);
+      }
+#else
+      g = g0 = xy/celt_sqrt(1+xx*yy);
+#endif
+   /* Look for any pitch at T/k */
+   for (k=2;k<=15;k++)
+   {
+      int T1, T1b;
+      opus_val16 g1;
+      opus_val16 cont=0;
+      opus_val16 thresh;
+      T1 = (2*T0+k)/(2*k);
+      if (T1 < minperiod)
+         break;
+      /* Look for another strong correlation at T1b */
+      if (k==2)
+      {
+         if (T1+T0>maxperiod)
+            T1b = T0;
+         else
+            T1b = T0+T1;
+      } else
+      {
+         T1b = (2*second_check[k]*T0+k)/(2*k);
+      }
+      dual_inner_prod(x, &x[-T1], &x[-T1b], N, &xy, &xy2);
+      xy += xy2;
+      yy = yy_lookup[T1] + yy_lookup[T1b];
+#ifdef OPUS_FIXED_POINT
+      {
+         opus_val32 x2y2;
+         int sh, t;
+         x2y2 = 1+MULT32_32_Q31(xx,yy);
+         sh = celt_ilog2(x2y2)>>1;
+         t = VSHR32(x2y2, 2*(sh-7));
+         g1 = VSHR32(MULT16_32_Q15(celt_rsqrt_norm(t), xy),sh+1);
+      }
+#else
+      g1 = xy/celt_sqrt(1+2.f*xx*1.f*yy);
+#endif
+      if (abs(T1-prev_period)<=1)
+         cont = prev_gain;
+      else if (abs(T1-prev_period)<=2 && 5*k*k < T0)
+         cont = HALF32(prev_gain);
+      else
+         cont = 0;
+      thresh = MAX16(QCONST16(.3f,15), MULT16_16_Q15(QCONST16(.7f,15),g0)-cont);
+      /* Bias against very high pitch (very short period) to avoid false-positives
+         due to short-term correlation */
+      if (T1<3*minperiod)
+         thresh = MAX16(QCONST16(.4f,15), MULT16_16_Q15(QCONST16(.85f,15),g0)-cont);
+      else if (T1<2*minperiod)
+         thresh = MAX16(QCONST16(.5f,15), MULT16_16_Q15(QCONST16(.9f,15),g0)-cont);
+      if (g1 > thresh)
+      {
+         best_xy = xy;
+         best_yy = yy;
+         T = T1;
+         g = g1;
+      }
+   }
+   best_xy = MAX32(0, best_xy);
+   if (best_yy <= best_xy)
+      pg = Q15ONE;
+   else
+      pg = SHR32(frac_div32(best_xy,best_yy+1),16);
+
+   for (k=0;k<3;k++)
+   {
+      int T1 = T+k-1;
+      xy = 0;
+      for (i=0;i<N;i++)
+         xy = MAC16_16(xy, x[i], x[i-T1]);
+      xcorr[k] = xy;
+   }
+   if ((xcorr[2]-xcorr[0]) > MULT16_32_Q15(QCONST16(.7f,15),xcorr[1]-xcorr[0]))
+      offset = 1;
+   else if ((xcorr[0]-xcorr[2]) > MULT16_32_Q15(QCONST16(.7f,15),xcorr[1]-xcorr[2]))
+      offset = -1;
+   else
+      offset = 0;
+   if (pg > g)
+      pg = g;
+   *T0_ = 2*T+offset;
+
+   if (*T0_<minperiod0)
+      *T0_=minperiod0;
+   RESTORE_STACK;
+   return pg;
+}
diff --git a/drivers/opus/celt/pitch.h b/drivers/opus/celt/pitch.h
new file mode 100644
index 0000000000..3a7d305425
--- /dev/null
+++ b/drivers/opus/celt/pitch.h
@@ -0,0 +1,173 @@
+/* Copyright (c) 2007-2008 CSIRO
+   Copyright (c) 2007-2009 Xiph.Org Foundation
+   Written by Jean-Marc Valin */
+/**
+   @file pitch.h
+   @brief Pitch analysis
+ */
+
+/*
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef PITCH_H
+#define PITCH_H
+
+#include "opus_modes.h"
+#include "cpu_support.h"
+
+#if defined(__SSE__) && !defined(OPUS_FIXED_POINT)
+#include "x86/pitch_sse.h"
+#endif
+
+#if defined(OPUS_ARM_ASM) && defined(OPUS_FIXED_POINT)
+# include "arm/pitch_arm.h"
+#endif
+
+void pitch_downsample(celt_sig * OPUS_RESTRICT x[], opus_val16 * OPUS_RESTRICT x_lp,
+      int len, int C, int arch);
+
+void pitch_search(const opus_val16 * OPUS_RESTRICT x_lp, opus_val16 * OPUS_RESTRICT y,
+                  int len, int max_pitch, int *pitch, int arch);
+
+opus_val16 remove_doubling(opus_val16 *x, int maxperiod, int minperiod,
+      int N, int *T0, int prev_period, opus_val16 prev_gain);
+
+/* OPT: This is the kernel you really want to optimize. It gets used a lot
+   by the prefilter and by the PLC. */
+#ifndef OVERRIDE_XCORR_KERNEL
+static OPUS_INLINE void xcorr_kernel(const opus_val16 * x, const opus_val16 * y, opus_val32 sum[4], int len)
+{
+   int j;
+   opus_val16 y_0, y_1, y_2, y_3;
+   celt_assert(len>=3);
+   y_3=0; /* gcc doesn't realize that y_3 can't be used uninitialized */
+   y_0=*y++;
+   y_1=*y++;
+   y_2=*y++;
+   for (j=0;j<len-3;j+=4)
+   {
+      opus_val16 tmp;
+      tmp = *x++;
+      y_3=*y++;
+      sum[0] = MAC16_16(sum[0],tmp,y_0);
+      sum[1] = MAC16_16(sum[1],tmp,y_1);
+      sum[2] = MAC16_16(sum[2],tmp,y_2);
+      sum[3] = MAC16_16(sum[3],tmp,y_3);
+      tmp=*x++;
+      y_0=*y++;
+      sum[0] = MAC16_16(sum[0],tmp,y_1);
+      sum[1] = MAC16_16(sum[1],tmp,y_2);
+      sum[2] = MAC16_16(sum[2],tmp,y_3);
+      sum[3] = MAC16_16(sum[3],tmp,y_0);
+      tmp=*x++;
+      y_1=*y++;
+      sum[0] = MAC16_16(sum[0],tmp,y_2);
+      sum[1] = MAC16_16(sum[1],tmp,y_3);
+      sum[2] = MAC16_16(sum[2],tmp,y_0);
+      sum[3] = MAC16_16(sum[3],tmp,y_1);
+      tmp=*x++;
+      y_2=*y++;
+      sum[0] = MAC16_16(sum[0],tmp,y_3);
+      sum[1] = MAC16_16(sum[1],tmp,y_0);
+      sum[2] = MAC16_16(sum[2],tmp,y_1);
+      sum[3] = MAC16_16(sum[3],tmp,y_2);
+   }
+   if (j++<len)
+   {
+      opus_val16 tmp = *x++;
+      y_3=*y++;
+      sum[0] = MAC16_16(sum[0],tmp,y_0);
+      sum[1] = MAC16_16(sum[1],tmp,y_1);
+      sum[2] = MAC16_16(sum[2],tmp,y_2);
+      sum[3] = MAC16_16(sum[3],tmp,y_3);
+   }
+   if (j++<len)
+   {
+      opus_val16 tmp=*x++;
+      y_0=*y++;
+      sum[0] = MAC16_16(sum[0],tmp,y_1);
+      sum[1] = MAC16_16(sum[1],tmp,y_2);
+      sum[2] = MAC16_16(sum[2],tmp,y_3);
+      sum[3] = MAC16_16(sum[3],tmp,y_0);
+   }
+   if (j<len)
+   {
+      opus_val16 tmp=*x++;
+      y_1=*y++;
+      sum[0] = MAC16_16(sum[0],tmp,y_2);
+      sum[1] = MAC16_16(sum[1],tmp,y_3);
+      sum[2] = MAC16_16(sum[2],tmp,y_0);
+      sum[3] = MAC16_16(sum[3],tmp,y_1);
+   }
+}
+#endif /* OVERRIDE_XCORR_KERNEL */
+
+#ifndef OVERRIDE_DUAL_INNER_PROD
+static OPUS_INLINE void dual_inner_prod(const opus_val16 *x, const opus_val16 *y01, const opus_val16 *y02,
+      int N, opus_val32 *xy1, opus_val32 *xy2)
+{
+   int i;
+   opus_val32 xy01=0;
+   opus_val32 xy02=0;
+   for (i=0;i<N;i++)
+   {
+      xy01 = MAC16_16(xy01, x[i], y01[i]);
+      xy02 = MAC16_16(xy02, x[i], y02[i]);
+   }
+   *xy1 = xy01;
+   *xy2 = xy02;
+}
+#endif
+
+#ifdef OPUS_FIXED_POINT
+opus_val32
+#else
+void
+#endif
+celt_pitch_xcorr_c(const opus_val16 *_x, const opus_val16 *_y,
+      opus_val32 *xcorr, int len, int max_pitch);
+
+#if !defined(OVERRIDE_PITCH_XCORR)
+/*Is run-time CPU detection enabled on this platform?*/
+# if defined(OPUS_HAVE_RTCD)
+extern
+#  if defined(OPUS_FIXED_POINT)
+opus_val32
+#  else
+void
+#  endif
+(*const CELT_PITCH_XCORR_IMPL[OPUS_ARCHMASK+1])(const opus_val16 *,
+      const opus_val16 *, opus_val32 *, int, int);
+
+#  define celt_pitch_xcorr(_x, _y, xcorr, len, max_pitch, arch) \
+  ((*CELT_PITCH_XCORR_IMPL[(arch)&OPUS_ARCHMASK])(_x, _y, \
+        xcorr, len, max_pitch))
+# else
+#  define celt_pitch_xcorr(_x, _y, xcorr, len, max_pitch, arch) \
+  ((void)(arch),celt_pitch_xcorr_c(_x, _y, xcorr, len, max_pitch))
+# endif
+#endif
+
+#endif
diff --git a/drivers/opus/celt/quant_bands.c b/drivers/opus/celt/quant_bands.c
new file mode 100644
index 0000000000..0a170e850d
--- /dev/null
+++ b/drivers/opus/celt/quant_bands.c
@@ -0,0 +1,556 @@
+/* Copyright (c) 2007-2008 CSIRO
+   Copyright (c) 2007-2009 Xiph.Org Foundation
+   Written by Jean-Marc Valin */
+/*
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifdef OPUS_HAVE_CONFIG_H
+#include "opus_config.h"
+#endif
+
+#include "quant_bands.h"
+#include "laplace.h"
+#include <math.h>
+#include "os_support.h"
+#include "arch.h"
+#include "mathops.h"
+#include "stack_alloc.h"
+#include "rate.h"
+
+#ifdef OPUS_FIXED_POINT
+/* Mean energy in each band quantized in Q4 */
+const signed char eMeans[25] = {
+      103,100, 92, 85, 81,
+       77, 72, 70, 78, 75,
+       73, 71, 78, 74, 69,
+       72, 70, 74, 76, 71,
+       60, 60, 60, 60, 60
+};
+#else
+/* Mean energy in each band quantized in Q4 and converted back to float */
+const opus_val16 eMeans[25] = {
+      6.437500f, 6.250000f, 5.750000f, 5.312500f, 5.062500f,
+      4.812500f, 4.500000f, 4.375000f, 4.875000f, 4.687500f,
+      4.562500f, 4.437500f, 4.875000f, 4.625000f, 4.312500f,
+      4.500000f, 4.375000f, 4.625000f, 4.750000f, 4.437500f,
+      3.750000f, 3.750000f, 3.750000f, 3.750000f, 3.750000f
+};
+#endif
+/* prediction coefficients: 0.9, 0.8, 0.65, 0.5 */
+#ifdef OPUS_FIXED_POINT
+static const opus_val16 pred_coef[4] = {29440, 26112, 21248, 16384};
+static const opus_val16 beta_coef[4] = {30147, 22282, 12124, 6554};
+static const opus_val16 beta_intra = 4915;
+#else
+static const opus_val16 pred_coef[4] = {29440/32768., 26112/32768., 21248/32768., 16384/32768.};
+static const opus_val16 beta_coef[4] = {30147/32768., 22282/32768., 12124/32768., 6554/32768.};
+static const opus_val16 beta_intra = 4915/32768.;
+#endif
+
+/*Parameters of the Laplace-like probability models used for the coarse energy.
+  There is one pair of parameters for each frame size, prediction type
+   (inter/intra), and band number.
+  The first number of each pair is the probability of 0, and the second is the
+   decay rate, both in Q8 precision.*/
+static const unsigned char e_prob_model[4][2][42] = {
+   /*120 sample frames.*/
+   {
+      /*Inter*/
+      {
+          72, 127,  65, 129,  66, 128,  65, 128,  64, 128,  62, 128,  64, 128,
+          64, 128,  92,  78,  92,  79,  92,  78,  90,  79, 116,  41, 115,  40,
+         114,  40, 132,  26, 132,  26, 145,  17, 161,  12, 176,  10, 177,  11
+      },
+      /*Intra*/
+      {
+          24, 179,  48, 138,  54, 135,  54, 132,  53, 134,  56, 133,  55, 132,
+          55, 132,  61, 114,  70,  96,  74,  88,  75,  88,  87,  74,  89,  66,
+          91,  67, 100,  59, 108,  50, 120,  40, 122,  37,  97,  43,  78,  50
+      }
+   },
+   /*240 sample frames.*/
+   {
+      /*Inter*/
+      {
+          83,  78,  84,  81,  88,  75,  86,  74,  87,  71,  90,  73,  93,  74,
+          93,  74, 109,  40, 114,  36, 117,  34, 117,  34, 143,  17, 145,  18,
+         146,  19, 162,  12, 165,  10, 178,   7, 189,   6, 190,   8, 177,   9
+      },
+      /*Intra*/
+      {
+          23, 178,  54, 115,  63, 102,  66,  98,  69,  99,  74,  89,  71,  91,
+          73,  91,  78,  89,  86,  80,  92,  66,  93,  64, 102,  59, 103,  60,
+         104,  60, 117,  52, 123,  44, 138,  35, 133,  31,  97,  38,  77,  45
+      }
+   },
+   /*480 sample frames.*/
+   {
+      /*Inter*/
+      {
+          61,  90,  93,  60, 105,  42, 107,  41, 110,  45, 116,  38, 113,  38,
+         112,  38, 124,  26, 132,  27, 136,  19, 140,  20, 155,  14, 159,  16,
+         158,  18, 170,  13, 177,  10, 187,   8, 192,   6, 175,   9, 159,  10
+      },
+      /*Intra*/
+      {
+          21, 178,  59, 110,  71,  86,  75,  85,  84,  83,  91,  66,  88,  73,
+          87,  72,  92,  75,  98,  72, 105,  58, 107,  54, 115,  52, 114,  55,
+         112,  56, 129,  51, 132,  40, 150,  33, 140,  29,  98,  35,  77,  42
+      }
+   },
+   /*960 sample frames.*/
+   {
+      /*Inter*/
+      {
+          42, 121,  96,  66, 108,  43, 111,  40, 117,  44, 123,  32, 120,  36,
+         119,  33, 127,  33, 134,  34, 139,  21, 147,  23, 152,  20, 158,  25,
+         154,  26, 166,  21, 173,  16, 184,  13, 184,  10, 150,  13, 139,  15
+      },
+      /*Intra*/
+      {
+          22, 178,  63, 114,  74,  82,  84,  83,  92,  82, 103,  62,  96,  72,
+          96,  67, 101,  73, 107,  72, 113,  55, 118,  52, 125,  52, 118,  52,
+         117,  55, 135,  49, 137,  39, 157,  32, 145,  29,  97,  33,  77,  40
+      }
+   }
+};
+
+static const unsigned char small_energy_icdf[3]={2,1,0};
+
+static opus_val32 loss_distortion(const opus_val16 *eBands, opus_val16 *oldEBands, int start, int end, int len, int C)
+{
+   int c, i;
+   opus_val32 dist = 0;
+   c=0; do {
+      for (i=start;i<end;i++)
+      {
+         opus_val16 d = SUB16(SHR16(eBands[i+c*len], 3), SHR16(oldEBands[i+c*len], 3));
+         dist = MAC16_16(dist, d,d);
+      }
+   } while (++c<C);
+   return MIN32(200,SHR32(dist,2*DB_SHIFT-6));
+}
+
+static int quant_coarse_energy_impl(const CELTMode *m, int start, int end,
+      const opus_val16 *eBands, opus_val16 *oldEBands,
+      opus_int32 budget, opus_int32 tell,
+      const unsigned char *prob_model, opus_val16 *error, ec_enc *enc,
+      int C, int LM, int intra, opus_val16 max_decay, int lfe)
+{
+   int i, c;
+   int badness = 0;
+   opus_val32 prev[2] = {0,0};
+   opus_val16 coef;
+   opus_val16 beta;
+
+   if (tell+3 <= budget)
+      ec_enc_bit_logp(enc, intra, 3);
+   if (intra)
+   {
+      coef = 0;
+      beta = beta_intra;
+   } else {
+      beta = beta_coef[LM];
+      coef = pred_coef[LM];
+   }
+
+   /* Encode at a fixed coarse resolution */
+   for (i=start;i<end;i++)
+   {
+      c=0;
+      do {
+         int bits_left;
+         int qi, qi0;
+         opus_val32 q;
+         opus_val16 x;
+         opus_val32 f, tmp;
+         opus_val16 oldE;
+         opus_val16 decay_bound;
+         x = eBands[i+c*m->nbEBands];
+         oldE = MAX16(-QCONST16(9.f,DB_SHIFT), oldEBands[i+c*m->nbEBands]);
+#ifdef OPUS_FIXED_POINT
+         f = SHL32(EXTEND32(x),7) - PSHR32(MULT16_16(coef,oldE), 8) - prev[c];
+         /* Rounding to nearest integer here is really important! */
+         qi = (f+QCONST32(.5f,DB_SHIFT+7))>>(DB_SHIFT+7);
+         decay_bound = EXTRACT16(MAX32(-QCONST16(28.f,DB_SHIFT),
+               SUB32((opus_val32)oldEBands[i+c*m->nbEBands],max_decay)));
+#else
+         f = x-coef*oldE-prev[c];
+         /* Rounding to nearest integer here is really important! */
+         qi = (int)floor(.5f+f);
+         decay_bound = MAX16(-QCONST16(28.f,DB_SHIFT), oldEBands[i+c*m->nbEBands]) - max_decay;
+#endif
+         /* Prevent the energy from going down too quickly (e.g. for bands
+            that have just one bin) */
+         if (qi < 0 && x < decay_bound)
+         {
+            qi += (int)SHR16(SUB16(decay_bound,x), DB_SHIFT);
+            if (qi > 0)
+               qi = 0;
+         }
+         qi0 = qi;
+         /* If we don't have enough bits to encode all the energy, just assume
+             something safe. */
+         tell = ec_tell(enc);
+         bits_left = budget-tell-3*C*(end-i);
+         if (i!=start && bits_left < 30)
+         {
+            if (bits_left < 24)
+               qi = IMIN(1, qi);
+            if (bits_left < 16)
+               qi = IMAX(-1, qi);
+         }
+         if (lfe && i>=2)
+            qi = IMIN(qi, 0);
+         if (budget-tell >= 15)
+         {
+            int pi;
+            pi = 2*IMIN(i,20);
+            ec_laplace_encode(enc, &qi,
+                  prob_model[pi]<<7, prob_model[pi+1]<<6);
+         }
+         else if(budget-tell >= 2)
+         {
+            qi = IMAX(-1, IMIN(qi, 1));
+            ec_enc_icdf(enc, 2*qi^-(qi<0), small_energy_icdf, 2);
+         }
+         else if(budget-tell >= 1)
+         {
+            qi = IMIN(0, qi);
+            ec_enc_bit_logp(enc, -qi, 1);
+         }
+         else
+            qi = -1;
+         error[i+c*m->nbEBands] = PSHR32(f,7) - SHL16(qi,DB_SHIFT);
+         badness += abs(qi0-qi);
+         q = (opus_val32)SHL32(EXTEND32(qi),DB_SHIFT);
+
+         tmp = PSHR32(MULT16_16(coef,oldE),8) + prev[c] + SHL32(q,7);
+#ifdef OPUS_FIXED_POINT
+         tmp = MAX32(-QCONST32(28.f, DB_SHIFT+7), tmp);
+#endif
+         oldEBands[i+c*m->nbEBands] = PSHR32(tmp, 7);
+         prev[c] = prev[c] + SHL32(q,7) - MULT16_16(beta,PSHR32(q,8));
+      } while (++c < C);
+   }
+   return lfe ? 0 : badness;
+}
+
+void quant_coarse_energy(const CELTMode *m, int start, int end, int effEnd,
+      const opus_val16 *eBands, opus_val16 *oldEBands, opus_uint32 budget,
+      opus_val16 *error, ec_enc *enc, int C, int LM, int nbAvailableBytes,
+      int force_intra, opus_val32 *delayedIntra, int two_pass, int loss_rate, int lfe)
+{
+   int intra;
+   opus_val16 max_decay;
+   VARDECL(opus_val16, oldEBands_intra);
+   VARDECL(opus_val16, error_intra);
+   ec_enc enc_start_state;
+   opus_uint32 tell;
+   int badness1=0;
+   opus_int32 intra_bias;
+   opus_val32 new_distortion;
+   SAVE_STACK;
+
+   intra = force_intra || (!two_pass && *delayedIntra>2*C*(end-start) && nbAvailableBytes > (end-start)*C);
+   intra_bias = (opus_int32)((budget**delayedIntra*loss_rate)/(C*512));
+   new_distortion = loss_distortion(eBands, oldEBands, start, effEnd, m->nbEBands, C);
+
+   tell = ec_tell(enc);
+   if (tell+3 > budget)
+      two_pass = intra = 0;
+
+   max_decay = QCONST16(16.f,DB_SHIFT);
+   if (end-start>10)
+   {
+#ifdef OPUS_FIXED_POINT
+      max_decay = MIN32(max_decay, SHL32(EXTEND32(nbAvailableBytes),DB_SHIFT-3));
+#else
+      max_decay = MIN32(max_decay, .125f*nbAvailableBytes);
+#endif
+   }
+   if (lfe)
+      max_decay=3;
+   enc_start_state = *enc;
+
+   ALLOC(oldEBands_intra, C*m->nbEBands, opus_val16);
+   ALLOC(error_intra, C*m->nbEBands, opus_val16);
+   OPUS_COPY(oldEBands_intra, oldEBands, C*m->nbEBands);
+
+   if (two_pass || intra)
+   {
+      badness1 = quant_coarse_energy_impl(m, start, end, eBands, oldEBands_intra, budget,
+            tell, e_prob_model[LM][1], error_intra, enc, C, LM, 1, max_decay, lfe);
+   }
+
+   if (!intra)
+   {
+      unsigned char *intra_buf;
+      ec_enc enc_intra_state;
+      opus_int32 tell_intra;
+      opus_uint32 nstart_bytes;
+      opus_uint32 nintra_bytes;
+      opus_uint32 save_bytes;
+      int badness2;
+      VARDECL(unsigned char, intra_bits);
+
+      tell_intra = ec_tell_frac(enc);
+
+      enc_intra_state = *enc;
+
+      nstart_bytes = ec_range_bytes(&enc_start_state);
+      nintra_bytes = ec_range_bytes(&enc_intra_state);
+      intra_buf = ec_get_buffer(&enc_intra_state) + nstart_bytes;
+      save_bytes = nintra_bytes-nstart_bytes;
+      if (save_bytes == 0)
+         save_bytes = ALLOC_NONE;
+      ALLOC(intra_bits, save_bytes, unsigned char);
+      /* Copy bits from intra bit-stream */
+      OPUS_COPY(intra_bits, intra_buf, nintra_bytes - nstart_bytes);
+
+      *enc = enc_start_state;
+
+      badness2 = quant_coarse_energy_impl(m, start, end, eBands, oldEBands, budget,
+            tell, e_prob_model[LM][intra], error, enc, C, LM, 0, max_decay, lfe);
+
+      if (two_pass && (badness1 < badness2 || (badness1 == badness2 && ((opus_int32)ec_tell_frac(enc))+intra_bias > tell_intra)))
+      {
+         *enc = enc_intra_state;
+         /* Copy intra bits to bit-stream */
+         OPUS_COPY(intra_buf, intra_bits, nintra_bytes - nstart_bytes);
+         OPUS_COPY(oldEBands, oldEBands_intra, C*m->nbEBands);
+         OPUS_COPY(error, error_intra, C*m->nbEBands);
+         intra = 1;
+      }
+   } else {
+      OPUS_COPY(oldEBands, oldEBands_intra, C*m->nbEBands);
+      OPUS_COPY(error, error_intra, C*m->nbEBands);
+   }
+
+   if (intra)
+      *delayedIntra = new_distortion;
+   else
+      *delayedIntra = ADD32(MULT16_32_Q15(MULT16_16_Q15(pred_coef[LM], pred_coef[LM]),*delayedIntra),
+            new_distortion);
+
+   RESTORE_STACK;
+}
+
+void quant_fine_energy(const CELTMode *m, int start, int end, opus_val16 *oldEBands, opus_val16 *error, int *fine_quant, ec_enc *enc, int C)
+{
+   int i, c;
+
+   /* Encode finer resolution */
+   for (i=start;i<end;i++)
+   {
+      opus_int16 frac = 1<<fine_quant[i];
+      if (fine_quant[i] <= 0)
+         continue;
+      c=0;
+      do {
+         int q2;
+         opus_val16 offset;
+#ifdef OPUS_FIXED_POINT
+         /* Has to be without rounding */
+         q2 = (error[i+c*m->nbEBands]+QCONST16(.5f,DB_SHIFT))>>(DB_SHIFT-fine_quant[i]);
+#else
+         q2 = (int)floor((error[i+c*m->nbEBands]+.5f)*frac);
+#endif
+         if (q2 > frac-1)
+            q2 = frac-1;
+         if (q2<0)
+            q2 = 0;
+         ec_enc_bits(enc, q2, fine_quant[i]);
+#ifdef OPUS_FIXED_POINT
+         offset = SUB16(SHR32(SHL32(EXTEND32(q2),DB_SHIFT)+QCONST16(.5f,DB_SHIFT),fine_quant[i]),QCONST16(.5f,DB_SHIFT));
+#else
+         offset = (q2+.5f)*(1<<(14-fine_quant[i]))*(1.f/16384) - .5f;
+#endif
+         oldEBands[i+c*m->nbEBands] += offset;
+         error[i+c*m->nbEBands] -= offset;
+         /*printf ("%f ", error[i] - offset);*/
+      } while (++c < C);
+   }
+}
+
+void quant_energy_finalise(const CELTMode *m, int start, int end, opus_val16 *oldEBands, opus_val16 *error, int *fine_quant, int *fine_priority, int bits_left, ec_enc *enc, int C)
+{
+   int i, prio, c;
+
+   /* Use up the remaining bits */
+   for (prio=0;prio<2;prio++)
+   {
+      for (i=start;i<end && bits_left>=C ;i++)
+      {
+         if (fine_quant[i] >= MAX_FINE_BITS || fine_priority[i]!=prio)
+            continue;
+         c=0;
+         do {
+            int q2;
+            opus_val16 offset;
+            q2 = error[i+c*m->nbEBands]<0 ? 0 : 1;
+            ec_enc_bits(enc, q2, 1);
+#ifdef OPUS_FIXED_POINT
+            offset = SHR16(SHL16(q2,DB_SHIFT)-QCONST16(.5f,DB_SHIFT),fine_quant[i]+1);
+#else
+            offset = (q2-.5f)*(1<<(14-fine_quant[i]-1))*(1.f/16384);
+#endif
+            oldEBands[i+c*m->nbEBands] += offset;
+            bits_left--;
+         } while (++c < C);
+      }
+   }
+}
+
+void unquant_coarse_energy(const CELTMode *m, int start, int end, opus_val16 *oldEBands, int intra, ec_dec *dec, int C, int LM)
+{
+   const unsigned char *prob_model = e_prob_model[LM][intra];
+   int i, c;
+   opus_val32 prev[2] = {0, 0};
+   opus_val16 coef;
+   opus_val16 beta;
+   opus_int32 budget;
+   opus_int32 tell;
+
+   if (intra)
+   {
+      coef = 0;
+      beta = beta_intra;
+   } else {
+      beta = beta_coef[LM];
+      coef = pred_coef[LM];
+   }
+
+   budget = dec->storage*8;
+
+   /* Decode at a fixed coarse resolution */
+   for (i=start;i<end;i++)
+   {
+      c=0;
+      do {
+         int qi;
+         opus_val32 q;
+         opus_val32 tmp;
+         /* It would be better to express this invariant as a
+            test on C at function entry, but that isn't enough
+            to make the static analyzer happy. */
+         celt_assert(c<2);
+         tell = ec_tell(dec);
+         if(budget-tell>=15)
+         {
+            int pi;
+            pi = 2*IMIN(i,20);
+            qi = ec_laplace_decode(dec,
+                  prob_model[pi]<<7, prob_model[pi+1]<<6);
+         }
+         else if(budget-tell>=2)
+         {
+            qi = ec_dec_icdf(dec, small_energy_icdf, 2);
+            qi = (qi>>1)^-(qi&1);
+         }
+         else if(budget-tell>=1)
+         {
+            qi = -ec_dec_bit_logp(dec, 1);
+         }
+         else
+            qi = -1;
+         q = (opus_val32)SHL32(EXTEND32(qi),DB_SHIFT);
+
+         oldEBands[i+c*m->nbEBands] = MAX16(-QCONST16(9.f,DB_SHIFT), oldEBands[i+c*m->nbEBands]);
+         tmp = PSHR32(MULT16_16(coef,oldEBands[i+c*m->nbEBands]),8) + prev[c] + SHL32(q,7);
+#ifdef OPUS_FIXED_POINT
+         tmp = MAX32(-QCONST32(28.f, DB_SHIFT+7), tmp);
+#endif
+         oldEBands[i+c*m->nbEBands] = PSHR32(tmp, 7);
+         prev[c] = prev[c] + SHL32(q,7) - MULT16_16(beta,PSHR32(q,8));
+      } while (++c < C);
+   }
+}
+
+void unquant_fine_energy(const CELTMode *m, int start, int end, opus_val16 *oldEBands, int *fine_quant, ec_dec *dec, int C)
+{
+   int i, c;
+   /* Decode finer resolution */
+   for (i=start;i<end;i++)
+   {
+      if (fine_quant[i] <= 0)
+         continue;
+      c=0;
+      do {
+         int q2;
+         opus_val16 offset;
+         q2 = ec_dec_bits(dec, fine_quant[i]);
+#ifdef OPUS_FIXED_POINT
+         offset = SUB16(SHR32(SHL32(EXTEND32(q2),DB_SHIFT)+QCONST16(.5f,DB_SHIFT),fine_quant[i]),QCONST16(.5f,DB_SHIFT));
+#else
+         offset = (q2+.5f)*(1<<(14-fine_quant[i]))*(1.f/16384) - .5f;
+#endif
+         oldEBands[i+c*m->nbEBands] += offset;
+      } while (++c < C);
+   }
+}
+
+void unquant_energy_finalise(const CELTMode *m, int start, int end, opus_val16 *oldEBands, int *fine_quant,  int *fine_priority, int bits_left, ec_dec *dec, int C)
+{
+   int i, prio, c;
+
+   /* Use up the remaining bits */
+   for (prio=0;prio<2;prio++)
+   {
+      for (i=start;i<end && bits_left>=C ;i++)
+      {
+         if (fine_quant[i] >= MAX_FINE_BITS || fine_priority[i]!=prio)
+            continue;
+         c=0;
+         do {
+            int q2;
+            opus_val16 offset;
+            q2 = ec_dec_bits(dec, 1);
+#ifdef OPUS_FIXED_POINT
+            offset = SHR16(SHL16(q2,DB_SHIFT)-QCONST16(.5f,DB_SHIFT),fine_quant[i]+1);
+#else
+            offset = (q2-.5f)*(1<<(14-fine_quant[i]-1))*(1.f/16384);
+#endif
+            oldEBands[i+c*m->nbEBands] += offset;
+            bits_left--;
+         } while (++c < C);
+      }
+   }
+}
+
+void amp2Log2(const CELTMode *m, int effEnd, int end,
+      celt_ener *bandE, opus_val16 *bandLogE, int C)
+{
+   int c, i;
+   c=0;
+   do {
+      for (i=0;i<effEnd;i++)
+         bandLogE[i+c*m->nbEBands] =
+               celt_log2(SHL32(bandE[i+c*m->nbEBands],2))
+               - SHL16((opus_val16)eMeans[i],6);
+      for (i=effEnd;i<end;i++)
+         bandLogE[c*m->nbEBands+i] = -QCONST16(14.f,DB_SHIFT);
+   } while (++c < C);
+}
diff --git a/drivers/opus/celt/quant_bands.h b/drivers/opus/celt/quant_bands.h
new file mode 100644
index 0000000000..840df8723f
--- /dev/null
+++ b/drivers/opus/celt/quant_bands.h
@@ -0,0 +1,66 @@
+/* Copyright (c) 2007-2008 CSIRO
+   Copyright (c) 2007-2009 Xiph.Org Foundation
+   Written by Jean-Marc Valin */
+/*
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef QUANT_BANDS
+#define QUANT_BANDS
+
+#include "arch.h"
+#include "opus_modes.h"
+#include "entenc.h"
+#include "entdec.h"
+#include "mathops.h"
+
+#ifdef OPUS_FIXED_POINT
+extern const signed char eMeans[25];
+#else
+extern const opus_val16 eMeans[25];
+#endif
+
+void amp2Log2(const CELTMode *m, int effEnd, int end,
+      celt_ener *bandE, opus_val16 *bandLogE, int C);
+
+void log2Amp(const CELTMode *m, int start, int end,
+      celt_ener *eBands, const opus_val16 *oldEBands, int C);
+
+void quant_coarse_energy(const CELTMode *m, int start, int end, int effEnd,
+      const opus_val16 *eBands, opus_val16 *oldEBands, opus_uint32 budget,
+      opus_val16 *error, ec_enc *enc, int C, int LM,
+      int nbAvailableBytes, int force_intra, opus_val32 *delayedIntra,
+      int two_pass, int loss_rate, int lfe);
+
+void quant_fine_energy(const CELTMode *m, int start, int end, opus_val16 *oldEBands, opus_val16 *error, int *fine_quant, ec_enc *enc, int C);
+
+void quant_energy_finalise(const CELTMode *m, int start, int end, opus_val16 *oldEBands, opus_val16 *error, int *fine_quant, int *fine_priority, int bits_left, ec_enc *enc, int C);
+
+void unquant_coarse_energy(const CELTMode *m, int start, int end, opus_val16 *oldEBands, int intra, ec_dec *dec, int C, int LM);
+
+void unquant_fine_energy(const CELTMode *m, int start, int end, opus_val16 *oldEBands, int *fine_quant, ec_dec *dec, int C);
+
+void unquant_energy_finalise(const CELTMode *m, int start, int end, opus_val16 *oldEBands, int *fine_quant, int *fine_priority, int bits_left, ec_dec *dec, int C);
+
+#endif /* QUANT_BANDS */
diff --git a/drivers/opus/celt/rate.c b/drivers/opus/celt/rate.c
new file mode 100644
index 0000000000..cca585ad95
--- /dev/null
+++ b/drivers/opus/celt/rate.c
@@ -0,0 +1,638 @@
+/* Copyright (c) 2007-2008 CSIRO
+   Copyright (c) 2007-2009 Xiph.Org Foundation
+   Written by Jean-Marc Valin */
+/*
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifdef OPUS_HAVE_CONFIG_H
+#include "opus_config.h"
+#endif
+
+#include <math.h>
+#include "opus_modes.h"
+#include "cwrs.h"
+#include "arch.h"
+#include "os_support.h"
+
+#include "entcode.h"
+#include "rate.h"
+
+static const unsigned char LOG2_FRAC_TABLE[24]={
+   0,
+   8,13,
+  16,19,21,23,
+  24,26,27,28,29,30,31,32,
+  32,33,34,34,35,36,36,37,37
+};
+
+#ifdef CUSTOM_MODES
+
+/*Determines if V(N,K) fits in a 32-bit unsigned integer.
+  N and K are themselves limited to 15 bits.*/
+static int fits_in32(int _n, int _k)
+{
+   static const opus_int16 maxN[15] = {
+      32767, 32767, 32767, 1476, 283, 109,  60,  40,
+       29,  24,  20,  18,  16,  14,  13};
+   static const opus_int16 maxK[15] = {
+      32767, 32767, 32767, 32767, 1172, 238,  95,  53,
+       36,  27,  22,  18,  16,  15,  13};
+   if (_n>=14)
+   {
+      if (_k>=14)
+         return 0;
+      else
+         return _n <= maxN[_k];
+   } else {
+      return _k <= maxK[_n];
+   }
+}
+
+void compute_pulse_cache(CELTMode *m, int LM)
+{
+   int C;
+   int i;
+   int j;
+   int curr=0;
+   int nbEntries=0;
+   int entryN[100], entryK[100], entryI[100];
+   const opus_int16 *eBands = m->eBands;
+   PulseCache *cache = &m->cache;
+   opus_int16 *cindex;
+   unsigned char *bits;
+   unsigned char *cap;
+
+   cindex = (opus_int16 *)opus_alloc(sizeof(cache->index[0])*m->nbEBands*(LM+2));
+   cache->index = cindex;
+
+   /* Scan for all unique band sizes */
+   for (i=0;i<=LM+1;i++)
+   {
+      for (j=0;j<m->nbEBands;j++)
+      {
+         int k;
+         int N = (eBands[j+1]-eBands[j])<<i>>1;
+         cindex[i*m->nbEBands+j] = -1;
+         /* Find other bands that have the same size */
+         for (k=0;k<=i;k++)
+         {
+            int n;
+            for (n=0;n<m->nbEBands && (k!=i || n<j);n++)
+            {
+               if (N == (eBands[n+1]-eBands[n])<<k>>1)
+               {
+                  cindex[i*m->nbEBands+j] = cindex[k*m->nbEBands+n];
+                  break;
+               }
+            }
+         }
+         if (cache->index[i*m->nbEBands+j] == -1 && N!=0)
+         {
+            int K;
+            entryN[nbEntries] = N;
+            K = 0;
+            while (fits_in32(N,get_pulses(K+1)) && K<MAX_PSEUDO)
+               K++;
+            entryK[nbEntries] = K;
+            cindex[i*m->nbEBands+j] = curr;
+            entryI[nbEntries] = curr;
+
+            curr += K+1;
+            nbEntries++;
+         }
+      }
+   }
+   bits = (unsigned char *)opus_alloc(sizeof(unsigned char)*curr);
+   cache->bits = bits;
+   cache->size = curr;
+   /* Compute the cache for all unique sizes */
+   for (i=0;i<nbEntries;i++)
+   {
+      unsigned char *ptr = bits+entryI[i];
+      opus_int16 tmp[MAX_PULSES+1];
+      get_required_bits(tmp, entryN[i], get_pulses(entryK[i]), BITRES);
+      for (j=1;j<=entryK[i];j++)
+         ptr[j] = tmp[get_pulses(j)]-1;
+      ptr[0] = entryK[i];
+   }
+
+   /* Compute the maximum rate for each band at which we'll reliably use as
+       many bits as we ask for. */
+   cache->caps = cap = (unsigned char *)opus_alloc(sizeof(cache->caps[0])*(LM+1)*2*m->nbEBands);
+   for (i=0;i<=LM;i++)
+   {
+      for (C=1;C<=2;C++)
+      {
+         for (j=0;j<m->nbEBands;j++)
+         {
+            int N0;
+            int max_bits;
+            N0 = m->eBands[j+1]-m->eBands[j];
+            /* N=1 bands only have a sign bit and fine bits. */
+            if (N0<<i == 1)
+               max_bits = C*(1+MAX_FINE_BITS)<<BITRES;
+            else
+            {
+               const unsigned char *pcache;
+               opus_int32           num;
+               opus_int32           den;
+               int                  LM0;
+               int                  N;
+               int                  offset;
+               int                  ndof;
+               int                  qb;
+               int                  k;
+               LM0 = 0;
+               /* Even-sized bands bigger than N=2 can be split one more time.
+                  As of commit 44203907 all bands >1 are even, including custom modes.*/
+               if (N0 > 2)
+               {
+                  N0>>=1;
+                  LM0--;
+               }
+               /* N0=1 bands can't be split down to N<2. */
+               else if (N0 <= 1)
+               {
+                  LM0=IMIN(i,1);
+                  N0<<=LM0;
+               }
+               /* Compute the cost for the lowest-level PVQ of a fully split
+                   band. */
+               pcache = bits + cindex[(LM0+1)*m->nbEBands+j];
+               max_bits = pcache[pcache[0]]+1;
+               /* Add in the cost of coding regular splits. */
+               N = N0;
+               for(k=0;k<i-LM0;k++){
+                  max_bits <<= 1;
+                  /* Offset the number of qtheta bits by log2(N)/2
+                      + QTHETA_OFFSET compared to their "fair share" of
+                      total/N */
+                  offset = ((m->logN[j]+((LM0+k)<<BITRES))>>1)-QTHETA_OFFSET;
+                  /* The number of qtheta bits we'll allocate if the remainder
+                      is to be max_bits.
+                     The average measured cost for theta is 0.89701 times qb,
+                      approximated here as 459/512. */
+                  num=459*(opus_int32)((2*N-1)*offset+max_bits);
+                  den=((opus_int32)(2*N-1)<<9)-459;
+                  qb = IMIN((num+(den>>1))/den, 57);
+                  celt_assert(qb >= 0);
+                  max_bits += qb;
+                  N <<= 1;
+               }
+               /* Add in the cost of a stereo split, if necessary. */
+               if (C==2)
+               {
+                  max_bits <<= 1;
+                  offset = ((m->logN[j]+(i<<BITRES))>>1)-(N==2?QTHETA_OFFSET_TWOPHASE:QTHETA_OFFSET);
+                  ndof = 2*N-1-(N==2);
+                  /* The average measured cost for theta with the step PDF is
+                      0.95164 times qb, approximated here as 487/512. */
+                  num = (N==2?512:487)*(opus_int32)(max_bits+ndof*offset);
+                  den = ((opus_int32)ndof<<9)-(N==2?512:487);
+                  qb = IMIN((num+(den>>1))/den, (N==2?64:61));
+                  celt_assert(qb >= 0);
+                  max_bits += qb;
+               }
+               /* Add the fine bits we'll use. */
+               /* Compensate for the extra DoF in stereo */
+               ndof = C*N + ((C==2 && N>2) ? 1 : 0);
+               /* Offset the number of fine bits by log2(N)/2 + FINE_OFFSET
+                   compared to their "fair share" of total/N */
+               offset = ((m->logN[j] + (i<<BITRES))>>1)-FINE_OFFSET;
+               /* N=2 is the only point that doesn't match the curve */
+               if (N==2)
+                  offset += 1<<BITRES>>2;
+               /* The number of fine bits we'll allocate if the remainder is
+                   to be max_bits. */
+               num = max_bits+ndof*offset;
+               den = (ndof-1)<<BITRES;
+               qb = IMIN((num+(den>>1))/den, MAX_FINE_BITS);
+               celt_assert(qb >= 0);
+               max_bits += C*qb<<BITRES;
+            }
+            max_bits = (4*max_bits/(C*((m->eBands[j+1]-m->eBands[j])<<i)))-64;
+            celt_assert(max_bits >= 0);
+            celt_assert(max_bits < 256);
+            *cap++ = (unsigned char)max_bits;
+         }
+      }
+   }
+}
+
+#endif /* CUSTOM_MODES */
+
+#define ALLOC_STEPS 6
+
+static OPUS_INLINE int interp_bits2pulses(const CELTMode *m, int start, int end, int skip_start,
+      const int *bits1, const int *bits2, const int *thresh, const int *cap, opus_int32 total, opus_int32 *_balance,
+      int skip_rsv, int *intensity, int intensity_rsv, int *dual_stereo, int dual_stereo_rsv, int *bits,
+      int *ebits, int *fine_priority, int C, int LM, ec_ctx *ec, int encode, int prev, int signalBandwidth)
+{
+   opus_int32 psum;
+   int lo, hi;
+   int i, j;
+   int logM;
+   int stereo;
+   int codedBands=-1;
+   int alloc_floor;
+   opus_int32 left, percoeff;
+   int done;
+   opus_int32 balance;
+   SAVE_STACK;
+
+   alloc_floor = C<<BITRES;
+   stereo = C>1;
+
+   logM = LM<<BITRES;
+   lo = 0;
+   hi = 1<<ALLOC_STEPS;
+   for (i=0;i<ALLOC_STEPS;i++)
+   {
+      int mid = (lo+hi)>>1;
+      psum = 0;
+      done = 0;
+      for (j=end;j-->start;)
+      {
+         int tmp = bits1[j] + (mid*(opus_int32)bits2[j]>>ALLOC_STEPS);
+         if (tmp >= thresh[j] || done)
+         {
+            done = 1;
+            /* Don't allocate more than we can actually use */
+            psum += IMIN(tmp, cap[j]);
+         } else {
+            if (tmp >= alloc_floor)
+               psum += alloc_floor;
+         }
+      }
+      if (psum > total)
+         hi = mid;
+      else
+         lo = mid;
+   }
+   psum = 0;
+   /*printf ("interp bisection gave %d\n", lo);*/
+   done = 0;
+   for (j=end;j-->start;)
+   {
+      int tmp = bits1[j] + (lo*bits2[j]>>ALLOC_STEPS);
+      if (tmp < thresh[j] && !done)
+      {
+         if (tmp >= alloc_floor)
+            tmp = alloc_floor;
+         else
+            tmp = 0;
+      } else
+         done = 1;
+      /* Don't allocate more than we can actually use */
+      tmp = IMIN(tmp, cap[j]);
+      bits[j] = tmp;
+      psum += tmp;
+   }
+
+   /* Decide which bands to skip, working backwards from the end. */
+   for (codedBands=end;;codedBands--)
+   {
+      int band_width;
+      int band_bits;
+      int rem;
+      j = codedBands-1;
+      /* Never skip the first band, nor a band that has been boosted by
+          dynalloc.
+         In the first case, we'd be coding a bit to signal we're going to waste
+          all the other bits.
+         In the second case, we'd be coding a bit to redistribute all the bits
+          we just signaled should be cocentrated in this band. */
+      if (j<=skip_start)
+      {
+         /* Give the bit we reserved to end skipping back. */
+         total += skip_rsv;
+         break;
+      }
+      /*Figure out how many left-over bits we would be adding to this band.
+        This can include bits we've stolen back from higher, skipped bands.*/
+      left = total-psum;
+      percoeff = left/(m->eBands[codedBands]-m->eBands[start]);
+      left -= (m->eBands[codedBands]-m->eBands[start])*percoeff;
+      rem = IMAX(left-(m->eBands[j]-m->eBands[start]),0);
+      band_width = m->eBands[codedBands]-m->eBands[j];
+      band_bits = (int)(bits[j] + percoeff*band_width + rem);
+      /*Only code a skip decision if we're above the threshold for this band.
+        Otherwise it is force-skipped.
+        This ensures that we have enough bits to code the skip flag.*/
+      if (band_bits >= IMAX(thresh[j], alloc_floor+(1<<BITRES)))
+      {
+         if (encode)
+         {
+            /*This if() block is the only part of the allocation function that
+               is not a mandatory part of the bitstream: any bands we choose to
+               skip here must be explicitly signaled.*/
+            /*Choose a threshold with some hysteresis to keep bands from
+               fluctuating in and out.*/
+#ifdef FUZZING
+            if ((rand()&0x1) == 0)
+#else
+            if (codedBands<=start+2 || (band_bits > ((j<prev?7:9)*band_width<<LM<<BITRES)>>4 && j<=signalBandwidth))
+#endif
+            {
+               ec_enc_bit_logp(ec, 1, 1);
+               break;
+            }
+            ec_enc_bit_logp(ec, 0, 1);
+         } else if (ec_dec_bit_logp(ec, 1)) {
+            break;
+         }
+         /*We used a bit to skip this band.*/
+         psum += 1<<BITRES;
+         band_bits -= 1<<BITRES;
+      }
+      /*Reclaim the bits originally allocated to this band.*/
+      psum -= bits[j]+intensity_rsv;
+      if (intensity_rsv > 0)
+         intensity_rsv = LOG2_FRAC_TABLE[j-start];
+      psum += intensity_rsv;
+      if (band_bits >= alloc_floor)
+      {
+         /*If we have enough for a fine energy bit per channel, use it.*/
+         psum += alloc_floor;
+         bits[j] = alloc_floor;
+      } else {
+         /*Otherwise this band gets nothing at all.*/
+         bits[j] = 0;
+      }
+   }
+
+   celt_assert(codedBands > start);
+   /* Code the intensity and dual stereo parameters. */
+   if (intensity_rsv > 0)
+   {
+      if (encode)
+      {
+         *intensity = IMIN(*intensity, codedBands);
+         ec_enc_uint(ec, *intensity-start, codedBands+1-start);
+      }
+      else
+         *intensity = start+ec_dec_uint(ec, codedBands+1-start);
+   }
+   else
+      *intensity = 0;
+   if (*intensity <= start)
+   {
+      total += dual_stereo_rsv;
+      dual_stereo_rsv = 0;
+   }
+   if (dual_stereo_rsv > 0)
+   {
+      if (encode)
+         ec_enc_bit_logp(ec, *dual_stereo, 1);
+      else
+         *dual_stereo = ec_dec_bit_logp(ec, 1);
+   }
+   else
+      *dual_stereo = 0;
+
+   /* Allocate the remaining bits */
+   left = total-psum;
+   percoeff = left/(m->eBands[codedBands]-m->eBands[start]);
+   left -= (m->eBands[codedBands]-m->eBands[start])*percoeff;
+   for (j=start;j<codedBands;j++)
+      bits[j] += ((int)percoeff*(m->eBands[j+1]-m->eBands[j]));
+   for (j=start;j<codedBands;j++)
+   {
+      int tmp = (int)IMIN(left, m->eBands[j+1]-m->eBands[j]);
+      bits[j] += tmp;
+      left -= tmp;
+   }
+   /*for (j=0;j<end;j++)printf("%d ", bits[j]);printf("\n");*/
+
+   balance = 0;
+   for (j=start;j<codedBands;j++)
+   {
+      int N0, N, den;
+      int offset;
+      int NClogN;
+      opus_int32 excess, bit;
+
+      celt_assert(bits[j] >= 0);
+      N0 = m->eBands[j+1]-m->eBands[j];
+      N=N0<<LM;
+      bit = (opus_int32)bits[j]+balance;
+
+      if (N>1)
+      {
+         excess = MAX32(bit-cap[j],0);
+         bits[j] = bit-excess;
+
+         /* Compensate for the extra DoF in stereo */
+         den=(C*N+ ((C==2 && N>2 && !*dual_stereo && j<*intensity) ? 1 : 0));
+
+         NClogN = den*(m->logN[j] + logM);
+
+         /* Offset for the number of fine bits by log2(N)/2 + FINE_OFFSET
+            compared to their "fair share" of total/N */
+         offset = (NClogN>>1)-den*FINE_OFFSET;
+
+         /* N=2 is the only point that doesn't match the curve */
+         if (N==2)
+            offset += den<<BITRES>>2;
+
+         /* Changing the offset for allocating the second and third
+             fine energy bit */
+         if (bits[j] + offset < den*2<<BITRES)
+            offset += NClogN>>2;
+         else if (bits[j] + offset < den*3<<BITRES)
+            offset += NClogN>>3;
+
+         /* Divide with rounding */
+         ebits[j] = IMAX(0, (bits[j] + offset + (den<<(BITRES-1))) / (den<<BITRES));
+
+         /* Make sure not to bust */
+         if (C*ebits[j] > (bits[j]>>BITRES))
+            ebits[j] = bits[j] >> stereo >> BITRES;
+
+         /* More than that is useless because that's about as far as PVQ can go */
+         ebits[j] = IMIN(ebits[j], MAX_FINE_BITS);
+
+         /* If we rounded down or capped this band, make it a candidate for the
+             final fine energy pass */
+         fine_priority[j] = ebits[j]*(den<<BITRES) >= bits[j]+offset;
+
+         /* Remove the allocated fine bits; the rest are assigned to PVQ */
+         bits[j] -= C*ebits[j]<<BITRES;
+
+      } else {
+         /* For N=1, all bits go to fine energy except for a single sign bit */
+         excess = MAX32(0,bit-(C<<BITRES));
+         bits[j] = bit-excess;
+         ebits[j] = 0;
+         fine_priority[j] = 1;
+      }
+
+      /* Fine energy can't take advantage of the re-balancing in
+          quant_all_bands().
+         Instead, do the re-balancing here.*/
+      if(excess > 0)
+      {
+         int extra_fine;
+         int extra_bits;
+         extra_fine = IMIN(excess>>(stereo+BITRES),MAX_FINE_BITS-ebits[j]);
+         ebits[j] += extra_fine;
+         extra_bits = extra_fine*C<<BITRES;
+         fine_priority[j] = extra_bits >= excess-balance;
+         excess -= extra_bits;
+      }
+      balance = excess;
+
+      celt_assert(bits[j] >= 0);
+      celt_assert(ebits[j] >= 0);
+   }
+   /* Save any remaining bits over the cap for the rebalancing in
+       quant_all_bands(). */
+   *_balance = balance;
+
+   /* The skipped bands use all their bits for fine energy. */
+   for (;j<end;j++)
+   {
+      ebits[j] = bits[j] >> stereo >> BITRES;
+      celt_assert(C*ebits[j]<<BITRES == bits[j]);
+      bits[j] = 0;
+      fine_priority[j] = ebits[j]<1;
+   }
+   RESTORE_STACK;
+   return codedBands;
+}
+
+int compute_allocation(const CELTMode *m, int start, int end, const int *offsets, const int *cap, int alloc_trim, int *intensity, int *dual_stereo,
+      opus_int32 total, opus_int32 *balance, int *pulses, int *ebits, int *fine_priority, int C, int LM, ec_ctx *ec, int encode, int prev, int signalBandwidth)
+{
+   int lo, hi, len, j;
+   int codedBands;
+   int skip_start;
+   int skip_rsv;
+   int intensity_rsv;
+   int dual_stereo_rsv;
+   VARDECL(int, bits1);
+   VARDECL(int, bits2);
+   VARDECL(int, thresh);
+   VARDECL(int, trim_offset);
+   SAVE_STACK;
+
+   total = IMAX(total, 0);
+   len = m->nbEBands;
+   skip_start = start;
+   /* Reserve a bit to signal the end of manually skipped bands. */
+   skip_rsv = total >= 1<<BITRES ? 1<<BITRES : 0;
+   total -= skip_rsv;
+   /* Reserve bits for the intensity and dual stereo parameters. */
+   intensity_rsv = dual_stereo_rsv = 0;
+   if (C==2)
+   {
+      intensity_rsv = LOG2_FRAC_TABLE[end-start];
+      if (intensity_rsv>total)
+         intensity_rsv = 0;
+      else
+      {
+         total -= intensity_rsv;
+         dual_stereo_rsv = total>=1<<BITRES ? 1<<BITRES : 0;
+         total -= dual_stereo_rsv;
+      }
+   }
+   ALLOC(bits1, len, int);
+   ALLOC(bits2, len, int);
+   ALLOC(thresh, len, int);
+   ALLOC(trim_offset, len, int);
+
+   for (j=start;j<end;j++)
+   {
+      /* Below this threshold, we're sure not to allocate any PVQ bits */
+      thresh[j] = IMAX((C)<<BITRES, (3*(m->eBands[j+1]-m->eBands[j])<<LM<<BITRES)>>4);
+      /* Tilt of the allocation curve */
+      trim_offset[j] = C*(m->eBands[j+1]-m->eBands[j])*(alloc_trim-5-LM)*(end-j-1)
+            *(1<<(LM+BITRES))>>6;
+      /* Giving less resolution to single-coefficient bands because they get
+         more benefit from having one coarse value per coefficient*/
+      if ((m->eBands[j+1]-m->eBands[j])<<LM==1)
+         trim_offset[j] -= C<<BITRES;
+   }
+   lo = 1;
+   hi = m->nbAllocVectors - 1;
+   do
+   {
+      int done = 0;
+      int psum = 0;
+      int mid = (lo+hi) >> 1;
+      for (j=end;j-->start;)
+      {
+         int bitsj;
+         int N = m->eBands[j+1]-m->eBands[j];
+         bitsj = C*N*m->allocVectors[mid*len+j]<<LM>>2;
+         if (bitsj > 0)
+            bitsj = IMAX(0, bitsj + trim_offset[j]);
+         bitsj += offsets[j];
+         if (bitsj >= thresh[j] || done)
+         {
+            done = 1;
+            /* Don't allocate more than we can actually use */
+            psum += IMIN(bitsj, cap[j]);
+         } else {
+            if (bitsj >= C<<BITRES)
+               psum += C<<BITRES;
+         }
+      }
+      if (psum > total)
+         hi = mid - 1;
+      else
+         lo = mid + 1;
+      /*printf ("lo = %d, hi = %d\n", lo, hi);*/
+   }
+   while (lo <= hi);
+   hi = lo--;
+   /*printf ("interp between %d and %d\n", lo, hi);*/
+   for (j=start;j<end;j++)
+   {
+      int bits1j, bits2j;
+      int N = m->eBands[j+1]-m->eBands[j];
+      bits1j = C*N*m->allocVectors[lo*len+j]<<LM>>2;
+      bits2j = hi>=m->nbAllocVectors ?
+            cap[j] : C*N*m->allocVectors[hi*len+j]<<LM>>2;
+      if (bits1j > 0)
+         bits1j = IMAX(0, bits1j + trim_offset[j]);
+      if (bits2j > 0)
+         bits2j = IMAX(0, bits2j + trim_offset[j]);
+      if (lo > 0)
+         bits1j += offsets[j];
+      bits2j += offsets[j];
+      if (offsets[j]>0)
+         skip_start = j;
+      bits2j = IMAX(0,bits2j-bits1j);
+      bits1[j] = bits1j;
+      bits2[j] = bits2j;
+   }
+   codedBands = interp_bits2pulses(m, start, end, skip_start, bits1, bits2, thresh, cap,
+         total, balance, skip_rsv, intensity, intensity_rsv, dual_stereo, dual_stereo_rsv,
+         pulses, ebits, fine_priority, C, LM, ec, encode, prev, signalBandwidth);
+   RESTORE_STACK;
+   return codedBands;
+}
+
diff --git a/drivers/opus/celt/rate.h b/drivers/opus/celt/rate.h
new file mode 100644
index 0000000000..7ced23ea09
--- /dev/null
+++ b/drivers/opus/celt/rate.h
@@ -0,0 +1,101 @@
+/* Copyright (c) 2007-2008 CSIRO
+   Copyright (c) 2007-2009 Xiph.Org Foundation
+   Written by Jean-Marc Valin */
+/*
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef RATE_H
+#define RATE_H
+
+#define MAX_PSEUDO 40
+#define LOG_MAX_PSEUDO 6
+
+#define MAX_PULSES 128
+
+#define MAX_FINE_BITS 8
+
+#define FINE_OFFSET 21
+#define QTHETA_OFFSET 4
+#define QTHETA_OFFSET_TWOPHASE 16
+
+#include "cwrs.h"
+#include "opus_modes.h"
+
+void compute_pulse_cache(CELTMode *m, int LM);
+
+static OPUS_INLINE int get_pulses(int i)
+{
+   return i<8 ? i : (8 + (i&7)) << ((i>>3)-1);
+}
+
+static OPUS_INLINE int bits2pulses(const CELTMode *m, int band, int LM, int bits)
+{
+   int i;
+   int lo, hi;
+   const unsigned char *cache;
+
+   LM++;
+   cache = m->cache.bits + m->cache.index[LM*m->nbEBands+band];
+
+   lo = 0;
+   hi = cache[0];
+   bits--;
+   for (i=0;i<LOG_MAX_PSEUDO;i++)
+   {
+      int mid = (lo+hi+1)>>1;
+      /* OPT: Make sure this is implemented with a conditional move */
+      if ((int)cache[mid] >= bits)
+         hi = mid;
+      else
+         lo = mid;
+   }
+   if (bits- (lo == 0 ? -1 : (int)cache[lo]) <= (int)cache[hi]-bits)
+      return lo;
+   else
+      return hi;
+}
+
+static OPUS_INLINE int pulses2bits(const CELTMode *m, int band, int LM, int pulses)
+{
+   const unsigned char *cache;
+
+   LM++;
+   cache = m->cache.bits + m->cache.index[LM*m->nbEBands+band];
+   return pulses == 0 ? 0 : cache[pulses]+1;
+}
+
+/** Compute the pulse allocation, i.e. how many pulses will go in each
+  * band.
+ @param m mode
+ @param offsets Requested increase or decrease in the number of bits for
+                each band
+ @param total Number of bands
+ @param pulses Number of pulses per band (returned)
+ @return Total number of bits allocated
+*/
+int compute_allocation(const CELTMode *m, int start, int end, const int *offsets, const int *cap, int alloc_trim, int *intensity, int *dual_stero,
+      opus_int32 total, opus_int32 *balance, int *pulses, int *ebits, int *fine_priority, int C, int LM, ec_ctx *ec, int encode, int prev, int signalBandwidth);
+
+#endif
diff --git a/drivers/opus/celt/stack_alloc.h b/drivers/opus/celt/stack_alloc.h
new file mode 100644
index 0000000000..d500c4dab9
--- /dev/null
+++ b/drivers/opus/celt/stack_alloc.h
@@ -0,0 +1,182 @@
+/* Copyright (C) 2002-2003 Jean-Marc Valin
+   Copyright (C) 2007-2009 Xiph.Org Foundation */
+/**
+   @file stack_alloc.h
+   @brief Temporary memory allocation on stack
+*/
+/*
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef STACK_ALLOC_H
+#define STACK_ALLOC_H
+
+#include "opus_types.h"
+#include "opus_defines.h"
+
+#if (!defined (VAR_ARRAYS) && !defined (USE_ALLOCA) && !defined (NONTHREADSAFE_PSEUDOSTACK))
+#define VAR_ARRAYS
+#endif
+
+#ifdef USE_ALLOCA
+# ifdef WIN32
+#  include <malloc.h>
+# else
+#  ifdef OPUS_HAVE_ALLOCA_H
+#   include <alloca.h>
+#  else
+#   ifdef __linux__
+#    include <alloca.h>
+#   else
+#    include <stdlib.h>
+#   endif
+#  endif
+# endif
+#endif
+
+/**
+ * @def ALIGN(stack, size)
+ *
+ * Aligns the stack to a 'size' boundary
+ *
+ * @param stack Stack
+ * @param size  New size boundary
+ */
+
+/**
+ * @def PUSH(stack, size, type)
+ *
+ * Allocates 'size' elements of type 'type' on the stack
+ *
+ * @param stack Stack
+ * @param size  Number of elements
+ * @param type  Type of element
+ */
+
+/**
+ * @def VARDECL(var)
+ *
+ * Declare variable on stack
+ *
+ * @param var Variable to declare
+ */
+
+/**
+ * @def ALLOC(var, size, type)
+ *
+ * Allocate 'size' elements of 'type' on stack
+ *
+ * @param var  Name of variable to allocate
+ * @param size Number of elements
+ * @param type Type of element
+ */
+
+#if defined(VAR_ARRAYS)
+
+#define VARDECL(type, var)
+#define ALLOC(var, size, type) type var[size]
+#define SAVE_STACK
+#define RESTORE_STACK
+#define ALLOC_STACK
+/* C99 does not allow VLAs of size zero */
+#define ALLOC_NONE 1
+
+#elif defined(USE_ALLOCA)
+
+#define VARDECL(type, var) type *var
+
+# ifdef WIN32
+#  define ALLOC(var, size, type) var = ((type*)_alloca(sizeof(type)*(size)))
+# else
+#  define ALLOC(var, size, type) var = ((type*)alloca(sizeof(type)*(size)))
+# endif
+
+#define SAVE_STACK
+#define RESTORE_STACK
+#define ALLOC_STACK
+#define ALLOC_NONE 0
+
+#else
+
+#ifdef CELT_C
+char *global_stack=0;
+#else
+extern char *global_stack;
+#endif /* CELT_C */
+
+#ifdef ENABLE_VALGRIND
+
+#include <valgrind/memcheck.h>
+
+#ifdef CELT_C
+char *global_stack_top=0;
+#else
+extern char *global_stack_top;
+#endif /* CELT_C */
+
+#define ALIGN(stack, size) ((stack) += ((size) - (long)(stack)) & ((size) - 1))
+#define PUSH(stack, size, type) (VALGRIND_MAKE_MEM_NOACCESS(stack, global_stack_top-stack),ALIGN((stack),sizeof(type)/sizeof(char)),VALGRIND_MAKE_MEM_UNDEFINED(stack, ((size)*sizeof(type)/sizeof(char))),(stack)+=(2*(size)*sizeof(type)/sizeof(char)),(type*)((stack)-(2*(size)*sizeof(type)/sizeof(char))))
+#define RESTORE_STACK ((global_stack = _saved_stack),VALGRIND_MAKE_MEM_NOACCESS(global_stack, global_stack_top-global_stack))
+#define ALLOC_STACK char *_saved_stack; ((global_stack = (global_stack==0) ? ((global_stack_top=opus_alloc_scratch(GLOBAL_STACK_SIZE*2)+(GLOBAL_STACK_SIZE*2))-(GLOBAL_STACK_SIZE*2)) : global_stack),VALGRIND_MAKE_MEM_NOACCESS(global_stack, global_stack_top-global_stack)); _saved_stack = global_stack;
+
+#else
+
+#define ALIGN(stack, size) ((stack) += ((size) - (long)(stack)) & ((size) - 1))
+#define PUSH(stack, size, type) (ALIGN((stack),sizeof(type)/sizeof(char)),(stack)+=(size)*(sizeof(type)/sizeof(char)),(type*)((stack)-(size)*(sizeof(type)/sizeof(char))))
+#define RESTORE_STACK (global_stack = _saved_stack)
+#define ALLOC_STACK char *_saved_stack; (global_stack = (global_stack==0) ? opus_alloc_scratch(GLOBAL_STACK_SIZE) : global_stack); _saved_stack = global_stack;
+
+#endif /* ENABLE_VALGRIND */
+
+#include "os_support.h"
+#define VARDECL(type, var) type *var
+#define ALLOC(var, size, type) var = PUSH(global_stack, size, type)
+#define SAVE_STACK char *_saved_stack = global_stack;
+#define ALLOC_NONE 0
+
+#endif /* VAR_ARRAYS */
+
+
+#ifdef ENABLE_VALGRIND
+
+#include <valgrind/memcheck.h>
+#define OPUS_CHECK_ARRAY(ptr, len) VALGRIND_CHECK_MEM_IS_DEFINED(ptr, len*sizeof(*ptr))
+#define OPUS_CHECK_VALUE(value) VALGRIND_CHECK_VALUE_IS_DEFINED(value)
+#define OPUS_CHECK_ARRAY_COND(ptr, len) VALGRIND_CHECK_MEM_IS_DEFINED(ptr, len*sizeof(*ptr))
+#define OPUS_CHECK_VALUE_COND(value) VALGRIND_CHECK_VALUE_IS_DEFINED(value)
+#define OPUS_PRINT_INT(value) do {fprintf(stderr, #value " = %d at %s:%d\n", value, __FILE__, __LINE__);}while(0)
+#define OPUS_FPRINTF fprintf
+
+#else
+
+static OPUS_INLINE int _opus_false(void) {return 0;}
+#define OPUS_CHECK_ARRAY(ptr, len) _opus_false()
+#define OPUS_CHECK_VALUE(value) _opus_false()
+#define OPUS_PRINT_INT(value) do{}while(0)
+#define OPUS_FPRINTF (void)
+
+#endif
+
+
+#endif /* STACK_ALLOC_H */
diff --git a/drivers/opus/celt/static_modes_fixed.h b/drivers/opus/celt/static_modes_fixed.h
new file mode 100644
index 0000000000..d23e2a66f5
--- /dev/null
+++ b/drivers/opus/celt/static_modes_fixed.h
@@ -0,0 +1,595 @@
+/* The contents of this file was automatically generated by dump_modes.c
+   with arguments: 48000 960
+   It contains static definitions for some pre-defined modes. */
+#include "opus_modes.h"
+#include "rate.h"
+
+#ifndef DEF_WINDOW120
+#define DEF_WINDOW120
+static const opus_val16 window120[120] = {
+2, 20, 55, 108, 178,
+266, 372, 494, 635, 792,
+966, 1157, 1365, 1590, 1831,
+2089, 2362, 2651, 2956, 3276,
+3611, 3961, 4325, 4703, 5094,
+5499, 5916, 6346, 6788, 7241,
+7705, 8179, 8663, 9156, 9657,
+10167, 10684, 11207, 11736, 12271,
+12810, 13353, 13899, 14447, 14997,
+15547, 16098, 16648, 17197, 17744,
+18287, 18827, 19363, 19893, 20418,
+20936, 21447, 21950, 22445, 22931,
+23407, 23874, 24330, 24774, 25208,
+25629, 26039, 26435, 26819, 27190,
+27548, 27893, 28224, 28541, 28845,
+29135, 29411, 29674, 29924, 30160,
+30384, 30594, 30792, 30977, 31151,
+31313, 31463, 31602, 31731, 31849,
+31958, 32057, 32148, 32229, 32303,
+32370, 32429, 32481, 32528, 32568,
+32604, 32634, 32661, 32683, 32701,
+32717, 32729, 32740, 32748, 32754,
+32758, 32762, 32764, 32766, 32767,
+32767, 32767, 32767, 32767, 32767,
+};
+#endif
+
+#ifndef DEF_LOGN400
+#define DEF_LOGN400
+static const opus_int16 logN400[21] = {
+0, 0, 0, 0, 0, 0, 0, 0, 8, 8, 8, 8, 16, 16, 16, 21, 21, 24, 29, 34, 36, };
+#endif
+
+#ifndef DEF_PULSE_CACHE50
+#define DEF_PULSE_CACHE50
+static const opus_int16 cache_index50[105] = {
+-1, -1, -1, -1, -1, -1, -1, -1, 0, 0, 0, 0, 41, 41, 41,
+82, 82, 123, 164, 200, 222, 0, 0, 0, 0, 0, 0, 0, 0, 41,
+41, 41, 41, 123, 123, 123, 164, 164, 240, 266, 283, 295, 41, 41, 41,
+41, 41, 41, 41, 41, 123, 123, 123, 123, 240, 240, 240, 266, 266, 305,
+318, 328, 336, 123, 123, 123, 123, 123, 123, 123, 123, 240, 240, 240, 240,
+305, 305, 305, 318, 318, 343, 351, 358, 364, 240, 240, 240, 240, 240, 240,
+240, 240, 305, 305, 305, 305, 343, 343, 343, 351, 351, 370, 376, 382, 387,
+};
+static const unsigned char cache_bits50[392] = {
+40, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
+7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
+7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 40, 15, 23, 28,
+31, 34, 36, 38, 39, 41, 42, 43, 44, 45, 46, 47, 47, 49, 50,
+51, 52, 53, 54, 55, 55, 57, 58, 59, 60, 61, 62, 63, 63, 65,
+66, 67, 68, 69, 70, 71, 71, 40, 20, 33, 41, 48, 53, 57, 61,
+64, 66, 69, 71, 73, 75, 76, 78, 80, 82, 85, 87, 89, 91, 92,
+94, 96, 98, 101, 103, 105, 107, 108, 110, 112, 114, 117, 119, 121, 123,
+124, 126, 128, 40, 23, 39, 51, 60, 67, 73, 79, 83, 87, 91, 94,
+97, 100, 102, 105, 107, 111, 115, 118, 121, 124, 126, 129, 131, 135, 139,
+142, 145, 148, 150, 153, 155, 159, 163, 166, 169, 172, 174, 177, 179, 35,
+28, 49, 65, 78, 89, 99, 107, 114, 120, 126, 132, 136, 141, 145, 149,
+153, 159, 165, 171, 176, 180, 185, 189, 192, 199, 205, 211, 216, 220, 225,
+229, 232, 239, 245, 251, 21, 33, 58, 79, 97, 112, 125, 137, 148, 157,
+166, 174, 182, 189, 195, 201, 207, 217, 227, 235, 243, 251, 17, 35, 63,
+86, 106, 123, 139, 152, 165, 177, 187, 197, 206, 214, 222, 230, 237, 250,
+25, 31, 55, 75, 91, 105, 117, 128, 138, 146, 154, 161, 168, 174, 180,
+185, 190, 200, 208, 215, 222, 229, 235, 240, 245, 255, 16, 36, 65, 89,
+110, 128, 144, 159, 173, 185, 196, 207, 217, 226, 234, 242, 250, 11, 41,
+74, 103, 128, 151, 172, 191, 209, 225, 241, 255, 9, 43, 79, 110, 138,
+163, 186, 207, 227, 246, 12, 39, 71, 99, 123, 144, 164, 182, 198, 214,
+228, 241, 253, 9, 44, 81, 113, 142, 168, 192, 214, 235, 255, 7, 49,
+90, 127, 160, 191, 220, 247, 6, 51, 95, 134, 170, 203, 234, 7, 47,
+87, 123, 155, 184, 212, 237, 6, 52, 97, 137, 174, 208, 240, 5, 57,
+106, 151, 192, 231, 5, 59, 111, 158, 202, 243, 5, 55, 103, 147, 187,
+224, 5, 60, 113, 161, 206, 248, 4, 65, 122, 175, 224, 4, 67, 127,
+182, 234, };
+static const unsigned char cache_caps50[168] = {
+224, 224, 224, 224, 224, 224, 224, 224, 160, 160, 160, 160, 185, 185, 185,
+178, 178, 168, 134, 61, 37, 224, 224, 224, 224, 224, 224, 224, 224, 240,
+240, 240, 240, 207, 207, 207, 198, 198, 183, 144, 66, 40, 160, 160, 160,
+160, 160, 160, 160, 160, 185, 185, 185, 185, 193, 193, 193, 183, 183, 172,
+138, 64, 38, 240, 240, 240, 240, 240, 240, 240, 240, 207, 207, 207, 207,
+204, 204, 204, 193, 193, 180, 143, 66, 40, 185, 185, 185, 185, 185, 185,
+185, 185, 193, 193, 193, 193, 193, 193, 193, 183, 183, 172, 138, 65, 39,
+207, 207, 207, 207, 207, 207, 207, 207, 204, 204, 204, 204, 201, 201, 201,
+188, 188, 176, 141, 66, 40, 193, 193, 193, 193, 193, 193, 193, 193, 193,
+193, 193, 193, 194, 194, 194, 184, 184, 173, 139, 65, 39, 204, 204, 204,
+204, 204, 204, 204, 204, 201, 201, 201, 201, 198, 198, 198, 187, 187, 175,
+140, 66, 40, };
+#endif
+
+#ifndef FFT_TWIDDLES48000_960
+#define FFT_TWIDDLES48000_960
+static const kiss_twiddle_cpx fft_twiddles48000_960[480] = {
+{32767, 0}, {32766, -429},
+{32757, -858}, {32743, -1287},
+{32724, -1715}, {32698, -2143},
+{32667, -2570}, {32631, -2998},
+{32588, -3425}, {32541, -3851},
+{32488, -4277}, {32429, -4701},
+{32364, -5125}, {32295, -5548},
+{32219, -5971}, {32138, -6393},
+{32051, -6813}, {31960, -7231},
+{31863, -7650}, {31760, -8067},
+{31652, -8481}, {31539, -8895},
+{31419, -9306}, {31294, -9716},
+{31165, -10126}, {31030, -10532},
+{30889, -10937}, {30743, -11340},
+{30592, -11741}, {30436, -12141},
+{30274, -12540}, {30107, -12935},
+{29936, -13328}, {29758, -13718},
+{29577, -14107}, {29390, -14493},
+{29197, -14875}, {29000, -15257},
+{28797, -15635}, {28590, -16010},
+{28379, -16384}, {28162, -16753},
+{27940, -17119}, {27714, -17484},
+{27482, -17845}, {27246, -18205},
+{27006, -18560}, {26760, -18911},
+{26510, -19260}, {26257, -19606},
+{25997, -19947}, {25734, -20286},
+{25466, -20621}, {25194, -20952},
+{24918, -21281}, {24637, -21605},
+{24353, -21926}, {24063, -22242},
+{23770, -22555}, {23473, -22865},
+{23171, -23171}, {22866, -23472},
+{22557, -23769}, {22244, -24063},
+{21927, -24352}, {21606, -24636},
+{21282, -24917}, {20954, -25194},
+{20622, -25465}, {20288, -25733},
+{19949, -25997}, {19607, -26255},
+{19261, -26509}, {18914, -26760},
+{18561, -27004}, {18205, -27246},
+{17846, -27481}, {17485, -27713},
+{17122, -27940}, {16755, -28162},
+{16385, -28378}, {16012, -28590},
+{15636, -28797}, {15258, -28999},
+{14878, -29197}, {14494, -29389},
+{14108, -29576}, {13720, -29757},
+{13329, -29934}, {12937, -30107},
+{12540, -30274}, {12142, -30435},
+{11744, -30592}, {11342, -30743},
+{10939, -30889}, {10534, -31030},
+{10127, -31164}, {9718, -31294},
+{9307, -31418}, {8895, -31537},
+{8482, -31652}, {8067, -31759},
+{7650, -31862}, {7233, -31960},
+{6815, -32051}, {6393, -32138},
+{5973, -32219}, {5549, -32294},
+{5127, -32364}, {4703, -32429},
+{4278, -32487}, {3852, -32541},
+{3426, -32588}, {2999, -32630},
+{2572, -32667}, {2144, -32698},
+{1716, -32724}, {1287, -32742},
+{860, -32757}, {430, -32766},
+{0, -32767}, {-429, -32766},
+{-858, -32757}, {-1287, -32743},
+{-1715, -32724}, {-2143, -32698},
+{-2570, -32667}, {-2998, -32631},
+{-3425, -32588}, {-3851, -32541},
+{-4277, -32488}, {-4701, -32429},
+{-5125, -32364}, {-5548, -32295},
+{-5971, -32219}, {-6393, -32138},
+{-6813, -32051}, {-7231, -31960},
+{-7650, -31863}, {-8067, -31760},
+{-8481, -31652}, {-8895, -31539},
+{-9306, -31419}, {-9716, -31294},
+{-10126, -31165}, {-10532, -31030},
+{-10937, -30889}, {-11340, -30743},
+{-11741, -30592}, {-12141, -30436},
+{-12540, -30274}, {-12935, -30107},
+{-13328, -29936}, {-13718, -29758},
+{-14107, -29577}, {-14493, -29390},
+{-14875, -29197}, {-15257, -29000},
+{-15635, -28797}, {-16010, -28590},
+{-16384, -28379}, {-16753, -28162},
+{-17119, -27940}, {-17484, -27714},
+{-17845, -27482}, {-18205, -27246},
+{-18560, -27006}, {-18911, -26760},
+{-19260, -26510}, {-19606, -26257},
+{-19947, -25997}, {-20286, -25734},
+{-20621, -25466}, {-20952, -25194},
+{-21281, -24918}, {-21605, -24637},
+{-21926, -24353}, {-22242, -24063},
+{-22555, -23770}, {-22865, -23473},
+{-23171, -23171}, {-23472, -22866},
+{-23769, -22557}, {-24063, -22244},
+{-24352, -21927}, {-24636, -21606},
+{-24917, -21282}, {-25194, -20954},
+{-25465, -20622}, {-25733, -20288},
+{-25997, -19949}, {-26255, -19607},
+{-26509, -19261}, {-26760, -18914},
+{-27004, -18561}, {-27246, -18205},
+{-27481, -17846}, {-27713, -17485},
+{-27940, -17122}, {-28162, -16755},
+{-28378, -16385}, {-28590, -16012},
+{-28797, -15636}, {-28999, -15258},
+{-29197, -14878}, {-29389, -14494},
+{-29576, -14108}, {-29757, -13720},
+{-29934, -13329}, {-30107, -12937},
+{-30274, -12540}, {-30435, -12142},
+{-30592, -11744}, {-30743, -11342},
+{-30889, -10939}, {-31030, -10534},
+{-31164, -10127}, {-31294, -9718},
+{-31418, -9307}, {-31537, -8895},
+{-31652, -8482}, {-31759, -8067},
+{-31862, -7650}, {-31960, -7233},
+{-32051, -6815}, {-32138, -6393},
+{-32219, -5973}, {-32294, -5549},
+{-32364, -5127}, {-32429, -4703},
+{-32487, -4278}, {-32541, -3852},
+{-32588, -3426}, {-32630, -2999},
+{-32667, -2572}, {-32698, -2144},
+{-32724, -1716}, {-32742, -1287},
+{-32757, -860}, {-32766, -430},
+{-32767, 0}, {-32766, 429},
+{-32757, 858}, {-32743, 1287},
+{-32724, 1715}, {-32698, 2143},
+{-32667, 2570}, {-32631, 2998},
+{-32588, 3425}, {-32541, 3851},
+{-32488, 4277}, {-32429, 4701},
+{-32364, 5125}, {-32295, 5548},
+{-32219, 5971}, {-32138, 6393},
+{-32051, 6813}, {-31960, 7231},
+{-31863, 7650}, {-31760, 8067},
+{-31652, 8481}, {-31539, 8895},
+{-31419, 9306}, {-31294, 9716},
+{-31165, 10126}, {-31030, 10532},
+{-30889, 10937}, {-30743, 11340},
+{-30592, 11741}, {-30436, 12141},
+{-30274, 12540}, {-30107, 12935},
+{-29936, 13328}, {-29758, 13718},
+{-29577, 14107}, {-29390, 14493},
+{-29197, 14875}, {-29000, 15257},
+{-28797, 15635}, {-28590, 16010},
+{-28379, 16384}, {-28162, 16753},
+{-27940, 17119}, {-27714, 17484},
+{-27482, 17845}, {-27246, 18205},
+{-27006, 18560}, {-26760, 18911},
+{-26510, 19260}, {-26257, 19606},
+{-25997, 19947}, {-25734, 20286},
+{-25466, 20621}, {-25194, 20952},
+{-24918, 21281}, {-24637, 21605},
+{-24353, 21926}, {-24063, 22242},
+{-23770, 22555}, {-23473, 22865},
+{-23171, 23171}, {-22866, 23472},
+{-22557, 23769}, {-22244, 24063},
+{-21927, 24352}, {-21606, 24636},
+{-21282, 24917}, {-20954, 25194},
+{-20622, 25465}, {-20288, 25733},
+{-19949, 25997}, {-19607, 26255},
+{-19261, 26509}, {-18914, 26760},
+{-18561, 27004}, {-18205, 27246},
+{-17846, 27481}, {-17485, 27713},
+{-17122, 27940}, {-16755, 28162},
+{-16385, 28378}, {-16012, 28590},
+{-15636, 28797}, {-15258, 28999},
+{-14878, 29197}, {-14494, 29389},
+{-14108, 29576}, {-13720, 29757},
+{-13329, 29934}, {-12937, 30107},
+{-12540, 30274}, {-12142, 30435},
+{-11744, 30592}, {-11342, 30743},
+{-10939, 30889}, {-10534, 31030},
+{-10127, 31164}, {-9718, 31294},
+{-9307, 31418}, {-8895, 31537},
+{-8482, 31652}, {-8067, 31759},
+{-7650, 31862}, {-7233, 31960},
+{-6815, 32051}, {-6393, 32138},
+{-5973, 32219}, {-5549, 32294},
+{-5127, 32364}, {-4703, 32429},
+{-4278, 32487}, {-3852, 32541},
+{-3426, 32588}, {-2999, 32630},
+{-2572, 32667}, {-2144, 32698},
+{-1716, 32724}, {-1287, 32742},
+{-860, 32757}, {-430, 32766},
+{0, 32767}, {429, 32766},
+{858, 32757}, {1287, 32743},
+{1715, 32724}, {2143, 32698},
+{2570, 32667}, {2998, 32631},
+{3425, 32588}, {3851, 32541},
+{4277, 32488}, {4701, 32429},
+{5125, 32364}, {5548, 32295},
+{5971, 32219}, {6393, 32138},
+{6813, 32051}, {7231, 31960},
+{7650, 31863}, {8067, 31760},
+{8481, 31652}, {8895, 31539},
+{9306, 31419}, {9716, 31294},
+{10126, 31165}, {10532, 31030},
+{10937, 30889}, {11340, 30743},
+{11741, 30592}, {12141, 30436},
+{12540, 30274}, {12935, 30107},
+{13328, 29936}, {13718, 29758},
+{14107, 29577}, {14493, 29390},
+{14875, 29197}, {15257, 29000},
+{15635, 28797}, {16010, 28590},
+{16384, 28379}, {16753, 28162},
+{17119, 27940}, {17484, 27714},
+{17845, 27482}, {18205, 27246},
+{18560, 27006}, {18911, 26760},
+{19260, 26510}, {19606, 26257},
+{19947, 25997}, {20286, 25734},
+{20621, 25466}, {20952, 25194},
+{21281, 24918}, {21605, 24637},
+{21926, 24353}, {22242, 24063},
+{22555, 23770}, {22865, 23473},
+{23171, 23171}, {23472, 22866},
+{23769, 22557}, {24063, 22244},
+{24352, 21927}, {24636, 21606},
+{24917, 21282}, {25194, 20954},
+{25465, 20622}, {25733, 20288},
+{25997, 19949}, {26255, 19607},
+{26509, 19261}, {26760, 18914},
+{27004, 18561}, {27246, 18205},
+{27481, 17846}, {27713, 17485},
+{27940, 17122}, {28162, 16755},
+{28378, 16385}, {28590, 16012},
+{28797, 15636}, {28999, 15258},
+{29197, 14878}, {29389, 14494},
+{29576, 14108}, {29757, 13720},
+{29934, 13329}, {30107, 12937},
+{30274, 12540}, {30435, 12142},
+{30592, 11744}, {30743, 11342},
+{30889, 10939}, {31030, 10534},
+{31164, 10127}, {31294, 9718},
+{31418, 9307}, {31537, 8895},
+{31652, 8482}, {31759, 8067},
+{31862, 7650}, {31960, 7233},
+{32051, 6815}, {32138, 6393},
+{32219, 5973}, {32294, 5549},
+{32364, 5127}, {32429, 4703},
+{32487, 4278}, {32541, 3852},
+{32588, 3426}, {32630, 2999},
+{32667, 2572}, {32698, 2144},
+{32724, 1716}, {32742, 1287},
+{32757, 860}, {32766, 430},
+};
+#ifndef FFT_BITREV480
+#define FFT_BITREV480
+static const opus_int16 fft_bitrev480[480] = {
+0, 120, 240, 360, 30, 150, 270, 390, 60, 180, 300, 420, 90, 210, 330,
+450, 15, 135, 255, 375, 45, 165, 285, 405, 75, 195, 315, 435, 105, 225,
+345, 465, 5, 125, 245, 365, 35, 155, 275, 395, 65, 185, 305, 425, 95,
+215, 335, 455, 20, 140, 260, 380, 50, 170, 290, 410, 80, 200, 320, 440,
+110, 230, 350, 470, 10, 130, 250, 370, 40, 160, 280, 400, 70, 190, 310,
+430, 100, 220, 340, 460, 25, 145, 265, 385, 55, 175, 295, 415, 85, 205,
+325, 445, 115, 235, 355, 475, 1, 121, 241, 361, 31, 151, 271, 391, 61,
+181, 301, 421, 91, 211, 331, 451, 16, 136, 256, 376, 46, 166, 286, 406,
+76, 196, 316, 436, 106, 226, 346, 466, 6, 126, 246, 366, 36, 156, 276,
+396, 66, 186, 306, 426, 96, 216, 336, 456, 21, 141, 261, 381, 51, 171,
+291, 411, 81, 201, 321, 441, 111, 231, 351, 471, 11, 131, 251, 371, 41,
+161, 281, 401, 71, 191, 311, 431, 101, 221, 341, 461, 26, 146, 266, 386,
+56, 176, 296, 416, 86, 206, 326, 446, 116, 236, 356, 476, 2, 122, 242,
+362, 32, 152, 272, 392, 62, 182, 302, 422, 92, 212, 332, 452, 17, 137,
+257, 377, 47, 167, 287, 407, 77, 197, 317, 437, 107, 227, 347, 467, 7,
+127, 247, 367, 37, 157, 277, 397, 67, 187, 307, 427, 97, 217, 337, 457,
+22, 142, 262, 382, 52, 172, 292, 412, 82, 202, 322, 442, 112, 232, 352,
+472, 12, 132, 252, 372, 42, 162, 282, 402, 72, 192, 312, 432, 102, 222,
+342, 462, 27, 147, 267, 387, 57, 177, 297, 417, 87, 207, 327, 447, 117,
+237, 357, 477, 3, 123, 243, 363, 33, 153, 273, 393, 63, 183, 303, 423,
+93, 213, 333, 453, 18, 138, 258, 378, 48, 168, 288, 408, 78, 198, 318,
+438, 108, 228, 348, 468, 8, 128, 248, 368, 38, 158, 278, 398, 68, 188,
+308, 428, 98, 218, 338, 458, 23, 143, 263, 383, 53, 173, 293, 413, 83,
+203, 323, 443, 113, 233, 353, 473, 13, 133, 253, 373, 43, 163, 283, 403,
+73, 193, 313, 433, 103, 223, 343, 463, 28, 148, 268, 388, 58, 178, 298,
+418, 88, 208, 328, 448, 118, 238, 358, 478, 4, 124, 244, 364, 34, 154,
+274, 394, 64, 184, 304, 424, 94, 214, 334, 454, 19, 139, 259, 379, 49,
+169, 289, 409, 79, 199, 319, 439, 109, 229, 349, 469, 9, 129, 249, 369,
+39, 159, 279, 399, 69, 189, 309, 429, 99, 219, 339, 459, 24, 144, 264,
+384, 54, 174, 294, 414, 84, 204, 324, 444, 114, 234, 354, 474, 14, 134,
+254, 374, 44, 164, 284, 404, 74, 194, 314, 434, 104, 224, 344, 464, 29,
+149, 269, 389, 59, 179, 299, 419, 89, 209, 329, 449, 119, 239, 359, 479,
+};
+#endif
+
+#ifndef FFT_BITREV240
+#define FFT_BITREV240
+static const opus_int16 fft_bitrev240[240] = {
+0, 60, 120, 180, 15, 75, 135, 195, 30, 90, 150, 210, 45, 105, 165,
+225, 5, 65, 125, 185, 20, 80, 140, 200, 35, 95, 155, 215, 50, 110,
+170, 230, 10, 70, 130, 190, 25, 85, 145, 205, 40, 100, 160, 220, 55,
+115, 175, 235, 1, 61, 121, 181, 16, 76, 136, 196, 31, 91, 151, 211,
+46, 106, 166, 226, 6, 66, 126, 186, 21, 81, 141, 201, 36, 96, 156,
+216, 51, 111, 171, 231, 11, 71, 131, 191, 26, 86, 146, 206, 41, 101,
+161, 221, 56, 116, 176, 236, 2, 62, 122, 182, 17, 77, 137, 197, 32,
+92, 152, 212, 47, 107, 167, 227, 7, 67, 127, 187, 22, 82, 142, 202,
+37, 97, 157, 217, 52, 112, 172, 232, 12, 72, 132, 192, 27, 87, 147,
+207, 42, 102, 162, 222, 57, 117, 177, 237, 3, 63, 123, 183, 18, 78,
+138, 198, 33, 93, 153, 213, 48, 108, 168, 228, 8, 68, 128, 188, 23,
+83, 143, 203, 38, 98, 158, 218, 53, 113, 173, 233, 13, 73, 133, 193,
+28, 88, 148, 208, 43, 103, 163, 223, 58, 118, 178, 238, 4, 64, 124,
+184, 19, 79, 139, 199, 34, 94, 154, 214, 49, 109, 169, 229, 9, 69,
+129, 189, 24, 84, 144, 204, 39, 99, 159, 219, 54, 114, 174, 234, 14,
+74, 134, 194, 29, 89, 149, 209, 44, 104, 164, 224, 59, 119, 179, 239,
+};
+#endif
+
+#ifndef FFT_BITREV120
+#define FFT_BITREV120
+static const opus_int16 fft_bitrev120[120] = {
+0, 30, 60, 90, 15, 45, 75, 105, 5, 35, 65, 95, 20, 50, 80,
+110, 10, 40, 70, 100, 25, 55, 85, 115, 1, 31, 61, 91, 16, 46,
+76, 106, 6, 36, 66, 96, 21, 51, 81, 111, 11, 41, 71, 101, 26,
+56, 86, 116, 2, 32, 62, 92, 17, 47, 77, 107, 7, 37, 67, 97,
+22, 52, 82, 112, 12, 42, 72, 102, 27, 57, 87, 117, 3, 33, 63,
+93, 18, 48, 78, 108, 8, 38, 68, 98, 23, 53, 83, 113, 13, 43,
+73, 103, 28, 58, 88, 118, 4, 34, 64, 94, 19, 49, 79, 109, 9,
+39, 69, 99, 24, 54, 84, 114, 14, 44, 74, 104, 29, 59, 89, 119,
+};
+#endif
+
+#ifndef FFT_BITREV60
+#define FFT_BITREV60
+static const opus_int16 fft_bitrev60[60] = {
+0, 15, 30, 45, 5, 20, 35, 50, 10, 25, 40, 55, 1, 16, 31,
+46, 6, 21, 36, 51, 11, 26, 41, 56, 2, 17, 32, 47, 7, 22,
+37, 52, 12, 27, 42, 57, 3, 18, 33, 48, 8, 23, 38, 53, 13,
+28, 43, 58, 4, 19, 34, 49, 9, 24, 39, 54, 14, 29, 44, 59,
+};
+#endif
+
+#ifndef FFT_STATE48000_960_0
+#define FFT_STATE48000_960_0
+static const kiss_fft_state fft_state48000_960_0 = {
+480,    /* nfft */
+-1,     /* shift */
+{4, 120, 4, 30, 2, 15, 3, 5, 5, 1, 0, 0, 0, 0, 0, 0, }, /* factors */
+fft_bitrev480,  /* bitrev */
+fft_twiddles48000_960,  /* bitrev */
+};
+#endif
+
+#ifndef FFT_STATE48000_960_1
+#define FFT_STATE48000_960_1
+static const kiss_fft_state fft_state48000_960_1 = {
+240,    /* nfft */
+1,      /* shift */
+{4, 60, 4, 15, 3, 5, 5, 1, 0, 0, 0, 0, 0, 0, 0, 0, },   /* factors */
+fft_bitrev240,  /* bitrev */
+fft_twiddles48000_960,  /* bitrev */
+};
+#endif
+
+#ifndef FFT_STATE48000_960_2
+#define FFT_STATE48000_960_2
+static const kiss_fft_state fft_state48000_960_2 = {
+120,    /* nfft */
+2,      /* shift */
+{4, 30, 2, 15, 3, 5, 5, 1, 0, 0, 0, 0, 0, 0, 0, 0, },   /* factors */
+fft_bitrev120,  /* bitrev */
+fft_twiddles48000_960,  /* bitrev */
+};
+#endif
+
+#ifndef FFT_STATE48000_960_3
+#define FFT_STATE48000_960_3
+static const kiss_fft_state fft_state48000_960_3 = {
+60,     /* nfft */
+3,      /* shift */
+{4, 15, 3, 5, 5, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, },    /* factors */
+fft_bitrev60,   /* bitrev */
+fft_twiddles48000_960,  /* bitrev */
+};
+#endif
+
+#endif
+
+#ifndef MDCT_TWIDDLES960
+#define MDCT_TWIDDLES960
+static const opus_val16 mdct_twiddles960[481] = {
+32767, 32767, 32767, 32767, 32766,
+32763, 32762, 32759, 32757, 32753,
+32751, 32747, 32743, 32738, 32733,
+32729, 32724, 32717, 32711, 32705,
+32698, 32690, 32683, 32676, 32667,
+32658, 32650, 32640, 32631, 32620,
+32610, 32599, 32588, 32577, 32566,
+32554, 32541, 32528, 32515, 32502,
+32487, 32474, 32459, 32444, 32429,
+32413, 32397, 32381, 32364, 32348,
+32331, 32313, 32294, 32277, 32257,
+32239, 32219, 32200, 32180, 32159,
+32138, 32118, 32096, 32074, 32051,
+32029, 32006, 31984, 31960, 31936,
+31912, 31888, 31863, 31837, 31812,
+31786, 31760, 31734, 31707, 31679,
+31652, 31624, 31596, 31567, 31539,
+31508, 31479, 31450, 31419, 31388,
+31357, 31326, 31294, 31262, 31230,
+31198, 31164, 31131, 31097, 31063,
+31030, 30994, 30959, 30924, 30889,
+30853, 30816, 30779, 30743, 30705,
+30668, 30629, 30592, 30553, 30515,
+30475, 30435, 30396, 30356, 30315,
+30274, 30233, 30191, 30149, 30107,
+30065, 30022, 29979, 29936, 29891,
+29847, 29803, 29758, 29713, 29668,
+29622, 29577, 29529, 29483, 29436,
+29390, 29341, 29293, 29246, 29197,
+29148, 29098, 29050, 29000, 28949,
+28899, 28848, 28797, 28746, 28694,
+28642, 28590, 28537, 28485, 28432,
+28378, 28324, 28271, 28217, 28162,
+28106, 28051, 27995, 27940, 27884,
+27827, 27770, 27713, 27657, 27598,
+27540, 27481, 27423, 27365, 27305,
+27246, 27187, 27126, 27066, 27006,
+26945, 26883, 26822, 26760, 26698,
+26636, 26574, 26510, 26448, 26383,
+26320, 26257, 26191, 26127, 26062,
+25997, 25931, 25866, 25800, 25734,
+25667, 25601, 25533, 25466, 25398,
+25330, 25262, 25194, 25125, 25056,
+24987, 24917, 24848, 24778, 24707,
+24636, 24566, 24495, 24424, 24352,
+24280, 24208, 24135, 24063, 23990,
+23917, 23842, 23769, 23695, 23622,
+23546, 23472, 23398, 23322, 23246,
+23171, 23095, 23018, 22942, 22866,
+22788, 22711, 22634, 22557, 22478,
+22400, 22322, 22244, 22165, 22085,
+22006, 21927, 21846, 21766, 21687,
+21606, 21524, 21443, 21363, 21282,
+21199, 21118, 21035, 20954, 20870,
+20788, 20705, 20621, 20538, 20455,
+20371, 20286, 20202, 20118, 20034,
+19947, 19863, 19777, 19692, 19606,
+19520, 19434, 19347, 19260, 19174,
+19088, 18999, 18911, 18825, 18737,
+18648, 18560, 18472, 18384, 18294,
+18205, 18116, 18025, 17936, 17846,
+17757, 17666, 17576, 17485, 17395,
+17303, 17212, 17122, 17030, 16937,
+16846, 16755, 16662, 16569, 16477,
+16385, 16291, 16198, 16105, 16012,
+15917, 15824, 15730, 15636, 15541,
+15447, 15352, 15257, 15162, 15067,
+14973, 14875, 14781, 14685, 14589,
+14493, 14396, 14300, 14204, 14107,
+14010, 13914, 13815, 13718, 13621,
+13524, 13425, 13328, 13230, 13133,
+13033, 12935, 12836, 12738, 12638,
+12540, 12441, 12341, 12241, 12142,
+12044, 11943, 11843, 11744, 11643,
+11542, 11442, 11342, 11241, 11139,
+11039, 10939, 10836, 10736, 10635,
+10534, 10431, 10330, 10228, 10127,
+10024, 9921, 9820, 9718, 9614,
+9512, 9410, 9306, 9204, 9101,
+8998, 8895, 8791, 8689, 8585,
+8481, 8377, 8274, 8171, 8067,
+7962, 7858, 7753, 7650, 7545,
+7441, 7336, 7231, 7129, 7023,
+6917, 6813, 6709, 6604, 6498,
+6393, 6288, 6182, 6077, 5973,
+5867, 5760, 5656, 5549, 5445,
+5339, 5232, 5127, 5022, 4914,
+4809, 4703, 4596, 4490, 4384,
+4278, 4171, 4065, 3958, 3852,
+3745, 3640, 3532, 3426, 3318,
+3212, 3106, 2998, 2891, 2786,
+2679, 2570, 2465, 2358, 2251,
+2143, 2037, 1929, 1823, 1715,
+1609, 1501, 1393, 1287, 1180,
+1073, 964, 858, 751, 644,
+535, 429, 322, 214, 107,
+0, };
+#endif
+
+static const CELTMode mode48000_960_120 = {
+48000,  /* Fs */
+120,    /* overlap */
+21,     /* nbEBands */
+21,     /* effEBands */
+{27853, 0, 4096, 8192, },       /* preemph */
+eband5ms,       /* eBands */
+3,      /* maxLM */
+8,      /* nbShortMdcts */
+120,    /* shortMdctSize */
+11,     /* nbAllocVectors */
+band_allocation,        /* allocVectors */
+logN400,        /* logN */
+window120,      /* window */
+{1920, 3, {&fft_state48000_960_0, &fft_state48000_960_1, &fft_state48000_960_2, &fft_state48000_960_3, }, mdct_twiddles960},    /* mdct */
+{392, cache_index50, cache_bits50, cache_caps50},       /* cache */
+};
+
+/* List of all the available modes */
+#define TOTAL_MODES 1
+static const CELTMode * const static_mode_list[TOTAL_MODES] = {
+&mode48000_960_120,
+};
diff --git a/drivers/opus/celt/static_modes_float.h b/drivers/opus/celt/static_modes_float.h
new file mode 100644
index 0000000000..fe6bb4c8a3
--- /dev/null
+++ b/drivers/opus/celt/static_modes_float.h
@@ -0,0 +1,599 @@
+/* The contents of this file was automatically generated by dump_modes.c
+   with arguments: 48000 960
+   It contains static definitions for some pre-defined modes. */
+#include "opus_modes.h"
+#include "rate.h"
+
+#ifndef DEF_WINDOW120
+#define DEF_WINDOW120
+static const opus_val16 window120[120] = {
+6.7286966e-05f, 0.00060551348f, 0.0016815970f, 0.0032947962f, 0.0054439943f,
+0.0081276923f, 0.011344001f, 0.015090633f, 0.019364886f, 0.024163635f,
+0.029483315f, 0.035319905f, 0.041668911f, 0.048525347f, 0.055883718f,
+0.063737999f, 0.072081616f, 0.080907428f, 0.090207705f, 0.099974111f,
+0.11019769f, 0.12086883f, 0.13197729f, 0.14351214f, 0.15546177f,
+0.16781389f, 0.18055550f, 0.19367290f, 0.20715171f, 0.22097682f,
+0.23513243f, 0.24960208f, 0.26436860f, 0.27941419f, 0.29472040f,
+0.31026818f, 0.32603788f, 0.34200931f, 0.35816177f, 0.37447407f,
+0.39092462f, 0.40749142f, 0.42415215f, 0.44088423f, 0.45766484f,
+0.47447104f, 0.49127978f, 0.50806798f, 0.52481261f, 0.54149077f,
+0.55807973f, 0.57455701f, 0.59090049f, 0.60708841f, 0.62309951f,
+0.63891306f, 0.65450896f, 0.66986776f, 0.68497077f, 0.69980010f,
+0.71433873f, 0.72857055f, 0.74248043f, 0.75605424f, 0.76927895f,
+0.78214257f, 0.79463430f, 0.80674445f, 0.81846456f, 0.82978733f,
+0.84070669f, 0.85121779f, 0.86131698f, 0.87100183f, 0.88027111f,
+0.88912479f, 0.89756398f, 0.90559094f, 0.91320904f, 0.92042270f,
+0.92723738f, 0.93365955f, 0.93969656f, 0.94535671f, 0.95064907f,
+0.95558353f, 0.96017067f, 0.96442171f, 0.96834849f, 0.97196334f,
+0.97527906f, 0.97830883f, 0.98106616f, 0.98356480f, 0.98581869f,
+0.98784191f, 0.98964856f, 0.99125274f, 0.99266849f, 0.99390969f,
+0.99499004f, 0.99592297f, 0.99672162f, 0.99739874f, 0.99796667f,
+0.99843728f, 0.99882195f, 0.99913147f, 0.99937606f, 0.99956527f,
+0.99970802f, 0.99981248f, 0.99988613f, 0.99993565f, 0.99996697f,
+0.99998518f, 0.99999457f, 0.99999859f, 0.99999982f, 1.0000000f,
+};
+#endif
+
+#ifndef DEF_LOGN400
+#define DEF_LOGN400
+static const opus_int16 logN400[21] = {
+0, 0, 0, 0, 0, 0, 0, 0, 8, 8, 8, 8, 16, 16, 16, 21, 21, 24, 29, 34, 36, };
+#endif
+
+#ifndef DEF_PULSE_CACHE50
+#define DEF_PULSE_CACHE50
+static const opus_int16 cache_index50[105] = {
+-1, -1, -1, -1, -1, -1, -1, -1, 0, 0, 0, 0, 41, 41, 41,
+82, 82, 123, 164, 200, 222, 0, 0, 0, 0, 0, 0, 0, 0, 41,
+41, 41, 41, 123, 123, 123, 164, 164, 240, 266, 283, 295, 41, 41, 41,
+41, 41, 41, 41, 41, 123, 123, 123, 123, 240, 240, 240, 266, 266, 305,
+318, 328, 336, 123, 123, 123, 123, 123, 123, 123, 123, 240, 240, 240, 240,
+305, 305, 305, 318, 318, 343, 351, 358, 364, 240, 240, 240, 240, 240, 240,
+240, 240, 305, 305, 305, 305, 343, 343, 343, 351, 351, 370, 376, 382, 387,
+};
+static const unsigned char cache_bits50[392] = {
+40, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
+7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
+7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 40, 15, 23, 28,
+31, 34, 36, 38, 39, 41, 42, 43, 44, 45, 46, 47, 47, 49, 50,
+51, 52, 53, 54, 55, 55, 57, 58, 59, 60, 61, 62, 63, 63, 65,
+66, 67, 68, 69, 70, 71, 71, 40, 20, 33, 41, 48, 53, 57, 61,
+64, 66, 69, 71, 73, 75, 76, 78, 80, 82, 85, 87, 89, 91, 92,
+94, 96, 98, 101, 103, 105, 107, 108, 110, 112, 114, 117, 119, 121, 123,
+124, 126, 128, 40, 23, 39, 51, 60, 67, 73, 79, 83, 87, 91, 94,
+97, 100, 102, 105, 107, 111, 115, 118, 121, 124, 126, 129, 131, 135, 139,
+142, 145, 148, 150, 153, 155, 159, 163, 166, 169, 172, 174, 177, 179, 35,
+28, 49, 65, 78, 89, 99, 107, 114, 120, 126, 132, 136, 141, 145, 149,
+153, 159, 165, 171, 176, 180, 185, 189, 192, 199, 205, 211, 216, 220, 225,
+229, 232, 239, 245, 251, 21, 33, 58, 79, 97, 112, 125, 137, 148, 157,
+166, 174, 182, 189, 195, 201, 207, 217, 227, 235, 243, 251, 17, 35, 63,
+86, 106, 123, 139, 152, 165, 177, 187, 197, 206, 214, 222, 230, 237, 250,
+25, 31, 55, 75, 91, 105, 117, 128, 138, 146, 154, 161, 168, 174, 180,
+185, 190, 200, 208, 215, 222, 229, 235, 240, 245, 255, 16, 36, 65, 89,
+110, 128, 144, 159, 173, 185, 196, 207, 217, 226, 234, 242, 250, 11, 41,
+74, 103, 128, 151, 172, 191, 209, 225, 241, 255, 9, 43, 79, 110, 138,
+163, 186, 207, 227, 246, 12, 39, 71, 99, 123, 144, 164, 182, 198, 214,
+228, 241, 253, 9, 44, 81, 113, 142, 168, 192, 214, 235, 255, 7, 49,
+90, 127, 160, 191, 220, 247, 6, 51, 95, 134, 170, 203, 234, 7, 47,
+87, 123, 155, 184, 212, 237, 6, 52, 97, 137, 174, 208, 240, 5, 57,
+106, 151, 192, 231, 5, 59, 111, 158, 202, 243, 5, 55, 103, 147, 187,
+224, 5, 60, 113, 161, 206, 248, 4, 65, 122, 175, 224, 4, 67, 127,
+182, 234, };
+static const unsigned char cache_caps50[168] = {
+224, 224, 224, 224, 224, 224, 224, 224, 160, 160, 160, 160, 185, 185, 185,
+178, 178, 168, 134, 61, 37, 224, 224, 224, 224, 224, 224, 224, 224, 240,
+240, 240, 240, 207, 207, 207, 198, 198, 183, 144, 66, 40, 160, 160, 160,
+160, 160, 160, 160, 160, 185, 185, 185, 185, 193, 193, 193, 183, 183, 172,
+138, 64, 38, 240, 240, 240, 240, 240, 240, 240, 240, 207, 207, 207, 207,
+204, 204, 204, 193, 193, 180, 143, 66, 40, 185, 185, 185, 185, 185, 185,
+185, 185, 193, 193, 193, 193, 193, 193, 193, 183, 183, 172, 138, 65, 39,
+207, 207, 207, 207, 207, 207, 207, 207, 204, 204, 204, 204, 201, 201, 201,
+188, 188, 176, 141, 66, 40, 193, 193, 193, 193, 193, 193, 193, 193, 193,
+193, 193, 193, 194, 194, 194, 184, 184, 173, 139, 65, 39, 204, 204, 204,
+204, 204, 204, 204, 204, 201, 201, 201, 201, 198, 198, 198, 187, 187, 175,
+140, 66, 40, };
+#endif
+
+#ifndef FFT_TWIDDLES48000_960
+#define FFT_TWIDDLES48000_960
+static const kiss_twiddle_cpx fft_twiddles48000_960[480] = {
+{1.0000000f, -0.0000000f}, {0.99991433f, -0.013089596f},
+{0.99965732f, -0.026176948f}, {0.99922904f, -0.039259816f},
+{0.99862953f, -0.052335956f}, {0.99785892f, -0.065403129f},
+{0.99691733f, -0.078459096f}, {0.99580493f, -0.091501619f},
+{0.99452190f, -0.10452846f}, {0.99306846f, -0.11753740f},
+{0.99144486f, -0.13052619f}, {0.98965139f, -0.14349262f},
+{0.98768834f, -0.15643447f}, {0.98555606f, -0.16934950f},
+{0.98325491f, -0.18223553f}, {0.98078528f, -0.19509032f},
+{0.97814760f, -0.20791169f}, {0.97534232f, -0.22069744f},
+{0.97236992f, -0.23344536f}, {0.96923091f, -0.24615329f},
+{0.96592583f, -0.25881905f}, {0.96245524f, -0.27144045f},
+{0.95881973f, -0.28401534f}, {0.95501994f, -0.29654157f},
+{0.95105652f, -0.30901699f}, {0.94693013f, -0.32143947f},
+{0.94264149f, -0.33380686f}, {0.93819134f, -0.34611706f},
+{0.93358043f, -0.35836795f}, {0.92880955f, -0.37055744f},
+{0.92387953f, -0.38268343f}, {0.91879121f, -0.39474386f},
+{0.91354546f, -0.40673664f}, {0.90814317f, -0.41865974f},
+{0.90258528f, -0.43051110f}, {0.89687274f, -0.44228869f},
+{0.89100652f, -0.45399050f}, {0.88498764f, -0.46561452f},
+{0.87881711f, -0.47715876f}, {0.87249601f, -0.48862124f},
+{0.86602540f, -0.50000000f}, {0.85940641f, -0.51129309f},
+{0.85264016f, -0.52249856f}, {0.84572782f, -0.53361452f},
+{0.83867057f, -0.54463904f}, {0.83146961f, -0.55557023f},
+{0.82412619f, -0.56640624f}, {0.81664156f, -0.57714519f},
+{0.80901699f, -0.58778525f}, {0.80125381f, -0.59832460f},
+{0.79335334f, -0.60876143f}, {0.78531693f, -0.61909395f},
+{0.77714596f, -0.62932039f}, {0.76884183f, -0.63943900f},
+{0.76040597f, -0.64944805f}, {0.75183981f, -0.65934582f},
+{0.74314483f, -0.66913061f}, {0.73432251f, -0.67880075f},
+{0.72537437f, -0.68835458f}, {0.71630194f, -0.69779046f},
+{0.70710678f, -0.70710678f}, {0.69779046f, -0.71630194f},
+{0.68835458f, -0.72537437f}, {0.67880075f, -0.73432251f},
+{0.66913061f, -0.74314483f}, {0.65934582f, -0.75183981f},
+{0.64944805f, -0.76040597f}, {0.63943900f, -0.76884183f},
+{0.62932039f, -0.77714596f}, {0.61909395f, -0.78531693f},
+{0.60876143f, -0.79335334f}, {0.59832460f, -0.80125381f},
+{0.58778525f, -0.80901699f}, {0.57714519f, -0.81664156f},
+{0.56640624f, -0.82412619f}, {0.55557023f, -0.83146961f},
+{0.54463904f, -0.83867057f}, {0.53361452f, -0.84572782f},
+{0.52249856f, -0.85264016f}, {0.51129309f, -0.85940641f},
+{0.50000000f, -0.86602540f}, {0.48862124f, -0.87249601f},
+{0.47715876f, -0.87881711f}, {0.46561452f, -0.88498764f},
+{0.45399050f, -0.89100652f}, {0.44228869f, -0.89687274f},
+{0.43051110f, -0.90258528f}, {0.41865974f, -0.90814317f},
+{0.40673664f, -0.91354546f}, {0.39474386f, -0.91879121f},
+{0.38268343f, -0.92387953f}, {0.37055744f, -0.92880955f},
+{0.35836795f, -0.93358043f}, {0.34611706f, -0.93819134f},
+{0.33380686f, -0.94264149f}, {0.32143947f, -0.94693013f},
+{0.30901699f, -0.95105652f}, {0.29654157f, -0.95501994f},
+{0.28401534f, -0.95881973f}, {0.27144045f, -0.96245524f},
+{0.25881905f, -0.96592583f}, {0.24615329f, -0.96923091f},
+{0.23344536f, -0.97236992f}, {0.22069744f, -0.97534232f},
+{0.20791169f, -0.97814760f}, {0.19509032f, -0.98078528f},
+{0.18223553f, -0.98325491f}, {0.16934950f, -0.98555606f},
+{0.15643447f, -0.98768834f}, {0.14349262f, -0.98965139f},
+{0.13052619f, -0.99144486f}, {0.11753740f, -0.99306846f},
+{0.10452846f, -0.99452190f}, {0.091501619f, -0.99580493f},
+{0.078459096f, -0.99691733f}, {0.065403129f, -0.99785892f},
+{0.052335956f, -0.99862953f}, {0.039259816f, -0.99922904f},
+{0.026176948f, -0.99965732f}, {0.013089596f, -0.99991433f},
+{6.1230318e-17f, -1.0000000f}, {-0.013089596f, -0.99991433f},
+{-0.026176948f, -0.99965732f}, {-0.039259816f, -0.99922904f},
+{-0.052335956f, -0.99862953f}, {-0.065403129f, -0.99785892f},
+{-0.078459096f, -0.99691733f}, {-0.091501619f, -0.99580493f},
+{-0.10452846f, -0.99452190f}, {-0.11753740f, -0.99306846f},
+{-0.13052619f, -0.99144486f}, {-0.14349262f, -0.98965139f},
+{-0.15643447f, -0.98768834f}, {-0.16934950f, -0.98555606f},
+{-0.18223553f, -0.98325491f}, {-0.19509032f, -0.98078528f},
+{-0.20791169f, -0.97814760f}, {-0.22069744f, -0.97534232f},
+{-0.23344536f, -0.97236992f}, {-0.24615329f, -0.96923091f},
+{-0.25881905f, -0.96592583f}, {-0.27144045f, -0.96245524f},
+{-0.28401534f, -0.95881973f}, {-0.29654157f, -0.95501994f},
+{-0.30901699f, -0.95105652f}, {-0.32143947f, -0.94693013f},
+{-0.33380686f, -0.94264149f}, {-0.34611706f, -0.93819134f},
+{-0.35836795f, -0.93358043f}, {-0.37055744f, -0.92880955f},
+{-0.38268343f, -0.92387953f}, {-0.39474386f, -0.91879121f},
+{-0.40673664f, -0.91354546f}, {-0.41865974f, -0.90814317f},
+{-0.43051110f, -0.90258528f}, {-0.44228869f, -0.89687274f},
+{-0.45399050f, -0.89100652f}, {-0.46561452f, -0.88498764f},
+{-0.47715876f, -0.87881711f}, {-0.48862124f, -0.87249601f},
+{-0.50000000f, -0.86602540f}, {-0.51129309f, -0.85940641f},
+{-0.52249856f, -0.85264016f}, {-0.53361452f, -0.84572782f},
+{-0.54463904f, -0.83867057f}, {-0.55557023f, -0.83146961f},
+{-0.56640624f, -0.82412619f}, {-0.57714519f, -0.81664156f},
+{-0.58778525f, -0.80901699f}, {-0.59832460f, -0.80125381f},
+{-0.60876143f, -0.79335334f}, {-0.61909395f, -0.78531693f},
+{-0.62932039f, -0.77714596f}, {-0.63943900f, -0.76884183f},
+{-0.64944805f, -0.76040597f}, {-0.65934582f, -0.75183981f},
+{-0.66913061f, -0.74314483f}, {-0.67880075f, -0.73432251f},
+{-0.68835458f, -0.72537437f}, {-0.69779046f, -0.71630194f},
+{-0.70710678f, -0.70710678f}, {-0.71630194f, -0.69779046f},
+{-0.72537437f, -0.68835458f}, {-0.73432251f, -0.67880075f},
+{-0.74314483f, -0.66913061f}, {-0.75183981f, -0.65934582f},
+{-0.76040597f, -0.64944805f}, {-0.76884183f, -0.63943900f},
+{-0.77714596f, -0.62932039f}, {-0.78531693f, -0.61909395f},
+{-0.79335334f, -0.60876143f}, {-0.80125381f, -0.59832460f},
+{-0.80901699f, -0.58778525f}, {-0.81664156f, -0.57714519f},
+{-0.82412619f, -0.56640624f}, {-0.83146961f, -0.55557023f},
+{-0.83867057f, -0.54463904f}, {-0.84572782f, -0.53361452f},
+{-0.85264016f, -0.52249856f}, {-0.85940641f, -0.51129309f},
+{-0.86602540f, -0.50000000f}, {-0.87249601f, -0.48862124f},
+{-0.87881711f, -0.47715876f}, {-0.88498764f, -0.46561452f},
+{-0.89100652f, -0.45399050f}, {-0.89687274f, -0.44228869f},
+{-0.90258528f, -0.43051110f}, {-0.90814317f, -0.41865974f},
+{-0.91354546f, -0.40673664f}, {-0.91879121f, -0.39474386f},
+{-0.92387953f, -0.38268343f}, {-0.92880955f, -0.37055744f},
+{-0.93358043f, -0.35836795f}, {-0.93819134f, -0.34611706f},
+{-0.94264149f, -0.33380686f}, {-0.94693013f, -0.32143947f},
+{-0.95105652f, -0.30901699f}, {-0.95501994f, -0.29654157f},
+{-0.95881973f, -0.28401534f}, {-0.96245524f, -0.27144045f},
+{-0.96592583f, -0.25881905f}, {-0.96923091f, -0.24615329f},
+{-0.97236992f, -0.23344536f}, {-0.97534232f, -0.22069744f},
+{-0.97814760f, -0.20791169f}, {-0.98078528f, -0.19509032f},
+{-0.98325491f, -0.18223553f}, {-0.98555606f, -0.16934950f},
+{-0.98768834f, -0.15643447f}, {-0.98965139f, -0.14349262f},
+{-0.99144486f, -0.13052619f}, {-0.99306846f, -0.11753740f},
+{-0.99452190f, -0.10452846f}, {-0.99580493f, -0.091501619f},
+{-0.99691733f, -0.078459096f}, {-0.99785892f, -0.065403129f},
+{-0.99862953f, -0.052335956f}, {-0.99922904f, -0.039259816f},
+{-0.99965732f, -0.026176948f}, {-0.99991433f, -0.013089596f},
+{-1.0000000f, -1.2246064e-16f}, {-0.99991433f, 0.013089596f},
+{-0.99965732f, 0.026176948f}, {-0.99922904f, 0.039259816f},
+{-0.99862953f, 0.052335956f}, {-0.99785892f, 0.065403129f},
+{-0.99691733f, 0.078459096f}, {-0.99580493f, 0.091501619f},
+{-0.99452190f, 0.10452846f}, {-0.99306846f, 0.11753740f},
+{-0.99144486f, 0.13052619f}, {-0.98965139f, 0.14349262f},
+{-0.98768834f, 0.15643447f}, {-0.98555606f, 0.16934950f},
+{-0.98325491f, 0.18223553f}, {-0.98078528f, 0.19509032f},
+{-0.97814760f, 0.20791169f}, {-0.97534232f, 0.22069744f},
+{-0.97236992f, 0.23344536f}, {-0.96923091f, 0.24615329f},
+{-0.96592583f, 0.25881905f}, {-0.96245524f, 0.27144045f},
+{-0.95881973f, 0.28401534f}, {-0.95501994f, 0.29654157f},
+{-0.95105652f, 0.30901699f}, {-0.94693013f, 0.32143947f},
+{-0.94264149f, 0.33380686f}, {-0.93819134f, 0.34611706f},
+{-0.93358043f, 0.35836795f}, {-0.92880955f, 0.37055744f},
+{-0.92387953f, 0.38268343f}, {-0.91879121f, 0.39474386f},
+{-0.91354546f, 0.40673664f}, {-0.90814317f, 0.41865974f},
+{-0.90258528f, 0.43051110f}, {-0.89687274f, 0.44228869f},
+{-0.89100652f, 0.45399050f}, {-0.88498764f, 0.46561452f},
+{-0.87881711f, 0.47715876f}, {-0.87249601f, 0.48862124f},
+{-0.86602540f, 0.50000000f}, {-0.85940641f, 0.51129309f},
+{-0.85264016f, 0.52249856f}, {-0.84572782f, 0.53361452f},
+{-0.83867057f, 0.54463904f}, {-0.83146961f, 0.55557023f},
+{-0.82412619f, 0.56640624f}, {-0.81664156f, 0.57714519f},
+{-0.80901699f, 0.58778525f}, {-0.80125381f, 0.59832460f},
+{-0.79335334f, 0.60876143f}, {-0.78531693f, 0.61909395f},
+{-0.77714596f, 0.62932039f}, {-0.76884183f, 0.63943900f},
+{-0.76040597f, 0.64944805f}, {-0.75183981f, 0.65934582f},
+{-0.74314483f, 0.66913061f}, {-0.73432251f, 0.67880075f},
+{-0.72537437f, 0.68835458f}, {-0.71630194f, 0.69779046f},
+{-0.70710678f, 0.70710678f}, {-0.69779046f, 0.71630194f},
+{-0.68835458f, 0.72537437f}, {-0.67880075f, 0.73432251f},
+{-0.66913061f, 0.74314483f}, {-0.65934582f, 0.75183981f},
+{-0.64944805f, 0.76040597f}, {-0.63943900f, 0.76884183f},
+{-0.62932039f, 0.77714596f}, {-0.61909395f, 0.78531693f},
+{-0.60876143f, 0.79335334f}, {-0.59832460f, 0.80125381f},
+{-0.58778525f, 0.80901699f}, {-0.57714519f, 0.81664156f},
+{-0.56640624f, 0.82412619f}, {-0.55557023f, 0.83146961f},
+{-0.54463904f, 0.83867057f}, {-0.53361452f, 0.84572782f},
+{-0.52249856f, 0.85264016f}, {-0.51129309f, 0.85940641f},
+{-0.50000000f, 0.86602540f}, {-0.48862124f, 0.87249601f},
+{-0.47715876f, 0.87881711f}, {-0.46561452f, 0.88498764f},
+{-0.45399050f, 0.89100652f}, {-0.44228869f, 0.89687274f},
+{-0.43051110f, 0.90258528f}, {-0.41865974f, 0.90814317f},
+{-0.40673664f, 0.91354546f}, {-0.39474386f, 0.91879121f},
+{-0.38268343f, 0.92387953f}, {-0.37055744f, 0.92880955f},
+{-0.35836795f, 0.93358043f}, {-0.34611706f, 0.93819134f},
+{-0.33380686f, 0.94264149f}, {-0.32143947f, 0.94693013f},
+{-0.30901699f, 0.95105652f}, {-0.29654157f, 0.95501994f},
+{-0.28401534f, 0.95881973f}, {-0.27144045f, 0.96245524f},
+{-0.25881905f, 0.96592583f}, {-0.24615329f, 0.96923091f},
+{-0.23344536f, 0.97236992f}, {-0.22069744f, 0.97534232f},
+{-0.20791169f, 0.97814760f}, {-0.19509032f, 0.98078528f},
+{-0.18223553f, 0.98325491f}, {-0.16934950f, 0.98555606f},
+{-0.15643447f, 0.98768834f}, {-0.14349262f, 0.98965139f},
+{-0.13052619f, 0.99144486f}, {-0.11753740f, 0.99306846f},
+{-0.10452846f, 0.99452190f}, {-0.091501619f, 0.99580493f},
+{-0.078459096f, 0.99691733f}, {-0.065403129f, 0.99785892f},
+{-0.052335956f, 0.99862953f}, {-0.039259816f, 0.99922904f},
+{-0.026176948f, 0.99965732f}, {-0.013089596f, 0.99991433f},
+{-1.8369095e-16f, 1.0000000f}, {0.013089596f, 0.99991433f},
+{0.026176948f, 0.99965732f}, {0.039259816f, 0.99922904f},
+{0.052335956f, 0.99862953f}, {0.065403129f, 0.99785892f},
+{0.078459096f, 0.99691733f}, {0.091501619f, 0.99580493f},
+{0.10452846f, 0.99452190f}, {0.11753740f, 0.99306846f},
+{0.13052619f, 0.99144486f}, {0.14349262f, 0.98965139f},
+{0.15643447f, 0.98768834f}, {0.16934950f, 0.98555606f},
+{0.18223553f, 0.98325491f}, {0.19509032f, 0.98078528f},
+{0.20791169f, 0.97814760f}, {0.22069744f, 0.97534232f},
+{0.23344536f, 0.97236992f}, {0.24615329f, 0.96923091f},
+{0.25881905f, 0.96592583f}, {0.27144045f, 0.96245524f},
+{0.28401534f, 0.95881973f}, {0.29654157f, 0.95501994f},
+{0.30901699f, 0.95105652f}, {0.32143947f, 0.94693013f},
+{0.33380686f, 0.94264149f}, {0.34611706f, 0.93819134f},
+{0.35836795f, 0.93358043f}, {0.37055744f, 0.92880955f},
+{0.38268343f, 0.92387953f}, {0.39474386f, 0.91879121f},
+{0.40673664f, 0.91354546f}, {0.41865974f, 0.90814317f},
+{0.43051110f, 0.90258528f}, {0.44228869f, 0.89687274f},
+{0.45399050f, 0.89100652f}, {0.46561452f, 0.88498764f},
+{0.47715876f, 0.87881711f}, {0.48862124f, 0.87249601f},
+{0.50000000f, 0.86602540f}, {0.51129309f, 0.85940641f},
+{0.52249856f, 0.85264016f}, {0.53361452f, 0.84572782f},
+{0.54463904f, 0.83867057f}, {0.55557023f, 0.83146961f},
+{0.56640624f, 0.82412619f}, {0.57714519f, 0.81664156f},
+{0.58778525f, 0.80901699f}, {0.59832460f, 0.80125381f},
+{0.60876143f, 0.79335334f}, {0.61909395f, 0.78531693f},
+{0.62932039f, 0.77714596f}, {0.63943900f, 0.76884183f},
+{0.64944805f, 0.76040597f}, {0.65934582f, 0.75183981f},
+{0.66913061f, 0.74314483f}, {0.67880075f, 0.73432251f},
+{0.68835458f, 0.72537437f}, {0.69779046f, 0.71630194f},
+{0.70710678f, 0.70710678f}, {0.71630194f, 0.69779046f},
+{0.72537437f, 0.68835458f}, {0.73432251f, 0.67880075f},
+{0.74314483f, 0.66913061f}, {0.75183981f, 0.65934582f},
+{0.76040597f, 0.64944805f}, {0.76884183f, 0.63943900f},
+{0.77714596f, 0.62932039f}, {0.78531693f, 0.61909395f},
+{0.79335334f, 0.60876143f}, {0.80125381f, 0.59832460f},
+{0.80901699f, 0.58778525f}, {0.81664156f, 0.57714519f},
+{0.82412619f, 0.56640624f}, {0.83146961f, 0.55557023f},
+{0.83867057f, 0.54463904f}, {0.84572782f, 0.53361452f},
+{0.85264016f, 0.52249856f}, {0.85940641f, 0.51129309f},
+{0.86602540f, 0.50000000f}, {0.87249601f, 0.48862124f},
+{0.87881711f, 0.47715876f}, {0.88498764f, 0.46561452f},
+{0.89100652f, 0.45399050f}, {0.89687274f, 0.44228869f},
+{0.90258528f, 0.43051110f}, {0.90814317f, 0.41865974f},
+{0.91354546f, 0.40673664f}, {0.91879121f, 0.39474386f},
+{0.92387953f, 0.38268343f}, {0.92880955f, 0.37055744f},
+{0.93358043f, 0.35836795f}, {0.93819134f, 0.34611706f},
+{0.94264149f, 0.33380686f}, {0.94693013f, 0.32143947f},
+{0.95105652f, 0.30901699f}, {0.95501994f, 0.29654157f},
+{0.95881973f, 0.28401534f}, {0.96245524f, 0.27144045f},
+{0.96592583f, 0.25881905f}, {0.96923091f, 0.24615329f},
+{0.97236992f, 0.23344536f}, {0.97534232f, 0.22069744f},
+{0.97814760f, 0.20791169f}, {0.98078528f, 0.19509032f},
+{0.98325491f, 0.18223553f}, {0.98555606f, 0.16934950f},
+{0.98768834f, 0.15643447f}, {0.98965139f, 0.14349262f},
+{0.99144486f, 0.13052619f}, {0.99306846f, 0.11753740f},
+{0.99452190f, 0.10452846f}, {0.99580493f, 0.091501619f},
+{0.99691733f, 0.078459096f}, {0.99785892f, 0.065403129f},
+{0.99862953f, 0.052335956f}, {0.99922904f, 0.039259816f},
+{0.99965732f, 0.026176948f}, {0.99991433f, 0.013089596f},
+};
+#ifndef FFT_BITREV480
+#define FFT_BITREV480
+static const opus_int16 fft_bitrev480[480] = {
+0, 120, 240, 360, 30, 150, 270, 390, 60, 180, 300, 420, 90, 210, 330,
+450, 15, 135, 255, 375, 45, 165, 285, 405, 75, 195, 315, 435, 105, 225,
+345, 465, 5, 125, 245, 365, 35, 155, 275, 395, 65, 185, 305, 425, 95,
+215, 335, 455, 20, 140, 260, 380, 50, 170, 290, 410, 80, 200, 320, 440,
+110, 230, 350, 470, 10, 130, 250, 370, 40, 160, 280, 400, 70, 190, 310,
+430, 100, 220, 340, 460, 25, 145, 265, 385, 55, 175, 295, 415, 85, 205,
+325, 445, 115, 235, 355, 475, 1, 121, 241, 361, 31, 151, 271, 391, 61,
+181, 301, 421, 91, 211, 331, 451, 16, 136, 256, 376, 46, 166, 286, 406,
+76, 196, 316, 436, 106, 226, 346, 466, 6, 126, 246, 366, 36, 156, 276,
+396, 66, 186, 306, 426, 96, 216, 336, 456, 21, 141, 261, 381, 51, 171,
+291, 411, 81, 201, 321, 441, 111, 231, 351, 471, 11, 131, 251, 371, 41,
+161, 281, 401, 71, 191, 311, 431, 101, 221, 341, 461, 26, 146, 266, 386,
+56, 176, 296, 416, 86, 206, 326, 446, 116, 236, 356, 476, 2, 122, 242,
+362, 32, 152, 272, 392, 62, 182, 302, 422, 92, 212, 332, 452, 17, 137,
+257, 377, 47, 167, 287, 407, 77, 197, 317, 437, 107, 227, 347, 467, 7,
+127, 247, 367, 37, 157, 277, 397, 67, 187, 307, 427, 97, 217, 337, 457,
+22, 142, 262, 382, 52, 172, 292, 412, 82, 202, 322, 442, 112, 232, 352,
+472, 12, 132, 252, 372, 42, 162, 282, 402, 72, 192, 312, 432, 102, 222,
+342, 462, 27, 147, 267, 387, 57, 177, 297, 417, 87, 207, 327, 447, 117,
+237, 357, 477, 3, 123, 243, 363, 33, 153, 273, 393, 63, 183, 303, 423,
+93, 213, 333, 453, 18, 138, 258, 378, 48, 168, 288, 408, 78, 198, 318,
+438, 108, 228, 348, 468, 8, 128, 248, 368, 38, 158, 278, 398, 68, 188,
+308, 428, 98, 218, 338, 458, 23, 143, 263, 383, 53, 173, 293, 413, 83,
+203, 323, 443, 113, 233, 353, 473, 13, 133, 253, 373, 43, 163, 283, 403,
+73, 193, 313, 433, 103, 223, 343, 463, 28, 148, 268, 388, 58, 178, 298,
+418, 88, 208, 328, 448, 118, 238, 358, 478, 4, 124, 244, 364, 34, 154,
+274, 394, 64, 184, 304, 424, 94, 214, 334, 454, 19, 139, 259, 379, 49,
+169, 289, 409, 79, 199, 319, 439, 109, 229, 349, 469, 9, 129, 249, 369,
+39, 159, 279, 399, 69, 189, 309, 429, 99, 219, 339, 459, 24, 144, 264,
+384, 54, 174, 294, 414, 84, 204, 324, 444, 114, 234, 354, 474, 14, 134,
+254, 374, 44, 164, 284, 404, 74, 194, 314, 434, 104, 224, 344, 464, 29,
+149, 269, 389, 59, 179, 299, 419, 89, 209, 329, 449, 119, 239, 359, 479,
+};
+#endif
+
+#ifndef FFT_BITREV240
+#define FFT_BITREV240
+static const opus_int16 fft_bitrev240[240] = {
+0, 60, 120, 180, 15, 75, 135, 195, 30, 90, 150, 210, 45, 105, 165,
+225, 5, 65, 125, 185, 20, 80, 140, 200, 35, 95, 155, 215, 50, 110,
+170, 230, 10, 70, 130, 190, 25, 85, 145, 205, 40, 100, 160, 220, 55,
+115, 175, 235, 1, 61, 121, 181, 16, 76, 136, 196, 31, 91, 151, 211,
+46, 106, 166, 226, 6, 66, 126, 186, 21, 81, 141, 201, 36, 96, 156,
+216, 51, 111, 171, 231, 11, 71, 131, 191, 26, 86, 146, 206, 41, 101,
+161, 221, 56, 116, 176, 236, 2, 62, 122, 182, 17, 77, 137, 197, 32,
+92, 152, 212, 47, 107, 167, 227, 7, 67, 127, 187, 22, 82, 142, 202,
+37, 97, 157, 217, 52, 112, 172, 232, 12, 72, 132, 192, 27, 87, 147,
+207, 42, 102, 162, 222, 57, 117, 177, 237, 3, 63, 123, 183, 18, 78,
+138, 198, 33, 93, 153, 213, 48, 108, 168, 228, 8, 68, 128, 188, 23,
+83, 143, 203, 38, 98, 158, 218, 53, 113, 173, 233, 13, 73, 133, 193,
+28, 88, 148, 208, 43, 103, 163, 223, 58, 118, 178, 238, 4, 64, 124,
+184, 19, 79, 139, 199, 34, 94, 154, 214, 49, 109, 169, 229, 9, 69,
+129, 189, 24, 84, 144, 204, 39, 99, 159, 219, 54, 114, 174, 234, 14,
+74, 134, 194, 29, 89, 149, 209, 44, 104, 164, 224, 59, 119, 179, 239,
+};
+#endif
+
+#ifndef FFT_BITREV120
+#define FFT_BITREV120
+static const opus_int16 fft_bitrev120[120] = {
+0, 30, 60, 90, 15, 45, 75, 105, 5, 35, 65, 95, 20, 50, 80,
+110, 10, 40, 70, 100, 25, 55, 85, 115, 1, 31, 61, 91, 16, 46,
+76, 106, 6, 36, 66, 96, 21, 51, 81, 111, 11, 41, 71, 101, 26,
+56, 86, 116, 2, 32, 62, 92, 17, 47, 77, 107, 7, 37, 67, 97,
+22, 52, 82, 112, 12, 42, 72, 102, 27, 57, 87, 117, 3, 33, 63,
+93, 18, 48, 78, 108, 8, 38, 68, 98, 23, 53, 83, 113, 13, 43,
+73, 103, 28, 58, 88, 118, 4, 34, 64, 94, 19, 49, 79, 109, 9,
+39, 69, 99, 24, 54, 84, 114, 14, 44, 74, 104, 29, 59, 89, 119,
+};
+#endif
+
+#ifndef FFT_BITREV60
+#define FFT_BITREV60
+static const opus_int16 fft_bitrev60[60] = {
+0, 15, 30, 45, 5, 20, 35, 50, 10, 25, 40, 55, 1, 16, 31,
+46, 6, 21, 36, 51, 11, 26, 41, 56, 2, 17, 32, 47, 7, 22,
+37, 52, 12, 27, 42, 57, 3, 18, 33, 48, 8, 23, 38, 53, 13,
+28, 43, 58, 4, 19, 34, 49, 9, 24, 39, 54, 14, 29, 44, 59,
+};
+#endif
+
+#ifndef FFT_STATE48000_960_0
+#define FFT_STATE48000_960_0
+static const kiss_fft_state fft_state48000_960_0 = {
+480,    /* nfft */
+0.002083333f,   /* scale */
+-1,     /* shift */
+{4, 120, 4, 30, 2, 15, 3, 5, 5, 1, 0, 0, 0, 0, 0, 0, }, /* factors */
+fft_bitrev480,  /* bitrev */
+fft_twiddles48000_960,  /* bitrev */
+};
+#endif
+
+#ifndef FFT_STATE48000_960_1
+#define FFT_STATE48000_960_1
+static const kiss_fft_state fft_state48000_960_1 = {
+240,    /* nfft */
+0.004166667f,   /* scale */
+1,      /* shift */
+{4, 60, 4, 15, 3, 5, 5, 1, 0, 0, 0, 0, 0, 0, 0, 0, },   /* factors */
+fft_bitrev240,  /* bitrev */
+fft_twiddles48000_960,  /* bitrev */
+};
+#endif
+
+#ifndef FFT_STATE48000_960_2
+#define FFT_STATE48000_960_2
+static const kiss_fft_state fft_state48000_960_2 = {
+120,    /* nfft */
+0.008333333f,   /* scale */
+2,      /* shift */
+{4, 30, 2, 15, 3, 5, 5, 1, 0, 0, 0, 0, 0, 0, 0, 0, },   /* factors */
+fft_bitrev120,  /* bitrev */
+fft_twiddles48000_960,  /* bitrev */
+};
+#endif
+
+#ifndef FFT_STATE48000_960_3
+#define FFT_STATE48000_960_3
+static const kiss_fft_state fft_state48000_960_3 = {
+60,     /* nfft */
+0.016666667f,   /* scale */
+3,      /* shift */
+{4, 15, 3, 5, 5, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, },    /* factors */
+fft_bitrev60,   /* bitrev */
+fft_twiddles48000_960,  /* bitrev */
+};
+#endif
+
+#endif
+
+#ifndef MDCT_TWIDDLES960
+#define MDCT_TWIDDLES960
+static const opus_val16 mdct_twiddles960[481] = {
+1.0000000f, 0.99999465f, 0.99997858f, 0.99995181f, 0.99991433f,
+0.99986614f, 0.99980724f, 0.99973764f, 0.99965732f, 0.99956631f,
+0.99946459f, 0.99935216f, 0.99922904f, 0.99909521f, 0.99895068f,
+0.99879546f, 0.99862953f, 0.99845292f, 0.99826561f, 0.99806761f,
+0.99785892f, 0.99763955f, 0.99740949f, 0.99716875f, 0.99691733f,
+0.99665524f, 0.99638247f, 0.99609903f, 0.99580493f, 0.99550016f,
+0.99518473f, 0.99485864f, 0.99452190f, 0.99417450f, 0.99381646f,
+0.99344778f, 0.99306846f, 0.99267850f, 0.99227791f, 0.99186670f,
+0.99144486f, 0.99101241f, 0.99056934f, 0.99011566f, 0.98965139f,
+0.98917651f, 0.98869104f, 0.98819498f, 0.98768834f, 0.98717112f,
+0.98664333f, 0.98610497f, 0.98555606f, 0.98499659f, 0.98442657f,
+0.98384600f, 0.98325491f, 0.98265328f, 0.98204113f, 0.98141846f,
+0.98078528f, 0.98014159f, 0.97948742f, 0.97882275f, 0.97814760f,
+0.97746197f, 0.97676588f, 0.97605933f, 0.97534232f, 0.97461487f,
+0.97387698f, 0.97312866f, 0.97236992f, 0.97160077f, 0.97082121f,
+0.97003125f, 0.96923091f, 0.96842019f, 0.96759909f, 0.96676764f,
+0.96592582f, 0.96507367f, 0.96421118f, 0.96333837f, 0.96245523f,
+0.96156180f, 0.96065806f, 0.95974403f, 0.95881973f, 0.95788517f,
+0.95694034f, 0.95598526f, 0.95501995f, 0.95404440f, 0.95305864f,
+0.95206267f, 0.95105651f, 0.95004016f, 0.94901364f, 0.94797697f,
+0.94693013f, 0.94587315f, 0.94480604f, 0.94372882f, 0.94264149f,
+0.94154406f, 0.94043656f, 0.93931897f, 0.93819133f, 0.93705365f,
+0.93590592f, 0.93474818f, 0.93358042f, 0.93240268f, 0.93121493f,
+0.93001722f, 0.92880955f, 0.92759193f, 0.92636438f, 0.92512690f,
+0.92387953f, 0.92262225f, 0.92135509f, 0.92007809f, 0.91879121f,
+0.91749449f, 0.91618795f, 0.91487161f, 0.91354545f, 0.91220952f,
+0.91086382f, 0.90950836f, 0.90814316f, 0.90676824f, 0.90538363f,
+0.90398929f, 0.90258528f, 0.90117161f, 0.89974828f, 0.89831532f,
+0.89687273f, 0.89542055f, 0.89395877f, 0.89248742f, 0.89100652f,
+0.88951606f, 0.88801610f, 0.88650661f, 0.88498764f, 0.88345918f,
+0.88192125f, 0.88037390f, 0.87881711f, 0.87725090f, 0.87567531f,
+0.87409035f, 0.87249599f, 0.87089232f, 0.86927933f, 0.86765699f,
+0.86602540f, 0.86438453f, 0.86273437f, 0.86107503f, 0.85940641f,
+0.85772862f, 0.85604161f, 0.85434547f, 0.85264014f, 0.85092572f,
+0.84920218f, 0.84746955f, 0.84572781f, 0.84397704f, 0.84221721f,
+0.84044838f, 0.83867056f, 0.83688375f, 0.83508799f, 0.83328325f,
+0.83146961f, 0.82964704f, 0.82781562f, 0.82597530f, 0.82412620f,
+0.82226820f, 0.82040144f, 0.81852589f, 0.81664154f, 0.81474847f,
+0.81284665f, 0.81093620f, 0.80901698f, 0.80708914f, 0.80515262f,
+0.80320752f, 0.80125378f, 0.79929149f, 0.79732067f, 0.79534125f,
+0.79335335f, 0.79135691f, 0.78935204f, 0.78733867f, 0.78531691f,
+0.78328674f, 0.78124818f, 0.77920122f, 0.77714595f, 0.77508232f,
+0.77301043f, 0.77093026f, 0.76884183f, 0.76674517f, 0.76464026f,
+0.76252720f, 0.76040593f, 0.75827656f, 0.75613907f, 0.75399349f,
+0.75183978f, 0.74967807f, 0.74750833f, 0.74533054f, 0.74314481f,
+0.74095112f, 0.73874950f, 0.73653993f, 0.73432251f, 0.73209718f,
+0.72986405f, 0.72762307f, 0.72537438f, 0.72311787f, 0.72085359f,
+0.71858162f, 0.71630192f, 0.71401459f, 0.71171956f, 0.70941701f,
+0.70710677f, 0.70478900f, 0.70246363f, 0.70013079f, 0.69779041f,
+0.69544260f, 0.69308738f, 0.69072466f, 0.68835458f, 0.68597709f,
+0.68359229f, 0.68120013f, 0.67880072f, 0.67639404f, 0.67398011f,
+0.67155892f, 0.66913059f, 0.66669509f, 0.66425240f, 0.66180265f,
+0.65934581f, 0.65688191f, 0.65441092f, 0.65193298f, 0.64944801f,
+0.64695613f, 0.64445727f, 0.64195160f, 0.63943902f, 0.63691954f,
+0.63439328f, 0.63186019f, 0.62932037f, 0.62677377f, 0.62422055f,
+0.62166055f, 0.61909394f, 0.61652065f, 0.61394081f, 0.61135435f,
+0.60876139f, 0.60616195f, 0.60355593f, 0.60094349f, 0.59832457f,
+0.59569929f, 0.59306758f, 0.59042957f, 0.58778523f, 0.58513460f,
+0.58247766f, 0.57981452f, 0.57714518f, 0.57446961f, 0.57178793f,
+0.56910013f, 0.56640624f, 0.56370623f, 0.56100023f, 0.55828818f,
+0.55557020f, 0.55284627f, 0.55011641f, 0.54738067f, 0.54463901f,
+0.54189157f, 0.53913828f, 0.53637921f, 0.53361450f, 0.53084398f,
+0.52806787f, 0.52528601f, 0.52249852f, 0.51970543f, 0.51690688f,
+0.51410279f, 0.51129310f, 0.50847793f, 0.50565732f, 0.50283139f,
+0.49999997f, 0.49716321f, 0.49432122f, 0.49147383f, 0.48862118f,
+0.48576340f, 0.48290042f, 0.48003216f, 0.47715876f, 0.47428025f,
+0.47139677f, 0.46850813f, 0.46561448f, 0.46271584f, 0.45981235f,
+0.45690383f, 0.45399042f, 0.45107214f, 0.44814915f, 0.44522124f,
+0.44228868f, 0.43935137f, 0.43640926f, 0.43346247f, 0.43051104f,
+0.42755511f, 0.42459449f, 0.42162932f, 0.41865964f, 0.41568558f,
+0.41270697f, 0.40972393f, 0.40673661f, 0.40374494f, 0.40074884f,
+0.39774844f, 0.39474390f, 0.39173501f, 0.38872193f, 0.38570469f,
+0.38268343f, 0.37965796f, 0.37662842f, 0.37359496f, 0.37055739f,
+0.36751585f, 0.36447038f, 0.36142122f, 0.35836797f, 0.35531089f,
+0.35225000f, 0.34918544f, 0.34611704f, 0.34304493f, 0.33996926f,
+0.33688983f, 0.33380680f, 0.33072019f, 0.32763015f, 0.32453650f,
+0.32143936f, 0.31833890f, 0.31523503f, 0.31212767f, 0.30901696f,
+0.30590306f, 0.30278577f, 0.29966524f, 0.29654150f, 0.29341470f,
+0.29028464f, 0.28715147f, 0.28401522f, 0.28087605f, 0.27773376f,
+0.27458861f, 0.27144052f, 0.26828940f, 0.26513541f, 0.26197859f,
+0.25881907f, 0.25565666f, 0.25249152f, 0.24932367f, 0.24615327f,
+0.24298012f, 0.23980436f, 0.23662604f, 0.23344530f, 0.23026206f,
+0.22707623f, 0.22388809f, 0.22069744f, 0.21750443f, 0.21430908f,
+0.21111156f, 0.20791165f, 0.20470953f, 0.20150520f, 0.19829884f,
+0.19509024f, 0.19187955f, 0.18866692f, 0.18545227f, 0.18223552f,
+0.17901681f, 0.17579631f, 0.17257380f, 0.16934945f, 0.16612328f,
+0.16289546f, 0.15966577f, 0.15643437f, 0.15320141f, 0.14996669f,
+0.14673037f, 0.14349260f, 0.14025329f, 0.13701235f, 0.13376995f,
+0.13052612f, 0.12728101f, 0.12403442f, 0.12078650f, 0.11753740f,
+0.11428693f, 0.11103523f, 0.10778234f, 0.10452842f, 0.10127326f,
+0.098017137f, 0.094759842f, 0.091501652f, 0.088242363f, 0.084982129f,
+0.081721103f, 0.078459084f, 0.075196224f, 0.071932560f, 0.068668243f,
+0.065403073f, 0.062137201f, 0.058870665f, 0.055603617f, 0.052335974f,
+0.049067651f, 0.045798921f, 0.042529582f, 0.039259788f, 0.035989573f,
+0.032719092f, 0.029448142f, 0.026176876f, 0.022905329f, 0.019633657f,
+0.016361655f, 0.013089478f, 0.0098171604f, 0.0065449764f, 0.0032724839f,
+-4.3711390e-08f, };
+#endif
+
+static const CELTMode mode48000_960_120 = {
+48000,  /* Fs */
+120,    /* overlap */
+21,     /* nbEBands */
+21,     /* effEBands */
+{0.85000610f, 0.0000000f, 1.0000000f, 1.0000000f, },    /* preemph */
+eband5ms,       /* eBands */
+3,      /* maxLM */
+8,      /* nbShortMdcts */
+120,    /* shortMdctSize */
+11,     /* nbAllocVectors */
+band_allocation,        /* allocVectors */
+logN400,        /* logN */
+window120,      /* window */
+{1920, 3, {&fft_state48000_960_0, &fft_state48000_960_1, &fft_state48000_960_2, &fft_state48000_960_3, }, mdct_twiddles960},    /* mdct */
+{392, cache_index50, cache_bits50, cache_caps50},       /* cache */
+};
+
+/* List of all the available modes */
+#define TOTAL_MODES 1
+static const CELTMode * const static_mode_list[TOTAL_MODES] = {
+&mode48000_960_120,
+};
diff --git a/drivers/opus/celt/tests/test_unit_cwrs32.c b/drivers/opus/celt/tests/test_unit_cwrs32.c
new file mode 100644
index 0000000000..9cf124336a
--- /dev/null
+++ b/drivers/opus/celt/tests/test_unit_cwrs32.c
@@ -0,0 +1,161 @@
+/* Copyright (c) 2008-2011 Xiph.Org Foundation, Mozilla Corporation,
+                           Gregory Maxwell
+   Written by Jean-Marc Valin, Gregory Maxwell, and Timothy B. Terriberry */
+/*
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifdef OPUS_HAVE_CONFIG_H
+#include "opus_config.h"
+#endif
+
+#include <stdio.h>
+#include <string.h>
+
+#ifndef CUSTOM_MODES
+#define CUSTOM_MODES
+#else
+#define TEST_CUSTOM_MODES
+#endif
+
+#define CELT_C
+#include "stack_alloc.h"
+#include "entenc.c"
+#include "entdec.c"
+#include "entcode.c"
+#include "cwrs.c"
+#include "mathops.c"
+#include "rate.h"
+
+#define NMAX (240)
+#define KMAX (128)
+
+#ifdef TEST_CUSTOM_MODES
+
+#define NDIMS (44)
+static const int pn[NDIMS]={
+   2,   3,   4,   5,   6,   7,   8,   9,  10,
+  11,  12,  13,  14,  15,  16,  18,  20,  22,
+  24,  26,  28,  30,  32,  36,  40,  44,  48,
+  52,  56,  60,  64,  72,  80,  88,  96, 104,
+ 112, 120, 128, 144, 160, 176, 192, 208
+};
+static const int pkmax[NDIMS]={
+ 128, 128, 128, 128,  88,  52,  36,  26,  22,
+  18,  16,  15,  13,  12,  12,  11,  10,   9,
+   9,   8,   8,   7,   7,   7,   7,   6,   6,
+   6,   6,   6,   5,   5,   5,   5,   5,   5,
+   4,   4,   4,   4,   4,   4,   4,   4
+};
+
+#else /* TEST_CUSTOM_MODES */
+
+#define NDIMS (22)
+static const int pn[NDIMS]={
+   2,   3,   4,   6,   8,   9,  11,  12,  16,
+  18,  22,  24,  32,  36,  44,  48,  64,  72,
+  88,  96, 144, 176
+};
+static const int pkmax[NDIMS]={
+ 128, 128, 128,  88,  36,  26,  18,  16,  12,
+  11,   9,   9,   7,   7,   6,   6,   5,   5,
+   5,   5,   4,   4
+};
+
+#endif
+
+int main(void){
+  int t;
+  int n;
+  ALLOC_STACK;
+  for(t=0;t<NDIMS;t++){
+    int pseudo;
+    n=pn[t];
+    for(pseudo=1;pseudo<41;pseudo++)
+    {
+      int k;
+#if defined(SMALL_FOOTPRINT)
+      opus_uint32 uu[KMAX+2U];
+#endif
+      opus_uint32 inc;
+      opus_uint32 nc;
+      opus_uint32 i;
+      k=get_pulses(pseudo);
+      if (k>pkmax[t])break;
+      printf("Testing CWRS with N=%i, K=%i...\n",n,k);
+#if defined(SMALL_FOOTPRINT)
+      nc=ncwrs_urow(n,k,uu);
+#else
+      nc=CELT_PVQ_V(n,k);
+#endif
+      inc=nc/20000;
+      if(inc<1)inc=1;
+      for(i=0;i<nc;i+=inc){
+#if defined(SMALL_FOOTPRINT)
+        opus_uint32 u[KMAX+2U];
+#endif
+        int         y[NMAX];
+        int         sy;
+        opus_uint32 v;
+        opus_uint32 ii;
+        int         j;
+#if defined(SMALL_FOOTPRINT)
+        memcpy(u,uu,(k+2U)*sizeof(*u));
+        cwrsi(n,k,i,y,u);
+#else
+        cwrsi(n,k,i,y);
+#endif
+        sy=0;
+        for(j=0;j<n;j++)sy+=ABS(y[j]);
+        if(sy!=k){
+          fprintf(stderr,"N=%d Pulse count mismatch in cwrsi (%d!=%d).\n",
+           n,sy,k);
+          return 99;
+        }
+        /*printf("%6u of %u:",i,nc);
+        for(j=0;j<n;j++)printf(" %+3i",y[j]);
+        printf(" ->");*/
+#if defined(SMALL_FOOTPRINT)
+        ii=icwrs(n,k,&v,y,u);
+#else
+        ii=icwrs(n,y);
+        v=CELT_PVQ_V(n,k);
+#endif
+        if(ii!=i){
+          fprintf(stderr,"Combination-index mismatch (%lu!=%lu).\n",
+           (long)ii,(long)i);
+          return 1;
+        }
+        if(v!=nc){
+          fprintf(stderr,"Combination count mismatch (%lu!=%lu).\n",
+           (long)v,(long)nc);
+          return 2;
+        }
+        /*printf(" %6u\n",i);*/
+      }
+      /*printf("\n");*/
+    }
+  }
+  return 0;
+}
diff --git a/drivers/opus/celt/tests/test_unit_dft.c b/drivers/opus/celt/tests/test_unit_dft.c
new file mode 100644
index 0000000000..4a00013b2a
--- /dev/null
+++ b/drivers/opus/celt/tests/test_unit_dft.c
@@ -0,0 +1,164 @@
+/* Copyright (c) 2008 Xiph.Org Foundation
+   Written by Jean-Marc Valin */
+/*
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifdef OPUS_HAVE_CONFIG_H
+#include "opus_config.h"
+#endif
+
+#define SKIP_CONFIG_H
+
+#ifndef CUSTOM_MODES
+#define CUSTOM_MODES
+#endif
+
+#include <stdio.h>
+
+#define CELT_C
+#include "stack_alloc.h"
+#include "kiss_fft.h"
+#include "kiss_fft.c"
+#include "mathops.c"
+#include "entcode.c"
+
+
+#ifndef M_PI
+#define M_PI 3.141592653
+#endif
+
+int ret = 0;
+
+void check(kiss_fft_cpx  * in,kiss_fft_cpx  * out,int nfft,int isinverse)
+{
+    int bin,k;
+    double errpow=0,sigpow=0, snr;
+
+    for (bin=0;bin<nfft;++bin) {
+        double ansr = 0;
+        double ansi = 0;
+        double difr;
+        double difi;
+
+        for (k=0;k<nfft;++k) {
+            double phase = -2*M_PI*bin*k/nfft;
+            double re = cos(phase);
+            double im = sin(phase);
+            if (isinverse)
+                im = -im;
+
+            if (!isinverse)
+            {
+               re /= nfft;
+               im /= nfft;
+            }
+
+            ansr += in[k].r * re - in[k].i * im;
+            ansi += in[k].r * im + in[k].i * re;
+        }
+        /*printf ("%d %d ", (int)ansr, (int)ansi);*/
+        difr = ansr - out[bin].r;
+        difi = ansi - out[bin].i;
+        errpow += difr*difr + difi*difi;
+        sigpow += ansr*ansr+ansi*ansi;
+    }
+    snr = 10*log10(sigpow/errpow);
+    printf("nfft=%d inverse=%d,snr = %f\n",nfft,isinverse,snr );
+    if (snr<60) {
+       printf( "** poor snr: %f ** \n", snr);
+       ret = 1;
+    }
+}
+
+void test1d(int nfft,int isinverse)
+{
+    size_t buflen = sizeof(kiss_fft_cpx)*nfft;
+
+    kiss_fft_cpx  * in = (kiss_fft_cpx*)malloc(buflen);
+    kiss_fft_cpx  * out= (kiss_fft_cpx*)malloc(buflen);
+    kiss_fft_state *cfg = opus_fft_alloc(nfft,0,0);
+    int k;
+
+    for (k=0;k<nfft;++k) {
+        in[k].r = (rand() % 32767) - 16384;
+        in[k].i = (rand() % 32767) - 16384;
+    }
+
+    for (k=0;k<nfft;++k) {
+       in[k].r *= 32768;
+       in[k].i *= 32768;
+    }
+
+    if (isinverse)
+    {
+       for (k=0;k<nfft;++k) {
+          in[k].r /= nfft;
+          in[k].i /= nfft;
+       }
+    }
+
+    /*for (k=0;k<nfft;++k) printf("%d %d ", in[k].r, in[k].i);printf("\n");*/
+
+    if (isinverse)
+       opus_ifft(cfg,in,out);
+    else
+       opus_fft(cfg,in,out);
+
+    /*for (k=0;k<nfft;++k) printf("%d %d ", out[k].r, out[k].i);printf("\n");*/
+
+    check(in,out,nfft,isinverse);
+
+    free(in);
+    free(out);
+    free(cfg);
+}
+
+int main(int argc,char ** argv)
+{
+    ALLOC_STACK;
+    if (argc>1) {
+        int k;
+        for (k=1;k<argc;++k) {
+            test1d(atoi(argv[k]),0);
+            test1d(atoi(argv[k]),1);
+        }
+    }else{
+        test1d(32,0);
+        test1d(32,1);
+        test1d(128,0);
+        test1d(128,1);
+        test1d(256,0);
+        test1d(256,1);
+#ifndef RADIX_TWO_ONLY
+        test1d(36,0);
+        test1d(36,1);
+        test1d(50,0);
+        test1d(50,1);
+        test1d(120,0);
+        test1d(120,1);
+#endif
+    }
+    return ret;
+}
diff --git a/drivers/opus/celt/tests/test_unit_entropy.c b/drivers/opus/celt/tests/test_unit_entropy.c
new file mode 100644
index 0000000000..62268b1564
--- /dev/null
+++ b/drivers/opus/celt/tests/test_unit_entropy.c
@@ -0,0 +1,382 @@
+/* Copyright (c) 2007-2011 Xiph.Org Foundation, Mozilla Corporation,
+                           Gregory Maxwell
+   Written by Jean-Marc Valin, Gregory Maxwell, and Timothy B. Terriberry */
+/*
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifdef OPUS_HAVE_CONFIG_H
+#include "opus_config.h"
+#endif
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <math.h>
+#include <time.h>
+#include "entcode.h"
+#include "entenc.h"
+#include "entdec.h"
+#include <string.h>
+
+#include "entenc.c"
+#include "entdec.c"
+#include "entcode.c"
+
+#ifndef M_LOG2E
+# define M_LOG2E    1.4426950408889634074
+#endif
+#define DATA_SIZE 10000000
+#define DATA_SIZE2 10000
+
+int main(int _argc,char **_argv){
+  ec_enc         enc;
+  ec_dec         dec;
+  long           nbits;
+  long           nbits2;
+  double         entropy;
+  int            ft;
+  int            ftb;
+  int            sz;
+  int            i;
+  int            ret;
+  unsigned int   sym;
+  unsigned int   seed;
+  unsigned char *ptr;
+  const char    *env_seed;
+  ret=0;
+  entropy=0;
+    if (_argc > 2) {
+	fprintf(stderr, "Usage: %s [<seed>]\n", _argv[0]);
+	return 1;
+    }
+  env_seed = getenv("SEED");
+  if (_argc > 1)
+    seed = atoi(_argv[1]);
+  else if (env_seed)
+    seed = atoi(env_seed);
+  else
+    seed = time(NULL);
+  /*Testing encoding of raw bit values.*/
+  ptr = (unsigned char *)malloc(DATA_SIZE);
+  ec_enc_init(&enc,ptr, DATA_SIZE);
+  for(ft=2;ft<1024;ft++){
+    for(i=0;i<ft;i++){
+      entropy+=log(ft)*M_LOG2E;
+      ec_enc_uint(&enc,i,ft);
+    }
+  }
+  /*Testing encoding of raw bit values.*/
+  for(ftb=1;ftb<16;ftb++){
+    for(i=0;i<(1<<ftb);i++){
+      entropy+=ftb;
+      nbits=ec_tell(&enc);
+      ec_enc_bits(&enc,i,ftb);
+      nbits2=ec_tell(&enc);
+      if(nbits2-nbits!=ftb){
+        fprintf(stderr,"Used %li bits to encode %i bits directly.\n",
+         nbits2-nbits,ftb);
+        ret=-1;
+      }
+    }
+  }
+  nbits=ec_tell_frac(&enc);
+  ec_enc_done(&enc);
+  fprintf(stderr,
+   "Encoded %0.2lf bits of entropy to %0.2lf bits (%0.3lf%% wasted).\n",
+   entropy,ldexp(nbits,-3),100*(nbits-ldexp(entropy,3))/nbits);
+  fprintf(stderr,"Packed to %li bytes.\n",(long)ec_range_bytes(&enc));
+  ec_dec_init(&dec,ptr,DATA_SIZE);
+  for(ft=2;ft<1024;ft++){
+    for(i=0;i<ft;i++){
+      sym=ec_dec_uint(&dec,ft);
+      if(sym!=(unsigned)i){
+        fprintf(stderr,"Decoded %i instead of %i with ft of %i.\n",sym,i,ft);
+        ret=-1;
+      }
+    }
+  }
+  for(ftb=1;ftb<16;ftb++){
+    for(i=0;i<(1<<ftb);i++){
+      sym=ec_dec_bits(&dec,ftb);
+      if(sym!=(unsigned)i){
+        fprintf(stderr,"Decoded %i instead of %i with ftb of %i.\n",sym,i,ftb);
+        ret=-1;
+      }
+    }
+  }
+  nbits2=ec_tell_frac(&dec);
+  if(nbits!=nbits2){
+    fprintf(stderr,
+     "Reported number of bits used was %0.2lf, should be %0.2lf.\n",
+     ldexp(nbits2,-3),ldexp(nbits,-3));
+    ret=-1;
+  }
+  /*Testing an encoder bust prefers range coder data over raw bits.
+    This isn't a general guarantee, will only work for data that is buffered in
+     the encoder state and not yet stored in the user buffer, and should never
+     get used in practice.
+    It's mostly here for code coverage completeness.*/
+  /*Start with a 16-bit buffer.*/
+  ec_enc_init(&enc,ptr,2);
+  /*Write 7 raw bits.*/
+  ec_enc_bits(&enc,0x55,7);
+  /*Write 12.3 bits of range coder data.*/
+  ec_enc_uint(&enc,1,2);
+  ec_enc_uint(&enc,1,3);
+  ec_enc_uint(&enc,1,4);
+  ec_enc_uint(&enc,1,5);
+  ec_enc_uint(&enc,2,6);
+  ec_enc_uint(&enc,6,7);
+  ec_enc_done(&enc);
+  ec_dec_init(&dec,ptr,2);
+  if(!enc.error
+   /*The raw bits should have been overwritten by the range coder data.*/
+   ||ec_dec_bits(&dec,7)!=0x05
+   /*And all the range coder data should have been encoded correctly.*/
+   ||ec_dec_uint(&dec,2)!=1
+   ||ec_dec_uint(&dec,3)!=1
+   ||ec_dec_uint(&dec,4)!=1
+   ||ec_dec_uint(&dec,5)!=1
+   ||ec_dec_uint(&dec,6)!=2
+   ||ec_dec_uint(&dec,7)!=6){
+    fprintf(stderr,"Encoder bust overwrote range coder data with raw bits.\n");
+    ret=-1;
+  }
+  srand(seed);
+  fprintf(stderr,"Testing random streams... Random seed: %u (%.4X)\n", seed, rand() % 65536);
+  for(i=0;i<409600;i++){
+    unsigned *data;
+    unsigned *tell;
+    unsigned tell_bits;
+    int       j;
+    int zeros;
+    ft=rand()/((RAND_MAX>>(rand()%11U))+1U)+10;
+    sz=rand()/((RAND_MAX>>(rand()%9U))+1U);
+    data=(unsigned *)malloc(sz*sizeof(*data));
+    tell=(unsigned *)malloc((sz+1)*sizeof(*tell));
+    ec_enc_init(&enc,ptr,DATA_SIZE2);
+    zeros = rand()%13==0;
+    tell[0]=ec_tell_frac(&enc);
+    for(j=0;j<sz;j++){
+      if (zeros)
+        data[j]=0;
+      else
+        data[j]=rand()%ft;
+      ec_enc_uint(&enc,data[j],ft);
+      tell[j+1]=ec_tell_frac(&enc);
+    }
+    if (rand()%2==0)
+      while(ec_tell(&enc)%8 != 0)
+        ec_enc_uint(&enc, rand()%2, 2);
+    tell_bits = ec_tell(&enc);
+    ec_enc_done(&enc);
+    if(tell_bits!=(unsigned)ec_tell(&enc)){
+      fprintf(stderr,"ec_tell() changed after ec_enc_done(): %i instead of %i (Random seed: %u)\n",
+       ec_tell(&enc),tell_bits,seed);
+      ret=-1;
+    }
+    if ((tell_bits+7)/8 < ec_range_bytes(&enc))
+    {
+      fprintf (stderr, "ec_tell() lied, there's %i bytes instead of %d (Random seed: %u)\n",
+               ec_range_bytes(&enc), (tell_bits+7)/8,seed);
+      ret=-1;
+    }
+    ec_dec_init(&dec,ptr,DATA_SIZE2);
+    if(ec_tell_frac(&dec)!=tell[0]){
+      fprintf(stderr,
+       "Tell mismatch between encoder and decoder at symbol %i: %i instead of %i (Random seed: %u).\n",
+       0,ec_tell_frac(&dec),tell[0],seed);
+    }
+    for(j=0;j<sz;j++){
+      sym=ec_dec_uint(&dec,ft);
+      if(sym!=data[j]){
+        fprintf(stderr,
+         "Decoded %i instead of %i with ft of %i at position %i of %i (Random seed: %u).\n",
+         sym,data[j],ft,j,sz,seed);
+        ret=-1;
+      }
+      if(ec_tell_frac(&dec)!=tell[j+1]){
+        fprintf(stderr,
+         "Tell mismatch between encoder and decoder at symbol %i: %i instead of %i (Random seed: %u).\n",
+         j+1,ec_tell_frac(&dec),tell[j+1],seed);
+      }
+    }
+    free(tell);
+    free(data);
+  }
+  /*Test compatibility between multiple different encode/decode routines.*/
+  for(i=0;i<409600;i++){
+    unsigned *logp1;
+    unsigned *data;
+    unsigned *tell;
+    unsigned *enc_method;
+    int       j;
+    sz=rand()/((RAND_MAX>>(rand()%9U))+1U);
+    logp1=(unsigned *)malloc(sz*sizeof(*logp1));
+    data=(unsigned *)malloc(sz*sizeof(*data));
+    tell=(unsigned *)malloc((sz+1)*sizeof(*tell));
+    enc_method=(unsigned *)malloc(sz*sizeof(*enc_method));
+    ec_enc_init(&enc,ptr,DATA_SIZE2);
+    tell[0]=ec_tell_frac(&enc);
+    for(j=0;j<sz;j++){
+      data[j]=rand()/((RAND_MAX>>1)+1);
+      logp1[j]=(rand()%15)+1;
+      enc_method[j]=rand()/((RAND_MAX>>2)+1);
+      switch(enc_method[j]){
+        case 0:{
+          ec_encode(&enc,data[j]?(1<<logp1[j])-1:0,
+           (1<<logp1[j])-(data[j]?0:1),1<<logp1[j]);
+        }break;
+        case 1:{
+          ec_encode_bin(&enc,data[j]?(1<<logp1[j])-1:0,
+           (1<<logp1[j])-(data[j]?0:1),logp1[j]);
+        }break;
+        case 2:{
+          ec_enc_bit_logp(&enc,data[j],logp1[j]);
+        }break;
+        case 3:{
+          unsigned char icdf[2];
+          icdf[0]=1;
+          icdf[1]=0;
+          ec_enc_icdf(&enc,data[j],icdf,logp1[j]);
+        }break;
+      }
+      tell[j+1]=ec_tell_frac(&enc);
+    }
+    ec_enc_done(&enc);
+    if((ec_tell(&enc)+7U)/8U<ec_range_bytes(&enc)){
+      fprintf(stderr,"tell() lied, there's %i bytes instead of %d (Random seed: %u)\n",
+       ec_range_bytes(&enc),(ec_tell(&enc)+7)/8,seed);
+      ret=-1;
+    }
+    ec_dec_init(&dec,ptr,DATA_SIZE2);
+    if(ec_tell_frac(&dec)!=tell[0]){
+      fprintf(stderr,
+       "Tell mismatch between encoder and decoder at symbol %i: %i instead of %i (Random seed: %u).\n",
+       0,ec_tell_frac(&dec),tell[0],seed);
+    }
+    for(j=0;j<sz;j++){
+      int fs;
+      int dec_method;
+      dec_method=rand()/((RAND_MAX>>2)+1);
+      switch(dec_method){
+        case 0:{
+          fs=ec_decode(&dec,1<<logp1[j]);
+          sym=fs>=(1<<logp1[j])-1;
+          ec_dec_update(&dec,sym?(1<<logp1[j])-1:0,
+           (1<<logp1[j])-(sym?0:1),1<<logp1[j]);
+        }break;
+        case 1:{
+          fs=ec_decode_bin(&dec,logp1[j]);
+          sym=fs>=(1<<logp1[j])-1;
+          ec_dec_update(&dec,sym?(1<<logp1[j])-1:0,
+           (1<<logp1[j])-(sym?0:1),1<<logp1[j]);
+        }break;
+        case 2:{
+          sym=ec_dec_bit_logp(&dec,logp1[j]);
+        }break;
+        case 3:{
+          unsigned char icdf[2];
+          icdf[0]=1;
+          icdf[1]=0;
+          sym=ec_dec_icdf(&dec,icdf,logp1[j]);
+        }break;
+      }
+      if(sym!=data[j]){
+        fprintf(stderr,
+         "Decoded %i instead of %i with logp1 of %i at position %i of %i (Random seed: %u).\n",
+         sym,data[j],logp1[j],j,sz,seed);
+        fprintf(stderr,"Encoding method: %i, decoding method: %i\n",
+         enc_method[j],dec_method);
+        ret=-1;
+      }
+      if(ec_tell_frac(&dec)!=tell[j+1]){
+        fprintf(stderr,
+         "Tell mismatch between encoder and decoder at symbol %i: %i instead of %i (Random seed: %u).\n",
+         j+1,ec_tell_frac(&dec),tell[j+1],seed);
+      }
+    }
+    free(enc_method);
+    free(tell);
+    free(data);
+    free(logp1);
+  }
+  ec_enc_init(&enc,ptr,DATA_SIZE2);
+  ec_enc_bit_logp(&enc,0,1);
+  ec_enc_bit_logp(&enc,0,1);
+  ec_enc_bit_logp(&enc,0,1);
+  ec_enc_bit_logp(&enc,0,1);
+  ec_enc_bit_logp(&enc,0,2);
+  ec_enc_patch_initial_bits(&enc,3,2);
+  if(enc.error){
+    fprintf(stderr,"patch_initial_bits failed");
+    ret=-1;
+  }
+  ec_enc_patch_initial_bits(&enc,0,5);
+  if(!enc.error){
+    fprintf(stderr,"patch_initial_bits didn't fail when it should have");
+    ret=-1;
+  }
+  ec_enc_done(&enc);
+  if(ec_range_bytes(&enc)!=1||ptr[0]!=192){
+    fprintf(stderr,"Got %d when expecting 192 for patch_initial_bits",ptr[0]);
+    ret=-1;
+  }
+  ec_enc_init(&enc,ptr,DATA_SIZE2);
+  ec_enc_bit_logp(&enc,0,1);
+  ec_enc_bit_logp(&enc,0,1);
+  ec_enc_bit_logp(&enc,1,6);
+  ec_enc_bit_logp(&enc,0,2);
+  ec_enc_patch_initial_bits(&enc,0,2);
+  if(enc.error){
+    fprintf(stderr,"patch_initial_bits failed");
+    ret=-1;
+  }
+  ec_enc_done(&enc);
+  if(ec_range_bytes(&enc)!=2||ptr[0]!=63){
+    fprintf(stderr,"Got %d when expecting 63 for patch_initial_bits",ptr[0]);
+    ret=-1;
+  }
+  ec_enc_init(&enc,ptr,2);
+  ec_enc_bit_logp(&enc,0,2);
+  for(i=0;i<48;i++){
+    ec_enc_bits(&enc,0,1);
+  }
+  ec_enc_done(&enc);
+  if(!enc.error){
+    fprintf(stderr,"Raw bits overfill didn't fail when it should have");
+    ret=-1;
+  }
+  ec_enc_init(&enc,ptr,2);
+  for(i=0;i<17;i++){
+    ec_enc_bits(&enc,0,1);
+  }
+  ec_enc_done(&enc);
+  if(!enc.error){
+    fprintf(stderr,"17 raw bits encoded in two bytes");
+    ret=-1;
+  }
+  free(ptr);
+  return ret;
+}
diff --git a/drivers/opus/celt/tests/test_unit_laplace.c b/drivers/opus/celt/tests/test_unit_laplace.c
new file mode 100644
index 0000000000..af7d471045
--- /dev/null
+++ b/drivers/opus/celt/tests/test_unit_laplace.c
@@ -0,0 +1,92 @@
+/* Copyright (c) 2008-2011 Xiph.Org Foundation, Mozilla Corporation
+   Written by Jean-Marc Valin and Timothy B. Terriberry */
+/*
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifdef OPUS_HAVE_CONFIG_H
+#include "opus_config.h"
+#endif
+
+#include <stdio.h>
+#include <stdlib.h>
+#include "laplace.h"
+#define CELT_C
+#include "stack_alloc.h"
+
+#include "entenc.c"
+#include "entdec.c"
+#include "entcode.c"
+#include "laplace.c"
+
+#define DATA_SIZE 40000
+
+int ec_laplace_get_start_freq(int decay)
+{
+   opus_uint32 ft = 32768 - LAPLACE_MINP*(2*LAPLACE_NMIN+1);
+   int fs = (ft*(16384-decay))/(16384+decay);
+   return fs+LAPLACE_MINP;
+}
+
+int main(void)
+{
+   int i;
+   int ret = 0;
+   ec_enc enc;
+   ec_dec dec;
+   unsigned char *ptr;
+   int val[10000], decay[10000];
+   ALLOC_STACK;
+   ptr = (unsigned char *)malloc(DATA_SIZE);
+   ec_enc_init(&enc,ptr,DATA_SIZE);
+
+   val[0] = 3; decay[0] = 6000;
+   val[1] = 0; decay[1] = 5800;
+   val[2] = -1; decay[2] = 5600;
+   for (i=3;i<10000;i++)
+   {
+      val[i] = rand()%15-7;
+      decay[i] = rand()%11000+5000;
+   }
+   for (i=0;i<10000;i++)
+      ec_laplace_encode(&enc, &val[i],
+            ec_laplace_get_start_freq(decay[i]), decay[i]);
+
+   ec_enc_done(&enc);
+
+   ec_dec_init(&dec,ec_get_buffer(&enc),ec_range_bytes(&enc));
+
+   for (i=0;i<10000;i++)
+   {
+      int d = ec_laplace_decode(&dec,
+            ec_laplace_get_start_freq(decay[i]), decay[i]);
+      if (d != val[i])
+      {
+         fprintf (stderr, "Got %d instead of %d\n", d, val[i]);
+         ret = 1;
+      }
+   }
+
+   return ret;
+}
diff --git a/drivers/opus/celt/tests/test_unit_mathops.c b/drivers/opus/celt/tests/test_unit_mathops.c
new file mode 100644
index 0000000000..36d6a4bfb4
--- /dev/null
+++ b/drivers/opus/celt/tests/test_unit_mathops.c
@@ -0,0 +1,275 @@
+/* Copyright (c) 2008-2011 Xiph.Org Foundation, Mozilla Corporation,
+                           Gregory Maxwell
+   Written by Jean-Marc Valin, Gregory Maxwell, and Timothy B. Terriberry */
+/*
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifdef OPUS_HAVE_CONFIG_H
+#include "opus_config.h"
+#endif
+
+#ifndef CUSTOM_MODES
+#define CUSTOM_MODES
+#endif
+
+#define CELT_C
+
+#include "mathops.c"
+#include "entenc.c"
+#include "entdec.c"
+#include "entcode.c"
+#include "bands.c"
+#include "quant_bands.c"
+#include "laplace.c"
+#include "vq.c"
+#include "cwrs.c"
+#include <stdio.h>
+#include <math.h>
+
+#ifdef OPUS_FIXED_POINT
+#define WORD "%d"
+#else
+#define WORD "%f"
+#endif
+
+int ret = 0;
+
+void testdiv(void)
+{
+   opus_int32 i;
+   for (i=1;i<=327670;i++)
+   {
+      double prod;
+      opus_val32 val;
+      val = celt_rcp(i);
+#ifdef OPUS_FIXED_POINT
+      prod = (1./32768./65526.)*val*i;
+#else
+      prod = val*i;
+#endif
+      if (fabs(prod-1) > .00025)
+      {
+         fprintf (stderr, "div failed: 1/%d="WORD" (product = %f)\n", i, val, prod);
+         ret = 1;
+      }
+   }
+}
+
+void testsqrt(void)
+{
+   opus_int32 i;
+   for (i=1;i<=1000000000;i++)
+   {
+      double ratio;
+      opus_val16 val;
+      val = celt_sqrt(i);
+      ratio = val/sqrt(i);
+      if (fabs(ratio - 1) > .0005 && fabs(val-sqrt(i)) > 2)
+      {
+         fprintf (stderr, "sqrt failed: sqrt(%d)="WORD" (ratio = %f)\n", i, val, ratio);
+         ret = 1;
+      }
+      i+= i>>10;
+   }
+}
+
+void testbitexactcos(void)
+{
+   int i;
+   opus_int32 min_d,max_d,last,chk;
+   chk=max_d=0;
+   last=min_d=32767;
+   for(i=64;i<=16320;i++)
+   {
+      opus_int32 d;
+      opus_int32 q=bitexact_cos(i);
+      chk ^= q*i;
+      d = last - q;
+      if (d>max_d)max_d=d;
+      if (d<min_d)min_d=d;
+      last = q;
+   }
+   if ((chk!=89408644)||(max_d!=5)||(min_d!=0)||(bitexact_cos(64)!=32767)||
+       (bitexact_cos(16320)!=200)||(bitexact_cos(8192)!=23171))
+   {
+      fprintf (stderr, "bitexact_cos failed\n");
+      ret = 1;
+   }
+}
+
+void testbitexactlog2tan(void)
+{
+   int i,fail;
+   opus_int32 min_d,max_d,last,chk;
+   fail=chk=max_d=0;
+   last=min_d=15059;
+   for(i=64;i<8193;i++)
+   {
+      opus_int32 d;
+      opus_int32 mid=bitexact_cos(i);
+      opus_int32 side=bitexact_cos(16384-i);
+      opus_int32 q=bitexact_log2tan(mid,side);
+      chk ^= q*i;
+      d = last - q;
+      if (q!=-1*bitexact_log2tan(side,mid))
+        fail = 1;
+      if (d>max_d)max_d=d;
+      if (d<min_d)min_d=d;
+      last = q;
+   }
+   if ((chk!=15821257)||(max_d!=61)||(min_d!=-2)||fail||
+       (bitexact_log2tan(32767,200)!=15059)||(bitexact_log2tan(30274,12540)!=2611)||
+       (bitexact_log2tan(23171,23171)!=0))
+   {
+      fprintf (stderr, "bitexact_log2tan failed\n");
+      ret = 1;
+   }
+}
+
+#ifndef OPUS_FIXED_POINT
+void testlog2(void)
+{
+   float x;
+   for (x=0.001;x<1677700.0;x+=(x/8.0))
+   {
+      float error = fabs((1.442695040888963387*log(x))-celt_log2(x));
+      if (error>0.0009)
+      {
+         fprintf (stderr, "celt_log2 failed: fabs((1.442695040888963387*log(x))-celt_log2(x))>0.001 (x = %f, error = %f)\n", x,error);
+         ret = 1;
+      }
+   }
+}
+
+void testexp2(void)
+{
+   float x;
+   for (x=-11.0;x<24.0;x+=0.0007)
+   {
+      float error = fabs(x-(1.442695040888963387*log(celt_exp2(x))));
+      if (error>0.0002)
+      {
+         fprintf (stderr, "celt_exp2 failed: fabs(x-(1.442695040888963387*log(celt_exp2(x))))>0.0005 (x = %f, error = %f)\n", x,error);
+         ret = 1;
+      }
+   }
+}
+
+void testexp2log2(void)
+{
+   float x;
+   for (x=-11.0;x<24.0;x+=0.0007)
+   {
+      float error = fabs(x-(celt_log2(celt_exp2(x))));
+      if (error>0.001)
+      {
+         fprintf (stderr, "celt_log2/celt_exp2 failed: fabs(x-(celt_log2(celt_exp2(x))))>0.001 (x = %f, error = %f)\n", x,error);
+         ret = 1;
+      }
+   }
+}
+#else
+void testlog2(void)
+{
+   opus_val32 x;
+   for (x=8;x<1073741824;x+=(x>>3))
+   {
+      float error = fabs((1.442695040888963387*log(x/16384.0))-celt_log2(x)/1024.0);
+      if (error>0.003)
+      {
+         fprintf (stderr, "celt_log2 failed: x = %ld, error = %f\n", (long)x,error);
+         ret = 1;
+      }
+   }
+}
+
+void testexp2(void)
+{
+   opus_val16 x;
+   for (x=-32768;x<15360;x++)
+   {
+      float error1 = fabs(x/1024.0-(1.442695040888963387*log(celt_exp2(x)/65536.0)));
+      float error2 = fabs(exp(0.6931471805599453094*x/1024.0)-celt_exp2(x)/65536.0);
+      if (error1>0.0002&&error2>0.00004)
+      {
+    	 fprintf (stderr, "celt_exp2 failed: x = "WORD", error1 = %f, error2 = %f\n", x,error1,error2);
+         ret = 1;
+      }
+   }
+}
+
+void testexp2log2(void)
+{
+   opus_val32 x;
+   for (x=8;x<65536;x+=(x>>3))
+   {
+      float error = fabs(x-0.25*celt_exp2(celt_log2(x)))/16384;
+      if (error>0.004)
+      {
+         fprintf (stderr, "celt_log2/celt_exp2 failed: fabs(x-(celt_exp2(celt_log2(x))))>0.001 (x = %ld, error = %f)\n", (long)x,error);
+         ret = 1;
+      }
+   }
+}
+
+void testilog2(void)
+{
+   opus_val32 x;
+   for (x=1;x<=268435455;x+=127)
+   {
+      opus_val32 lg;
+      opus_val32 y;
+
+      lg = celt_ilog2(x);
+      if (lg<0 || lg>=31)
+      {
+         printf("celt_ilog2 failed: 0<=celt_ilog2(x)<31 (x = %d, celt_ilog2(x) = %d)\n",x,lg);
+         ret = 1;
+      }
+      y = 1<<lg;
+
+      if (x<y || (x>>1)>=y)
+      {
+         printf("celt_ilog2 failed: 2**celt_ilog2(x)<=x<2**(celt_ilog2(x)+1) (x = %d, 2**celt_ilog2(x) = %d)\n",x,y);
+         ret = 1;
+      }
+   }
+}
+#endif
+
+int main(void)
+{
+   testbitexactcos();
+   testbitexactlog2tan();
+   testdiv();
+   testsqrt();
+   testlog2();
+   testexp2();
+   testexp2log2();
+#ifdef OPUS_FIXED_POINT
+   testilog2();
+#endif
+   return ret;
+}
diff --git a/drivers/opus/celt/tests/test_unit_mdct.c b/drivers/opus/celt/tests/test_unit_mdct.c
new file mode 100644
index 0000000000..e3b5eec11c
--- /dev/null
+++ b/drivers/opus/celt/tests/test_unit_mdct.c
@@ -0,0 +1,210 @@
+/* Copyright (c) 2008-2011 Xiph.Org Foundation
+   Written by Jean-Marc Valin */
+/*
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifdef OPUS_HAVE_CONFIG_H
+#include "opus_config.h"
+#endif
+
+#define SKIP_CONFIG_H
+
+#ifndef CUSTOM_MODES
+#define CUSTOM_MODES
+#endif
+
+#include <stdio.h>
+
+#define CELT_C
+#include "mdct.h"
+#include "stack_alloc.h"
+
+#include "kiss_fft.c"
+#include "mdct.c"
+#include "mathops.c"
+#include "entcode.c"
+
+#ifndef M_PI
+#define M_PI 3.141592653
+#endif
+
+int ret = 0;
+void check(kiss_fft_scalar  * in,kiss_fft_scalar  * out,int nfft,int isinverse)
+{
+    int bin,k;
+    double errpow=0,sigpow=0;
+    double snr;
+    for (bin=0;bin<nfft/2;++bin) {
+        double ansr = 0;
+        double difr;
+
+        for (k=0;k<nfft;++k) {
+           double phase = 2*M_PI*(k+.5+.25*nfft)*(bin+.5)/nfft;
+           double re = cos(phase);
+
+           re /= nfft/4;
+
+           ansr += in[k] * re;
+        }
+        /*printf ("%f %f\n", ansr, out[bin]);*/
+        difr = ansr - out[bin];
+        errpow += difr*difr;
+        sigpow += ansr*ansr;
+    }
+    snr = 10*log10(sigpow/errpow);
+    printf("nfft=%d inverse=%d,snr = %f\n",nfft,isinverse,snr );
+    if (snr<60) {
+       printf( "** poor snr: %f **\n", snr);
+       ret = 1;
+    }
+}
+
+void check_inv(kiss_fft_scalar  * in,kiss_fft_scalar  * out,int nfft,int isinverse)
+{
+   int bin,k;
+   double errpow=0,sigpow=0;
+   double snr;
+   for (bin=0;bin<nfft;++bin) {
+      double ansr = 0;
+      double difr;
+
+      for (k=0;k<nfft/2;++k) {
+         double phase = 2*M_PI*(bin+.5+.25*nfft)*(k+.5)/nfft;
+         double re = cos(phase);
+
+         /*re *= 2;*/
+
+         ansr += in[k] * re;
+      }
+      /*printf ("%f %f\n", ansr, out[bin]);*/
+      difr = ansr - out[bin];
+      errpow += difr*difr;
+      sigpow += ansr*ansr;
+   }
+   snr = 10*log10(sigpow/errpow);
+   printf("nfft=%d inverse=%d,snr = %f\n",nfft,isinverse,snr );
+   if (snr<60) {
+      printf( "** poor snr: %f **\n", snr);
+      ret = 1;
+   }
+}
+
+
+void test1d(int nfft,int isinverse)
+{
+    celt_mdct_lookup cfg;
+    size_t buflen = sizeof(kiss_fft_scalar)*nfft;
+
+    kiss_fft_scalar  * in = (kiss_fft_scalar*)malloc(buflen);
+    kiss_fft_scalar  * in_copy = (kiss_fft_scalar*)malloc(buflen);
+    kiss_fft_scalar  * out= (kiss_fft_scalar*)malloc(buflen);
+    opus_val16  * window= (opus_val16*)malloc(sizeof(opus_val16)*nfft/2);
+    int k;
+
+    clt_mdct_init(&cfg, nfft, 0);
+    for (k=0;k<nfft;++k) {
+        in[k] = (rand() % 32768) - 16384;
+    }
+
+    for (k=0;k<nfft/2;++k) {
+       window[k] = Q15ONE;
+    }
+    for (k=0;k<nfft;++k) {
+       in[k] *= 32768;
+    }
+
+    if (isinverse)
+    {
+       for (k=0;k<nfft;++k) {
+          in[k] /= nfft;
+       }
+    }
+
+    for (k=0;k<nfft;++k)
+       in_copy[k] = in[k];
+    /*for (k=0;k<nfft;++k) printf("%d %d ", in[k].r, in[k].i);printf("\n");*/
+
+    if (isinverse)
+    {
+       for (k=0;k<nfft;++k)
+          out[k] = 0;
+       clt_mdct_backward(&cfg,in,out, window, nfft/2, 0, 1);
+       /* apply TDAC because clt_mdct_backward() no longer does that */
+       for (k=0;k<nfft/4;++k)
+          out[nfft-k-1] = out[nfft/2+k];
+       check_inv(in,out,nfft,isinverse);
+    } else {
+       clt_mdct_forward(&cfg,in,out,window, nfft/2, 0, 1);
+       check(in_copy,out,nfft,isinverse);
+    }
+    /*for (k=0;k<nfft;++k) printf("%d %d ", out[k].r, out[k].i);printf("\n");*/
+
+
+    free(in);
+    free(out);
+    clt_mdct_clear(&cfg);
+}
+
+int main(int argc,char ** argv)
+{
+    ALLOC_STACK;
+    if (argc>1) {
+        int k;
+        for (k=1;k<argc;++k) {
+            test1d(atoi(argv[k]),0);
+            test1d(atoi(argv[k]),1);
+        }
+    }else{
+        test1d(32,0);
+        test1d(32,1);
+        test1d(256,0);
+        test1d(256,1);
+        test1d(512,0);
+        test1d(512,1);
+        test1d(1024,0);
+        test1d(1024,1);
+        test1d(2048,0);
+        test1d(2048,1);
+#ifndef RADIX_TWO_ONLY
+        test1d(36,0);
+        test1d(36,1);
+        test1d(40,0);
+        test1d(40,1);
+        test1d(60,0);
+        test1d(60,1);
+        test1d(120,0);
+        test1d(120,1);
+        test1d(240,0);
+        test1d(240,1);
+        test1d(480,0);
+        test1d(480,1);
+        test1d(960,0);
+        test1d(960,1);
+        test1d(1920,0);
+        test1d(1920,1);
+#endif
+    }
+    return ret;
+}
diff --git a/drivers/opus/celt/tests/test_unit_rotation.c b/drivers/opus/celt/tests/test_unit_rotation.c
new file mode 100644
index 0000000000..c12cc3f02f
--- /dev/null
+++ b/drivers/opus/celt/tests/test_unit_rotation.c
@@ -0,0 +1,90 @@
+/* Copyright (c) 2008-2011 Xiph.Org Foundation
+   Written by Jean-Marc Valin */
+/*
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifdef OPUS_HAVE_CONFIG_H
+#include "opus_config.h"
+#endif
+
+#ifndef CUSTOM_MODES
+#define CUSTOM_MODES
+#endif
+
+#define CELT_C
+
+#include <stdio.h>
+#include <stdlib.h>
+#include "vq.c"
+#include "cwrs.c"
+#include "entcode.c"
+#include "entenc.c"
+#include "entdec.c"
+#include "mathops.c"
+#include "bands.h"
+#include <math.h>
+#define MAX_SIZE 100
+
+int ret=0;
+void test_rotation(int N, int K)
+{
+   int i;
+   double err = 0, ener = 0, snr, snr0;
+   opus_val16 x0[MAX_SIZE];
+   opus_val16 x1[MAX_SIZE];
+   for (i=0;i<N;i++)
+      x1[i] = x0[i] = rand()%32767-16384;
+   exp_rotation(x1, N, 1, 1, K, SPREAD_NORMAL);
+   for (i=0;i<N;i++)
+   {
+      err += (x0[i]-(double)x1[i])*(x0[i]-(double)x1[i]);
+      ener += x0[i]*(double)x0[i];
+   }
+   snr0 = 20*log10(ener/err);
+   err = ener = 0;
+   exp_rotation(x1, N, -1, 1, K, SPREAD_NORMAL);
+   for (i=0;i<N;i++)
+   {
+      err += (x0[i]-(double)x1[i])*(x0[i]-(double)x1[i]);
+      ener += x0[i]*(double)x0[i];
+   }
+   snr = 20*log10(ener/err);
+   printf ("SNR for size %d (%d pulses) is %f (was %f without inverse)\n", N, K, snr, snr0);
+   if (snr < 60 || snr0 > 20)
+   {
+      fprintf(stderr, "FAIL!\n");
+      ret = 1;
+   }
+}
+
+int main(void)
+{
+   ALLOC_STACK;
+   test_rotation(15, 3);
+   test_rotation(23, 5);
+   test_rotation(50, 3);
+   test_rotation(80, 1);
+   return ret;
+}
diff --git a/drivers/opus/celt/tests/test_unit_types.c b/drivers/opus/celt/tests/test_unit_types.c
new file mode 100644
index 0000000000..29e671067f
--- /dev/null
+++ b/drivers/opus/celt/tests/test_unit_types.c
@@ -0,0 +1,50 @@
+/* Copyright (c) 2008-2011 Xiph.Org Foundation
+   Written by Jean-Marc Valin */
+/*
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifdef OPUS_HAVE_CONFIG_H
+#include "opus_config.h"
+#endif
+
+#include "opus_types.h"
+#include <stdio.h>
+
+int main(void)
+{
+   opus_int16 i = 1;
+   i <<= 14;
+   if (i>>14 != 1)
+   {
+      fprintf(stderr, "opus_int16 isn't 16 bits\n");
+      return 1;
+   }
+   if (sizeof(opus_int16)*2 != sizeof(opus_int32))
+   {
+      fprintf(stderr, "16*2 != 32\n");
+      return 1;
+   }
+   return 0;
+}
diff --git a/drivers/opus/celt/vq.c b/drivers/opus/celt/vq.c
new file mode 100644
index 0000000000..20b0b82728
--- /dev/null
+++ b/drivers/opus/celt/vq.c
@@ -0,0 +1,415 @@
+/* Copyright (c) 2007-2008 CSIRO
+   Copyright (c) 2007-2009 Xiph.Org Foundation
+   Written by Jean-Marc Valin */
+/*
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifdef OPUS_HAVE_CONFIG_H
+#include "opus_config.h"
+#endif
+
+#include "mathops.h"
+#include "cwrs.h"
+#include "vq.h"
+#include "arch.h"
+#include "os_support.h"
+#include "bands.h"
+#include "rate.h"
+
+static void exp_rotation1(celt_norm *X, int len, int stride, opus_val16 c, opus_val16 s)
+{
+   int i;
+   celt_norm *Xptr;
+   Xptr = X;
+   for (i=0;i<len-stride;i++)
+   {
+      celt_norm x1, x2;
+      x1 = Xptr[0];
+      x2 = Xptr[stride];
+      Xptr[stride] = EXTRACT16(SHR32(MULT16_16(c,x2) + MULT16_16(s,x1), 15));
+      *Xptr++      = EXTRACT16(SHR32(MULT16_16(c,x1) - MULT16_16(s,x2), 15));
+   }
+   Xptr = &X[len-2*stride-1];
+   for (i=len-2*stride-1;i>=0;i--)
+   {
+      celt_norm x1, x2;
+      x1 = Xptr[0];
+      x2 = Xptr[stride];
+      Xptr[stride] = EXTRACT16(SHR32(MULT16_16(c,x2) + MULT16_16(s,x1), 15));
+      *Xptr--      = EXTRACT16(SHR32(MULT16_16(c,x1) - MULT16_16(s,x2), 15));
+   }
+}
+
+static void exp_rotation(celt_norm *X, int len, int dir, int stride, int K, int spread)
+{
+   static const int SPREAD_FACTOR[3]={15,10,5};
+   int i;
+   opus_val16 c, s;
+   opus_val16 gain, theta;
+   int stride2=0;
+   int factor;
+
+   if (2*K>=len || spread==SPREAD_NONE)
+      return;
+   factor = SPREAD_FACTOR[spread-1];
+
+   gain = celt_div((opus_val32)MULT16_16(Q15_ONE,len),(opus_val32)(len+factor*K));
+   theta = HALF16(MULT16_16_Q15(gain,gain));
+
+   c = celt_cos_norm(EXTEND32(theta));
+   s = celt_cos_norm(EXTEND32(SUB16(Q15ONE,theta))); /*  sin(theta) */
+
+   if (len>=8*stride)
+   {
+      stride2 = 1;
+      /* This is just a simple (equivalent) way of computing sqrt(len/stride) with rounding.
+         It's basically incrementing long as (stride2+0.5)^2 < len/stride. */
+      while ((stride2*stride2+stride2)*stride + (stride>>2) < len)
+         stride2++;
+   }
+   /*NOTE: As a minor optimization, we could be passing around log2(B), not B, for both this and for
+      extract_collapse_mask().*/
+   len /= stride;
+   for (i=0;i<stride;i++)
+   {
+      if (dir < 0)
+      {
+         if (stride2)
+            exp_rotation1(X+i*len, len, stride2, s, c);
+         exp_rotation1(X+i*len, len, 1, c, s);
+      } else {
+         exp_rotation1(X+i*len, len, 1, c, -s);
+         if (stride2)
+            exp_rotation1(X+i*len, len, stride2, s, -c);
+      }
+   }
+}
+
+/** Takes the pitch vector and the decoded residual vector, computes the gain
+    that will give ||p+g*y||=1 and mixes the residual with the pitch. */
+static void normalise_residual(int * OPUS_RESTRICT iy, celt_norm * OPUS_RESTRICT X,
+      int N, opus_val32 Ryy, opus_val16 gain)
+{
+   int i;
+#ifdef OPUS_FIXED_POINT
+   int k;
+#endif
+   opus_val32 t;
+   opus_val16 g;
+
+#ifdef OPUS_FIXED_POINT
+   k = celt_ilog2(Ryy)>>1;
+#endif
+   t = VSHR32(Ryy, 2*(k-7));
+   g = MULT16_16_P15(celt_rsqrt_norm(t),gain);
+
+   i=0;
+   do
+      X[i] = EXTRACT16(PSHR32(MULT16_16(g, iy[i]), k+1));
+   while (++i < N);
+}
+
+static unsigned extract_collapse_mask(int *iy, int N, int B)
+{
+   unsigned collapse_mask;
+   int N0;
+   int i;
+   if (B<=1)
+      return 1;
+   /*NOTE: As a minor optimization, we could be passing around log2(B), not B, for both this and for
+      exp_rotation().*/
+   N0 = N/B;
+   collapse_mask = 0;
+   i=0; do {
+      int j;
+      j=0; do {
+         collapse_mask |= (iy[i*N0+j]!=0)<<i;
+      } while (++j<N0);
+   } while (++i<B);
+   return collapse_mask;
+}
+
+unsigned alg_quant(celt_norm *X, int N, int K, int spread, int B, ec_enc *enc
+#ifdef RESYNTH
+   , opus_val16 gain
+#endif
+   )
+{
+   VARDECL(celt_norm, y);
+   VARDECL(int, iy);
+   VARDECL(opus_val16, signx);
+   int i, j;
+   opus_val16 s;
+   int pulsesLeft;
+   opus_val32 sum;
+   opus_val32 xy;
+   opus_val16 yy;
+   unsigned collapse_mask;
+   SAVE_STACK;
+
+   celt_assert2(K>0, "alg_quant() needs at least one pulse");
+   celt_assert2(N>1, "alg_quant() needs at least two dimensions");
+
+   ALLOC(y, N, celt_norm);
+   ALLOC(iy, N, int);
+   ALLOC(signx, N, opus_val16);
+
+   exp_rotation(X, N, 1, B, K, spread);
+
+   /* Get rid of the sign */
+   sum = 0;
+   j=0; do {
+      if (X[j]>0)
+         signx[j]=1;
+      else {
+         signx[j]=-1;
+         X[j]=-X[j];
+      }
+      iy[j] = 0;
+      y[j] = 0;
+   } while (++j<N);
+
+   xy = yy = 0;
+
+   pulsesLeft = K;
+
+   /* Do a pre-search by projecting on the pyramid */
+   if (K > (N>>1))
+   {
+      opus_val16 rcp;
+      j=0; do {
+         sum += X[j];
+      }  while (++j<N);
+
+      /* If X is too small, just replace it with a pulse at 0 */
+#ifdef OPUS_FIXED_POINT
+      if (sum <= K)
+#else
+      /* Prevents infinities and NaNs from causing too many pulses
+         to be allocated. 64 is an approximation of infinity here. */
+      if (!(sum > EPSILON && sum < 64))
+#endif
+      {
+         X[0] = QCONST16(1.f,14);
+         j=1; do
+            X[j]=0;
+         while (++j<N);
+         sum = QCONST16(1.f,14);
+      }
+      rcp = EXTRACT16(MULT16_32_Q16(K-1, celt_rcp(sum)));
+      j=0; do {
+#ifdef OPUS_FIXED_POINT
+         /* It's really important to round *towards zero* here */
+         iy[j] = MULT16_16_Q15(X[j],rcp);
+#else
+         iy[j] = (int)floor(rcp*X[j]);
+#endif
+         y[j] = (celt_norm)iy[j];
+         yy = MAC16_16(yy, y[j],y[j]);
+         xy = MAC16_16(xy, X[j],y[j]);
+         y[j] *= 2;
+         pulsesLeft -= iy[j];
+      }  while (++j<N);
+   }
+   celt_assert2(pulsesLeft>=1, "Allocated too many pulses in the quick pass");
+
+   /* This should never happen, but just in case it does (e.g. on silence)
+      we fill the first bin with pulses. */
+#ifdef OPUS_FIXED_POINT_DEBUG
+   celt_assert2(pulsesLeft<=N+3, "Not enough pulses in the quick pass");
+#endif
+   if (pulsesLeft > N+3)
+   {
+      opus_val16 tmp = (opus_val16)pulsesLeft;
+      yy = MAC16_16(yy, tmp, tmp);
+      yy = MAC16_16(yy, tmp, y[0]);
+      iy[0] += pulsesLeft;
+      pulsesLeft=0;
+   }
+
+   s = 1;
+   for (i=0;i<pulsesLeft;i++)
+   {
+      int best_id;
+      opus_val32 best_num = -VERY_LARGE16;
+      opus_val16 best_den = 0;
+#ifdef OPUS_FIXED_POINT
+      int rshift;
+#endif
+#ifdef OPUS_FIXED_POINT
+      rshift = 1+celt_ilog2(K-pulsesLeft+i+1);
+#endif
+      best_id = 0;
+      /* The squared magnitude term gets added anyway, so we might as well
+         add it outside the loop */
+      yy = ADD32(yy, 1);
+      j=0;
+      do {
+         opus_val16 Rxy, Ryy;
+         /* Temporary sums of the new pulse(s) */
+         Rxy = EXTRACT16(SHR32(ADD32(xy, EXTEND32(X[j])),rshift));
+         /* We're multiplying y[j] by two so we don't have to do it here */
+         Ryy = ADD16(yy, y[j]);
+
+         /* Approximate score: we maximise Rxy/sqrt(Ryy) (we're guaranteed that
+            Rxy is positive because the sign is pre-computed) */
+         Rxy = MULT16_16_Q15(Rxy,Rxy);
+         /* The idea is to check for num/den >= best_num/best_den, but that way
+            we can do it without any division */
+         /* OPT: Make sure to use conditional moves here */
+         if (MULT16_16(best_den, Rxy) > MULT16_16(Ryy, best_num))
+         {
+            best_den = Ryy;
+            best_num = Rxy;
+            best_id = j;
+         }
+      } while (++j<N);
+
+      /* Updating the sums of the new pulse(s) */
+      xy = ADD32(xy, EXTEND32(X[best_id]));
+      /* We're multiplying y[j] by two so we don't have to do it here */
+      yy = ADD16(yy, y[best_id]);
+
+      /* Only now that we've made the final choice, update y/iy */
+      /* Multiplying y[j] by 2 so we don't have to do it everywhere else */
+      y[best_id] += 2*s;
+      iy[best_id]++;
+   }
+
+   /* Put the original sign back */
+   j=0;
+   do {
+      X[j] = MULT16_16(signx[j],X[j]);
+      if (signx[j] < 0)
+         iy[j] = -iy[j];
+   } while (++j<N);
+   encode_pulses(iy, N, K, enc);
+
+#ifdef RESYNTH
+   normalise_residual(iy, X, N, yy, gain);
+   exp_rotation(X, N, -1, B, K, spread);
+#endif
+
+   collapse_mask = extract_collapse_mask(iy, N, B);
+   RESTORE_STACK;
+   return collapse_mask;
+}
+
+/** Decode pulse vector and combine the result with the pitch vector to produce
+    the final normalised signal in the current band. */
+unsigned alg_unquant(celt_norm *X, int N, int K, int spread, int B,
+      ec_dec *dec, opus_val16 gain)
+{
+   int i;
+   opus_val32 Ryy;
+   unsigned collapse_mask;
+   VARDECL(int, iy);
+   SAVE_STACK;
+
+   celt_assert2(K>0, "alg_unquant() needs at least one pulse");
+   celt_assert2(N>1, "alg_unquant() needs at least two dimensions");
+   ALLOC(iy, N, int);
+   decode_pulses(iy, N, K, dec);
+   Ryy = 0;
+   i=0;
+   do {
+      Ryy = MAC16_16(Ryy, iy[i], iy[i]);
+   } while (++i < N);
+   normalise_residual(iy, X, N, Ryy, gain);
+   exp_rotation(X, N, -1, B, K, spread);
+   collapse_mask = extract_collapse_mask(iy, N, B);
+   RESTORE_STACK;
+   return collapse_mask;
+}
+
+void renormalise_vector(celt_norm *X, int N, opus_val16 gain)
+{
+   int i;
+#ifdef OPUS_FIXED_POINT
+   int k;
+#endif
+   opus_val32 E = EPSILON;
+   opus_val16 g;
+   opus_val32 t;
+   celt_norm *xptr = X;
+   for (i=0;i<N;i++)
+   {
+      E = MAC16_16(E, *xptr, *xptr);
+      xptr++;
+   }
+#ifdef OPUS_FIXED_POINT
+   k = celt_ilog2(E)>>1;
+#endif
+   t = VSHR32(E, 2*(k-7));
+   g = MULT16_16_P15(celt_rsqrt_norm(t),gain);
+
+   xptr = X;
+   for (i=0;i<N;i++)
+   {
+      *xptr = EXTRACT16(PSHR32(MULT16_16(g, *xptr), k+1));
+      xptr++;
+   }
+   /*return celt_sqrt(E);*/
+}
+
+int stereo_itheta(celt_norm *X, celt_norm *Y, int stereo, int N)
+{
+   int i;
+   int itheta;
+   opus_val16 mid, side;
+   opus_val32 Emid, Eside;
+
+   Emid = Eside = EPSILON;
+   if (stereo)
+   {
+      for (i=0;i<N;i++)
+      {
+         celt_norm m, s;
+         m = ADD16(SHR16(X[i],1),SHR16(Y[i],1));
+         s = SUB16(SHR16(X[i],1),SHR16(Y[i],1));
+         Emid = MAC16_16(Emid, m, m);
+         Eside = MAC16_16(Eside, s, s);
+      }
+   } else {
+      for (i=0;i<N;i++)
+      {
+         celt_norm m, s;
+         m = X[i];
+         s = Y[i];
+         Emid = MAC16_16(Emid, m, m);
+         Eside = MAC16_16(Eside, s, s);
+      }
+   }
+   mid = celt_sqrt(Emid);
+   side = celt_sqrt(Eside);
+#ifdef OPUS_FIXED_POINT
+   /* 0.63662 = 2/pi */
+   itheta = MULT16_16_Q15(QCONST16(0.63662f,15),celt_atan2p(side, mid));
+#else
+   itheta = (int)floor(.5f+16384*0.63662f*atan2(side,mid));
+#endif
+
+   return itheta;
+}
diff --git a/drivers/opus/celt/vq.h b/drivers/opus/celt/vq.h
new file mode 100644
index 0000000000..8bab59c5e0
--- /dev/null
+++ b/drivers/opus/celt/vq.h
@@ -0,0 +1,70 @@
+/* Copyright (c) 2007-2008 CSIRO
+   Copyright (c) 2007-2009 Xiph.Org Foundation
+   Written by Jean-Marc Valin */
+/**
+   @file vq.h
+   @brief Vector quantisation of the residual
+ */
+/*
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef VQ_H
+#define VQ_H
+
+#include "entenc.h"
+#include "entdec.h"
+#include "opus_modes.h"
+
+/** Algebraic pulse-vector quantiser. The signal x is replaced by the sum of
+  * the pitch and a combination of pulses such that its norm is still equal
+  * to 1. This is the function that will typically require the most CPU.
+ * @param X Residual signal to quantise/encode (returns quantised version)
+ * @param N Number of samples to encode
+ * @param K Number of pulses to use
+ * @param enc Entropy encoder state
+ * @ret A mask indicating which blocks in the band received pulses
+*/
+unsigned alg_quant(celt_norm *X, int N, int K, int spread, int B,
+      ec_enc *enc
+#ifdef RESYNTH
+      , opus_val16 gain
+#endif
+      );
+
+/** Algebraic pulse decoder
+ * @param X Decoded normalised spectrum (returned)
+ * @param N Number of samples to decode
+ * @param K Number of pulses to use
+ * @param dec Entropy decoder state
+ * @ret A mask indicating which blocks in the band received pulses
+ */
+unsigned alg_unquant(celt_norm *X, int N, int K, int spread, int B,
+      ec_dec *dec, opus_val16 gain);
+
+void renormalise_vector(celt_norm *X, int N, opus_val16 gain);
+
+int stereo_itheta(celt_norm *X, celt_norm *Y, int stereo, int N);
+
+#endif /* VQ_H */
diff --git a/drivers/opus/celt/x86/pitch_sse.h b/drivers/opus/celt/x86/pitch_sse.h
new file mode 100644
index 0000000000..695122a5ad
--- /dev/null
+++ b/drivers/opus/celt/x86/pitch_sse.h
@@ -0,0 +1,156 @@
+/* Copyright (c) 2013 Jean-Marc Valin and John Ridges */
+/**
+   @file pitch_sse.h
+   @brief Pitch analysis
+ */
+
+/*
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef PITCH_SSE_H
+#define PITCH_SSE_H
+
+#include <xmmintrin.h>
+#include "arch.h"
+
+#define OVERRIDE_XCORR_KERNEL
+static OPUS_INLINE void xcorr_kernel(const opus_val16 *x, const opus_val16 *y, opus_val32 sum[4], int len)
+{
+   int j;
+   __m128 xsum1, xsum2;
+   xsum1 = _mm_loadu_ps(sum);
+   xsum2 = _mm_setzero_ps();
+
+   for (j = 0; j < len-3; j += 4)
+   {
+      __m128 x0 = _mm_loadu_ps(x+j);
+      __m128 yj = _mm_loadu_ps(y+j);
+      __m128 y3 = _mm_loadu_ps(y+j+3);
+
+      xsum1 = _mm_add_ps(xsum1,_mm_mul_ps(_mm_shuffle_ps(x0,x0,0x00),yj));
+      xsum2 = _mm_add_ps(xsum2,_mm_mul_ps(_mm_shuffle_ps(x0,x0,0x55),
+                                          _mm_shuffle_ps(yj,y3,0x49)));
+      xsum1 = _mm_add_ps(xsum1,_mm_mul_ps(_mm_shuffle_ps(x0,x0,0xaa),
+                                          _mm_shuffle_ps(yj,y3,0x9e)));
+      xsum2 = _mm_add_ps(xsum2,_mm_mul_ps(_mm_shuffle_ps(x0,x0,0xff),y3));
+   }
+   if (j < len)
+   {
+      xsum1 = _mm_add_ps(xsum1,_mm_mul_ps(_mm_load1_ps(x+j),_mm_loadu_ps(y+j)));
+      if (++j < len)
+      {
+         xsum2 = _mm_add_ps(xsum2,_mm_mul_ps(_mm_load1_ps(x+j),_mm_loadu_ps(y+j)));
+         if (++j < len)
+         {
+            xsum1 = _mm_add_ps(xsum1,_mm_mul_ps(_mm_load1_ps(x+j),_mm_loadu_ps(y+j)));
+         }
+      }
+   }
+   _mm_storeu_ps(sum,_mm_add_ps(xsum1,xsum2));
+}
+
+#define OVERRIDE_DUAL_INNER_PROD
+static OPUS_INLINE void dual_inner_prod(const opus_val16 *x, const opus_val16 *y01, const opus_val16 *y02,
+      int N, opus_val32 *xy1, opus_val32 *xy2)
+{
+   int i;
+   __m128 xsum1, xsum2;
+   xsum1 = _mm_setzero_ps();
+   xsum2 = _mm_setzero_ps();
+   for (i=0;i<N-3;i+=4)
+   {
+      __m128 xi = _mm_loadu_ps(x+i);
+      __m128 y1i = _mm_loadu_ps(y01+i);
+      __m128 y2i = _mm_loadu_ps(y02+i);
+      xsum1 = _mm_add_ps(xsum1,_mm_mul_ps(xi, y1i));
+      xsum2 = _mm_add_ps(xsum2,_mm_mul_ps(xi, y2i));
+   }
+   /* Horizontal sum */
+   xsum1 = _mm_add_ps(xsum1, _mm_movehl_ps(xsum1, xsum1));
+   xsum1 = _mm_add_ss(xsum1, _mm_shuffle_ps(xsum1, xsum1, 0x55));
+   _mm_store_ss(xy1, xsum1);
+   xsum2 = _mm_add_ps(xsum2, _mm_movehl_ps(xsum2, xsum2));
+   xsum2 = _mm_add_ss(xsum2, _mm_shuffle_ps(xsum2, xsum2, 0x55));
+   _mm_store_ss(xy2, xsum2);
+   for (;i<N;i++)
+   {
+      *xy1 = MAC16_16(*xy1, x[i], y01[i]);
+      *xy2 = MAC16_16(*xy2, x[i], y02[i]);
+   }
+}
+
+#define OVERRIDE_COMB_FILTER_CONST
+static OPUS_INLINE void comb_filter_const(opus_val32 *y, opus_val32 *x, int T, int N,
+      opus_val16 g10, opus_val16 g11, opus_val16 g12)
+{
+   int i;
+   __m128 x0v;
+   __m128 g10v, g11v, g12v;
+   g10v = _mm_load1_ps(&g10);
+   g11v = _mm_load1_ps(&g11);
+   g12v = _mm_load1_ps(&g12);
+   x0v = _mm_loadu_ps(&x[-T-2]);
+   for (i=0;i<N-3;i+=4)
+   {
+      __m128 yi, yi2, x1v, x2v, x3v, x4v;
+      const opus_val32 *xp = &x[i-T-2];
+      yi = _mm_loadu_ps(x+i);
+      x4v = _mm_loadu_ps(xp+4);
+#if 0
+      /* Slower version with all loads */
+      x1v = _mm_loadu_ps(xp+1);
+      x2v = _mm_loadu_ps(xp+2);
+      x3v = _mm_loadu_ps(xp+3);
+#else
+      x2v = _mm_shuffle_ps(x0v, x4v, 0x4e);
+      x1v = _mm_shuffle_ps(x0v, x2v, 0x99);
+      x3v = _mm_shuffle_ps(x2v, x4v, 0x99);
+#endif
+
+      yi = _mm_add_ps(yi, _mm_mul_ps(g10v,x2v));
+#if 0 /* Set to 1 to make it bit-exact with the non-SSE version */
+      yi = _mm_add_ps(yi, _mm_mul_ps(g11v,_mm_add_ps(x3v,x1v)));
+      yi = _mm_add_ps(yi, _mm_mul_ps(g12v,_mm_add_ps(x4v,x0v)));
+#else
+      /* Use partial sums */
+      yi2 = _mm_add_ps(_mm_mul_ps(g11v,_mm_add_ps(x3v,x1v)),
+                       _mm_mul_ps(g12v,_mm_add_ps(x4v,x0v)));
+      yi = _mm_add_ps(yi, yi2);
+#endif
+      x0v=x4v;
+      _mm_storeu_ps(y+i, yi);
+   }
+#ifdef CUSTOM_MODES
+   for (;i<N;i++)
+   {
+      y[i] = x[i]
+               + MULT16_32_Q15(g10,x[i-T])
+               + MULT16_32_Q15(g11,ADD32(x[i-T+1],x[i-T-1]))
+               + MULT16_32_Q15(g12,ADD32(x[i-T+2],x[i-T-2]));
+   }
+#endif
+}
+
+#endif
diff --git a/drivers/opus/http.c b/drivers/opus/http.c
new file mode 100644
index 0000000000..803db044af
--- /dev/null
+++ b/drivers/opus/http.c
@@ -0,0 +1,3391 @@
+/********************************************************************
+ *                                                                  *
+ * THIS FILE IS PART OF THE libopusfile SOFTWARE CODEC SOURCE CODE. *
+ * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS     *
+ * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
+ * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
+ *                                                                  *
+ * THE libopusfile SOURCE CODE IS (C) COPYRIGHT 2012                *
+ * by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
+ *                                                                  *
+ ********************************************************************/
+#ifdef OPUS_HAVE_CONFIG_H
+#include "opus_config.h"
+#endif
+
+#include "internal.h"
+#include <ctype.h>
+#include <errno.h>
+#include <limits.h>
+#include <string.h>
+
+/*RFCs referenced in this file:
+  RFC  761: DOD Standard Transmission Control Protocol
+  RFC 1535: A Security Problem and Proposed Correction With Widely Deployed DNS
+   Software
+  RFC 1738: Uniform Resource Locators (URL)
+  RFC 1945: Hypertext Transfer Protocol -- HTTP/1.0
+  RFC 2068: Hypertext Transfer Protocol -- HTTP/1.1
+  RFC 2145: Use and Interpretation of HTTP Version Numbers
+  RFC 2246: The TLS Protocol Version 1.0
+  RFC 2459: Internet X.509 Public Key Infrastructure Certificate and
+   Certificate Revocation List (CRL) Profile
+  RFC 2616: Hypertext Transfer Protocol -- HTTP/1.1
+  RFC 2617: HTTP Authentication: Basic and Digest Access Authentication
+  RFC 2817: Upgrading to TLS Within HTTP/1.1
+  RFC 2818: HTTP Over TLS
+  RFC 3492: Punycode: A Bootstring encoding of Unicode for Internationalized
+   Domain Names in Applications (IDNA)
+  RFC 3986: Uniform Resource Identifier (URI): Generic Syntax
+  RFC 3987: Internationalized Resource Identifiers (IRIs)
+  RFC 4343: Domain Name System (DNS) Case Insensitivity Clarification
+  RFC 5894: Internationalized Domain Names for Applications (IDNA):
+   Background, Explanation, and Rationale
+  RFC 6066: Transport Layer Security (TLS) Extensions: Extension Definitions
+  RFC 6125: Representation and Verification of Domain-Based Application Service
+   Identity within Internet Public Key Infrastructure Using X.509 (PKIX)
+   Certificates in the Context of Transport Layer Security (TLS)
+  RFC 6555: Happy Eyeballs: Success with Dual-Stack Hosts*/
+
+typedef struct OpusParsedURL   OpusParsedURL;
+typedef struct OpusStringBuf   OpusStringBuf;
+typedef struct OpusHTTPConn    OpusHTTPConn;
+typedef struct OpusHTTPStream  OpusHTTPStream;
+
+static char *op_string_range_dup(const char *_start,const char *_end){
+  size_t  len;
+  char   *ret;
+  OP_ASSERT(_start<=_end);
+  len=_end-_start;
+  /*This is to help avoid overflow elsewhere, later.*/
+  if(OP_UNLIKELY(len>=INT_MAX))return NULL;
+  ret=(char *)_ogg_malloc(sizeof(*ret)*(len+1));
+  if(OP_LIKELY(ret!=NULL)){
+    ret=(char *)memcpy(ret,_start,sizeof(*ret)*(len));
+    ret[len]='\0';
+  }
+  return ret;
+}
+
+static char *op_string_dup(const char *_s){
+  return op_string_range_dup(_s,_s+strlen(_s));
+}
+
+static char *op_string_tolower(char *_s){
+  int i;
+  for(i=0;_s[i]!='\0';i++){
+    int c;
+    c=_s[i];
+    if(c>='A'&&c<='Z')c+='a'-'A';
+    _s[i]=(char)c;
+  }
+  return _s;
+}
+
+/*URI character classes (from RFC 3986).*/
+#define OP_URL_ALPHA \
+ "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"
+#define OP_URL_DIGIT       "0123456789"
+#define OP_URL_HEXDIGIT    "0123456789ABCDEFabcdef"
+/*Not a character class, but the characters allowed in <scheme>.*/
+#define OP_URL_SCHEME      OP_URL_ALPHA OP_URL_DIGIT "+-."
+#define OP_URL_GEN_DELIMS  "#/:?@[]"
+#define OP_URL_SUB_DELIMS  "!$&'()*+,;="
+#define OP_URL_RESERVED    OP_URL_GEN_DELIMS OP_URL_SUB_DELIMS
+#define OP_URL_UNRESERVED  OP_URL_ALPHA OP_URL_DIGIT "-._~"
+/*Not a character class, but the characters allowed in <pct-encoded>.*/
+#define OP_URL_PCT_ENCODED "%"
+/*Not a character class or production rule, but for convenience.*/
+#define OP_URL_PCHAR_BASE \
+ OP_URL_UNRESERVED OP_URL_PCT_ENCODED OP_URL_SUB_DELIMS
+#define OP_URL_PCHAR       OP_URL_PCHAR_BASE ":@"
+/*Not a character class, but the characters allowed in <userinfo> and
+   <IP-literal>.*/
+#define OP_URL_PCHAR_NA    OP_URL_PCHAR_BASE ":"
+/*Not a character class, but the characters allowed in <segment-nz-nc>.*/
+#define OP_URL_PCHAR_NC    OP_URL_PCHAR_BASE "@"
+/*Not a character clsss, but the characters allowed in <path>.*/
+#define OP_URL_PATH        OP_URL_PCHAR "/"
+/*Not a character class, but the characters allowed in <query> / <fragment>.*/
+#define OP_URL_QUERY_FRAG  OP_URL_PCHAR "/?"
+
+/*Check the <% HEXDIG HEXDIG> escapes of a URL for validity.
+  Return: 0 if valid, or a negative value on failure.*/
+static int op_validate_url_escapes(const char *_s){
+  int i;
+  for(i=0;_s[i];i++){
+    if(_s[i]=='%'){
+      if(OP_UNLIKELY(!isxdigit(_s[i+1]))
+       ||OP_UNLIKELY(!isxdigit(_s[i+2]))
+       /*RFC 3986 says %00 "should be rejected if the application is not
+          expecting to receive raw data within a component."*/
+       ||OP_UNLIKELY(_s[i+1]=='0'&&_s[i+2]=='0')){
+        return OP_FALSE;
+      }
+      i+=2;
+    }
+  }
+  return 0;
+}
+
+/*Convert a hex digit to its actual value.
+  _c: The hex digit to convert.
+      Presumed to be valid ('0'...'9', 'A'...'F', or 'a'...'f').
+  Return: The value of the digit, in the range [0,15].*/
+static int op_hex_value(int _c){
+  return _c>='a'?_c-'a'+10:_c>='A'?_c-'A'+10:_c-'0';
+}
+
+/*Unescape all the <% HEXDIG HEXDIG> sequences in a string in-place.
+  This does no validity checking.*/
+static char *op_unescape_url_component(char *_s){
+  int i;
+  int j;
+  for(i=j=0;_s[i];i++,j++){
+    if(_s[i]=='%'){
+      _s[i]=(char)(op_hex_value(_s[i+1])<<4|op_hex_value(_s[i+2]));
+      i+=2;
+    }
+  }
+  return _s;
+}
+
+/*Parse a file: URL.
+  This code is not meant to be fast: strspn() with large sets is likely to be
+   slow, but it is very convenient.
+  It is meant to be RFC 1738-compliant (as updated by RFC 3986).*/
+static const char *op_parse_file_url(const char *_src){
+  const char *scheme_end;
+  const char *path;
+  const char *path_end;
+  scheme_end=_src+strspn(_src,OP_URL_SCHEME);
+  if(OP_UNLIKELY(*scheme_end!=':')
+   ||scheme_end-_src!=4||op_strncasecmp(_src,"file",4)!=0){
+    /*Unsupported protocol.*/
+    return NULL;
+  }
+  /*Make sure all escape sequences are valid to simplify unescaping later.*/
+  if(OP_UNLIKELY(op_validate_url_escapes(scheme_end+1)<0))return NULL;
+  if(scheme_end[1]=='/'&&scheme_end[2]=='/'){
+    const char *host;
+    /*file: URLs can have a host!
+      Yeah, I was surprised, too, but that's what RFC 1738 says.
+      It also says, "The file URL scheme is unusual in that it does not specify
+       an Internet protocol or access method for such files; as such, its
+       utility in network protocols between hosts is limited," which is a mild
+       understatement.*/
+    host=scheme_end+3;
+    /*The empty host is what we expect.*/
+    if(OP_LIKELY(*host=='/'))path=host;
+    else{
+      const char *host_end;
+      char        host_buf[28];
+      /*RFC 1738 says localhost "is interpreted as `the machine from which the
+         URL is being interpreted,'" so let's check for it.*/
+      host_end=host+strspn(host,OP_URL_PCHAR_BASE);
+      /*No <port> allowed.
+        This also rejects IP-Literals.*/
+      if(*host_end!='/')return NULL;
+      /*An escaped "localhost" can take at most 27 characters.*/
+      if(OP_UNLIKELY(host_end-host>27))return NULL;
+      memcpy(host_buf,host,sizeof(*host_buf)*(host_end-host));
+      host_buf[host_end-host]='\0';
+      op_unescape_url_component(host_buf);
+      op_string_tolower(host_buf);
+      /*Some other host: give up.*/
+      if(OP_UNLIKELY(strcmp(host_buf,"localhost")!=0))return NULL;
+      path=host_end;
+    }
+  }
+  else path=scheme_end+1;
+  path_end=path+strspn(path,OP_URL_PATH);
+  /*This will reject a <query> or <fragment> component, too.
+    I don't know what to do with queries, but a temporal fragment would at
+     least make sense.
+    RFC 1738 pretty clearly defines a <searchpart> that's equivalent to the
+     RFC 3986 <query> component for other schemes, but not the file: scheme,
+     so I'm going to just reject it.*/
+  if(*path_end!='\0')return NULL;
+  return path;
+}
+
+#if defined(OP_ENABLE_HTTP)
+# if defined(_WIN32)
+#  include <winsock2.h>
+#  include <ws2tcpip.h>
+#  include <openssl/ssl.h>
+#  include "winerrno.h"
+
+typedef SOCKET op_sock;
+
+#  define OP_INVALID_SOCKET (INVALID_SOCKET)
+
+/*Vista and later support WSAPoll(), but we don't want to rely on that.
+  Instead we re-implement it badly using select().
+  Unfortunately, they define a conflicting struct pollfd, so we only define our
+   own if it looks like that one has not already been defined.*/
+#  if !defined(POLLIN)
+/*Equivalent to POLLIN.*/
+#   define POLLRDNORM (0x0100)
+/*Priority band data can be read.*/
+#   define POLLRDBAND (0x0200)
+/*There is data to read.*/
+#   define POLLIN     (POLLRDNORM|POLLRDBAND)
+/* There is urgent data to read.*/
+#   define POLLPRI    (0x0400)
+/*Equivalent to POLLOUT.*/
+#   define POLLWRNORM (0x0010)
+/*Writing now will not block.*/
+#   define POLLOUT    (POLLWRNORM)
+/*Priority data may be written.*/
+#   define POLLWRBAND (0x0020)
+/*Error condition (output only).*/
+#   define POLLERR    (0x0001)
+/*Hang up (output only).*/
+#   define POLLHUP    (0x0002)
+/*Invalid request: fd not open (output only).*/
+#   define POLLNVAL   (0x0004)
+
+struct pollfd{
+  /*File descriptor.*/
+  op_sock fd;
+  /*Requested events.*/
+  short   events;
+  /*Returned events.*/
+  short   revents;
+};
+#  endif
+
+/*But Winsock never defines nfds_t (it's simply hard-coded to ULONG).*/
+typedef unsigned long nfds_t;
+
+/*The usage of FD_SET() below is O(N^2).
+  This is okay because select() is limited to 64 sockets in Winsock, anyway.
+  In practice, we only ever call it with one or two sockets.*/
+static int op_poll_win32(struct pollfd *_fds,nfds_t _nfds,int _timeout){
+  struct timeval tv;
+  fd_set         ifds;
+  fd_set         ofds;
+  fd_set         efds;
+  nfds_t         i;
+  int            ret;
+  FD_ZERO(&ifds);
+  FD_ZERO(&ofds);
+  FD_ZERO(&efds);
+  for(i=0;i<_nfds;i++){
+    _fds[i].revents=0;
+    if(_fds[i].events&POLLIN)FD_SET(_fds[i].fd,&ifds);
+    if(_fds[i].events&POLLOUT)FD_SET(_fds[i].fd,&ofds);
+    FD_SET(_fds[i].fd,&efds);
+  }
+  if(_timeout>=0){
+    tv.tv_sec=_timeout/1000;
+    tv.tv_usec=(_timeout%1000)*1000;
+  }
+  ret=select(-1,&ifds,&ofds,&efds,_timeout<0?NULL:&tv);
+  if(ret>0){
+    for(i=0;i<_nfds;i++){
+      if(FD_ISSET(_fds[i].fd,&ifds))_fds[i].revents|=POLLIN;
+      if(FD_ISSET(_fds[i].fd,&ofds))_fds[i].revents|=POLLOUT;
+      /*This isn't correct: there are several different things that might have
+         happened to a fd in efds, but I don't know a good way to distinguish
+         them without more context from the caller.
+        It's okay, because we don't actually check any of these bits, we just
+         need _some_ bit set.*/
+      if(FD_ISSET(_fds[i].fd,&efds))_fds[i].revents|=POLLHUP;
+    }
+  }
+  return ret;
+}
+
+/*We define op_errno() to make it clear that it's not an l-value like normal
+   errno is.*/
+#  define op_errno() (WSAGetLastError()?WSAGetLastError()-WSABASEERR:0)
+#  define op_reset_errno() (WSASetLastError(0))
+
+/*The remaining functions don't get an op_ prefix even though they only
+   operate on sockets, because we don't use non-socket I/O here, and this
+   minimizes the changes needed to deal with Winsock.*/
+#  define close(_fd) closesocket(_fd)
+/*This relies on sizeof(u_long)==sizeof(int), which is always true on both
+   Win32 and Win64.*/
+#  define ioctl(_fd,_req,_arg) ioctlsocket(_fd,_req,(u_long *)(_arg))
+#  define getsockopt(_fd,_level,_name,_val,_len) \
+ getsockopt(_fd,_level,_name,(char *)(_val),_len)
+#  define setsockopt(_fd,_level,_name,_val,_len) \
+ setsockopt(_fd,_level,_name,(const char *)(_val),_len)
+#  define poll(_fds,_nfds,_timeout) op_poll_win32(_fds,_nfds,_timeout)
+
+#  if defined(_MSC_VER)
+typedef ptrdiff_t ssize_t;
+#  endif
+
+/*Load certificates from the built-in certificate store.*/
+int SSL_CTX_set_default_verify_paths_win32(SSL_CTX *_ssl_ctx);
+#  define SSL_CTX_set_default_verify_paths \
+ SSL_CTX_set_default_verify_paths_win32
+
+# else
+/*Normal Berkeley sockets.*/
+#  include <sys/ioctl.h>
+#  include <sys/types.h>
+#  include <sys/socket.h>
+#  include <arpa/inet.h>
+#  include <netinet/in.h>
+#  include <netinet/tcp.h>
+#  include <fcntl.h>
+#  include <netdb.h>
+#  include <poll.h>
+#  include <unistd.h>
+#  include <openssl/ssl.h>
+
+typedef int op_sock;
+
+#  define OP_INVALID_SOCKET (-1)
+
+#  define op_errno() (errno)
+#  define op_reset_errno() (errno=0)
+
+# endif
+# include <sys/timeb.h>
+# include <openssl/x509v3.h>
+
+/*The maximum number of simultaneous connections.
+  RFC 2616 says this SHOULD NOT be more than 2, but everyone on the modern web
+   ignores that (e.g., IE 8 bumped theirs up from 2 to 6, Firefox uses 15).
+  If it makes you feel better, we'll only ever actively read from one of these
+   at a time.
+  The others are kept around mainly to avoid slow-starting a new connection
+   when seeking, and time out rapidly.*/
+# define OP_NCONNS_MAX (4)
+
+/*The amount of time before we attempt to re-resolve the host.
+  This is 10 minutes, as recommended in RFC 6555 for expiring cached connection
+   results for dual-stack hosts.*/
+# define OP_RESOLVE_CACHE_TIMEOUT_MS (10*60*(opus_int32)1000)
+
+/*The number of redirections at which we give up.
+  The value here is the current default in Firefox.
+  RFC 2068 mandated a maximum of 5, but RFC 2616 relaxed that to "a client
+   SHOULD detect infinite redirection loops."
+  Fortunately, 20 is less than infinity.*/
+# define OP_REDIRECT_LIMIT (20)
+
+/*The initial size of the buffer used to read a response message (before the
+   body).*/
+# define OP_RESPONSE_SIZE_MIN (510)
+/*The maximum size of a response message (before the body).
+  Responses larger than this will be discarded.
+  I've seen a real server return 20 kB of data for a 302 Found response.
+  Increasing this beyond 32kB will cause problems on platforms with a 16-bit
+   int.*/
+# define OP_RESPONSE_SIZE_MAX (32766)
+
+/*The number of milliseconds we will allow a connection to sit idle before we
+   refuse to resurrect it.
+  Apache as of 2.2 has reduced its default timeout to 5 seconds (from 15), so
+   that's what we'll use here.*/
+# define OP_CONNECTION_IDLE_TIMEOUT_MS (5*1000)
+
+/*The number of milliseconds we will wait to send or receive data before giving
+   up.*/
+# define OP_POLL_TIMEOUT_MS (30*1000)
+
+/*We will always attempt to read ahead at least this much in preference to
+   opening a new connection.*/
+# define OP_READAHEAD_THRESH_MIN (32*(opus_int32)1024)
+
+/*The amount of data to request after a seek.
+  This is a trade-off between read throughput after a seek vs. the the ability
+   to quickly perform another seek with the same connection.*/
+# define OP_PIPELINE_CHUNK_SIZE     (32*(opus_int32)1024)
+/*Subsequent chunks are requested with larger and larger sizes until they pass
+   this threshold, after which we just ask for the rest of the resource.*/
+# define OP_PIPELINE_CHUNK_SIZE_MAX (1024*(opus_int32)1024)
+/*This is the maximum number of requests we'll make with a single connection.
+  Many servers will simply disconnect after we attempt some number of requests,
+   possibly without sending a Connection: close header, meaning we won't
+   discover it until we try to read beyond the end of the current chunk.
+  We can reconnect when that happens, but this is slow.
+  Instead, we impose a limit ourselves (set to the default for Apache
+   installations and thus likely the most common value in use).*/
+# define OP_PIPELINE_MAX_REQUESTS   (100)
+/*This should be the number of requests, starting from a chunk size of
+   OP_PIPELINE_CHUNK_SIZE and doubling each time, until we exceed
+   OP_PIPELINE_CHUNK_SIZE_MAX and just request the rest of the file.
+  We won't reuse a connection when seeking unless it has at least this many
+   requests left, to reduce the chances we'll have to open a new connection
+   while reading forward afterwards.*/
+# define OP_PIPELINE_MIN_REQUESTS   (7)
+
+/*Is this an https URL?
+  For now we can simply check the last letter of the scheme.*/
+# define OP_URL_IS_SSL(_url) ((_url)->scheme[4]=='s')
+
+/*Does this URL use the default port for its scheme?*/
+# define OP_URL_IS_DEFAULT_PORT(_url) \
+ (!OP_URL_IS_SSL(_url)&&(_url)->port==80 \
+ ||OP_URL_IS_SSL(_url)&&(_url)->port==443)
+
+struct OpusParsedURL{
+  /*Either "http" or "https".*/
+  char     *scheme;
+  /*The user name from the <userinfo> component, or NULL.*/
+  char     *user;
+  /*The password from the <userinfo> component, or NULL.*/
+  char     *pass;
+  /*The <host> component.
+    This may not be NULL.*/
+  char     *host;
+  /*The <path> and <query> components.
+    This may not be NULL.*/
+  char     *path;
+  /*The <port> component.
+    This is set to the default port if the URL did not contain one.*/
+  unsigned  port;
+};
+
+/*Parse a URL.
+  This code is not meant to be fast: strspn() with large sets is likely to be
+   slow, but it is very convenient.
+  It is meant to be RFC 3986-compliant.
+  We currently do not support IRIs (Internationalized Resource Identifiers,
+   RFC 3987).
+  Callers should translate them to URIs first.*/
+static int op_parse_url_impl(OpusParsedURL *_dst,const char *_src){
+  const char  *scheme_end;
+  const char  *authority;
+  const char  *userinfo_end;
+  const char  *user;
+  const char  *user_end;
+  const char  *pass;
+  const char  *hostport;
+  const char  *hostport_end;
+  const char  *host_end;
+  const char  *port;
+  opus_int32   port_num;
+  const char  *port_end;
+  const char  *path;
+  const char  *path_end;
+  const char  *uri_end;
+  scheme_end=_src+strspn(_src,OP_URL_SCHEME);
+  if(OP_UNLIKELY(*scheme_end!=':')
+   ||OP_UNLIKELY(scheme_end-_src<4)||OP_UNLIKELY(scheme_end-_src>5)
+   ||OP_UNLIKELY(op_strncasecmp(_src,"https",scheme_end-_src)!=0)){
+    /*Unsupported protocol.*/
+    return OP_EIMPL;
+  }
+  if(OP_UNLIKELY(scheme_end[1]!='/')||OP_UNLIKELY(scheme_end[2]!='/')){
+    /*We require an <authority> component.*/
+    return OP_EINVAL;
+  }
+  authority=scheme_end+3;
+  /*Make sure all escape sequences are valid to simplify unescaping later.*/
+  if(OP_UNLIKELY(op_validate_url_escapes(authority)<0))return OP_EINVAL;
+  /*Look for a <userinfo> component.*/
+  userinfo_end=authority+strspn(authority,OP_URL_PCHAR_NA);
+  if(*userinfo_end=='@'){
+    /*Found one.*/
+    user=authority;
+    /*Look for a password (yes, clear-text passwords are deprecated, I know,
+       but what else are people supposed to use? use SSL if you care).*/
+    user_end=authority+strspn(authority,OP_URL_PCHAR_BASE);
+    if(*user_end==':')pass=user_end+1;
+    else pass=NULL;
+    hostport=userinfo_end+1;
+  }
+  else{
+    /*We shouldn't have to initialize user_end, but gcc is too dumb to figure
+       out that user!=NULL below means we didn't take this else branch.*/
+    user=user_end=NULL;
+    pass=NULL;
+    hostport=authority;
+  }
+  /*Try to figure out where the <host> component ends.*/
+  if(hostport[0]=='['){
+    hostport++;
+    /*We have an <IP-literal>, which can contain colons.*/
+    hostport_end=host_end=hostport+strspn(hostport,OP_URL_PCHAR_NA);
+    if(OP_UNLIKELY(*hostport_end++!=']'))return OP_EINVAL;
+  }
+  /*Currently we don't support IDNA (RFC 5894), because I don't want to deal
+     with the policy about which domains should not be internationalized to
+     avoid confusing similarities.
+    Give this API Punycode (RFC 3492) domain names instead.*/
+  else hostport_end=host_end=hostport+strspn(hostport,OP_URL_PCHAR_BASE);
+  /*TODO: Validate host.*/
+  /*Is there a port number?*/
+  port_num=-1;
+  if(*hostport_end==':'){
+    int i;
+    port=hostport_end+1;
+    port_end=port+strspn(port,OP_URL_DIGIT);
+    path=port_end;
+    /*Not part of RFC 3986, but require port numbers in the range 0...65535.*/
+    if(OP_LIKELY(port_end-port>0)){
+      while(*port=='0')port++;
+      if(OP_UNLIKELY(port_end-port>5))return OP_EINVAL;
+      port_num=0;
+      for(i=0;i<port_end-port;i++)port_num=port_num*10+port[i]-'0';
+      if(OP_UNLIKELY(port_num>65535))return OP_EINVAL;
+    }
+  }
+  else path=hostport_end;
+  path_end=path+strspn(path,OP_URL_PATH);
+  /*If the path is not empty, it must begin with a '/'.*/
+  if(OP_LIKELY(path_end>path)&&OP_UNLIKELY(path[0]!='/'))return OP_EINVAL;
+  /*Consume the <query> component, if any (right now we don't split this out
+     from the <path> component).*/
+  if(*path_end=='?')path_end=path_end+strspn(path_end,OP_URL_QUERY_FRAG);
+  /*Discard the <fragment> component, if any.
+    This doesn't get sent to the server.
+    Some day we should add support for Media Fragment URIs
+     <http://www.w3.org/TR/media-frags/>.*/
+  if(*path_end=='#')uri_end=path_end+1+strspn(path_end+1,OP_URL_QUERY_FRAG);
+  else uri_end=path_end;
+  /*If there's anything left, this was not a valid URL.*/
+  if(OP_UNLIKELY(*uri_end!='\0'))return OP_EINVAL;
+  _dst->scheme=op_string_range_dup(_src,scheme_end);
+  if(OP_UNLIKELY(_dst->scheme==NULL))return OP_EFAULT;
+  op_string_tolower(_dst->scheme);
+  if(user!=NULL){
+    _dst->user=op_string_range_dup(user,user_end);
+    if(OP_UNLIKELY(_dst->user==NULL))return OP_EFAULT;
+    op_unescape_url_component(_dst->user);
+    /*Unescaping might have created a ':' in the username.
+      That's not allowed by RFC 2617's Basic Authentication Scheme.*/
+    if(OP_UNLIKELY(strchr(_dst->user,':')!=NULL))return OP_EINVAL;
+  }
+  else _dst->user=NULL;
+  if(pass!=NULL){
+    _dst->pass=op_string_range_dup(pass,userinfo_end);
+    if(OP_UNLIKELY(_dst->pass==NULL))return OP_EFAULT;
+    op_unescape_url_component(_dst->pass);
+  }
+  else _dst->pass=NULL;
+  _dst->host=op_string_range_dup(hostport,host_end);
+  if(OP_UNLIKELY(_dst->host==NULL))return OP_EFAULT;
+  if(port_num<0){
+    if(_src[4]=='s')port_num=443;
+    else port_num=80;
+  }
+  _dst->port=(unsigned)port_num;
+  /*RFC 2616 says an empty <abs-path> component is equivalent to "/", and we
+     MUST use the latter in the Request-URI.
+    Reserve space for the slash here.*/
+  if(path==path_end||path[0]=='?')path--;
+  _dst->path=op_string_range_dup(path,path_end);
+  if(OP_UNLIKELY(_dst->path==NULL))return OP_EFAULT;
+  /*And force-set it here.*/
+  _dst->path[0]='/';
+  return 0;
+}
+
+static void op_parsed_url_init(OpusParsedURL *_url){
+  memset(_url,0,sizeof(*_url));
+}
+
+static void op_parsed_url_clear(OpusParsedURL *_url){
+  _ogg_free(_url->scheme);
+  _ogg_free(_url->user);
+  _ogg_free(_url->pass);
+  _ogg_free(_url->host);
+  _ogg_free(_url->path);
+}
+
+static int op_parse_url(OpusParsedURL *_dst,const char *_src){
+  OpusParsedURL url;
+  int           ret;
+  op_parsed_url_init(&url);
+  ret=op_parse_url_impl(&url,_src);
+  if(OP_UNLIKELY(ret<0))op_parsed_url_clear(&url);
+  else *_dst=*&url;
+  return ret;
+}
+
+/*A buffer to hold growing strings.
+  The main purpose of this is to consolidate allocation checks and simplify
+   cleanup on a failed allocation.*/
+struct OpusStringBuf{
+  char *buf;
+  int   nbuf;
+  int   cbuf;
+};
+
+static void op_sb_init(OpusStringBuf *_sb){
+  _sb->buf=NULL;
+  _sb->nbuf=0;
+  _sb->cbuf=0;
+}
+
+static void op_sb_clear(OpusStringBuf *_sb){
+  _ogg_free(_sb->buf);
+}
+
+/*Make sure we have room for at least _capacity characters (plus 1 more for the
+   terminating NUL).*/
+static int op_sb_ensure_capacity(OpusStringBuf *_sb,int _capacity){
+  char *buf;
+  int   cbuf;
+  buf=_sb->buf;
+  cbuf=_sb->cbuf;
+  if(_capacity>=cbuf-1){
+    if(OP_UNLIKELY(cbuf>INT_MAX-1>>1))return OP_EFAULT;
+    if(OP_UNLIKELY(_capacity>=INT_MAX-1))return OP_EFAULT;
+    cbuf=OP_MAX(2*cbuf+1,_capacity+1);
+    buf=_ogg_realloc(buf,sizeof(*buf)*cbuf);
+    if(OP_UNLIKELY(buf==NULL))return OP_EFAULT;
+    _sb->buf=buf;
+    _sb->cbuf=cbuf;
+  }
+  return 0;
+}
+
+/*Increase the capacity of the buffer, but not to more than _max_size
+   characters (plus 1 more for the terminating NUL).*/
+static int op_sb_grow(OpusStringBuf *_sb,int _max_size){
+  char *buf;
+  int   cbuf;
+  buf=_sb->buf;
+  cbuf=_sb->cbuf;
+  OP_ASSERT(_max_size<=INT_MAX-1);
+  cbuf=cbuf<=_max_size-1>>1?2*cbuf+1:_max_size+1;
+  buf=_ogg_realloc(buf,sizeof(*buf)*cbuf);
+  if(OP_UNLIKELY(buf==NULL))return OP_EFAULT;
+  _sb->buf=buf;
+  _sb->cbuf=cbuf;
+  return 0;
+}
+
+static int op_sb_append(OpusStringBuf *_sb,const char *_s,int _len){
+  char *buf;
+  int   nbuf;
+  int   ret;
+  nbuf=_sb->nbuf;
+  if(OP_UNLIKELY(nbuf>INT_MAX-_len))return OP_EFAULT;
+  ret=op_sb_ensure_capacity(_sb,nbuf+_len);
+  if(OP_UNLIKELY(ret<0))return ret;
+  buf=_sb->buf;
+  memcpy(buf+nbuf,_s,sizeof(*buf)*_len);
+  nbuf+=_len;
+  buf[nbuf]='\0';
+  _sb->nbuf=nbuf;
+  return 0;
+}
+
+static int op_sb_append_string(OpusStringBuf *_sb,const char *_s){
+  return op_sb_append(_sb,_s,strlen(_s));
+}
+
+static int op_sb_append_port(OpusStringBuf *_sb,unsigned _port){
+  char port_buf[7];
+  OP_ASSERT(_port<=65535U);
+  sprintf(port_buf,":%u",_port);
+  return op_sb_append_string(_sb,port_buf);
+}
+
+static int op_sb_append_nonnegative_int64(OpusStringBuf *_sb,opus_int64 _i){
+  char digit;
+  int  nbuf_start;
+  int  ret;
+  OP_ASSERT(_i>=0);
+  nbuf_start=_sb->nbuf;
+  ret=0;
+  do{
+    digit='0'+_i%10;
+    ret|=op_sb_append(_sb,&digit,1);
+    _i/=10;
+  }
+  while(_i>0);
+  if(OP_LIKELY(ret>=0)){
+    char *buf;
+    int   nbuf_end;
+    buf=_sb->buf;
+    nbuf_end=_sb->nbuf-1;
+    /*We've added the digits backwards.
+      Reverse them.*/
+    while(nbuf_start<nbuf_end){
+      digit=buf[nbuf_start];
+      buf[nbuf_start]=buf[nbuf_end];
+      buf[nbuf_end]=digit;
+      nbuf_start++;
+      nbuf_end--;
+    }
+  }
+  return ret;
+}
+
+static struct addrinfo *op_resolve(const char *_host,unsigned _port){
+  struct addrinfo *addrs;
+  struct addrinfo  hints;
+  char             service[6];
+  memset(&hints,0,sizeof(hints));
+  hints.ai_socktype=SOCK_STREAM;
+#if !defined(_WIN32)
+  hints.ai_flags=AI_NUMERICSERV;
+#endif
+  OP_ASSERT(_port<=65535U);
+  sprintf(service,"%u",_port);
+  if(OP_LIKELY(!getaddrinfo(_host,service,&hints,&addrs)))return addrs;
+  return NULL;
+}
+
+static int op_sock_set_nonblocking(op_sock _fd,int _nonblocking){
+#if !defined(_WIN32)
+  int flags;
+  flags=fcntl(_fd,F_GETFL);
+  if(OP_UNLIKELY(flags<0))return flags;
+  if(_nonblocking)flags|=O_NONBLOCK;
+  else flags&=~O_NONBLOCK;
+  return fcntl(_fd,F_SETFL,flags);
+#else
+  return ioctl(_fd,FIONBIO,&_nonblocking);
+#endif
+}
+
+/*Disable/enable write coalescing if we can.
+  We always send whole requests at once and always parse the response headers
+   before sending another one, so normally write coalescing just causes added
+   delay.*/
+static void op_sock_set_tcp_nodelay(op_sock _fd,int _nodelay){
+# if defined(TCP_NODELAY)&&(defined(IPPROTO_TCP)||defined(SOL_TCP))
+#  if defined(IPPROTO_TCP)
+#   define OP_SO_LEVEL IPPROTO_TCP
+#  else
+#   define OP_SO_LEVEL SOL_TCP
+#  endif
+  /*It doesn't really matter if this call fails, but it would be interesting
+     to hit a case where it does.*/
+  OP_ALWAYS_TRUE(!setsockopt(_fd,OP_SO_LEVEL,TCP_NODELAY,
+   &_nodelay,sizeof(_nodelay)));
+# endif
+}
+
+#if defined(_WIN32)
+static void op_init_winsock(){
+  static LONG    count;
+  static WSADATA wsadata;
+  if(InterlockedIncrement(&count)==1)WSAStartup(0x0202,&wsadata);
+}
+#endif
+
+/*A single physical connection to an HTTP server.
+  We may have several of these open at once.*/
+struct OpusHTTPConn{
+  /*The current position indicator for this connection.*/
+  opus_int64    pos;
+  /*The position where the current request will end, or -1 if we're reading
+     until EOF (an unseekable stream or the initial HTTP/1.0 request).*/
+  opus_int64    end_pos;
+  /*The position where next request we've sent will start, or -1 if we haven't
+     sent the next request yet.*/
+  opus_int64    next_pos;
+  /*The end of the next request or -1 if we requested the rest of the resource.
+    This is only set to a meaningful value if next_pos is not -1.*/
+  opus_int64    next_end;
+  /*The SSL connection, if this is https.*/
+  SSL          *ssl_conn;
+  /*The next connection in either the LRU or free list.*/
+  OpusHTTPConn *next;
+  /*The last time we blocked for reading from this connection.*/
+  struct timeb  read_time;
+  /*The number of bytes we've read since the last time we blocked.*/
+  opus_int64    read_bytes;
+  /*The estimated throughput of this connection, in bytes/s.*/
+  opus_int64    read_rate;
+  /*The socket we're reading from.*/
+  op_sock       fd;
+  /*The number of remaining requests we are allowed on this connection.*/
+  int           nrequests_left;
+  /*The chunk size to use for pipelining requests.*/
+  opus_int32    chunk_size;
+};
+
+static void op_http_conn_init(OpusHTTPConn *_conn){
+  _conn->next_pos=-1;
+  _conn->ssl_conn=NULL;
+  _conn->next=NULL;
+  _conn->fd=OP_INVALID_SOCKET;
+}
+
+static void op_http_conn_clear(OpusHTTPConn *_conn){
+  if(_conn->ssl_conn!=NULL)SSL_free(_conn->ssl_conn);
+  /*SSL frees the BIO for us.*/
+  if(_conn->fd!=OP_INVALID_SOCKET)close(_conn->fd);
+}
+
+/*The global stream state.*/
+struct OpusHTTPStream{
+  /*The list of connections.*/
+  OpusHTTPConn     conns[OP_NCONNS_MAX];
+  /*The context object used as a framework for TLS/SSL functions.*/
+  SSL_CTX         *ssl_ctx;
+  /*The cached session to reuse for future connections.*/
+  SSL_SESSION     *ssl_session;
+  /*The LRU list (ordered from MRU to LRU) of currently connected
+     connections.*/
+  OpusHTTPConn    *lru_head;
+  /*The free list.*/
+  OpusHTTPConn    *free_head;
+  /*The URL to connect to.*/
+  OpusParsedURL    url;
+  /*Information about the address we connected to.*/
+  struct addrinfo  addr_info;
+  /*The address we connected to.*/
+  union{
+    struct sockaddr     s;
+    struct sockaddr_in  v4;
+    struct sockaddr_in6 v6;
+  }                addr;
+  /*The last time we re-resolved the host.*/
+  struct timeb     resolve_time;
+  /*A buffer used to build HTTP requests.*/
+  OpusStringBuf    request;
+  /*A buffer used to build proxy CONNECT requests.*/
+  OpusStringBuf    proxy_connect;
+  /*A buffer used to receive the response headers.*/
+  OpusStringBuf    response;
+  /*The Content-Length, if specified, or -1 otherwise.
+    This will always be specified for seekable streams.*/
+  opus_int64       content_length;
+  /*The position indicator used when no connection is active.*/
+  opus_int64       pos;
+  /*The host we actually connected to.*/
+  char            *connect_host;
+  /*The port we actually connected to.*/
+  unsigned         connect_port;
+  /*The connection we're currently reading from.
+    This can be -1 if no connection is active.*/
+  int              cur_conni;
+  /*Whether or not the server supports range requests.*/
+  int              seekable;
+  /*Whether or not the server supports HTTP/1.1 with persistent connections.*/
+  int              pipeline;
+  /*Whether or not we should skip certificate checks.*/
+  int              skip_certificate_check;
+  /*The offset of the tail of the request.
+    Only the offset in the Range: header appears after this, allowing us to
+     quickly edit the request to ask for a new range.*/
+  int              request_tail;
+  /*The estimated time required to open a new connection, in milliseconds.*/
+  opus_int32       connect_rate;
+};
+
+static void op_http_stream_init(OpusHTTPStream *_stream){
+  OpusHTTPConn **pnext;
+  int            ci;
+  pnext=&_stream->free_head;
+  for(ci=0;ci<OP_NCONNS_MAX;ci++){
+    op_http_conn_init(_stream->conns+ci);
+    *pnext=_stream->conns+ci;
+    pnext=&_stream->conns[ci].next;
+  }
+  _stream->ssl_ctx=NULL;
+  _stream->ssl_session=NULL;
+  _stream->lru_head=NULL;
+  op_parsed_url_init(&_stream->url);
+  op_sb_init(&_stream->request);
+  op_sb_init(&_stream->proxy_connect);
+  op_sb_init(&_stream->response);
+  _stream->connect_host=NULL;
+  _stream->seekable=0;
+}
+
+/*Close the connection and move it to the free list.
+  _stream:     The stream containing the free list.
+  _conn:       The connection to close.
+  _penxt:      The linked-list pointer currently pointing to this connection.
+  _gracefully: Whether or not to shut down cleanly.*/
+static void op_http_conn_close(OpusHTTPStream *_stream,OpusHTTPConn *_conn,
+ OpusHTTPConn **_pnext,int _gracefully){
+  /*If we don't shut down gracefully, the server MUST NOT re-use our session
+     according to RFC 2246, because it can't tell the difference between an
+     abrupt close and a truncation attack.
+    So we shut down gracefully if we can.
+    However, we will not wait if this would block (it's not worth the savings
+     from session resumption to do so).
+    Clients (that's us) MAY resume a TLS session that ended with an incomplete
+     close, according to RFC 2818, so there's no reason to make sure the server
+     shut things down gracefully.*/
+  if(_gracefully&&_conn->ssl_conn!=NULL)SSL_shutdown(_conn->ssl_conn);
+  op_http_conn_clear(_conn);
+  _conn->next_pos=-1;
+  _conn->ssl_conn=NULL;
+  _conn->fd=OP_INVALID_SOCKET;
+  OP_ASSERT(*_pnext==_conn);
+  *_pnext=_conn->next;
+  _conn->next=_stream->free_head;
+  _stream->free_head=_conn;
+}
+
+static void op_http_stream_clear(OpusHTTPStream *_stream){
+  while(_stream->lru_head!=NULL){
+    op_http_conn_close(_stream,_stream->lru_head,&_stream->lru_head,0);
+  }
+  if(_stream->ssl_session!=NULL)SSL_SESSION_free(_stream->ssl_session);
+  if(_stream->ssl_ctx!=NULL)SSL_CTX_free(_stream->ssl_ctx);
+  op_sb_clear(&_stream->response);
+  op_sb_clear(&_stream->proxy_connect);
+  op_sb_clear(&_stream->request);
+  if(_stream->connect_host!=_stream->url.host)_ogg_free(_stream->connect_host);
+  op_parsed_url_clear(&_stream->url);
+}
+
+static int op_http_conn_write_fully(OpusHTTPConn *_conn,
+ const char *_buf,int _buf_size){
+  struct pollfd  fd;
+  SSL           *ssl_conn;
+  fd.fd=_conn->fd;
+  ssl_conn=_conn->ssl_conn;
+  while(_buf_size>0){
+    int err;
+    if(ssl_conn!=NULL){
+      int ret;
+      ret=SSL_write(ssl_conn,_buf,_buf_size);
+      if(ret>0){
+        /*Wrote some data.*/
+        _buf+=ret;
+        _buf_size-=ret;
+        continue;
+      }
+      /*Connection closed.*/
+      else if(ret==0)return OP_FALSE;
+      err=SSL_get_error(ssl_conn,ret);
+      /*Yes, renegotiations can cause SSL_write() to block for reading.*/
+      if(err==SSL_ERROR_WANT_READ)fd.events=POLLIN;
+      else if(err==SSL_ERROR_WANT_WRITE)fd.events=POLLOUT;
+      else return OP_FALSE;
+    }
+    else{
+      ssize_t ret;
+      op_reset_errno();
+      ret=send(fd.fd,_buf,_buf_size,0);
+      if(ret>0){
+        _buf+=ret;
+        _buf_size-=ret;
+        continue;
+      }
+      err=op_errno();
+      if(err!=EAGAIN&&err!=EWOULDBLOCK)return OP_FALSE;
+      fd.events=POLLOUT;
+    }
+    if(poll(&fd,1,OP_POLL_TIMEOUT_MS)<=0)return OP_FALSE;
+  }
+  return 0;
+}
+
+static int op_http_conn_estimate_available(OpusHTTPConn *_conn){
+  int available;
+  int ret;
+  ret=ioctl(_conn->fd,FIONREAD,&available);
+  if(ret<0)available=0;
+  /*This requires the SSL read_ahead flag to be unset to work.
+    We ignore partial records as well as the protocol overhead for any pending
+     bytes.
+    This means we might return somewhat less than can truly be read without
+     blocking (if there's a partial record).
+    This is okay, because we're using this value to estimate network transfer
+     time, and we _have_ already received those bytes.
+    We also might return slightly more (due to protocol overhead), but that's
+     small enough that it probably doesn't matter.*/
+  if(_conn->ssl_conn!=NULL)available+=SSL_pending(_conn->ssl_conn);
+  return available;
+}
+
+static opus_int32 op_time_diff_ms(const struct timeb *_end,
+ const struct timeb *_start){
+  opus_int64 dtime;
+  dtime=_end->time-(opus_int64)_start->time;
+  OP_ASSERT(_end->millitm<1000);
+  OP_ASSERT(_start->millitm<1000);
+  if(OP_UNLIKELY(dtime>(OP_INT32_MAX-1000)/1000))return OP_INT32_MAX;
+  if(OP_UNLIKELY(dtime<(OP_INT32_MIN+1000)/1000))return OP_INT32_MIN;
+  return (opus_int32)dtime*1000+_end->millitm-_start->millitm;
+}
+
+/*Update the read rate estimate for this connection.*/
+static void op_http_conn_read_rate_update(OpusHTTPConn *_conn){
+  struct timeb read_time;
+  opus_int32   read_delta_ms;
+  opus_int64   read_delta_bytes;
+  opus_int64   read_rate;
+  read_delta_bytes=_conn->read_bytes;
+  if(read_delta_bytes<=0)return;
+  ftime(&read_time);
+  read_delta_ms=op_time_diff_ms(&read_time,&_conn->read_time);
+  read_rate=_conn->read_rate;
+  read_delta_ms=OP_MAX(read_delta_ms,1);
+  read_rate+=read_delta_bytes*1000/read_delta_ms-read_rate+4>>3;
+  *&_conn->read_time=*&read_time;
+  _conn->read_bytes=0;
+  _conn->read_rate=read_rate;
+}
+
+/*Tries to read from the given connection.
+  [out] _buf: Returns the data read.
+  _buf_size:  The size of the buffer.
+  _blocking:  Whether or not to block until some data is retrieved.
+  Return: A positive number of bytes read on success.
+          0:        The read would block, or the connection was closed.
+          OP_EREAD: There was a fatal read error.*/
+static int op_http_conn_read(OpusHTTPConn *_conn,
+ char *_buf,int _buf_size,int _blocking){
+  struct pollfd  fd;
+  SSL           *ssl_conn;
+  int            nread;
+  int            nread_unblocked;
+  fd.fd=_conn->fd;
+  ssl_conn=_conn->ssl_conn;
+  nread=nread_unblocked=0;
+  /*RFC 2818 says "client implementations MUST treat any premature closes as
+     errors and the data received as potentially truncated," so we make very
+     sure to report read errors upwards.*/
+  do{
+    int err;
+    if(ssl_conn!=NULL){
+      int ret;
+      ret=SSL_read(ssl_conn,_buf+nread,_buf_size-nread);
+      OP_ASSERT(ret<=_buf_size-nread);
+      if(ret>0){
+        /*Read some data.
+          Keep going to see if there's more.*/
+        nread+=ret;
+        nread_unblocked+=ret;
+        continue;
+      }
+      /*If we already read some data, return it right now.*/
+      if(nread>0)break;
+      err=SSL_get_error(ssl_conn,ret);
+      if(ret==0){
+        /*Connection close.
+          Check for a clean shutdown to prevent truncation attacks.
+          This check always succeeds for SSLv2, as it has no "close notify"
+           message and thus can't verify an orderly shutdown.*/
+        return err==SSL_ERROR_ZERO_RETURN?0:OP_EREAD;
+      }
+      if(err==SSL_ERROR_WANT_READ)fd.events=POLLIN;
+      /*Yes, renegotiations can cause SSL_read() to block for writing.*/
+      else if(err==SSL_ERROR_WANT_WRITE)fd.events=POLLOUT;
+      /*Some other error.*/
+      else return OP_EREAD;
+    }
+    else{
+      ssize_t ret;
+      op_reset_errno();
+      ret=recv(fd.fd,_buf+nread,_buf_size-nread,0);
+      OP_ASSERT(ret<=_buf_size-nread);
+      if(ret>0){
+        /*Read some data.
+          Keep going to see if there's more.*/
+        nread+=ret;
+        nread_unblocked+=ret;
+        continue;
+      }
+      /*If we already read some data or the connection was closed, return
+         right now.*/
+      if(ret==0||nread>0)break;
+      err=op_errno();
+      if(err!=EAGAIN&&err!=EWOULDBLOCK)return OP_EREAD;
+      fd.events=POLLIN;
+    }
+    _conn->read_bytes+=nread_unblocked;
+    op_http_conn_read_rate_update(_conn);
+    nread_unblocked=0;
+    if(!_blocking)break;
+    /*Need to wait to get any data at all.*/
+    if(poll(&fd,1,OP_POLL_TIMEOUT_MS)<=0)return OP_EREAD;
+  }
+  while(nread<_buf_size);
+  _conn->read_bytes+=nread_unblocked;
+  return nread;
+}
+
+/*Tries to look at the pending data for a connection without consuming it.
+  [out] _buf: Returns the data at which we're peeking.
+  _buf_size:  The size of the buffer.*/
+static int op_http_conn_peek(OpusHTTPConn *_conn,char *_buf,int _buf_size){
+  struct pollfd   fd;
+  SSL            *ssl_conn;
+  int             ret;
+  fd.fd=_conn->fd;
+  ssl_conn=_conn->ssl_conn;
+  for(;;){
+    int err;
+    if(ssl_conn!=NULL){
+      ret=SSL_peek(ssl_conn,_buf,_buf_size);
+      /*Either saw some data or the connection was closed.*/
+      if(ret>=0)return ret;
+      err=SSL_get_error(ssl_conn,ret);
+      if(err==SSL_ERROR_WANT_READ)fd.events=POLLIN;
+      /*Yes, renegotiations can cause SSL_peek() to block for writing.*/
+      else if(err==SSL_ERROR_WANT_WRITE)fd.events=POLLOUT;
+      else return 0;
+    }
+    else{
+      op_reset_errno();
+      ret=(int)recv(fd.fd,_buf,_buf_size,MSG_PEEK);
+      /*Either saw some data or the connection was closed.*/
+      if(ret>=0)return ret;
+      err=op_errno();
+      if(err!=EAGAIN&&err!=EWOULDBLOCK)return 0;
+      fd.events=POLLIN;
+    }
+    /*Need to wait to get any data at all.*/
+    if(poll(&fd,1,OP_POLL_TIMEOUT_MS)<=0)return 0;
+  }
+}
+
+/*When parsing response headers, RFC 2616 mandates that all lines end in CR LF.
+  However, even in the year 2012, I have seen broken servers use just a LF.
+  This is the evil that Postel's advice from RFC 761 breeds.*/
+
+/*Reads the entirety of a response to an HTTP request into the response buffer.
+  Actual parsing and validation is done later.
+  Return: The number of bytes in the response on success, OP_EREAD if the
+           connection was closed before reading any data, or another negative
+           value on any other error.*/
+static int op_http_conn_read_response(OpusHTTPConn *_conn,
+ OpusStringBuf *_response){
+  int ret;
+  _response->nbuf=0;
+  ret=op_sb_ensure_capacity(_response,OP_RESPONSE_SIZE_MIN);
+  if(OP_UNLIKELY(ret<0))return ret;
+  for(;;){
+    char *buf;
+    int   size;
+    int   capacity;
+    int   read_limit;
+    int   terminated;
+    size=_response->nbuf;
+    capacity=_response->cbuf-1;
+    if(OP_UNLIKELY(size>=capacity)){
+      ret=op_sb_grow(_response,OP_RESPONSE_SIZE_MAX);
+      if(OP_UNLIKELY(ret<0))return ret;
+      capacity=_response->cbuf-1;
+      /*The response was too large.
+        This prevents a bad server from running us out of memory.*/
+      if(OP_UNLIKELY(size>=capacity))return OP_EIMPL;
+    }
+    buf=_response->buf;
+    ret=op_http_conn_peek(_conn,buf+size,capacity-size);
+    if(OP_UNLIKELY(ret<=0))return size<=0?OP_EREAD:OP_FALSE;
+    /*We read some data.*/
+    /*Make sure the starting characters are "HTTP".
+      Otherwise we could wind up waiting forever for a response from
+       something that is not an HTTP server.*/
+    if(size<4&&op_strncasecmp(buf,"HTTP",OP_MIN(size+ret,4))!=0){
+      return OP_FALSE;
+    }
+    /*How far can we read without passing the "\r\n\r\n" terminator?*/
+    buf[size+ret]='\0';
+    terminated=0;
+    for(read_limit=OP_MAX(size-3,0);read_limit<size+ret;read_limit++){
+      /*We don't look for the leading '\r' thanks to broken servers.*/
+      if(buf[read_limit]=='\n'){
+        if(buf[read_limit+1]=='\r'&&OP_LIKELY(buf[read_limit+2]=='\n')){
+          terminated=3;
+          break;
+        }
+        /*This case is for broken servers.*/
+        else if(OP_UNLIKELY(buf[read_limit+1]=='\n')){
+          terminated=2;
+          break;
+        }
+      }
+    }
+    read_limit+=terminated;
+    OP_ASSERT(size<=read_limit);
+    OP_ASSERT(read_limit<=size+ret);
+    /*Actually consume that data.*/
+    ret=op_http_conn_read(_conn,buf+size,read_limit-size,1);
+    if(OP_UNLIKELY(ret<=0))return OP_FALSE;
+    size+=ret;
+    buf[size]='\0';
+    _response->nbuf=size;
+    /*We found the terminator and read all the data up to and including it.*/
+    if(terminated&&OP_LIKELY(size>=read_limit))return size;
+  }
+  return OP_EIMPL;
+}
+
+# define OP_HTTP_DIGIT "0123456789"
+
+/*The Reason-Phrase is not allowed to contain control characters, except
+   horizontal tab (HT: \011).*/
+# define OP_HTTP_CREASON_PHRASE \
+ "\001\002\003\004\005\006\007\010\012\013\014\015\016\017\020\021" \
+ "\022\023\024\025\026\027\030\031\032\033\034\035\036\037\177"
+
+# define OP_HTTP_CTLS \
+ "\001\002\003\004\005\006\007\010\011\012\013\014\015\016\017\020" \
+ "\021\022\023\024\025\026\027\030\031\032\033\034\035\036\037\177"
+
+/*This also includes '\t', but we get that from OP_HTTP_CTLS.*/
+# define OP_HTTP_SEPARATORS " \"(),/:;<=>?@[\\]{}"
+
+/*TEXT can also include LWS, but that has structure, so we parse it
+   separately.*/
+# define OP_HTTP_CTOKEN OP_HTTP_CTLS OP_HTTP_SEPARATORS
+
+/*Return: The amount of linear white space (LWS) at the start of _s.*/
+static int op_http_lwsspn(const char *_s){
+  int i;
+  for(i=0;;){
+    if(_s[0]=='\r'&&_s[1]=='\n'&&(_s[2]=='\t'||_s[2]==' '))i+=3;
+    /*This case is for broken servers.*/
+    else if(_s[0]=='\n'&&(_s[1]=='\t'||_s[1]==' '))i+=2;
+    else if(_s[i]=='\t'||_s[i]==' ')i++;
+    else return i;
+  }
+}
+
+static char *op_http_parse_status_line(int *_v1_1_compat,
+ char **_status_code,char *_response){
+  char   *next;
+  char   *status_code;
+  int     v1_1_compat;
+  size_t  d;
+  /*RFC 2616 Section 6.1 does not say that the tokens in the Status-Line cannot
+     be separated by optional LWS, but since it specifically calls out where
+     spaces are to be placed and that CR and LF are not allowed except at the
+     end, I am assuming this to be true.*/
+  /*We already validated that this starts with "HTTP"*/
+  OP_ASSERT(op_strncasecmp(_response,"HTTP",4)==0);
+  next=_response+4;
+  if(OP_UNLIKELY(*next++!='/'))return NULL;
+  d=strspn(next,OP_HTTP_DIGIT);
+  /*"Leading zeros MUST be ignored by recipients."*/
+  while(*next=='0'){
+    next++;
+    OP_ASSERT(d>0);
+    d--;
+  }
+  /*We only support version 1.x*/
+  if(OP_UNLIKELY(d!=1)||OP_UNLIKELY(*next++!='1'))return NULL;
+  if(OP_UNLIKELY(*next++!='.'))return NULL;
+  d=strspn(next,OP_HTTP_DIGIT);
+  if(OP_UNLIKELY(d<=0))return NULL;
+  /*"Leading zeros MUST be ignored by recipients."*/
+  while(*next=='0'){
+    next++;
+    OP_ASSERT(d>0);
+    d--;
+  }
+  /*We don't need to parse the version number.
+    Any non-zero digit means it's greater than 1.*/
+  v1_1_compat=d>0;
+  next+=d;
+  if(OP_UNLIKELY(*next++!=' '))return NULL;
+  status_code=next;
+  d=strspn(next,OP_HTTP_DIGIT);
+  if(OP_UNLIKELY(d!=3))return NULL;
+  next+=d;
+  /*The Reason-Phrase can be empty, but the space must be here.*/
+  if(OP_UNLIKELY(*next++!=' '))return NULL;
+  next+=strcspn(next,OP_HTTP_CREASON_PHRASE);
+  /*We are not mandating this be present thanks to broken servers.*/
+  if(OP_LIKELY(*next=='\r'))next++;
+  if(OP_UNLIKELY(*next++!='\n'))return NULL;
+  if(_v1_1_compat!=NULL)*_v1_1_compat=v1_1_compat;
+  *_status_code=status_code;
+  return next;
+}
+
+/*Get the next response header.
+  [out] _header: The header token, NUL-terminated, with leading and trailing
+                  whitespace stripped, and converted to lower case (to simplify
+                  case-insensitive comparisons), or NULL if there are no more
+                  response headers.
+  [out] _cdr:    The remaining contents of the header, excluding the initial
+                  colon (':') and the terminating CRLF ("\r\n"),
+                  NUL-terminated, and with leading and trailing whitespace
+                  stripped, or NULL if there are no more response headers.
+  [inout] _s:    On input, this points to the start of the current line of the
+                  response headers.
+                 On output, it points to the start of the first line following
+                  this header, or NULL if there are no more response headers.
+  Return: 0 on success, or a negative value on failure.*/
+static int op_http_get_next_header(char **_header,char **_cdr,char **_s){
+  char   *header;
+  char   *header_end;
+  char   *cdr;
+  char   *cdr_end;
+  char   *next;
+  size_t  d;
+  next=*_s;
+  /*The second case is for broken servers.*/
+  if(next[0]=='\r'&&next[1]=='\n'||OP_UNLIKELY(next[0]=='\n')){
+    /*No more headers.*/
+    *_header=NULL;
+    *_cdr=NULL;
+    *_s=NULL;
+    return 0;
+  }
+  header=next+op_http_lwsspn(next);
+  d=strcspn(header,OP_HTTP_CTOKEN);
+  if(OP_UNLIKELY(d<=0))return OP_FALSE;
+  header_end=header+d;
+  next=header_end+op_http_lwsspn(header_end);
+  if(OP_UNLIKELY(*next++!=':'))return OP_FALSE;
+  next+=op_http_lwsspn(next);
+  cdr=next;
+  do{
+    cdr_end=next+strcspn(next,OP_HTTP_CTLS);
+    next=cdr_end+op_http_lwsspn(cdr_end);
+  }
+  while(next>cdr_end);
+  /*We are not mandating this be present thanks to broken servers.*/
+  if(OP_LIKELY(*next=='\r'))next++;
+  if(OP_UNLIKELY(*next++!='\n'))return OP_FALSE;
+  *header_end='\0';
+  *cdr_end='\0';
+  /*Field names are case-insensitive.*/
+  op_string_tolower(header);
+  *_header=header;
+  *_cdr=cdr;
+  *_s=next;
+  return 0;
+}
+
+static opus_int64 op_http_parse_nonnegative_int64(const char **_next,
+ const char *_cdr){
+  const char *next;
+  opus_int64  ret;
+  int         i;
+  next=_cdr+strspn(_cdr,OP_HTTP_DIGIT);
+  *_next=next;
+  if(OP_UNLIKELY(next<=_cdr))return OP_FALSE;
+  while(*_cdr=='0')_cdr++;
+  if(OP_UNLIKELY(next-_cdr>19))return OP_EIMPL;
+  ret=0;
+  for(i=0;i<next-_cdr;i++){
+    int digit;
+    digit=_cdr[i]-'0';
+    /*Check for overflow.*/
+    if(OP_UNLIKELY(ret>(OP_INT64_MAX-9)/10+(digit<=7)))return OP_EIMPL;
+    ret=ret*10+digit;
+  }
+  return ret;
+}
+
+static opus_int64 op_http_parse_content_length(const char *_cdr){
+  const char *next;
+  opus_int64  content_length;
+  content_length=op_http_parse_nonnegative_int64(&next,_cdr);
+  if(OP_UNLIKELY(*next!='\0'))return OP_FALSE;
+  return content_length;
+}
+
+static int op_http_parse_content_range(opus_int64 *_first,opus_int64 *_last,
+ opus_int64 *_length,const char *_cdr){
+  opus_int64 first;
+  opus_int64 last;
+  opus_int64 length;
+  size_t     d;
+  if(OP_UNLIKELY(op_strncasecmp(_cdr,"bytes",5)!=0))return OP_FALSE;
+  _cdr+=5;
+  d=op_http_lwsspn(_cdr);
+  if(OP_UNLIKELY(d<=0))return OP_FALSE;
+  _cdr+=d;
+  if(*_cdr!='*'){
+    first=op_http_parse_nonnegative_int64(&_cdr,_cdr);
+    if(OP_UNLIKELY(first<0))return (int)first;
+    _cdr+=op_http_lwsspn(_cdr);
+    if(*_cdr++!='-')return OP_FALSE;
+    _cdr+=op_http_lwsspn(_cdr);
+    last=op_http_parse_nonnegative_int64(&_cdr,_cdr);
+    if(OP_UNLIKELY(last<0))return (int)last;
+    _cdr+=op_http_lwsspn(_cdr);
+  }
+  else{
+    /*This is for a 416 response (Requested range not satisfiable).*/
+    first=last=-1;
+    _cdr++;
+  }
+  if(OP_UNLIKELY(*_cdr++!='/'))return OP_FALSE;
+  if(*_cdr!='*'){
+    length=op_http_parse_nonnegative_int64(&_cdr,_cdr);
+    if(OP_UNLIKELY(length<0))return (int)length;
+  }
+  else{
+    /*The total length is unspecified.*/
+    _cdr++;
+    length=-1;
+  }
+  if(OP_UNLIKELY(*_cdr!='\0'))return OP_FALSE;
+  if(OP_UNLIKELY(last<first))return OP_FALSE;
+  if(length>=0&&OP_UNLIKELY(last>=length))return OP_FALSE;
+  *_first=first;
+  *_last=last;
+  *_length=length;
+  return 0;
+}
+
+/*Parse the Connection response header and look for a "close" token.
+  Return: 1 if a "close" token is found, 0 if it's not found, and a negative
+           value on error.*/
+static int op_http_parse_connection(char *_cdr){
+  size_t d;
+  int    ret;
+  ret=0;
+  for(;;){
+    d=strcspn(_cdr,OP_HTTP_CTOKEN);
+    if(OP_UNLIKELY(d<=0))return OP_FALSE;
+    if(op_strncasecmp(_cdr,"close",(int)d)==0)ret=1;
+    /*We're supposed to strip and ignore any headers mentioned in the
+       Connection header if this response is from an HTTP/1.0 server (to
+       work around forwarding of hop-by-hop headers by old proxies), but the
+       only hop-by-hop header we look at is Connection itself.
+      Everything else is a well-defined end-to-end header, and going back and
+       undoing the things we did based on already-examined headers would be
+       hard (since we only scan them once, in a destructive manner).
+      Therefore we just ignore all the other tokens.*/
+    _cdr+=d;
+    d=op_http_lwsspn(_cdr);
+    if(d<=0)break;
+    _cdr+=d;
+  }
+  return OP_UNLIKELY(*_cdr!='\0')?OP_FALSE:ret;
+}
+
+typedef int (*op_ssl_step_func)(SSL *_ssl_conn);
+
+/*Try to run an SSL function to completion (blocking if necessary).*/
+static int op_do_ssl_step(SSL *_ssl_conn,op_sock _fd,op_ssl_step_func _step){
+  struct pollfd fd;
+  fd.fd=_fd;
+  for(;;){
+    int ret;
+    int err;
+    ret=(*_step)(_ssl_conn);
+    if(ret>=0)return ret;
+    err=SSL_get_error(_ssl_conn,ret);
+    if(err==SSL_ERROR_WANT_READ)fd.events=POLLIN;
+    else if(err==SSL_ERROR_WANT_WRITE)fd.events=POLLOUT;
+    else return OP_FALSE;
+    if(poll(&fd,1,OP_POLL_TIMEOUT_MS)<=0)return OP_FALSE;
+  }
+}
+
+/*Implement a BIO type that just indicates every operation should be retried.
+  We use this when initializing an SSL connection via a proxy to allow the
+   initial handshake to proceed all the way up to the first read attempt, and
+   then return.
+  This allows the TLS client hello message to be pipelined with the HTTP
+   CONNECT request.*/
+
+static int op_bio_retry_write(BIO *_b,const char *_buf,int _num){
+  (void)_buf;
+  (void)_num;
+  BIO_clear_retry_flags(_b);
+  BIO_set_retry_write(_b);
+  return -1;
+}
+
+static int op_bio_retry_read(BIO *_b,char *_buf,int _num){
+  (void)_buf;
+  (void)_num;
+  BIO_clear_retry_flags(_b);
+  BIO_set_retry_read(_b);
+  return -1;
+}
+
+static int op_bio_retry_puts(BIO *_b,const char *_str){
+  return op_bio_retry_write(_b,_str,0);
+}
+
+static long op_bio_retry_ctrl(BIO *_b,int _cmd,long _num,void *_ptr){
+  long ret;
+  (void)_b;
+  (void)_num;
+  (void)_ptr;
+  ret=0;
+  switch(_cmd){
+    case BIO_CTRL_RESET:
+    case BIO_C_RESET_READ_REQUEST:{
+      BIO_clear_retry_flags(_b);
+      /*Fall through.*/
+    }
+    case BIO_CTRL_EOF:
+    case BIO_CTRL_SET:
+    case BIO_CTRL_SET_CLOSE:
+    case BIO_CTRL_FLUSH:
+    case BIO_CTRL_DUP:{
+      ret=1;
+    }break;
+  }
+  return ret;
+}
+
+static int op_bio_retry_new(BIO *_b){
+  _b->init=1;
+  _b->num=0;
+  _b->ptr=NULL;
+  return 1;
+}
+
+static int op_bio_retry_free(BIO *_b){
+  return _b!=NULL;
+}
+
+/*This is not const because OpenSSL doesn't allow it, even though it won't
+   write to it.*/
+static BIO_METHOD op_bio_retry_method={
+  BIO_TYPE_NULL,
+  "retry",
+  op_bio_retry_write,
+  op_bio_retry_read,
+  op_bio_retry_puts,
+  NULL,
+  op_bio_retry_ctrl,
+  op_bio_retry_new,
+  op_bio_retry_free,
+  NULL
+};
+
+/*Establish a CONNECT tunnel and pipeline the start of the TLS handshake for
+   proxying https URL requests.*/
+static int op_http_conn_establish_tunnel(OpusHTTPStream *_stream,
+ OpusHTTPConn *_conn,op_sock _fd,SSL *_ssl_conn,BIO *_ssl_bio){
+  BIO  *retry_bio;
+  char *status_code;
+  char *next;
+  int   ret;
+  _conn->ssl_conn=NULL;
+  _conn->fd=_fd;
+  OP_ASSERT(_stream->proxy_connect.nbuf>0);
+  ret=op_http_conn_write_fully(_conn,
+   _stream->proxy_connect.buf,_stream->proxy_connect.nbuf);
+  if(OP_UNLIKELY(ret<0))return ret;
+  retry_bio=BIO_new(&op_bio_retry_method);
+  if(OP_UNLIKELY(retry_bio==NULL))return OP_EFAULT;
+  SSL_set_bio(_ssl_conn,retry_bio,_ssl_bio);
+  SSL_set_connect_state(_ssl_conn);
+  /*This shouldn't succeed, since we can't read yet.*/
+  OP_ALWAYS_TRUE(SSL_connect(_ssl_conn)<0);
+  SSL_set_bio(_ssl_conn,_ssl_bio,_ssl_bio);
+  /*Only now do we disable write coalescing, to allow the CONNECT
+     request and the start of the TLS handshake to be combined.*/
+  op_sock_set_tcp_nodelay(_fd,1);
+  ret=op_http_conn_read_response(_conn,&_stream->response);
+  if(OP_UNLIKELY(ret<0))return ret;
+  next=op_http_parse_status_line(NULL,&status_code,_stream->response.buf);
+  /*According to RFC 2817, "Any successful (2xx) response to a
+     CONNECT request indicates that the proxy has established a
+     connection to the requested host and port.*/
+  if(OP_UNLIKELY(next==NULL)||OP_UNLIKELY(status_code[0]!='2'))return OP_FALSE;
+  return 0;
+}
+
+/*Match a host name against a host with a possible wildcard pattern according
+   to the rules of RFC 6125 Section 6.4.3.
+  Return: 0 if the pattern doesn't match, and a non-zero value if it does.*/
+static int op_http_hostname_match(const char *_host,size_t _host_len,
+ ASN1_STRING *_pattern){
+  const char *pattern;
+  size_t      host_label_len;
+  size_t      host_suffix_len;
+  size_t      pattern_len;
+  size_t      pattern_label_len;
+  size_t      pattern_prefix_len;
+  size_t      pattern_suffix_len;
+  pattern=(const char *)ASN1_STRING_data(_pattern);
+  pattern_len=strlen(pattern);
+  /*Check the pattern for embedded NULs.*/
+  if(OP_UNLIKELY(pattern_len!=(size_t)ASN1_STRING_length(_pattern)))return 0;
+  pattern_label_len=strcspn(pattern,".");
+  OP_ASSERT(pattern_label_len<=pattern_len);
+  pattern_prefix_len=strcspn(pattern,"*");
+  if(pattern_prefix_len>=pattern_label_len){
+    /*"The client SHOULD NOT attempt to match a presented identifier in which
+       the wildcard character comprises a label other than the left-most label
+       (e.g., do not match bar.*.example.net)." [RFC 6125 Section 6.4.3]*/
+    if(pattern_prefix_len<pattern_len)return 0;
+    /*If the pattern does not contain a wildcard in the first element, do an
+       exact match.
+      Don't use the system strcasecmp here, as that uses the locale and
+       RFC 4343 makes clear that DNS's case-insensitivity only applies to
+       the ASCII range.*/
+    return _host_len==pattern_len&&op_strncasecmp(_host,pattern,_host_len)==0;
+  }
+  /*"However, the client SHOULD NOT attempt to match a presented identifier
+     where the wildcard character is embedded within an A-label or U-label of
+     an internationalized domain name." [RFC 6125 Section 6.4.3]*/
+  if(op_strncasecmp(pattern,"xn--",4)==0)return 0;
+  host_label_len=strcspn(_host,".");
+  /*Make sure the host has at least two dots, to prevent the wildcard match
+     from being ridiculously wide.
+    We should have already checked to ensure it had at least one.*/
+  if(OP_UNLIKELY(_host[host_label_len]!='.')
+   ||strchr(_host+host_label_len+1,'.')==NULL){
+    return 0;
+  }
+  OP_ASSERT(host_label_len<_host_len);
+  /*"If the wildcard character is the only character of the left-most label in
+     the presented identifier, the client SHOULD NOT compare against anything
+     but the left-most label of the reference identifier (e.g., *.example.com
+     would match foo.example.com but not bar.foo.example.com)." [RFC 6125
+     Section 6.4.3]
+    This is really confusingly worded, as we check this by actually comparing
+     the rest of the pattern for an exact match.
+    We also use the fact that the wildcard must match at least one character,
+     so the left-most label of the hostname must be at least as large as the
+     left-most label of the pattern.*/
+  if(host_label_len<pattern_label_len)return 0;
+  OP_ASSERT(pattern[pattern_prefix_len]=='*');
+  /*"The client MAY match a presented identifier in which the wildcard
+     character is not the only character of the label (e.g., baz*.example.net
+     and *baz.example.net and b*z.example.net would be taken to match
+     baz1.example.net and foobaz.example.net and buzz.example.net,
+     respectively)." [RFC 6125 Section 6.4.3]*/
+  pattern_suffix_len=pattern_len-pattern_prefix_len-1;
+  host_suffix_len=_host_len-host_label_len
+   +pattern_label_len-pattern_prefix_len-1;
+  return pattern_suffix_len==host_suffix_len
+   &&op_strncasecmp(_host,pattern,pattern_prefix_len)==0
+   &&op_strncasecmp(_host+_host_len-host_suffix_len,
+   pattern+pattern_prefix_len+1,host_suffix_len)==0;
+}
+
+/*Convert a host to a numeric address, if possible.
+  Return: A struct addrinfo containing the address, if it was numeric, and NULL
+           otherise.*/
+static struct addrinfo *op_inet_pton(const char *_host){
+  struct addrinfo *addrs;
+  struct addrinfo  hints;
+  memset(&hints,0,sizeof(hints));
+  hints.ai_socktype=SOCK_STREAM;
+  hints.ai_flags=AI_NUMERICHOST;
+  if(!getaddrinfo(_host,NULL,&hints,&addrs))return addrs;
+  return NULL;
+}
+
+/*Verify the server's hostname matches the certificate they presented using
+   the procedure from Section 6 of RFC 6125.
+  Return: 0 if the certificate doesn't match, and a non-zero value if it does.*/
+static int op_http_verify_hostname(OpusHTTPStream *_stream,SSL *_ssl_conn){
+  X509                   *peer_cert;
+  STACK_OF(GENERAL_NAME) *san_names;
+  char                   *host;
+  size_t                  host_len;
+  int                     ret;
+  host=_stream->url.host;
+  host_len=strlen(host);
+  peer_cert=SSL_get_peer_certificate(_ssl_conn);
+  /*We set VERIFY_PEER, so we shouldn't get here without a certificate.*/
+  if(OP_UNLIKELY(peer_cert==NULL))return 0;
+  ret=0;
+  OP_ASSERT(host_len<INT_MAX);
+  /*RFC 2818 says (after correcting for Eratta 1077): "If a subjectAltName
+     extension of type dNSName is present, that MUST be used as the identity.
+    Otherwise, the (most specific) Common Name field in the Subject field of
+     the certificate MUST be used.
+    Although the use of the Common Name is existing practice, it is deprecated
+     and Certification Authorities are encouraged to use the dNSName
+     instead."
+    "Matching is performed using the matching rules specified by RFC 2459.
+    If more than one identity of a given type is present in the certificate
+     (e.g., more than one dNSName name), a match in any one of the set is
+     considered acceptable.
+    Names may contain the wildcard character * which is condered to match any
+     single domain name component or component fragment.
+    E.g., *.a.com matches foo.a.com but not bar.foo.a.com.
+    f*.com matches foo.com but not bar.com."
+    "In some cases, the URI is specified as an IP address rather than a
+     hostname.
+    In this case, the iPAddress subjectAltName must be present in the
+     certificate and must exactly match the IP in the URI."*/
+  san_names=X509_get_ext_d2i(peer_cert,NID_subject_alt_name,NULL,NULL);
+  if(san_names!=NULL){
+    struct addrinfo *addr;
+    unsigned char   *ip;
+    int              ip_len;
+    int              nsan_names;
+    int              sni;
+    /*Check to see if the host was specified as a simple IP address.*/
+    addr=op_inet_pton(host);
+    ip=NULL;
+    ip_len=0;
+    if(addr!=NULL){
+      switch(addr->ai_family){
+        case AF_INET:{
+          struct sockaddr_in *s;
+          s=(struct sockaddr_in *)addr->ai_addr;
+          OP_ASSERT(addr->ai_addrlen>=sizeof(*s));
+          ip=(unsigned char *)&s->sin_addr;
+          ip_len=sizeof(s->sin_addr);
+        }break;
+        case AF_INET6:{
+          struct sockaddr_in6 *s;
+          s=(struct sockaddr_in6 *)addr->ai_addr;
+          OP_ASSERT(addr->ai_addrlen>=sizeof(*s));
+          ip=(unsigned char *)&s->sin6_addr;
+          ip_len=sizeof(s->sin6_addr);
+        }break;
+      }
+    }
+    /*We can only verify fully-qualified domain names.
+      To quote RFC 6125: "The extracted data MUST include only information that
+       can be securely parsed out of the inputs (e.g., parsing the fully
+       qualified DNS domain name out of the "host" component (or its
+       equivalent) of a URI or deriving the application service type from the
+       scheme of a URI) ..."
+      We don't have a way to check (without relying on DNS records, which might
+       be subverted) if this address is fully-qualified.
+      This is particularly problematic when using a CONNECT tunnel, as it is
+       the server that does DNS lookup, not us.
+      However, we are certain that if the hostname has no '.', it is definitely
+       not a fully-qualified domain name (with the exception of crazy TLDs that
+       actually resolve, like "uz", but I am willing to ignore those).
+      RFC 1535 says "...in any event where a '.' exists in a specified name it
+       should be assumed to be a fully qualified domain name (FQDN) and SHOULD
+       be tried as a rooted name first."
+      That doesn't give us any security guarantees, of course (a subverted DNS
+       could fail the original query and our resolver might still retry with a
+       local domain appended).
+      If we don't have a FQDN, just set the number of names to 0, so we'll fail
+       and clean up any resources we allocated.*/
+    if(ip==NULL&&strchr(host,'.')==NULL)nsan_names=0;
+    /*RFC 2459 says there MUST be at least one, but we don't depend on it.*/
+    else nsan_names=sk_GENERAL_NAME_num(san_names);
+    for(sni=0;sni<nsan_names;sni++){
+      const GENERAL_NAME *name;
+      name=sk_GENERAL_NAME_value(san_names,sni);
+      if(ip==NULL){
+        if(name->type==GEN_DNS
+         &&op_http_hostname_match(host,host_len,name->d.dNSName)){
+          ret=1;
+          break;
+        }
+      }
+      else if(name->type==GEN_IPADD){
+        unsigned char *cert_ip;
+        /*If we do have an IP address, compare it directly.
+          RFC 6125: "When the reference identity is an IP address, the identity
+           MUST be converted to the 'network byte order' octet string
+           representation.
+          For IP Version 4, as specified in RFC 791, the octet string will
+           contain exactly four octets.
+          For IP Version 6, as specified in RFC 2460, the octet string will
+           contain exactly sixteen octets.
+          This octet string is then compared against subjectAltName values of
+           type iPAddress.
+          A match occurs if the reference identity octet string and the value
+           octet strings are identical."*/
+        cert_ip=ASN1_STRING_data(name->d.iPAddress);
+        if(ip_len==ASN1_STRING_length(name->d.iPAddress)
+         &&memcmp(ip,cert_ip,ip_len)==0){
+          ret=1;
+          break;
+        }
+      }
+    }
+    sk_GENERAL_NAME_pop_free(san_names,GENERAL_NAME_free);
+    if(addr!=NULL)freeaddrinfo(addr);
+  }
+  /*Do the same FQDN check we did above.
+    We don't do this once in advance for both cases, because in the
+     subjectAltName case we might have an IPv6 address without a dot.*/
+  else if(strchr(host,'.')!=NULL){
+    int last_cn_loc;
+    int cn_loc;
+    /*If there is no subjectAltName, match against commonName.
+      RFC 6125 says that at least one significant CA is known to issue certs
+       with multiple CNs, although it SHOULD NOT.
+      It also says: "The server's identity may also be verified by comparing
+       the reference identity to the Common Name (CN) value in the last
+       Relative Distinguished Name (RDN) of the subject field of the server's
+       certificate (where "last" refers to the DER-encoded order...)."
+      So find the last one and check it.*/
+    cn_loc=-1;
+    do{
+      last_cn_loc=cn_loc;
+      cn_loc=X509_NAME_get_index_by_NID(X509_get_subject_name(peer_cert),
+       NID_commonName,last_cn_loc);
+    }
+    while(cn_loc>=0);
+    ret=last_cn_loc>=0
+     &&op_http_hostname_match(host,host_len,
+     X509_NAME_ENTRY_get_data(
+     X509_NAME_get_entry(X509_get_subject_name(peer_cert),last_cn_loc)));
+  }
+  X509_free(peer_cert);
+  return ret;
+}
+
+/*Perform the TLS handshake on a new connection.*/
+static int op_http_conn_start_tls(OpusHTTPStream *_stream,OpusHTTPConn *_conn,
+ op_sock _fd,SSL *_ssl_conn){
+  SSL_SESSION *ssl_session;
+  BIO         *ssl_bio;
+  int          skip_certificate_check;
+  int          ret;
+  ssl_bio=BIO_new_socket(_fd,BIO_NOCLOSE);
+  if(OP_LIKELY(ssl_bio==NULL))return OP_FALSE;
+# if !defined(OPENSSL_NO_TLSEXT)
+  /*Support for RFC 6066 Server Name Indication.*/
+  SSL_set_tlsext_host_name(_ssl_conn,_stream->url.host);
+# endif
+  /*Resume a previous session if available.*/
+  if(_stream->ssl_session!=NULL){
+    SSL_set_session(_ssl_conn,_stream->ssl_session);
+  }
+  /*If we're proxying, establish the CONNECT tunnel.*/
+  if(_stream->proxy_connect.nbuf>0){
+    ret=op_http_conn_establish_tunnel(_stream,_conn,
+     _fd,_ssl_conn,ssl_bio);
+    if(OP_UNLIKELY(ret<0))return ret;
+  }
+  else{
+    /*Otherwise, just use this socket directly.*/
+    op_sock_set_tcp_nodelay(_fd,1);
+    SSL_set_bio(_ssl_conn,ssl_bio,ssl_bio);
+    SSL_set_connect_state(_ssl_conn);
+  }
+  ret=op_do_ssl_step(_ssl_conn,_fd,SSL_connect);
+  if(OP_UNLIKELY(ret<=0))return OP_FALSE;
+  ssl_session=_stream->ssl_session;
+  skip_certificate_check=_stream->skip_certificate_check;
+  if(ssl_session==NULL||!skip_certificate_check){
+    ret=op_do_ssl_step(_ssl_conn,_fd,SSL_do_handshake);
+    if(OP_UNLIKELY(ret<=0))return OP_FALSE;
+    /*OpenSSL does not do hostname verification, despite the fact that we just
+       passed it the hostname above in the call to SSL_set_tlsext_host_name(),
+       because they are morons.
+      Do it for them.*/
+    if(!skip_certificate_check&&!op_http_verify_hostname(_stream,_ssl_conn)){
+      return OP_FALSE;
+    }
+    if(ssl_session==NULL){
+      /*Save the session for later resumption.*/
+      _stream->ssl_session=SSL_get1_session(_ssl_conn);
+    }
+  }
+  _conn->ssl_conn=_ssl_conn;
+  _conn->fd=_fd;
+  _conn->nrequests_left=OP_PIPELINE_MAX_REQUESTS;
+  return 0;
+}
+
+/*Try to start a connection to the next address in the given list of a given
+   type.
+  _fd:           The socket to connect with.
+  [inout] _addr: A pointer to the list of addresses.
+                 This will be advanced to the first one that matches the given
+                  address family (possibly the current one).
+  _ai_family:    The address family to connect to.
+  Return: 1        If the connection was successful.
+          0        If the connection is in progress.
+          OP_FALSE If the connection failed and there were no more addresses
+                    left to try.
+                    *_addr will be set to NULL in this case.*/
+static int op_sock_connect_next(op_sock _fd,
+ const struct addrinfo **_addr,int _ai_family){
+  const struct addrinfo *addr;
+  int                    err;
+  addr=*_addr;
+  for(;;){
+    /*Move to the next address of the requested type.*/
+    for(;addr!=NULL&&addr->ai_family!=_ai_family;addr=addr->ai_next);
+    *_addr=addr;
+    /*No more: failure.*/
+    if(addr==NULL)return OP_FALSE;
+    if(connect(_fd,addr->ai_addr,addr->ai_addrlen)>=0)return 1;
+    err=op_errno();
+    /*Winsock will set WSAEWOULDBLOCK.*/
+    if(OP_LIKELY(err==EINPROGRESS||err==EWOULDBLOCK))return 0;
+    addr=addr->ai_next;
+  }
+}
+
+/*The number of address families to try connecting to simultaneously.*/
+# define OP_NPROTOS (2)
+
+static int op_http_connect_impl(OpusHTTPStream *_stream,OpusHTTPConn *_conn,
+ const struct addrinfo *_addrs,struct timeb *_start_time){
+  const struct addrinfo *addr;
+  const struct addrinfo *addrs[OP_NPROTOS];
+  struct pollfd          fds[OP_NPROTOS];
+  int                    ai_family;
+  int                    nprotos;
+  int                    ret;
+  int                    pi;
+  int                    pj;
+  for(pi=0;pi<OP_NPROTOS;pi++)addrs[pi]=NULL;
+  /*Try connecting via both IPv4 and IPv6 simultaneously, and keep the first
+     one that succeeds.
+    Start by finding the first address from each family.
+    We order the first connection attempts in the same order the address
+     families were returned in the DNS records in accordance with RFC 6555.*/
+  for(addr=_addrs,nprotos=0;addr!=NULL&&nprotos<OP_NPROTOS;addr=addr->ai_next){
+    if(addr->ai_family==AF_INET6||addr->ai_family==AF_INET){
+      OP_ASSERT(addr->ai_addrlen<=sizeof(struct sockaddr_in6));
+      OP_ASSERT(addr->ai_addrlen<=sizeof(struct sockaddr_in));
+      /*If we've seen this address family before, skip this address for now.*/
+      for(pi=0;pi<nprotos;pi++)if(addrs[pi]->ai_family==addr->ai_family)break;
+      if(pi<nprotos)continue;
+      addrs[nprotos++]=addr;
+    }
+  }
+  /*Pop the connection off the free list and put it on the LRU list.*/
+  OP_ASSERT(_stream->free_head==_conn);
+  _stream->free_head=_conn->next;
+  _conn->next=_stream->lru_head;
+  _stream->lru_head=_conn;
+  ftime(_start_time);
+  *&_conn->read_time=*_start_time;
+  _conn->read_bytes=0;
+  _conn->read_rate=0;
+  /*Try to start a connection to each protocol.
+    RFC 6555 says it is RECOMMENDED that connection attempts be paced
+     150...250 ms apart "to balance human factors against network load", but
+     that "stateful algorithms" (that's us) "are expected to be more
+     aggressive".
+    We are definitely more aggressive: we don't pace at all.*/
+  for(pi=0;pi<nprotos;pi++){
+    ai_family=addrs[pi]->ai_family;
+    fds[pi].fd=socket(ai_family,SOCK_STREAM,addrs[pi]->ai_protocol);
+    fds[pi].events=POLLOUT;
+    if(OP_LIKELY(fds[pi].fd!=OP_INVALID_SOCKET)){
+      if(OP_LIKELY(op_sock_set_nonblocking(fds[pi].fd,1)>=0)){
+        ret=op_sock_connect_next(fds[pi].fd,addrs+pi,ai_family);
+        if(OP_UNLIKELY(ret>0)){
+          /*It succeeded right away (technically possible), so stop.*/
+          nprotos=pi+1;
+          break;
+        }
+        /*Otherwise go on to the next protocol, and skip the clean-up below.*/
+        else if(ret==0)continue;
+        /*Tried all the addresses for this protocol.*/
+      }
+      /*Clean up the socket.*/
+      close(fds[pi].fd);
+    }
+    /*Remove this protocol from the list.*/
+    memmove(addrs+pi,addrs+pi+1,sizeof(*addrs)*(nprotos-pi-1));
+    nprotos--;
+    pi--;
+  }
+  /*Wait for one of the connections to finish.*/
+  while(pi>=nprotos&&nprotos>0&&poll(fds,nprotos,OP_POLL_TIMEOUT_MS)>0){
+    for(pi=0;pi<nprotos;pi++){
+      socklen_t errlen;
+      int       err;
+      /*Still waiting...*/
+      if(!fds[pi].revents)continue;
+      errlen=sizeof(err);
+      /*Some platforms will return the pending error in &err and return 0.
+        Others will put it in errno and return -1.*/
+      ret=getsockopt(fds[pi].fd,SOL_SOCKET,SO_ERROR,&err,&errlen);
+      if(ret<0)err=op_errno();
+      /*Success!*/
+      if(err==0||err==EISCONN)break;
+      /*Move on to the next address for this protocol.*/
+      ai_family=addrs[pi]->ai_family;
+      addrs[pi]=addrs[pi]->ai_next;
+      ret=op_sock_connect_next(fds[pi].fd,addrs+pi,ai_family);
+      /*It succeeded right away, so stop.*/
+      if(ret>0)break;
+      /*Otherwise go on to the next protocol, and skip the clean-up below.*/
+      else if(ret==0)continue;
+      /*Tried all the addresses for this protocol.
+        Remove it from the list.*/
+      close(fds[pi].fd);
+      memmove(fds+pi,fds+pi+1,sizeof(*fds)*(nprotos-pi-1));
+      memmove(addrs+pi,addrs+pi+1,sizeof(*addrs)*(nprotos-pi-1));
+      nprotos--;
+      pi--;
+    }
+  }
+  /*Close all the other sockets.*/
+  for(pj=0;pj<nprotos;pj++)if(pi!=pj)close(fds[pj].fd);
+  /*If none of them succeeded, we're done.*/
+  if(pi>=nprotos)return OP_FALSE;
+  /*Save this address for future connection attempts.*/
+  if(addrs[pi]!=&_stream->addr_info){
+    memcpy(&_stream->addr_info,addrs[pi],sizeof(_stream->addr_info));
+    _stream->addr_info.ai_addr=&_stream->addr.s;
+    _stream->addr_info.ai_next=NULL;
+    memcpy(&_stream->addr,addrs[pi]->ai_addr,addrs[pi]->ai_addrlen);
+  }
+  if(OP_URL_IS_SSL(&_stream->url)){
+    SSL *ssl_conn;
+    /*Start the SSL connection.*/
+    OP_ASSERT(_stream->ssl_ctx!=NULL);
+    ssl_conn=SSL_new(_stream->ssl_ctx);
+    if(OP_LIKELY(ssl_conn!=NULL)){
+      ret=op_http_conn_start_tls(_stream,_conn,fds[pi].fd,ssl_conn);
+      if(OP_LIKELY(ret>=0))return ret;
+      SSL_free(ssl_conn);
+    }
+    close(fds[pi].fd);
+    _conn->fd=OP_INVALID_SOCKET;
+    return OP_FALSE;
+  }
+  /*Just a normal non-SSL connection.*/
+  _conn->ssl_conn=NULL;
+  _conn->fd=fds[pi].fd;
+  _conn->nrequests_left=OP_PIPELINE_MAX_REQUESTS;
+  /*Disable write coalescing.
+    We always send whole requests at once and always parse the response headers
+     before sending another one.*/
+  op_sock_set_tcp_nodelay(fds[pi].fd,1);
+  return 0;
+}
+
+static int op_http_connect(OpusHTTPStream *_stream,OpusHTTPConn *_conn,
+ const struct addrinfo *_addrs,struct timeb *_start_time){
+  struct timeb     resolve_time;
+  struct addrinfo *new_addrs;
+  int              ret;
+  /*Re-resolve the host if we need to (RFC 6555 says we MUST do so
+     occasionally).*/
+  new_addrs=NULL;
+  ftime(&resolve_time);
+  if(_addrs!=&_stream->addr_info||op_time_diff_ms(&resolve_time,
+   &_stream->resolve_time)>=OP_RESOLVE_CACHE_TIMEOUT_MS){
+    new_addrs=op_resolve(_stream->connect_host,_stream->connect_port);
+    if(OP_LIKELY(new_addrs!=NULL)){
+      _addrs=new_addrs;
+      *&_stream->resolve_time=*&resolve_time;
+    }
+    else if(OP_LIKELY(_addrs==NULL))return OP_FALSE;
+  }
+  ret=op_http_connect_impl(_stream,_conn,_addrs,_start_time);
+  if(new_addrs!=NULL)freeaddrinfo(new_addrs);
+  return ret;
+}
+
+# define OP_BASE64_LENGTH(_len) (((_len)+2)/3*4)
+
+static const char BASE64_TABLE[64]={
+  'A','B','C','D','E','F','G','H','I','J','K','L','M','N','O','P',
+  'Q','R','S','T','U','V','W','X','Y','Z','a','b','c','d','e','f',
+  'g','h','i','j','k','l','m','n','o','p','q','r','s','t','u','v',
+  'w','x','y','z','0','1','2','3','4','5','6','7','8','9','+','/'
+};
+
+static char *op_base64_encode(char *_dst,const char *_src,int _len){
+  unsigned s0;
+  unsigned s1;
+  unsigned s2;
+  int      ngroups;
+  int      i;
+  ngroups=_len/3;
+  for(i=0;i<ngroups;i++){
+    s0=_src[3*i+0];
+    s1=_src[3*i+1];
+    s2=_src[3*i+2];
+    _dst[4*i+0]=BASE64_TABLE[s0>>2];
+    _dst[4*i+1]=BASE64_TABLE[(s0&3)<<4|s1>>4];
+    _dst[4*i+2]=BASE64_TABLE[(s1&15)<<2|s2>>6];
+    _dst[4*i+3]=BASE64_TABLE[s2&63];
+  }
+  _len-=3*i;
+  if(_len==1){
+    s0=_src[3*i+0];
+    _dst[4*i+0]=BASE64_TABLE[s0>>2];
+    _dst[4*i+1]=BASE64_TABLE[(s0&3)<<4];
+    _dst[4*i+2]='=';
+    _dst[4*i+3]='=';
+    i++;
+  }
+  else if(_len==2){
+    s0=_src[3*i+0];
+    s1=_src[3*i+1];
+    _dst[4*i+0]=BASE64_TABLE[s0>>2];
+    _dst[4*i+1]=BASE64_TABLE[(s0&3)<<4|s1>>4];
+    _dst[4*i+2]=BASE64_TABLE[(s1&15)<<2];
+    _dst[4*i+3]='=';
+    i++;
+  }
+  _dst[4*i]='\0';
+  return _dst+4*i;
+}
+
+/*Construct an HTTP authorization header using RFC 2617's Basic Authentication
+   Scheme and append it to the given string buffer.*/
+static int op_sb_append_basic_auth_header(OpusStringBuf *_sb,
+ const char *_header,const char *_user,const char *_pass){
+  int user_len;
+  int pass_len;
+  int user_pass_len;
+  int base64_len;
+  int nbuf_total;
+  int ret;
+  ret=op_sb_append_string(_sb,_header);
+  ret|=op_sb_append(_sb,": Basic ",8);
+  user_len=strlen(_user);
+  pass_len=strlen(_pass);
+  if(OP_UNLIKELY(pass_len>INT_MAX-user_len))return OP_EFAULT;
+  if(OP_UNLIKELY(user_len+pass_len>(INT_MAX>>2)*3-3))return OP_EFAULT;
+  user_pass_len=user_len+1+pass_len;
+  base64_len=OP_BASE64_LENGTH(user_pass_len);
+  /*Stick "user:pass" at the end of the buffer so we can Base64 encode it
+     in-place.*/
+  nbuf_total=_sb->nbuf;
+  if(OP_UNLIKELY(base64_len>INT_MAX-nbuf_total))return OP_EFAULT;
+  nbuf_total+=base64_len;
+  ret|=op_sb_ensure_capacity(_sb,nbuf_total);
+  if(OP_UNLIKELY(ret<0))return ret;
+  _sb->nbuf=nbuf_total-user_pass_len;
+  OP_ALWAYS_TRUE(!op_sb_append(_sb,_user,user_len));
+  OP_ALWAYS_TRUE(!op_sb_append(_sb,":",1));
+  OP_ALWAYS_TRUE(!op_sb_append(_sb,_pass,pass_len));
+  op_base64_encode(_sb->buf+nbuf_total-base64_len,
+   _sb->buf+nbuf_total-user_pass_len,user_pass_len);
+  return op_sb_append(_sb,"\r\n",2);
+}
+
+static int op_http_allow_pipelining(const char *_server){
+  /*Servers known to do bad things with pipelined requests.
+    This list is taken from Gecko's nsHttpConnection::SupportsPipelining() (in
+     netwerk/protocol/http/nsHttpConnection.cpp).*/
+  static const char *BAD_SERVERS[]={
+    "EFAServer/",
+    "Microsoft-IIS/4.",
+    "Microsoft-IIS/5.",
+    "Netscape-Enterprise/3.",
+    "Netscape-Enterprise/4.",
+    "Netscape-Enterprise/5.",
+    "Netscape-Enterprise/6.",
+    "WebLogic 3.",
+    "WebLogic 4.",
+    "WebLogic 5.",
+    "WebLogic 6.",
+    "Winstone Servlet Engine v0."
+  };
+# define NBAD_SERVERS ((int)(sizeof(BAD_SERVERS)/sizeof(*BAD_SERVERS)))
+  if(*_server>='E'&&*_server<='W'){
+    int si;
+    for(si=0;si<NBAD_SERVERS;si++){
+      if(strncmp(_server,BAD_SERVERS[si],strlen(BAD_SERVERS[si]))==0){
+        return 0;
+      }
+    }
+  }
+  return 1;
+# undef NBAD_SERVERS
+}
+
+static int op_http_stream_open(OpusHTTPStream *_stream,const char *_url,
+ int _skip_certificate_check,const char *_proxy_host,unsigned _proxy_port,
+ const char *_proxy_user,const char *_proxy_pass,OpusServerInfo *_info){
+  struct addrinfo *addrs;
+  int              nredirs;
+  int              ret;
+#if defined(_WIN32)
+  op_init_winsock();
+#endif
+  ret=op_parse_url(&_stream->url,_url);
+  if(OP_UNLIKELY(ret<0))return ret;
+  if(_proxy_host!=NULL){
+    if(OP_UNLIKELY(_proxy_port>65535U))return OP_EINVAL;
+    _stream->connect_host=op_string_dup(_proxy_host);
+    _stream->connect_port=_proxy_port;
+  }
+  else{
+    _stream->connect_host=_stream->url.host;
+    _stream->connect_port=_stream->url.port;
+  }
+  addrs=NULL;
+  for(nredirs=0;nredirs<OP_REDIRECT_LIMIT;nredirs++){
+    OpusParsedURL  next_url;
+    struct timeb   start_time;
+    struct timeb   end_time;
+    char          *next;
+    char          *status_code;
+    int            minor_version_pos;
+    int            v1_1_compat;
+    /*Initialize the SSL library if necessary.*/
+    if(OP_URL_IS_SSL(&_stream->url)&&_stream->ssl_ctx==NULL){
+      SSL_CTX *ssl_ctx;
+# if !defined(OPENSSL_NO_LOCKING)
+      /*The documentation says SSL_library_init() is not reentrant.
+        We don't want to add our own depenencies on a threading library, and it
+         appears that it's safe to call OpenSSL's locking functions before the
+         library is initialized, so that's what we'll do (really OpenSSL should
+         do this for us).
+        This doesn't guarantee that _other_ threads in the application aren't
+         calling SSL_library_init() at the same time, but there's not much we
+         can do about that.*/
+      CRYPTO_w_lock(CRYPTO_LOCK_SSL);
+# endif
+      SSL_library_init();
+      /*Needed to get SHA2 algorithms with old OpenSSL versions.*/
+      OpenSSL_add_ssl_algorithms();
+# if !defined(OPENSSL_NO_LOCKING)
+      CRYPTO_w_unlock(CRYPTO_LOCK_SSL);
+# endif
+      ssl_ctx=SSL_CTX_new(SSLv23_client_method());
+      if(ssl_ctx==NULL)return OP_EFAULT;
+      if(!_skip_certificate_check){
+        /*We don't do anything if this fails, since it just means we won't load
+           any certificates (and thus all checks will fail).
+          However, as that is probably the result of a system
+           mis-configuration, assert here to make it easier to identify.*/
+        OP_ALWAYS_TRUE(SSL_CTX_set_default_verify_paths(ssl_ctx));
+        SSL_CTX_set_verify(ssl_ctx,SSL_VERIFY_PEER,NULL);
+      }
+      _stream->ssl_ctx=ssl_ctx;
+      _stream->skip_certificate_check=_skip_certificate_check;
+      if(_proxy_host!=NULL){
+        /*We need to establish a CONNECT tunnel to handle https proxying.
+          Build the request we'll send to do so.*/
+        _stream->proxy_connect.nbuf=0;
+        ret=op_sb_append(&_stream->proxy_connect,"CONNECT ",8);
+        ret|=op_sb_append_string(&_stream->proxy_connect,_stream->url.host);
+        ret|=op_sb_append_port(&_stream->proxy_connect,_stream->url.port);
+        /*CONNECT requires at least HTTP 1.1.*/
+        ret|=op_sb_append(&_stream->proxy_connect," HTTP/1.1\r\n",11);
+        ret|=op_sb_append(&_stream->proxy_connect,"Host: ",6);
+        ret|=op_sb_append_string(&_stream->proxy_connect,_stream->url.host);
+        /*The example in RFC 2817 Section 5.2 specifies an explicit port even
+           when connecting to the default port.
+          Given that the proxy doesn't know whether we're trying to connect to
+           an http or an https URL except by the port number, this seems like a
+           good idea.*/
+        ret|=op_sb_append_port(&_stream->proxy_connect,_stream->url.port);
+        ret|=op_sb_append(&_stream->proxy_connect,"\r\n",2);
+        ret|=op_sb_append(&_stream->proxy_connect,"User-Agent: .\r\n",15);
+        if(_proxy_user!=NULL&&_proxy_pass!=NULL){
+          ret|=op_sb_append_basic_auth_header(&_stream->proxy_connect,
+           "Proxy-Authorization",_proxy_user,_proxy_pass);
+        }
+        /*For backwards compatibility.*/
+        ret|=op_sb_append(&_stream->proxy_connect,
+         "Proxy-Connection: keep-alive\r\n",30);
+        ret|=op_sb_append(&_stream->proxy_connect,"\r\n",2);
+        if(OP_UNLIKELY(ret<0))return ret;
+      }
+    }
+    /*Actually make the connection.*/
+    ret=op_http_connect(_stream,_stream->conns+0,addrs,&start_time);
+    if(OP_UNLIKELY(ret<0))return ret;
+    /*Build the request to send.*/
+    _stream->request.nbuf=0;
+    ret=op_sb_append(&_stream->request,"GET ",4);
+    ret|=op_sb_append_string(&_stream->request,
+     _proxy_host!=NULL?_url:_stream->url.path);
+    /*Send HTTP/1.0 by default for maximum compatibility (so we don't have to
+       re-try if HTTP/1.1 fails, though it shouldn't, even for a 1.0 server).
+      This means we aren't conditionally compliant with RFC 2145, because we
+       violate the requirement that "An HTTP client SHOULD send a request
+       version equal to the highest version for which the client is at least
+       conditionally compliant...".
+      According to RFC 2145, that means we can't claim any compliance with any
+       IETF HTTP specification.*/
+    ret|=op_sb_append(&_stream->request," HTTP/1.0\r\n",11);
+    /*Remember where this is so we can upgrade to HTTP/1.1 if the server
+       supports it.*/
+    minor_version_pos=_stream->request.nbuf-3;
+    ret|=op_sb_append(&_stream->request,"Host: ",6);
+    ret|=op_sb_append_string(&_stream->request,_stream->url.host);
+    if(!OP_URL_IS_DEFAULT_PORT(&_stream->url)){
+      ret|=op_sb_append_port(&_stream->request,_stream->url.port);
+    }
+    ret|=op_sb_append(&_stream->request,"\r\n",2);
+    /*User-Agents have been a bad idea, so send as little as possible.
+      RFC 2616 requires at least one token in the User-Agent, which must have
+       at least one character.*/
+    ret|=op_sb_append(&_stream->request,"User-Agent: .\r\n",15);
+    if(_proxy_host!=NULL&&!OP_URL_IS_SSL(&_stream->url)
+     &&_proxy_user!=NULL&&_proxy_pass!=NULL){
+      ret|=op_sb_append_basic_auth_header(&_stream->request,
+       "Proxy-Authorization",_proxy_user,_proxy_pass);
+    }
+    if(_stream->url.user!=NULL&&_stream->url.pass!=NULL){
+      ret|=op_sb_append_basic_auth_header(&_stream->request,
+       "Authorization",_stream->url.user,_stream->url.pass);
+    }
+    /*Always send a Referer [sic] header.
+      It's common to refuse to serve a resource unless one is present.
+      We just use the relative "/" URI to suggest we came from the same domain,
+       as this is the most common check.
+      This might violate RFC 2616's mandate that the field "MUST NOT be sent if
+       the Request-URI was obtained from a source that does not have its own
+       URI, such as input from the user keyboard," but we don't really have any
+       way to know.*/
+    /*TODO: Should we update this on redirects?*/
+    ret|=op_sb_append(&_stream->request,"Referer: /\r\n",12);
+    /*Always send a Range request header to find out if we're seekable.
+      This requires an HTTP/1.1 server to succeed, but we'll still get what we
+       want with an HTTP/1.0 server that ignores this request header.*/
+    ret|=op_sb_append(&_stream->request,"Range: bytes=0-\r\n",17);
+    /*Remember where this is so we can append offsets to it later.*/
+    _stream->request_tail=_stream->request.nbuf-4;
+    ret|=op_sb_append(&_stream->request,"\r\n",2);
+    if(OP_UNLIKELY(ret<0))return ret;
+    ret=op_http_conn_write_fully(_stream->conns+0,
+     _stream->request.buf,_stream->request.nbuf);
+    if(OP_UNLIKELY(ret<0))return ret;
+    ret=op_http_conn_read_response(_stream->conns+0,&_stream->response);
+    if(OP_UNLIKELY(ret<0))return ret;
+    ftime(&end_time);
+    next=op_http_parse_status_line(&v1_1_compat,&status_code,
+     _stream->response.buf);
+    if(OP_UNLIKELY(next==NULL))return OP_FALSE;
+    if(status_code[0]=='2'){
+      opus_int64 content_length;
+      opus_int64 range_length;
+      int        pipeline_supported;
+      int        pipeline_disabled;
+      /*We only understand 20x codes.*/
+      if(status_code[1]!='0')return OP_FALSE;
+      content_length=-1;
+      range_length=-1;
+      /*Pipelining must be explicitly enabled.*/
+      pipeline_supported=0;
+      pipeline_disabled=0;
+      for(;;){
+        char *header;
+        char *cdr;
+        ret=op_http_get_next_header(&header,&cdr,&next);
+        if(OP_UNLIKELY(ret<0))return ret;
+        if(header==NULL)break;
+        if(strcmp(header,"content-length")==0){
+          /*Two Content-Length headers?*/
+          if(OP_UNLIKELY(content_length>=0))return OP_FALSE;
+          content_length=op_http_parse_content_length(cdr);
+          if(OP_UNLIKELY(content_length<0))return (int)content_length;
+          /*Make sure the Content-Length and Content-Range headers match.*/
+          if(range_length>=0&&OP_UNLIKELY(content_length!=range_length)){
+            return OP_FALSE;
+          }
+        }
+        else if(strcmp(header,"content-range")==0){
+          opus_int64 range_first;
+          opus_int64 range_last;
+          /*Two Content-Range headers?*/
+          if(OP_UNLIKELY(range_length>=0))return OP_FALSE;
+          ret=op_http_parse_content_range(&range_first,&range_last,
+           &range_length,cdr);
+          if(OP_UNLIKELY(ret<0))return ret;
+          /*"A response with satus code 206 (Partial Content) MUST NOT
+             include a Content-Range field with a byte-range-resp-spec of
+             '*'."*/
+          if(status_code[2]=='6'
+           &&(OP_UNLIKELY(range_first<0)||OP_UNLIKELY(range_last<0))){
+            return OP_FALSE;
+          }
+          /*We asked for the entire resource.*/
+          if(range_length>=0){
+            /*Quit if we didn't get it.*/
+            if(range_last>=0&&OP_UNLIKELY(range_last!=range_length-1)){
+              return OP_FALSE;
+            }
+          }
+          /*If there was no length, use the end of the range.*/
+          else if(range_last>=0)range_length=range_last+1;
+          /*Make sure the Content-Length and Content-Range headers match.*/
+          if(content_length>=0&&OP_UNLIKELY(content_length!=range_length)){
+            return OP_FALSE;
+          }
+        }
+        else if(strcmp(header,"connection")==0){
+          /*According to RFC 2616, if an HTTP/1.1 application does not support
+             pipelining, it "MUST include the 'close' connection option in
+             every message."
+            Therefore, if we receive one in the initial response, disable
+             pipelining entirely.
+            The server still might support it (e.g., we might just have hit the
+             request limit for a temporary child process), but if it doesn't
+             and we assume it does, every time we cross a chunk boundary we'll
+             error out and reconnect, adding lots of latency.*/
+          ret=op_http_parse_connection(cdr);
+          if(OP_UNLIKELY(ret<0))return ret;
+          pipeline_disabled|=ret;
+        }
+        else if(strcmp(header,"server")==0){
+          /*If we got a Server response header, and it wasn't from a known-bad
+             server, enable pipelining, as long as it's at least HTTP/1.1.
+            According to RFC 2145, the server is supposed to respond with the
+             highest minor version number it supports unless it is known or
+             suspected that we incorrectly implement the HTTP specification.
+            So it should send back at least HTTP/1.1, despite our HTTP/1.0
+             request.*/
+          pipeline_supported=v1_1_compat;
+          if(v1_1_compat)pipeline_disabled|=!op_http_allow_pipelining(cdr);
+          if(_info!=NULL&&_info->server==NULL)_info->server=op_string_dup(cdr);
+        }
+        /*Collect station information headers if the caller requested it.
+          If there's more than one copy of a header, the first one wins.*/
+        else if(_info!=NULL){
+          if(strcmp(header,"content-type")==0){
+            if(_info->content_type==NULL){
+              _info->content_type=op_string_dup(cdr);
+            }
+          }
+          else if(header[0]=='i'&&header[1]=='c'
+           &&(header[2]=='e'||header[2]=='y')&&header[3]=='-'){
+            if(strcmp(header+4,"name")==0){
+              if(_info->name==NULL)_info->name=op_string_dup(cdr);
+            }
+            else if(strcmp(header+4,"description")==0){
+              if(_info->description==NULL)_info->description=op_string_dup(cdr);
+            }
+            else if(strcmp(header+4,"genre")==0){
+              if(_info->genre==NULL)_info->genre=op_string_dup(cdr);
+            }
+            else if(strcmp(header+4,"url")==0){
+              if(_info->url==NULL)_info->url=op_string_dup(cdr);
+            }
+            else if(strcmp(header,"icy-br")==0
+             ||strcmp(header,"ice-bitrate")==0){
+              if(_info->bitrate_kbps<0){
+                opus_int64 bitrate_kbps;
+                /*Just re-using this function to parse a random unsigned
+                   integer field.*/
+                bitrate_kbps=op_http_parse_content_length(cdr);
+                if(bitrate_kbps>=0&&bitrate_kbps<=OP_INT32_MAX){
+                  _info->bitrate_kbps=(opus_int32)bitrate_kbps;
+                }
+              }
+            }
+            else if(strcmp(header,"icy-pub")==0
+             ||strcmp(header,"ice-public")==0){
+              if(_info->is_public<0&&(cdr[0]=='0'||cdr[0]=='1')&&cdr[1]=='\0'){
+                _info->is_public=cdr[0]-'0';
+              }
+            }
+          }
+        }
+      }
+      switch(status_code[2]){
+        /*200 OK*/
+        case '0':break;
+        /*203 Non-Authoritative Information*/
+        case '3':break;
+        /*204 No Content*/
+        case '4':{
+          if(content_length>=0&&OP_UNLIKELY(content_length!=0)){
+            return OP_FALSE;
+          }
+        }break;
+        /*206 Partial Content*/
+        case '6':{
+          /*No Content-Range header.*/
+          if(OP_UNLIKELY(range_length<0))return OP_FALSE;
+          content_length=range_length;
+          /*The server supports range requests for this resource.
+            We can seek.*/
+          _stream->seekable=1;
+        }break;
+        /*201 Created: the response "SHOULD include an entity containing a list
+           of resource characteristics and location(s)," but not an Opus file.
+          202 Accepted: the response "SHOULD include an indication of request's
+           current status and either a pointer to a status monitor or some
+           estimate of when the user can expect the request to be fulfilled,"
+           but not an Opus file.
+          205 Reset Content: this "MUST NOT include an entity," meaning no Opus
+           file.
+          207...209 are not yet defined, so we don't know how to handle them.*/
+        default:return OP_FALSE;
+      }
+      _stream->content_length=content_length;
+      _stream->pipeline=pipeline_supported&&!pipeline_disabled;
+      /*Pipelining requires HTTP/1.1 persistent connections.*/
+      if(_stream->pipeline)_stream->request.buf[minor_version_pos]='1';
+      _stream->conns[0].pos=0;
+      _stream->conns[0].end_pos=_stream->seekable?content_length:-1;
+      _stream->conns[0].chunk_size=-1;
+      _stream->cur_conni=0;
+      _stream->connect_rate=op_time_diff_ms(&end_time,&start_time);
+      _stream->connect_rate=OP_MAX(_stream->connect_rate,1);
+      if(_info!=NULL)_info->is_ssl=OP_URL_IS_SSL(&_stream->url);
+      /*The URL has been successfully opened.*/
+      return 0;
+    }
+    /*Shouldn't get 1xx; 4xx and 5xx are both failures (and we don't retry).
+      Everything else is undefined.*/
+    else if(status_code[0]!='3')return OP_FALSE;
+    /*We have some form of redirect request.*/
+    /*We only understand 30x codes.*/
+    if(status_code[1]!='0')return OP_FALSE;
+    switch(status_code[2]){
+      /*300 Multiple Choices: "If the server has a preferred choice of
+         representation, it SHOULD include the specific URI for that
+         representation in the Location field," otherwise we'll fail.*/
+      case '0':
+      /*301 Moved Permanently*/
+      case '1':
+      /*302 Found*/
+      case '2':
+      /*307 Temporary Redirect*/
+      case '7':
+      /*308 Permanent Redirect (defined by draft-reschke-http-status-308-07).*/
+      case '8':break;
+      /*305 Use Proxy: "The Location field gives the URI of the proxy."
+        TODO: This shouldn't actually be that hard to do.*/
+      case '5':return OP_EIMPL;
+      /*303 See Other: "The new URI is not a substitute reference for the
+         originally requested resource."
+        304 Not Modified: "The 304 response MUST NOT contain a message-body."
+        306 (Unused)
+        309 is not yet defined, so we don't know how to handle it.*/
+      default:return OP_FALSE;
+    }
+    _url=NULL;
+    for(;;){
+      char *header;
+      char *cdr;
+      ret=op_http_get_next_header(&header,&cdr,&next);
+      if(OP_UNLIKELY(ret<0))return ret;
+      if(header==NULL)break;
+      if(strcmp(header,"location")==0&&OP_LIKELY(_url==NULL))_url=cdr;
+    }
+    if(OP_UNLIKELY(_url==NULL))return OP_FALSE;
+    ret=op_parse_url(&next_url,_url);
+    if(OP_UNLIKELY(ret<0))return ret;
+    if(_proxy_host==NULL||_stream->ssl_session!=NULL){
+      if(strcmp(_stream->url.host,next_url.host)==0
+       &&_stream->url.port==next_url.port){
+        /*Try to skip re-resolve when connecting to the same host.*/
+        addrs=&_stream->addr_info;
+      }
+      else{
+        if(_stream->ssl_session!=NULL){
+          /*Forget any cached SSL session from the last host.*/
+          SSL_SESSION_free(_stream->ssl_session);
+          _stream->ssl_session=NULL;
+        }
+      }
+    }
+    if(_proxy_host==NULL){
+      OP_ASSERT(_stream->connect_host==_stream->url.host);
+      _stream->connect_host=next_url.host;
+      _stream->connect_port=next_url.port;
+    }
+    /*Always try to skip re-resolve for proxy connections.*/
+    else addrs=&_stream->addr_info;
+    op_parsed_url_clear(&_stream->url);
+    *&_stream->url=*&next_url;
+    /*TODO: On servers/proxies that support pipelining, we might be able to
+       re-use this connection.*/
+    op_http_conn_close(_stream,_stream->conns+0,&_stream->lru_head,1);
+  }
+  /*Redirection limit reached.*/
+  return OP_FALSE;
+}
+
+static int op_http_conn_send_request(OpusHTTPStream *_stream,
+ OpusHTTPConn *_conn,opus_int64 _pos,opus_int32 _chunk_size,
+ int _try_not_to_block){
+  opus_int64 next_end;
+  int        ret;
+  /*We shouldn't have another request outstanding.*/
+  OP_ASSERT(_conn->next_pos<0);
+  /*Build the request to send.*/
+  OP_ASSERT(_stream->request.nbuf>=_stream->request_tail);
+  _stream->request.nbuf=_stream->request_tail;
+  ret=op_sb_append_nonnegative_int64(&_stream->request,_pos);
+  ret|=op_sb_append(&_stream->request,"-",1);
+  if(_chunk_size>0&&OP_ADV_OFFSET(_pos,2*_chunk_size)<_stream->content_length){
+    /*We shouldn't be pipelining requests with non-HTTP/1.1 servers.*/
+    OP_ASSERT(_stream->pipeline);
+    next_end=_pos+_chunk_size;
+    ret|=op_sb_append_nonnegative_int64(&_stream->request,next_end-1);
+    /*Use a larger chunk size for our next request.*/
+    _chunk_size<<=1;
+    /*But after a while, just request the rest of the resource.*/
+    if(_chunk_size>OP_PIPELINE_CHUNK_SIZE_MAX)_chunk_size=-1;
+  }
+  else{
+    /*Either this was a non-pipelined request or we were close enough to the
+       end to just ask for the rest.*/
+    next_end=-1;
+    _chunk_size=-1;
+  }
+  ret|=op_sb_append(&_stream->request,"\r\n\r\n",4);
+  if(OP_UNLIKELY(ret<0))return ret;
+  /*If we don't want to block, check to see if there's enough space in the send
+     queue.
+    There's still a chance we might block, even if there is enough space, but
+     it's a much slimmer one.
+    Blocking at all is pretty unlikely, as we won't have any requests queued
+     when _try_not_to_block is set, so if FIONSPACE isn't available (e.g., on
+     Linux), just skip the test.*/
+  if(_try_not_to_block){
+# if defined(FIONSPACE)
+    int available;
+    ret=ioctl(_conn->fd,FIONSPACE,&available);
+    if(ret<0||available<_stream->request.nbuf)return 1;
+# endif
+  }
+  ret=op_http_conn_write_fully(_conn,
+   _stream->request.buf,_stream->request.nbuf);
+  if(OP_UNLIKELY(ret<0))return ret;
+  _conn->next_pos=_pos;
+  _conn->next_end=next_end;
+  /*Save the chunk size to use for the next request.*/
+  _conn->chunk_size=_chunk_size;
+  _conn->nrequests_left--;
+  return ret;
+}
+
+/*Handles the response to all requests after the first one.
+  Return: 1 if the connection was closed or timed out, 0 on success, or a
+           negative value on any other error.*/
+static int op_http_conn_handle_response(OpusHTTPStream *_stream,
+ OpusHTTPConn *_conn){
+  char       *next;
+  char       *status_code;
+  opus_int64  range_length;
+  opus_int64  next_pos;
+  opus_int64  next_end;
+  int         ret;
+  ret=op_http_conn_read_response(_conn,&_stream->response);
+  /*If the server just closed the connection on us, we may have just hit a
+     connection re-use limit, so we might want to retry.*/
+  if(OP_UNLIKELY(ret<0))return ret==OP_EREAD?1:ret;
+  next=op_http_parse_status_line(NULL,&status_code,_stream->response.buf);
+  if(OP_UNLIKELY(next==NULL))return OP_FALSE;
+  /*We _need_ a 206 Partial Content response.
+    Nothing else will do.*/
+  if(strncmp(status_code,"206",3)!=0){
+    /*But on a 408 Request Timeout, we might want to re-try.*/
+    return strncmp(status_code,"408",3)==0?1:OP_FALSE;
+  }
+  next_pos=_conn->next_pos;
+  next_end=_conn->next_end;
+  range_length=-1;
+  for(;;){
+    char *header;
+    char *cdr;
+    ret=op_http_get_next_header(&header,&cdr,&next);
+    if(OP_UNLIKELY(ret<0))return ret;
+    if(header==NULL)break;
+    if(strcmp(header,"content-range")==0){
+      opus_int64 range_first;
+      opus_int64 range_last;
+      /*Two Content-Range headers?*/
+      if(OP_UNLIKELY(range_length>=0))return OP_FALSE;
+      ret=op_http_parse_content_range(&range_first,&range_last,
+       &range_length,cdr);
+      if(OP_UNLIKELY(ret<0))return ret;
+      /*"A response with satus code 206 (Partial Content) MUST NOT
+         include a Content-Range field with a byte-range-resp-spec of
+         '*'."*/
+      if(OP_UNLIKELY(range_first<0)||OP_UNLIKELY(range_last<0))return OP_FALSE;
+      /*We also don't want range_last to overflow.*/
+      if(OP_UNLIKELY(range_last>=OP_INT64_MAX))return OP_FALSE;
+      range_last++;
+      /*Quit if we didn't get the offset we asked for.*/
+      if(range_first!=next_pos)return OP_FALSE;
+      if(next_end<0){
+        /*We asked for the rest of the resource.*/
+        if(range_length>=0){
+          /*Quit if we didn't get it.*/
+          if(OP_UNLIKELY(range_last!=range_length))return OP_FALSE;
+        }
+        /*If there was no length, use the end of the range.*/
+        else range_length=range_last;
+        next_end=range_last;
+      }
+      else{
+        if(range_last!=next_end)return OP_FALSE;
+        /*If there was no length, use the larger of the content length or the
+           end of this chunk.*/
+        if(range_length<0){
+          range_length=OP_MAX(range_last,_stream->content_length);
+        }
+      }
+    }
+    else if(strcmp(header,"content-length")==0){
+      opus_int64 content_length;
+      /*Validate the Content-Length header, if present, against the request we
+         made.*/
+      content_length=op_http_parse_content_length(cdr);
+      if(OP_UNLIKELY(content_length<0))return (int)content_length;
+      if(next_end<0){
+        /*If we haven't seen the Content-Range header yet and we asked for the
+            rest of the resource, set next_end, so we can make sure they match
+            when we do find the Content-Range header.*/
+        if(OP_UNLIKELY(next_pos>OP_INT64_MAX-content_length))return OP_FALSE;
+        next_end=next_pos+content_length;
+      }
+      /*Otherwise, make sure they match now.*/
+      else if(OP_UNLIKELY(next_end-next_pos!=content_length))return OP_FALSE;
+    }
+    else if(strcmp(header,"connection")==0){
+      ret=op_http_parse_connection(cdr);
+      if(OP_UNLIKELY(ret<0))return ret;
+      /*If the server told us it was going to close the connection, don't make
+         any more requests.*/
+      if(OP_UNLIKELY(ret>0))_conn->nrequests_left=0;
+    }
+  }
+  /*No Content-Range header.*/
+  if(OP_UNLIKELY(range_length<0))return OP_FALSE;
+  /*Update the content_length if necessary.*/
+  _stream->content_length=range_length;
+  _conn->pos=next_pos;
+  _conn->end_pos=next_end;
+  _conn->next_pos=-1;
+  return 0;
+}
+
+/*Open a new connection that will start reading at byte offset _pos.
+  _pos:        The byte offset to start reading from.
+  _chunk_size: The number of bytes to ask for in the initial request, or -1 to
+                request the rest of the resource.
+               This may be more bytes than remain, in which case it will be
+                converted into a request for the rest.*/
+static int op_http_conn_open_pos(OpusHTTPStream *_stream,
+ OpusHTTPConn *_conn,opus_int64 _pos,opus_int32 _chunk_size){
+  struct timeb  start_time;
+  struct timeb  end_time;
+  opus_int32    connect_rate;
+  opus_int32    connect_time;
+  int           ret;
+  ret=op_http_connect(_stream,_conn,&_stream->addr_info,&start_time);
+  if(OP_UNLIKELY(ret<0))return ret;
+  ret=op_http_conn_send_request(_stream,_conn,_pos,_chunk_size,0);
+  if(OP_UNLIKELY(ret<0))return ret;
+  ret=op_http_conn_handle_response(_stream,_conn);
+  if(OP_UNLIKELY(ret!=0))return OP_FALSE;
+  ftime(&end_time);
+  _stream->cur_conni=_conn-_stream->conns;
+  OP_ASSERT(_stream->cur_conni>=0&&_stream->cur_conni<OP_NCONNS_MAX);
+  /*The connection has been successfully opened.
+    Update the connection time estimate.*/
+  connect_time=op_time_diff_ms(&end_time,&start_time);
+  connect_rate=_stream->connect_rate;
+  connect_rate+=OP_MAX(connect_time,1)-connect_rate+8>>4;
+  _stream->connect_rate=connect_rate;
+  return 0;
+}
+
+/*Read data from the current response body.
+  If we're pipelining and we get close to the end of this response, queue
+   another request.
+  If we've reached the end of this response body, parse the next response and
+   keep going.
+  [out] _buf: Returns the data read.
+  _buf_size:  The size of the buffer.
+  Return: A positive number of bytes read on success.
+          0:        The connection was closed.
+          OP_EREAD: There was a fatal read error.*/
+static int op_http_conn_read_body(OpusHTTPStream *_stream,
+ OpusHTTPConn *_conn,unsigned char *_buf,int _buf_size){
+  opus_int64 pos;
+  opus_int64 end_pos;
+  opus_int64 next_pos;
+  opus_int64 content_length;
+  int        nread;
+  int        pipeline;
+  int        ret;
+  /*Currently this function can only be called on the LRU head.
+    Otherwise, we'd need a _pnext pointer if we needed to close the connection,
+     and re-opening it would re-organize the lists.*/
+  OP_ASSERT(_stream->lru_head==_conn);
+  /*We should have filterd out empty reads by this point.*/
+  OP_ASSERT(_buf_size>0);
+  pos=_conn->pos;
+  end_pos=_conn->end_pos;
+  next_pos=_conn->next_pos;
+  pipeline=_stream->pipeline;
+  content_length=_stream->content_length;
+  if(end_pos>=0){
+    /*Have we reached the end of the current response body?*/
+    if(pos>=end_pos){
+      OP_ASSERT(content_length>=0);
+      /*If this was the end of the stream, we're done.
+        Also return early if a non-blocking read was requested (regardless of
+         whether we might be able to parse the next response without
+         blocking).*/
+      if(content_length<=end_pos)return 0;
+      /*Otherwise, start on the next response.*/
+      if(next_pos<0){
+        /*We haven't issued another request yet.*/
+        if(!pipeline||_conn->nrequests_left<=0){
+          /*There are two ways to get here: either the server told us it was
+             going to close the connection after the last request, or we
+             thought we were reading the whole resource, but it grew while we
+             were reading it.
+            The only way the latter could have happened is if content_length
+             changed while seeking.
+            Open a new request to read the rest.*/
+          OP_ASSERT(_stream->seekable);
+          /*Try to open a new connection to read another chunk.*/
+          op_http_conn_close(_stream,_conn,&_stream->lru_head,1);
+          /*If we're not pipelining, we should be requesting the rest.*/
+          OP_ASSERT(pipeline||_conn->chunk_size==-1);
+          ret=op_http_conn_open_pos(_stream,_conn,end_pos,_conn->chunk_size);
+          if(OP_UNLIKELY(ret<0))return OP_EREAD;
+        }
+        else{
+          /*Issue the request now (better late than never).*/
+          ret=op_http_conn_send_request(_stream,_conn,pos,_conn->chunk_size,0);
+          if(OP_UNLIKELY(ret<0))return OP_EREAD;
+          next_pos=_conn->next_pos;
+          OP_ASSERT(next_pos>=0);
+        }
+      }
+      if(next_pos>=0){
+        /*We shouldn't be trying to read past the current request body if we're
+           seeking somewhere else.*/
+        OP_ASSERT(next_pos==end_pos);
+        ret=op_http_conn_handle_response(_stream,_conn);
+        if(OP_UNLIKELY(ret<0))return OP_EREAD;
+        if(OP_UNLIKELY(ret>0)&&pipeline){
+          opus_int64 next_end;
+          next_end=_conn->next_end;
+          /*Our request timed out or the server closed the connection.
+            Try re-connecting.*/
+          op_http_conn_close(_stream,_conn,&_stream->lru_head,1);
+          /*Unless there's a bug, we should be able to convert
+             (next_pos,next_end) into valid (_pos,_chunk_size) parameters.*/
+          OP_ASSERT(next_end<0
+           ||next_end-next_pos>=0&&next_end-next_pos<=OP_INT32_MAX);
+          ret=op_http_conn_open_pos(_stream,_conn,next_pos,
+           next_end<0?-1:(opus_int32)(next_end-next_pos));
+          if(OP_UNLIKELY(ret<0))return OP_EREAD;
+        }
+        else if(OP_UNLIKELY(ret!=0))return OP_EREAD;
+      }
+      pos=_conn->pos;
+      end_pos=_conn->end_pos;
+      content_length=_stream->content_length;
+    }
+    OP_ASSERT(end_pos>pos);
+    _buf_size=OP_MIN(_buf_size,end_pos-pos);
+  }
+  nread=op_http_conn_read(_conn,(char *)_buf,_buf_size,1);
+  if(OP_UNLIKELY(nread<0))return nread;
+  pos+=nread;
+  _conn->pos=pos;
+  OP_ASSERT(end_pos<0||content_length>=0);
+  /*TODO: If nrequests_left<=0, we can't make a new request, and there will be
+     a big pause after we hit the end of the chunk while we open a new
+     connection.
+    It would be nice to be able to start that process now, but we have no way
+     to do it in the background without blocking (even if we could start it, we
+     have no guarantee the application will return control to us in a
+     sufficiently timely manner to allow us to complete it, and this is
+     uncommon enough that it's not worth using threads just for this).*/
+  if(end_pos>=0&&end_pos<content_length&&next_pos<0
+   &&pipeline&&OP_LIKELY(_conn->nrequests_left>0)){
+    opus_int64 request_thresh;
+    opus_int32 chunk_size;
+    /*Are we getting close to the end of the current response body?
+      If so, we should request more data.*/
+    request_thresh=_stream->connect_rate*_conn->read_rate>>12;
+    /*But don't commit ourselves too quickly.*/
+    chunk_size=_conn->chunk_size;
+    if(chunk_size>=0)request_thresh=OP_MIN(chunk_size>>2,request_thresh);
+    if(end_pos-pos<request_thresh){
+      ret=op_http_conn_send_request(_stream,_conn,end_pos,_conn->chunk_size,1);
+      if(OP_UNLIKELY(ret<0))return OP_EREAD;
+    }
+  }
+  return nread;
+}
+
+static int op_http_stream_read(void *_stream,
+ unsigned char *_ptr,int _buf_size){
+  OpusHTTPStream *stream;
+  ptrdiff_t       nread;
+  opus_int64      size;
+  opus_int64      pos;
+  int             ci;
+  stream=(OpusHTTPStream *)_stream;
+  /*Check for an empty read.*/
+  if(_buf_size<=0)return 0;
+  ci=stream->cur_conni;
+  /*No current connection => EOF.*/
+  if(ci<0)return 0;
+  pos=stream->conns[ci].pos;
+  size=stream->content_length;
+  /*Check for EOF.*/
+  if(size>=0){
+    if(pos>=size)return 0;
+    /*Check for a short read.*/
+    if(_buf_size>size-pos)_buf_size=(int)(size-pos);
+  }
+  nread=op_http_conn_read_body(stream,stream->conns+ci,_ptr,_buf_size);
+  if(OP_UNLIKELY(nread<=0)){
+    /*We hit an error or EOF.
+      Either way, we're done with this connection.*/
+    op_http_conn_close(stream,stream->conns+ci,&stream->lru_head,1);
+    stream->cur_conni=-1;
+    stream->pos=pos;
+  }
+  return nread;
+}
+
+/*Discard data until we reach the _target position.
+  This destroys the contents of _stream->response.buf, as we need somewhere to
+   read this data, and that is a convenient place.
+  _just_read_ahead: Whether or not this is a plain fast-forward.
+                    If 0, we need to issue a new request for a chunk at _target
+                     and discard all the data from our current request(s).
+                    Otherwise, we should be able to reach _target without
+                     issuing any new requests.
+  _target:          The stream position to which to read ahead.*/
+static int op_http_conn_read_ahead(OpusHTTPStream *_stream,
+ OpusHTTPConn *_conn,int _just_read_ahead,opus_int64 _target){
+  opus_int64 pos;
+  opus_int64 end_pos;
+  opus_int64 next_pos;
+  opus_int64 next_end;
+  ptrdiff_t  nread;
+  int        ret;
+  pos=_conn->pos;
+  end_pos=_conn->end_pos;
+  next_pos=_conn->next_pos;
+  next_end=_conn->next_end;
+  if(!_just_read_ahead){
+    /*We need to issue a new pipelined request.
+      This is the only case where we allow more than one outstanding request
+       at a time, so we need to reset next_pos (we'll restore it below if we
+       did have an outstanding request).*/
+    OP_ASSERT(_stream->pipeline);
+    _conn->next_pos=-1;
+    ret=op_http_conn_send_request(_stream,_conn,_target,
+     OP_PIPELINE_CHUNK_SIZE,0);
+    if(OP_UNLIKELY(ret<0))return ret;
+  }
+  /*We can reach the target position by reading forward in the current chunk.*/
+  if(_just_read_ahead&&(end_pos<0||_target<end_pos))end_pos=_target;
+  else if(next_pos>=0){
+    opus_int64 next_next_pos;
+    opus_int64 next_next_end;
+    /*We already have a request outstanding.
+      Finish off the current chunk.*/
+    while(pos<end_pos){
+      nread=op_http_conn_read(_conn,_stream->response.buf,
+       (int)OP_MIN(end_pos-pos,_stream->response.cbuf),1);
+      /*We failed to read ahead.*/
+      if(nread<=0)return OP_FALSE;
+      pos+=nread;
+    }
+    OP_ASSERT(pos==end_pos);
+    if(_just_read_ahead){
+      next_next_pos=next_next_end=-1;
+      end_pos=_target;
+    }
+    else{
+      OP_ASSERT(_conn->next_pos==_target);
+      next_next_pos=_target;
+      next_next_end=_conn->next_end;
+      _conn->next_pos=next_pos;
+      _conn->next_end=next_end;
+      end_pos=next_end;
+    }
+    ret=op_http_conn_handle_response(_stream,_conn);
+    if(OP_UNLIKELY(ret!=0))return OP_FALSE;
+    _conn->next_pos=next_next_pos;
+    _conn->next_end=next_next_end;
+  }
+  while(pos<end_pos){
+    nread=op_http_conn_read(_conn,_stream->response.buf,
+     (int)OP_MIN(end_pos-pos,_stream->response.cbuf),1);
+    /*We failed to read ahead.*/
+    if(nread<=0)return OP_FALSE;
+    pos+=nread;
+  }
+  OP_ASSERT(pos==end_pos);
+  if(!_just_read_ahead){
+    ret=op_http_conn_handle_response(_stream,_conn);
+    if(OP_UNLIKELY(ret!=0))return OP_FALSE;
+  }
+  else _conn->pos=end_pos;
+  OP_ASSERT(_conn->pos==_target);
+  return 0;
+}
+
+static int op_http_stream_seek(void *_stream,opus_int64 _offset,int _whence){
+  struct timeb     seek_time;
+  OpusHTTPStream  *stream;
+  OpusHTTPConn    *conn;
+  OpusHTTPConn   **pnext;
+  OpusHTTPConn    *close_conn;
+  OpusHTTPConn   **close_pnext;
+  opus_int64       content_length;
+  opus_int64       pos;
+  int              pipeline;
+  int              ci;
+  int              ret;
+  stream=(OpusHTTPStream *)_stream;
+  if(!stream->seekable)return -1;
+  content_length=stream->content_length;
+  /*If we're seekable, we should have gotten a Content-Length.*/
+  OP_ASSERT(content_length>=0);
+  ci=stream->cur_conni;
+  pos=ci<0?content_length:stream->conns[ci].pos;
+  switch(_whence){
+    case SEEK_SET:{
+      /*Check for overflow:*/
+      if(_offset<0)return -1;
+      pos=_offset;
+    }break;
+    case SEEK_CUR:{
+      /*Check for overflow:*/
+      if(_offset<-pos||_offset>OP_INT64_MAX-pos)return -1;
+      pos+=_offset;
+    }break;
+    case SEEK_END:{
+      /*Check for overflow:*/
+      if(_offset>content_length||_offset<content_length-OP_INT64_MAX)return -1;
+      pos=content_length-_offset;
+    }break;
+    default:return -1;
+  }
+  /*Mark when we deactivated the active connection.*/
+  if(ci>=0){
+    op_http_conn_read_rate_update(stream->conns+ci);
+    *&seek_time=*&stream->conns[ci].read_time;
+  }
+  else ftime(&seek_time);
+  /*If we seeked past the end of the stream, just disable the active
+     connection.*/
+  if(pos>=content_length){
+    stream->cur_conni=-1;
+    stream->pos=pos;
+    return 0;
+  }
+  /*First try to find a connection we can use without waiting.*/
+  pnext=&stream->lru_head;
+  conn=stream->lru_head;
+  while(conn!=NULL){
+    opus_int64 conn_pos;
+    opus_int64 end_pos;
+    int        available;
+    /*If this connection has been dormant too long or has made too many
+       requests, close it.
+      This is to prevent us from hitting server limits/firewall timeouts.*/
+    if(op_time_diff_ms(&seek_time,&conn->read_time)>
+     OP_CONNECTION_IDLE_TIMEOUT_MS
+     ||conn->nrequests_left<OP_PIPELINE_MIN_REQUESTS){
+      op_http_conn_close(stream,conn,pnext,1);
+      conn=*pnext;
+      continue;
+    }
+    available=op_http_conn_estimate_available(conn);
+    conn_pos=conn->pos;
+    end_pos=conn->end_pos;
+    if(conn->next_pos>=0){
+      OP_ASSERT(end_pos>=0);
+      OP_ASSERT(conn->next_pos==end_pos);
+      end_pos=conn->next_end;
+    }
+    OP_ASSERT(end_pos<0||conn_pos<=end_pos);
+    /*Can we quickly read ahead without issuing a new request or waiting for
+       any more data?
+      If we have an oustanding request, we'll over-estimate the amount of data
+       it has available (because we'll count the response headers, too), but
+       that probably doesn't matter.*/
+    if(conn_pos<=pos&&pos-conn_pos<=available&&(end_pos<0||pos<end_pos)){
+      /*Found a suitable connection to re-use.*/
+      ret=op_http_conn_read_ahead(stream,conn,1,pos);
+      if(OP_UNLIKELY(ret<0)){
+        /*The connection might have become stale, so close it and keep going.*/
+        op_http_conn_close(stream,conn,pnext,1);
+        conn=*pnext;
+        continue;
+      }
+      /*Sucessfully resurrected this connection.*/
+      *pnext=conn->next;
+      conn->next=stream->lru_head;
+      stream->lru_head=conn;
+      stream->cur_conni=conn-stream->conns;
+      return 0;
+    }
+    pnext=&conn->next;
+    conn=conn->next;
+  }
+  /*Chances are that didn't work, so now try to find one we can use by reading
+     ahead a reasonable amount and/or by issuing a new request.*/
+  close_pnext=NULL;
+  close_conn=NULL;
+  pnext=&stream->lru_head;
+  conn=stream->lru_head;
+  pipeline=stream->pipeline;
+  while(conn!=NULL){
+    opus_int64 conn_pos;
+    opus_int64 end_pos;
+    opus_int64 read_ahead_thresh;
+    int        available;
+    int        just_read_ahead;
+    /*Dividing by 2048 instead of 1000 scales this by nearly 1/2, biasing away
+       from connection re-use (and roughly compensating for the lag required to
+       reopen the TCP window of a connection that's been idle).
+      There's no overflow checking here, because it's vanishingly unlikely, and
+       all it would do is cause us to make poor decisions.*/
+    read_ahead_thresh=OP_MAX(OP_READAHEAD_THRESH_MIN,
+     stream->connect_rate*conn->read_rate>>11);
+    available=op_http_conn_estimate_available(conn);
+    conn_pos=conn->pos;
+    end_pos=conn->end_pos;
+    if(conn->next_pos>=0){
+      OP_ASSERT(end_pos>=0);
+      OP_ASSERT(conn->next_pos==end_pos);
+      end_pos=conn->next_end;
+    }
+    OP_ASSERT(end_pos<0||conn_pos<=end_pos);
+    /*Can we quickly read ahead without issuing a new request?*/
+    just_read_ahead=conn_pos<=pos&&pos-conn_pos-available<=read_ahead_thresh
+     &&(end_pos<0||pos<end_pos);
+    if(just_read_ahead||pipeline&&end_pos>=0
+     &&end_pos-conn_pos-available<=read_ahead_thresh){
+      /*Found a suitable connection to re-use.*/
+      ret=op_http_conn_read_ahead(stream,conn,just_read_ahead,pos);
+      if(OP_UNLIKELY(ret<0)){
+        /*The connection might have become stale, so close it and keep going.*/
+        op_http_conn_close(stream,conn,pnext,1);
+        conn=*pnext;
+        continue;
+      }
+      /*Sucessfully resurrected this connection.*/
+      *pnext=conn->next;
+      conn->next=stream->lru_head;
+      stream->lru_head=conn;
+      stream->cur_conni=conn-stream->conns;
+      return 0;
+    }
+    close_pnext=pnext;
+    close_conn=conn;
+    pnext=&conn->next;
+    conn=conn->next;
+  }
+  /*No suitable connections.
+    Open a new one.*/
+  if(stream->free_head==NULL){
+    /*All connections in use.
+      Expire one of them (we should have already picked which one when scanning
+       the list).*/
+    OP_ASSERT(close_conn!=NULL);
+    OP_ASSERT(close_pnext!=NULL);
+    op_http_conn_close(stream,close_conn,close_pnext,1);
+  }
+  OP_ASSERT(stream->free_head!=NULL);
+  conn=stream->free_head;
+  /*If we can pipeline, only request a chunk of data.
+    If we're seeking now, there's a good chance we will want to seek again
+     soon, and this avoids committing this connection to reading the rest of
+     the stream.
+    Particularly with SSL or proxies, issuing a new request on the same
+     connection can be substantially faster than opening a new one.
+    This also limits the amount of data the server will blast at us on this
+     connection if we later seek elsewhere and start reading from a different
+     connection.*/
+  ret=op_http_conn_open_pos(stream,conn,pos,
+   pipeline?OP_PIPELINE_CHUNK_SIZE:-1);
+  if(OP_UNLIKELY(ret<0)){
+    op_http_conn_close(stream,conn,&stream->lru_head,1);
+    return -1;
+  }
+  return 0;
+}
+
+static opus_int64 op_http_stream_tell(void *_stream){
+  OpusHTTPStream *stream;
+  int             ci;
+  stream=(OpusHTTPStream *)_stream;
+  ci=stream->cur_conni;
+  return ci<0?stream->pos:stream->conns[ci].pos;
+}
+
+static int op_http_stream_close(void *_stream){
+  OpusHTTPStream *stream;
+  stream=(OpusHTTPStream *)_stream;
+  if(OP_LIKELY(stream!=NULL)){
+    op_http_stream_clear(stream);
+    _ogg_free(stream);
+  }
+  return 0;
+}
+
+static const OpusFileCallbacks OP_HTTP_CALLBACKS={
+  op_http_stream_read,
+  op_http_stream_seek,
+  op_http_stream_tell,
+  op_http_stream_close
+};
+#endif
+
+void opus_server_info_init(OpusServerInfo *_info){
+  _info->name=NULL;
+  _info->description=NULL;
+  _info->genre=NULL;
+  _info->url=NULL;
+  _info->server=NULL;
+  _info->content_type=NULL;
+  _info->bitrate_kbps=-1;
+  _info->is_public=-1;
+  _info->is_ssl=0;
+}
+
+void opus_server_info_clear(OpusServerInfo *_info){
+  _ogg_free(_info->content_type);
+  _ogg_free(_info->server);
+  _ogg_free(_info->url);
+  _ogg_free(_info->genre);
+  _ogg_free(_info->description);
+  _ogg_free(_info->name);
+}
+
+/*The actual URL stream creation function.
+  This one isn't extensible like the application-level interface, but because
+   it isn't public, we're free to change it in the future.*/
+static void *op_url_stream_create_impl(OpusFileCallbacks *_cb,const char *_url,
+ int _skip_certificate_check,const char *_proxy_host,unsigned _proxy_port,
+ const char *_proxy_user,const char *_proxy_pass,OpusServerInfo *_info){
+  const char *path;
+  /*Check to see if this is a valid file: URL.*/
+  path=op_parse_file_url(_url);
+  if(path!=NULL){
+    char *unescaped_path;
+    void *ret;
+    unescaped_path=op_string_dup(path);
+    if(OP_UNLIKELY(unescaped_path==NULL))return NULL;
+    ret=op_fopen(_cb,op_unescape_url_component(unescaped_path),"rb");
+    _ogg_free(unescaped_path);
+    return ret;
+  }
+#if defined(OP_ENABLE_HTTP)
+  /*If not, try http/https.*/
+  else{
+    OpusHTTPStream *stream;
+    int             ret;
+    stream=(OpusHTTPStream *)_ogg_malloc(sizeof(*stream));
+    if(OP_UNLIKELY(stream==NULL))return NULL;
+    op_http_stream_init(stream);
+    ret=op_http_stream_open(stream,_url,_skip_certificate_check,
+     _proxy_host,_proxy_port,_proxy_user,_proxy_pass,_info);
+    if(OP_UNLIKELY(ret<0)){
+      op_http_stream_clear(stream);
+      _ogg_free(stream);
+      return NULL;
+    }
+    *_cb=*&OP_HTTP_CALLBACKS;
+    return stream;
+  }
+#else
+  (void)_skip_certificate_check;
+  (void)_proxy_host;
+  (void)_proxy_port;
+  (void)_proxy_user;
+  (void)_proxy_pass;
+  (void)_info;
+  return NULL;
+#endif
+}
+
+void *op_url_stream_vcreate(OpusFileCallbacks *_cb,
+ const char *_url,va_list _ap){
+  int             skip_certificate_check;
+  const char     *proxy_host;
+  opus_int32      proxy_port;
+  const char     *proxy_user;
+  const char     *proxy_pass;
+  OpusServerInfo *pinfo;
+  skip_certificate_check=0;
+  proxy_host=NULL;
+  proxy_port=8080;
+  proxy_user=NULL;
+  proxy_pass=NULL;
+  pinfo=NULL;
+  for(;;){
+    ptrdiff_t request;
+    request=va_arg(_ap,char *)-(char *)NULL;
+    /*If we hit NULL, we're done processing options.*/
+    if(!request)break;
+    switch(request){
+      case OP_SSL_SKIP_CERTIFICATE_CHECK_REQUEST:{
+        skip_certificate_check=!!va_arg(_ap,opus_int32);
+      }break;
+      case OP_HTTP_PROXY_HOST_REQUEST:{
+        proxy_host=va_arg(_ap,const char *);
+      }break;
+      case OP_HTTP_PROXY_PORT_REQUEST:{
+        proxy_port=va_arg(_ap,opus_int32);
+        if(proxy_port<0||proxy_port>(opus_int32)65535)return NULL;
+      }break;
+      case OP_HTTP_PROXY_USER_REQUEST:{
+        proxy_user=va_arg(_ap,const char *);
+      }break;
+      case OP_HTTP_PROXY_PASS_REQUEST:{
+        proxy_pass=va_arg(_ap,const char *);
+      }break;
+      case OP_GET_SERVER_INFO_REQUEST:{
+        pinfo=va_arg(_ap,OpusServerInfo *);
+      }break;
+      /*Some unknown option.*/
+      default:return NULL;
+    }
+  }
+  /*If the caller has requested server information, proxy it to a local copy to
+     simplify error handling.*/
+  if(pinfo!=NULL){
+    OpusServerInfo  info;
+    void           *ret;
+    opus_server_info_init(&info);
+    ret=op_url_stream_create_impl(_cb,_url,skip_certificate_check,
+     proxy_host,proxy_port,proxy_user,proxy_pass,&info);
+    if(ret!=NULL)*pinfo=*&info;
+    else opus_server_info_clear(&info);
+    return ret;
+  }
+  return op_url_stream_create_impl(_cb,_url,skip_certificate_check,
+   proxy_host,proxy_port,proxy_user,proxy_pass,NULL);
+}
+
+void *op_url_stream_create(OpusFileCallbacks *_cb,
+ const char *_url,...){
+  va_list  ap;
+  void    *ret;
+  va_start(ap,_url);
+  ret=op_url_stream_vcreate(_cb,_url,ap);
+  va_end(ap);
+  return ret;
+}
+
+/*Convenience routines to open/test URLs in a single step.*/
+
+OggOpusFile *op_vopen_url(const char *_url,int *_error,va_list _ap){
+  OpusFileCallbacks  cb;
+  OggOpusFile       *of;
+  void              *source;
+  source=op_url_stream_vcreate(&cb,_url,_ap);
+  if(OP_UNLIKELY(source==NULL)){
+    if(_error!=NULL)*_error=OP_EFAULT;
+    return NULL;
+  }
+  of=op_open_callbacks(source,&cb,NULL,0,_error);
+  if(OP_UNLIKELY(of==NULL))(*cb.close)(source);
+  return of;
+}
+
+OggOpusFile *op_open_url(const char *_url,int *_error,...){
+  OggOpusFile *ret;
+  va_list      ap;
+  va_start(ap,_error);
+  ret=op_vopen_url(_url,_error,ap);
+  va_end(ap);
+  return ret;
+}
+
+OggOpusFile *op_vtest_url(const char *_url,int *_error,va_list _ap){
+  OpusFileCallbacks  cb;
+  OggOpusFile       *of;
+  void              *source;
+  source=op_url_stream_vcreate(&cb,_url,_ap);
+  if(OP_UNLIKELY(source==NULL)){
+    if(_error!=NULL)*_error=OP_EFAULT;
+    return NULL;
+  }
+  of=op_test_callbacks(source,&cb,NULL,0,_error);
+  if(OP_UNLIKELY(of==NULL))(*cb.close)(source);
+  return of;
+}
+
+OggOpusFile *op_test_url(const char *_url,int *_error,...){
+  OggOpusFile *ret;
+  va_list      ap;
+  va_start(ap,_error);
+  ret=op_vtest_url(_url,_error,ap);
+  va_end(ap);
+  return ret;
+}
diff --git a/drivers/opus/info.c b/drivers/opus/info.c
new file mode 100644
index 0000000000..f5ad2110be
--- /dev/null
+++ b/drivers/opus/info.c
@@ -0,0 +1,687 @@
+/********************************************************************
+ *                                                                  *
+ * THIS FILE IS PART OF THE libopusfile SOFTWARE CODEC SOURCE CODE. *
+ * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS     *
+ * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
+ * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
+ *                                                                  *
+ * THE libopusfile SOURCE CODE IS (C) COPYRIGHT 2012                *
+ * by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
+ *                                                                  *
+ ********************************************************************/
+#ifdef OPUS_HAVE_CONFIG_H
+#include "opus_config.h"
+#endif
+
+#include "internal.h"
+#include <limits.h>
+#include <string.h>
+
+static unsigned op_parse_uint16le(const unsigned char *_data){
+  return _data[0]|_data[1]<<8;
+}
+
+static int op_parse_int16le(const unsigned char *_data){
+  int ret;
+  ret=_data[0]|_data[1]<<8;
+  return (ret^0x8000)-0x8000;
+}
+
+static opus_uint32 op_parse_uint32le(const unsigned char *_data){
+  return _data[0]|(opus_uint32)_data[1]<<8|
+   (opus_uint32)_data[2]<<16|(opus_uint32)_data[3]<<24;
+}
+
+static opus_uint32 op_parse_uint32be(const unsigned char *_data){
+  return _data[3]|(opus_uint32)_data[2]<<8|
+   (opus_uint32)_data[1]<<16|(opus_uint32)_data[0]<<24;
+}
+
+int opus_head_parse(OpusHead *_head,const unsigned char *_data,size_t _len){
+  OpusHead head;
+  if(_len<8)return OP_ENOTFORMAT;
+  if(memcmp(_data,"OpusHead",8)!=0)return OP_ENOTFORMAT;
+  if(_len<9)return OP_EBADHEADER;
+  head.version=_data[8];
+  if(head.version>15)return OP_EVERSION;
+  if(_len<19)return OP_EBADHEADER;
+  head.channel_count=_data[9];
+  head.pre_skip=op_parse_uint16le(_data+10);
+  head.input_sample_rate=op_parse_uint32le(_data+12);
+  head.output_gain=op_parse_int16le(_data+16);
+  head.mapping_family=_data[18];
+  if(head.mapping_family==0){
+    if(head.channel_count<1||head.channel_count>2)return OP_EBADHEADER;
+    if(head.version<=1&&_len>19)return OP_EBADHEADER;
+    head.stream_count=1;
+    head.coupled_count=head.channel_count-1;
+    if(_head!=NULL){
+      _head->mapping[0]=0;
+      _head->mapping[1]=1;
+    }
+  }
+  else if(head.mapping_family==1){
+    size_t size;
+    int    ci;
+    if(head.channel_count<1||head.channel_count>8)return OP_EBADHEADER;
+    size=21+head.channel_count;
+    if(_len<size||head.version<=1&&_len>size)return OP_EBADHEADER;
+    head.stream_count=_data[19];
+    if(head.stream_count<1)return OP_EBADHEADER;
+    head.coupled_count=_data[20];
+    if(head.coupled_count>head.stream_count)return OP_EBADHEADER;
+    for(ci=0;ci<head.channel_count;ci++){
+      if(_data[21+ci]>=head.stream_count+head.coupled_count
+       &&_data[21+ci]!=255){
+        return OP_EBADHEADER;
+      }
+    }
+    if(_head!=NULL)memcpy(_head->mapping,_data+21,head.channel_count);
+  }
+  /*General purpose players should not attempt to play back content with
+     channel mapping family 255.*/
+  else if(head.mapping_family==255)return OP_EIMPL;
+  /*No other channel mapping families are currently defined.*/
+  else return OP_EBADHEADER;
+  if(_head!=NULL)memcpy(_head,&head,head.mapping-(unsigned char *)&head);
+  return 0;
+}
+
+void opus_tags_init(OpusTags *_tags){
+  memset(_tags,0,sizeof(*_tags));
+}
+
+void opus_tags_clear(OpusTags *_tags){
+  int ci;
+  for(ci=_tags->comments;ci-->0;)_ogg_free(_tags->user_comments[ci]);
+  _ogg_free(_tags->user_comments);
+  _ogg_free(_tags->comment_lengths);
+  _ogg_free(_tags->vendor);
+}
+
+/*Ensure there's room for up to _ncomments comments.*/
+static int op_tags_ensure_capacity(OpusTags *_tags,size_t _ncomments){
+  char   **user_comments;
+  int     *comment_lengths;
+  size_t   size;
+  if(OP_UNLIKELY(_ncomments>=(size_t)INT_MAX))return OP_EFAULT;
+  size=sizeof(*_tags->comment_lengths)*(_ncomments+1);
+  if(size/sizeof(*_tags->comment_lengths)!=_ncomments+1)return OP_EFAULT;
+  comment_lengths=(int *)_ogg_realloc(_tags->comment_lengths,size);
+  if(OP_UNLIKELY(comment_lengths==NULL))return OP_EFAULT;
+  comment_lengths[_ncomments]=0;
+  _tags->comment_lengths=comment_lengths;
+  size=sizeof(*_tags->user_comments)*(_ncomments+1);
+  if(size/sizeof(*_tags->user_comments)!=_ncomments+1)return OP_EFAULT;
+  user_comments=(char **)_ogg_realloc(_tags->user_comments,size);
+  if(OP_UNLIKELY(user_comments==NULL))return OP_EFAULT;
+  user_comments[_ncomments]=NULL;
+  _tags->user_comments=user_comments;
+  return 0;
+}
+
+/*Duplicate a (possibly non-NUL terminated) string with a known length.*/
+static char *op_strdup_with_len(const char *_s,size_t _len){
+  size_t  size;
+  char   *ret;
+  size=sizeof(*ret)*(_len+1);
+  if(OP_UNLIKELY(size<_len))return NULL;
+  ret=(char *)_ogg_malloc(size);
+  if(OP_LIKELY(ret!=NULL)){
+    ret=(char *)memcpy(ret,_s,sizeof(*ret)*_len);
+    ret[_len]='\0';
+  }
+  return ret;
+}
+
+/*The actual implementation of opus_tags_parse().
+  Unlike the public API, this function requires _tags to already be
+   initialized, modifies its contents before success is guaranteed, and assumes
+   the caller will clear it on error.*/
+static int opus_tags_parse_impl(OpusTags *_tags,
+ const unsigned char *_data,size_t _len){
+  opus_uint32 count;
+  size_t      len;
+  int         ncomments;
+  int         ci;
+  len=_len;
+  if(len<8)return OP_ENOTFORMAT;
+  if(memcmp(_data,"OpusTags",8)!=0)return OP_ENOTFORMAT;
+  if(len<16)return OP_EBADHEADER;
+  _data+=8;
+  len-=8;
+  count=op_parse_uint32le(_data);
+  _data+=4;
+  len-=4;
+  if(count>len)return OP_EBADHEADER;
+  if(_tags!=NULL){
+    _tags->vendor=op_strdup_with_len((char *)_data,count);
+    if(_tags->vendor==NULL)return OP_EFAULT;
+  }
+  _data+=count;
+  len-=count;
+  if(len<4)return OP_EBADHEADER;
+  count=op_parse_uint32le(_data);
+  _data+=4;
+  len-=4;
+  /*Check to make sure there's minimally sufficient data left in the packet.*/
+  if(count>len>>2)return OP_EBADHEADER;
+  /*Check for overflow (the API limits this to an int).*/
+  if(count>(opus_uint32)INT_MAX-1)return OP_EFAULT;
+  if(_tags!=NULL){
+    int ret;
+    ret=op_tags_ensure_capacity(_tags,count);
+    if(ret<0)return ret;
+  }
+  ncomments=(int)count;
+  for(ci=0;ci<ncomments;ci++){
+    /*Check to make sure there's minimally sufficient data left in the packet.*/
+    if((size_t)(ncomments-ci)>len>>2)return OP_EBADHEADER;
+    count=op_parse_uint32le(_data);
+    _data+=4;
+    len-=4;
+    if(count>len)return OP_EBADHEADER;
+    /*Check for overflow (the API limits this to an int).*/
+    if(count>(opus_uint32)INT_MAX)return OP_EFAULT;
+    if(_tags!=NULL){
+      _tags->user_comments[ci]=op_strdup_with_len((char *)_data,count);
+      if(_tags->user_comments[ci]==NULL)return OP_EFAULT;
+      _tags->comment_lengths[ci]=(int)count;
+      _tags->comments=ci+1;
+    }
+    _data+=count;
+    len-=count;
+  }
+  return 0;
+}
+
+int opus_tags_parse(OpusTags *_tags,const unsigned char *_data,size_t _len){
+  if(_tags!=NULL){
+    OpusTags tags;
+    int      ret;
+    opus_tags_init(&tags);
+    ret=opus_tags_parse_impl(&tags,_data,_len);
+    if(ret<0)opus_tags_clear(&tags);
+    else *_tags=*&tags;
+    return ret;
+  }
+  else return opus_tags_parse_impl(NULL,_data,_len);
+}
+
+/*The actual implementation of opus_tags_copy().
+  Unlike the public API, this function requires _dst to already be
+   initialized, modifies its contents before success is guaranteed, and assumes
+   the caller will clear it on error.*/
+static int opus_tags_copy_impl(OpusTags *_dst,const OpusTags *_src){
+  char *vendor;
+  int   ncomments;
+  int   ret;
+  int   ci;
+  vendor=_src->vendor;
+  _dst->vendor=op_strdup_with_len(vendor,strlen(vendor));
+  if(OP_UNLIKELY(_dst->vendor==NULL))return OP_EFAULT;
+  ncomments=_src->comments;
+  ret=op_tags_ensure_capacity(_dst,ncomments);
+  if(OP_UNLIKELY(ret<0))return ret;
+  for(ci=0;ci<ncomments;ci++){
+    int len;
+    len=_src->comment_lengths[ci];
+    OP_ASSERT(len>=0);
+    _dst->user_comments[ci]=op_strdup_with_len(_src->user_comments[ci],len);
+    if(OP_UNLIKELY(_dst->user_comments[ci]==NULL))return OP_EFAULT;
+    _dst->comment_lengths[ci]=len;
+    _dst->comments=ci+1;
+  }
+  return 0;
+}
+
+int opus_tags_copy(OpusTags *_dst,const OpusTags *_src){
+  OpusTags dst;
+  int      ret;
+  opus_tags_init(&dst);
+  ret=opus_tags_copy_impl(&dst,_src);
+  if(OP_UNLIKELY(ret<0))opus_tags_clear(&dst);
+  else *_dst=*&dst;
+  return 0;
+}
+
+int opus_tags_add(OpusTags *_tags,const char *_tag,const char *_value){
+  char *comment;
+  int   tag_len;
+  int   value_len;
+  int   ncomments;
+  int   ret;
+  ncomments=_tags->comments;
+  ret=op_tags_ensure_capacity(_tags,ncomments+1);
+  if(OP_UNLIKELY(ret<0))return ret;
+  tag_len=strlen(_tag);
+  value_len=strlen(_value);
+  /*+2 for '=' and '\0'.*/
+  _tags->comment_lengths[ncomments]=0;
+  _tags->user_comments[ncomments]=comment=
+   (char *)_ogg_malloc(sizeof(*comment)*(tag_len+value_len+2));
+  if(OP_UNLIKELY(comment==NULL))return OP_EFAULT;
+  memcpy(comment,_tag,sizeof(*comment)*tag_len);
+  comment[tag_len]='=';
+  memcpy(comment+tag_len+1,_value,sizeof(*comment)*(value_len+1));
+  _tags->comment_lengths[ncomments]=tag_len+value_len+1;
+  _tags->comments=ncomments+1;
+  return 0;
+}
+
+int opus_tags_add_comment(OpusTags *_tags,const char *_comment){
+  int comment_len;
+  int ncomments;
+  int ret;
+  ncomments=_tags->comments;
+  ret=op_tags_ensure_capacity(_tags,ncomments+1);
+  if(OP_UNLIKELY(ret<0))return ret;
+  comment_len=(int)strlen(_comment);
+  _tags->comment_lengths[ncomments]=0;
+  _tags->user_comments[ncomments]=op_strdup_with_len(_comment,comment_len);
+  if(OP_UNLIKELY(_tags->user_comments[ncomments]==NULL))return OP_EFAULT;
+  _tags->comment_lengths[ncomments]=comment_len;
+  _tags->comments=ncomments+1;
+  return 0;
+}
+
+int opus_tagcompare(const char *_tag_name,const char *_comment){
+  return opus_tagncompare(_tag_name,strlen(_tag_name),_comment);
+}
+
+int opus_tagncompare(const char *_tag_name,int _tag_len,const char *_comment){
+  int ret;
+  OP_ASSERT(_tag_len>=0);
+  ret=op_strncasecmp(_tag_name,_comment,_tag_len);
+  return ret?ret:'='-_comment[_tag_len];
+}
+
+const char *opus_tags_query(const OpusTags *_tags,const char *_tag,int _count){
+  char **user_comments;
+  int    tag_len;
+  int    found;
+  int    ncomments;
+  int    ci;
+  tag_len=strlen(_tag);
+  ncomments=_tags->comments;
+  user_comments=_tags->user_comments;
+  found=0;
+  for(ci=0;ci<ncomments;ci++){
+    if(!opus_tagncompare(_tag,tag_len,user_comments[ci])){
+      /*We return a pointer to the data, not a copy.*/
+      if(_count==found++)return user_comments[ci]+tag_len+1;
+    }
+  }
+  /*Didn't find anything.*/
+  return NULL;
+}
+
+int opus_tags_query_count(const OpusTags *_tags,const char *_tag){
+  char **user_comments;
+  int    tag_len;
+  int    found;
+  int    ncomments;
+  int    ci;
+  tag_len=strlen(_tag);
+  ncomments=_tags->comments;
+  user_comments=_tags->user_comments;
+  found=0;
+  for(ci=0;ci<ncomments;ci++){
+    if(!opus_tagncompare(_tag,tag_len,user_comments[ci]))found++;
+  }
+  return found;
+}
+
+int opus_tags_get_track_gain(const OpusTags *_tags,int *_gain_q8){
+  char **comments;
+  int    ncomments;
+  int    ci;
+  comments=_tags->user_comments;
+  ncomments=_tags->comments;
+  /*Look for the first valid R128_TRACK_GAIN tag and use that.*/
+  for(ci=0;ci<ncomments;ci++){
+    if(opus_tagncompare("R128_TRACK_GAIN",15,comments[ci])==0){
+      char       *p;
+      opus_int32  gain_q8;
+      int         negative;
+      p=comments[ci]+16;
+      negative=0;
+      if(*p=='-'){
+        negative=-1;
+        p++;
+      }
+      else if(*p=='+')p++;
+      gain_q8=0;
+      while(*p>='0'&&*p<='9'){
+        gain_q8=10*gain_q8+*p-'0';
+        if(gain_q8>32767-negative)break;
+        p++;
+      }
+      /*This didn't look like a signed 16-bit decimal integer.
+        Not a valid R128_TRACK_GAIN tag.*/
+      if(*p!='\0')continue;
+      *_gain_q8=(int)(gain_q8+negative^negative);
+      return 0;
+    }
+  }
+  return OP_FALSE;
+}
+
+static int op_is_jpeg(const unsigned char *_buf,size_t _buf_sz){
+  return _buf_sz>=11&&memcmp(_buf,"\xFF\xD8\xFF\xE0",4)==0
+   &&(_buf[4]<<8|_buf[5])>=16&&memcmp(_buf+6,"JFIF",5)==0;
+}
+
+/*Tries to extract the width, height, bits per pixel, and palette size of a
+   JPEG.
+  On failure, simply leaves its outputs unmodified.*/
+static void op_extract_jpeg_params(const unsigned char *_buf,size_t _buf_sz,
+ opus_uint32 *_width,opus_uint32 *_height,
+ opus_uint32 *_depth,opus_uint32 *_colors,int *_has_palette){
+  if(op_is_jpeg(_buf,_buf_sz)){
+    size_t offs;
+    offs=2;
+    for(;;){
+      size_t segment_len;
+      int    marker;
+      while(offs<_buf_sz&&_buf[offs]!=0xFF)offs++;
+      while(offs<_buf_sz&&_buf[offs]==0xFF)offs++;
+      marker=_buf[offs];
+      offs++;
+      /*If we hit EOI* (end of image), or another SOI* (start of image),
+         or SOS (start of scan), then stop now.*/
+      if(offs>=_buf_sz||(marker>=0xD8&&marker<=0xDA))break;
+      /*RST* (restart markers): skip (no segment length).*/
+      else if(marker>=0xD0&&marker<=0xD7)continue;
+      /*Read the length of the marker segment.*/
+      if(_buf_sz-offs<2)break;
+      segment_len=_buf[offs]<<8|_buf[offs+1];
+      if(segment_len<2||_buf_sz-offs<segment_len)break;
+      if(marker==0xC0||(marker>0xC0&&marker<0xD0&&(marker&3)!=0)){
+        /*Found a SOFn (start of frame) marker segment:*/
+        if(segment_len>=8){
+          *_height=_buf[offs+3]<<8|_buf[offs+4];
+          *_width=_buf[offs+5]<<8|_buf[offs+6];
+          *_depth=_buf[offs+2]*_buf[offs+7];
+          *_colors=0;
+          *_has_palette=0;
+        }
+        break;
+      }
+      /*Other markers: skip the whole marker segment.*/
+      offs+=segment_len;
+    }
+  }
+}
+
+static int op_is_png(const unsigned char *_buf,size_t _buf_sz){
+  return _buf_sz>=8&&memcmp(_buf,"\x89PNG\x0D\x0A\x1A\x0A",8)==0;
+}
+
+/*Tries to extract the width, height, bits per pixel, and palette size of a
+   PNG.
+  On failure, simply leaves its outputs unmodified.*/
+static void op_extract_png_params(const unsigned char *_buf,size_t _buf_sz,
+ opus_uint32 *_width,opus_uint32 *_height,
+ opus_uint32 *_depth,opus_uint32 *_colors,int *_has_palette){
+  if(op_is_png(_buf,_buf_sz)){
+    size_t offs;
+    offs=8;
+    while(_buf_sz-offs>=12){
+      ogg_uint32_t chunk_len;
+      chunk_len=op_parse_uint32be(_buf+offs);
+      if(chunk_len>_buf_sz-(offs+12))break;
+      else if(chunk_len==13&&memcmp(_buf+offs+4,"IHDR",4)==0){
+        int color_type;
+        *_width=op_parse_uint32be(_buf+offs+8);
+        *_height=op_parse_uint32be(_buf+offs+12);
+        color_type=_buf[offs+17];
+        if(color_type==3){
+          *_depth=24;
+          *_has_palette=1;
+        }
+        else{
+          int sample_depth;
+          sample_depth=_buf[offs+16];
+          if(color_type==0)*_depth=sample_depth;
+          else if(color_type==2)*_depth=sample_depth*3;
+          else if(color_type==4)*_depth=sample_depth*2;
+          else if(color_type==6)*_depth=sample_depth*4;
+          *_colors=0;
+          *_has_palette=0;
+          break;
+        }
+      }
+      else if(*_has_palette>0&&memcmp(_buf+offs+4,"PLTE",4)==0){
+        *_colors=chunk_len/3;
+        break;
+      }
+      offs+=12+chunk_len;
+    }
+  }
+}
+
+static int op_is_gif(const unsigned char *_buf,size_t _buf_sz){
+  return _buf_sz>=6&&(memcmp(_buf,"GIF87a",6)==0||memcmp(_buf,"GIF89a",6)==0);
+}
+
+/*Tries to extract the width, height, bits per pixel, and palette size of a
+   GIF.
+  On failure, simply leaves its outputs unmodified.*/
+static void op_extract_gif_params(const unsigned char *_buf,size_t _buf_sz,
+ opus_uint32 *_width,opus_uint32 *_height,
+ opus_uint32 *_depth,opus_uint32 *_colors,int *_has_palette){
+  if(op_is_gif(_buf,_buf_sz)&&_buf_sz>=14){
+    *_width=_buf[6]|_buf[7]<<8;
+    *_height=_buf[8]|_buf[9]<<8;
+    /*libFLAC hard-codes the depth to 24.*/
+    *_depth=24;
+    *_colors=1<<((_buf[10]&7)+1);
+    *_has_palette=1;
+  }
+}
+
+/*The actual implementation of opus_picture_tag_parse().
+  Unlike the public API, this function requires _pic to already be
+   initialized, modifies its contents before success is guaranteed, and assumes
+   the caller will clear it on error.*/
+static int opus_picture_tag_parse_impl(OpusPictureTag *_pic,const char *_tag,
+ unsigned char *_buf,size_t _buf_sz,size_t _base64_sz){
+  opus_int32   picture_type;
+  opus_uint32  mime_type_length;
+  char        *mime_type;
+  opus_uint32  description_length;
+  char        *description;
+  opus_uint32  width;
+  opus_uint32  height;
+  opus_uint32  depth;
+  opus_uint32  colors;
+  opus_uint32  data_length;
+  opus_uint32  file_width;
+  opus_uint32  file_height;
+  opus_uint32  file_depth;
+  opus_uint32  file_colors;
+  int          format;
+  int          has_palette;
+  int          colors_set;
+  size_t       i;
+  /*Decode the BASE64 data.*/
+  for(i=0;i<_base64_sz;i++){
+    opus_uint32 value;
+    int         j;
+    value=0;
+    for(j=0;j<4;j++){
+      unsigned c;
+      unsigned d;
+      c=(unsigned char)_tag[4*i+j];
+      if(c=='+')d=62;
+      else if(c=='/')d=63;
+      else if(c>='0'&&c<='9')d=52+c-'0';
+      else if(c>='a'&&c<='z')d=26+c-'a';
+      else if(c>='A'&&c<='Z')d=c-'A';
+      else if(c=='='&&3*i+j>_buf_sz)d=0;
+      else return OP_ENOTFORMAT;
+      value=value<<6|d;
+    }
+    _buf[3*i]=(unsigned char)(value>>16);
+    if(3*i+1<_buf_sz){
+      _buf[3*i+1]=(unsigned char)(value>>8);
+      if(3*i+2<_buf_sz)_buf[3*i+2]=(unsigned char)value;
+    }
+  }
+  i=0;
+  picture_type=op_parse_uint32be(_buf+i);
+  i+=4;
+  /*Extract the MIME type.*/
+  mime_type_length=op_parse_uint32be(_buf+i);
+  i+=4;
+  if(mime_type_length>_buf_sz-32)return OP_ENOTFORMAT;
+  mime_type=(char *)_ogg_malloc(sizeof(*_pic->mime_type)*(mime_type_length+1));
+  if(mime_type==NULL)return OP_EFAULT;
+  memcpy(mime_type,_buf+i,sizeof(*mime_type)*mime_type_length);
+  mime_type[mime_type_length]='\0';
+  _pic->mime_type=mime_type;
+  i+=mime_type_length;
+  /*Extract the description string.*/
+  description_length=op_parse_uint32be(_buf+i);
+  i+=4;
+  if(description_length>_buf_sz-mime_type_length-32)return OP_ENOTFORMAT;
+  description=
+   (char *)_ogg_malloc(sizeof(*_pic->mime_type)*(description_length+1));
+  if(description==NULL)return OP_EFAULT;
+  memcpy(description,_buf+i,sizeof(*description)*description_length);
+  description[description_length]='\0';
+  _pic->description=description;
+  i+=description_length;
+  /*Extract the remaining fields.*/
+  width=op_parse_uint32be(_buf+i);
+  i+=4;
+  height=op_parse_uint32be(_buf+i);
+  i+=4;
+  depth=op_parse_uint32be(_buf+i);
+  i+=4;
+  colors=op_parse_uint32be(_buf+i);
+  i+=4;
+  /*If one of these is set, they all must be, but colors==0 is a valid value.*/
+  colors_set=width!=0||height!=0||depth!=0||colors!=0;
+  if((width==0||height==0||depth==0)&&colors_set)return OP_ENOTFORMAT;
+  data_length=op_parse_uint32be(_buf+i);
+  i+=4;
+  if(data_length>_buf_sz-i)return OP_ENOTFORMAT;
+  /*Trim extraneous data so we don't copy it below.*/
+  _buf_sz=i+data_length;
+  /*Attempt to determine the image format.*/
+  format=OP_PIC_FORMAT_UNKNOWN;
+  if(mime_type_length==3&&strcmp(mime_type,"-->")==0){
+    format=OP_PIC_FORMAT_URL;
+    /*Picture type 1 must be a 32x32 PNG.*/
+    if(picture_type==1&&(width!=0||height!=0)&&(width!=32||height!=32)){
+      return OP_ENOTFORMAT;
+    }
+    /*Append a terminating NUL for the convenience of our callers.*/
+    _buf[_buf_sz++]='\0';
+  }
+  else{
+    if(mime_type_length==10
+     &&op_strncasecmp(mime_type,"image/jpeg",mime_type_length)==0){
+      if(op_is_jpeg(_buf+i,data_length))format=OP_PIC_FORMAT_JPEG;
+    }
+    else if(mime_type_length==9
+     &&op_strncasecmp(mime_type,"image/png",mime_type_length)==0){
+      if(op_is_png(_buf+i,data_length))format=OP_PIC_FORMAT_PNG;
+    }
+    else if(mime_type_length==9
+     &&op_strncasecmp(mime_type,"image/gif",mime_type_length)==0){
+      if(op_is_gif(_buf+i,data_length))format=OP_PIC_FORMAT_GIF;
+    }
+    else if(mime_type_length==0||(mime_type_length==6
+     &&op_strncasecmp(mime_type,"image/",mime_type_length)==0)){
+      if(op_is_jpeg(_buf+i,data_length))format=OP_PIC_FORMAT_JPEG;
+      else if(op_is_png(_buf+i,data_length))format=OP_PIC_FORMAT_PNG;
+      else if(op_is_gif(_buf+i,data_length))format=OP_PIC_FORMAT_GIF;
+    }
+    file_width=file_height=file_depth=file_colors=0;
+    has_palette=-1;
+    switch(format){
+      case OP_PIC_FORMAT_JPEG:{
+        op_extract_jpeg_params(_buf+i,data_length,
+         &file_width,&file_height,&file_depth,&file_colors,&has_palette);
+      }break;
+      case OP_PIC_FORMAT_PNG:{
+        op_extract_png_params(_buf+i,data_length,
+         &file_width,&file_height,&file_depth,&file_colors,&has_palette);
+      }break;
+      case OP_PIC_FORMAT_GIF:{
+        op_extract_gif_params(_buf+i,data_length,
+         &file_width,&file_height,&file_depth,&file_colors,&has_palette);
+      }break;
+    }
+    if(has_palette>=0){
+      /*If we successfully extracted these parameters from the image, override
+         any declared values.*/
+      width=file_width;
+      height=file_height;
+      depth=file_depth;
+      colors=file_colors;
+    }
+    /*Picture type 1 must be a 32x32 PNG.*/
+    if(picture_type==1&&(format!=OP_PIC_FORMAT_PNG||width!=32||height!=32)){
+      return OP_ENOTFORMAT;
+    }
+  }
+  /*Adjust _buf_sz instead of using data_length to capture the terminating NUL
+     for URLs.*/
+  _buf_sz-=i;
+  memmove(_buf,_buf+i,sizeof(*_buf)*_buf_sz);
+  _buf=(unsigned char *)_ogg_realloc(_buf,_buf_sz);
+  if(_buf_sz>0&&_buf==NULL)return OP_EFAULT;
+  _pic->type=picture_type;
+  _pic->width=width;
+  _pic->height=height;
+  _pic->depth=depth;
+  _pic->colors=colors;
+  _pic->data_length=data_length;
+  _pic->data=_buf;
+  _pic->format=format;
+  return 0;
+}
+
+int opus_picture_tag_parse(OpusPictureTag *_pic,const char *_tag){
+  OpusPictureTag  pic;
+  unsigned char  *buf;
+  size_t          base64_sz;
+  size_t          buf_sz;
+  size_t          tag_length;
+  int             ret;
+  if(opus_tagncompare("METADATA_BLOCK_PICTURE",22,_tag)==0)_tag+=23;
+  /*Figure out how much BASE64-encoded data we have.*/
+  tag_length=strlen(_tag);
+  if(tag_length&3)return OP_ENOTFORMAT;
+  base64_sz=tag_length>>2;
+  buf_sz=3*base64_sz;
+  if(buf_sz<32)return OP_ENOTFORMAT;
+  if(_tag[tag_length-1]=='=')buf_sz--;
+  if(_tag[tag_length-2]=='=')buf_sz--;
+  if(buf_sz<32)return OP_ENOTFORMAT;
+  /*Allocate an extra byte to allow appending a terminating NUL to URL data.*/
+  buf=(unsigned char *)_ogg_malloc(sizeof(*buf)*(buf_sz+1));
+  if(buf==NULL)return OP_EFAULT;
+  opus_picture_tag_init(&pic);
+  ret=opus_picture_tag_parse_impl(&pic,_tag,buf,buf_sz,base64_sz);
+  if(ret<0){
+    opus_picture_tag_clear(&pic);
+    _ogg_free(buf);
+  }
+  else *_pic=*&pic;
+  return ret;
+}
+
+void opus_picture_tag_init(OpusPictureTag *_pic){
+  memset(_pic,0,sizeof(*_pic));
+}
+
+void opus_picture_tag_clear(OpusPictureTag *_pic){
+  _ogg_free(_pic->description);
+  _ogg_free(_pic->mime_type);
+  _ogg_free(_pic->data);
+}
diff --git a/drivers/opus/internal.c b/drivers/opus/internal.c
new file mode 100644
index 0000000000..a9c3671179
--- /dev/null
+++ b/drivers/opus/internal.c
@@ -0,0 +1,42 @@
+/********************************************************************
+ *                                                                  *
+ * THIS FILE IS PART OF THE libopusfile SOFTWARE CODEC SOURCE CODE. *
+ * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS     *
+ * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
+ * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
+ *                                                                  *
+ * THE libopusfile SOURCE CODE IS (C) COPYRIGHT 2012                *
+ * by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
+ *                                                                  *
+ ********************************************************************/
+#ifdef OPUS_HAVE_CONFIG_H
+#include "opus_config.h"
+#endif
+
+#include "internal.h"
+
+#if defined(OP_ENABLE_ASSERTIONS)
+void op_fatal_impl(const char *_str,const char *_file,int _line){
+  fprintf(stderr,"Fatal (internal) error in %s, line %i: %s\n",
+   _file,_line,_str);
+  abort();
+}
+#endif
+
+/*A version of strncasecmp() that is guaranteed to only ignore the case of
+   ASCII characters.*/
+int op_strncasecmp(const char *_a,const char *_b,int _n){
+  int i;
+  for(i=0;i<_n;i++){
+    int a;
+    int b;
+    int d;
+    a=_a[i];
+    b=_b[i];
+    if(a>='a'&&a<='z')a-='a'-'A';
+    if(b>='a'&&b<='z')b-='a'-'A';
+    d=a-b;
+    if(d)return d;
+  }
+  return 0;
+}
diff --git a/drivers/opus/internal.h b/drivers/opus/internal.h
new file mode 100644
index 0000000000..cb4089fd4d
--- /dev/null
+++ b/drivers/opus/internal.h
@@ -0,0 +1,249 @@
+/********************************************************************
+ *                                                                  *
+ * THIS FILE IS PART OF THE libopusfile SOFTWARE CODEC SOURCE CODE. *
+ * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS     *
+ * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
+ * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
+ *                                                                  *
+ * THE libopusfile SOURCE CODE IS (C) COPYRIGHT 2012                *
+ * by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
+ *                                                                  *
+ ********************************************************************/
+#if !defined(_opusfile_internal_h)
+# define _opusfile_internal_h (1)
+
+# if !defined(_REENTRANT)
+#  define _REENTRANT
+# endif
+# if !defined(_GNU_SOURCE)
+#  define _GNU_SOURCE
+# endif
+# if !defined(_LARGEFILE_SOURCE)
+#  define _LARGEFILE_SOURCE
+# endif
+# if !defined(_LARGEFILE64_SOURCE)
+#  define _LARGEFILE64_SOURCE
+# endif
+# if !defined(_FILE_OFFSET_BITS)
+#  define _FILE_OFFSET_BITS 64
+# endif
+
+# include <stdlib.h>
+# include <opus/opusfile.h>
+
+typedef struct OggOpusLink OggOpusLink;
+
+# if defined(OPUS_FIXED_POINT)
+
+typedef opus_int16 op_sample;
+
+# else
+
+typedef float      op_sample;
+
+/*We're using this define to test for libopus 1.1 or later until libopus
+   provides a better mechanism.*/
+#  if defined(OPUS_GET_EXPERT_FRAME_DURATION_REQUEST)
+/*Enable soft clipping prevention in 16-bit decodes.*/
+#   define OP_SOFT_CLIP (1)
+#  endif
+
+# endif
+
+# if OP_GNUC_PREREQ(4,2)
+/*Disable excessive warnings about the order of operations.*/
+#  pragma GCC diagnostic ignored "-Wparentheses"
+# elif defined(_MSC_VER)
+/*Disable excessive warnings about the order of operations.*/
+#  pragma warning(disable:4554)
+/*Disable warnings about "deprecated" POSIX functions.*/
+#  pragma warning(disable:4996)
+# endif
+
+# if OP_GNUC_PREREQ(3,0)
+/*Another alternative is
+    (__builtin_constant_p(_x)?!!(_x):__builtin_expect(!!(_x),1))
+   but that evaluates _x multiple times, which may be bad.*/
+#  define OP_LIKELY(_x) (__builtin_expect(!!(_x),1))
+#  define OP_UNLIKELY(_x) (__builtin_expect(!!(_x),0))
+# else
+#  define OP_LIKELY(_x)   (!!(_x))
+#  define OP_UNLIKELY(_x) (!!(_x))
+# endif
+
+# if defined(OP_ENABLE_ASSERTIONS)
+#  if OP_GNUC_PREREQ(2,5)||__SUNPRO_C>=0x590
+__attribute__((noreturn))
+#  endif
+void op_fatal_impl(const char *_str,const char *_file,int _line);
+
+#  define OP_FATAL(_str) (op_fatal_impl(_str,__FILE__,__LINE__))
+
+#  define OP_ASSERT(_cond) \
+  do{ \
+    if(OP_UNLIKELY(!(_cond)))OP_FATAL("assertion failed: " #_cond); \
+  } \
+  while(0)
+#  define OP_ALWAYS_TRUE(_cond) OP_ASSERT(_cond)
+
+# else
+#  define OP_FATAL(_str) abort()
+#  define OP_ASSERT(_cond)
+#  define OP_ALWAYS_TRUE(_cond) ((void)(_cond))
+# endif
+
+# define OP_INT64_MAX (2*(((ogg_int64_t)1<<62)-1)|1)
+# define OP_INT64_MIN (-OP_INT64_MAX-1)
+# define OP_INT32_MAX (2*(((ogg_int32_t)1<<30)-1)|1)
+# define OP_INT32_MIN (-OP_INT32_MAX-1)
+
+# define OP_MIN(_a,_b)        ((_a)<(_b)?(_a):(_b))
+# define OP_MAX(_a,_b)        ((_a)>(_b)?(_a):(_b))
+# define OP_CLAMP(_lo,_x,_hi) (OP_MAX(_lo,OP_MIN(_x,_hi)))
+
+/*Advance a file offset by the given amount, clamping against OP_INT64_MAX.
+  This is used to advance a known offset by things like OP_CHUNK_SIZE or
+   OP_PAGE_SIZE_MAX, while making sure to avoid signed overflow.
+  It assumes that both _offset and _amount are non-negative.*/
+#define OP_ADV_OFFSET(_offset,_amount) \
+ (OP_MIN(_offset,OP_INT64_MAX-(_amount))+(_amount))
+
+/*The maximum channel count for any mapping we'll actually decode.*/
+# define OP_NCHANNELS_MAX (8)
+
+/*Initial state.*/
+# define  OP_NOTOPEN   (0)
+/*We've found the first Opus stream in the first link.*/
+# define  OP_PARTOPEN  (1)
+# define  OP_OPENED    (2)
+/*We've found the first Opus stream in the current link.*/
+# define  OP_STREAMSET (3)
+/*We've initialized the decoder for the chosen Opus stream in the current
+   link.*/
+# define  OP_INITSET   (4)
+
+/*Information cached for a single link in a chained Ogg Opus file.
+  We choose the first Opus stream encountered in each link to play back (and
+   require at least one).*/
+struct OggOpusLink{
+  /*The byte offset of the first header page in this link.*/
+  opus_int64   offset;
+  /*The byte offset of the first data page from the chosen Opus stream in this
+     link (after the headers).*/
+  opus_int64   data_offset;
+  /*The byte offset of the last page from the chosen Opus stream in this link.
+    This is used when seeking to ensure we find a page before the last one, so
+     that end-trimming calculations work properly.
+    This is only valid for seekable sources.*/
+  opus_int64   end_offset;
+  /*The granule position of the last sample.
+    This is only valid for seekable sources.*/
+  ogg_int64_t  pcm_end;
+  /*The granule position before the first sample.*/
+  ogg_int64_t  pcm_start;
+  /*The serial number.*/
+  ogg_uint32_t serialno;
+  /*The contents of the info header.*/
+  OpusHead     head;
+  /*The contents of the comment header.*/
+  OpusTags     tags;
+};
+
+struct OggOpusFile{
+  /*The callbacks used to access the data source.*/
+  OpusFileCallbacks  callbacks;
+  /*A FILE *, memory bufer, etc.*/
+  void              *source;
+  /*Whether or not we can seek with this data source.*/
+  int                seekable;
+  /*The number of links in this chained Ogg Opus file.*/
+  int                nlinks;
+  /*The cached information from each link in a chained Ogg Opus file.
+    If source isn't seekable (e.g., it's a pipe), only the current link
+     appears.*/
+  OggOpusLink       *links;
+  /*The number of serial numbers from a single link.*/
+  int                nserialnos;
+  /*The capacity of the list of serial numbers from a single link.*/
+  int                cserialnos;
+  /*Storage for the list of serial numbers from a single link.*/
+  ogg_uint32_t      *serialnos;
+  /*This is the current offset of the data processed by the ogg_sync_state.
+    After a seek, this should be set to the target offset so that we can track
+     the byte offsets of subsequent pages.
+    After a call to op_get_next_page(), this will point to the first byte after
+     that page.*/
+  opus_int64         offset;
+  /*The total size of this data source, or -1 if it's unseekable.*/
+  opus_int64         end;
+  /*Used to locate pages in the data source.*/
+  ogg_sync_state     oy;
+  /*One of OP_NOTOPEN, OP_PARTOPEN, OP_OPENED, OP_STREAMSET, OP_INITSET.*/
+  int                ready_state;
+  /*The current link being played back.*/
+  int                cur_link;
+  /*The number of decoded samples to discard from the start of decoding.*/
+  opus_int32         cur_discard_count;
+  /*The granule position of the previous packet (current packet start time).*/
+  ogg_int64_t        prev_packet_gp;
+  /*The number of bytes read since the last bitrate query, including framing.*/
+  opus_int64         bytes_tracked;
+  /*The number of samples decoded since the last bitrate query.*/
+  ogg_int64_t        samples_tracked;
+  /*Takes physical pages and welds them into a logical stream of packets.*/
+  ogg_stream_state   os;
+  /*Re-timestamped packets from a single page.
+    Buffering these relies on the undocumented libogg behavior that ogg_packet
+     pointers remain valid until the next page is submitted to the
+     ogg_stream_state they came from.*/
+  ogg_packet         op[255];
+  /*The index of the next packet to return.*/
+  int                op_pos;
+  /*The total number of packets available.*/
+  int                op_count;
+  /*Central working state for the packet-to-PCM decoder.*/
+  OpusMSDecoder     *od;
+  /*The application-provided packet decode callback.*/
+  op_decode_cb_func  decode_cb;
+  /*The application-provided packet decode callback context.*/
+  void              *decode_cb_ctx;
+  /*The stream count used to initialize the decoder.*/
+  int                od_stream_count;
+  /*The coupled stream count used to initialize the decoder.*/
+  int                od_coupled_count;
+  /*The channel count used to initialize the decoder.*/
+  int                od_channel_count;
+  /*The channel mapping used to initialize the decoder.*/
+  unsigned char      od_mapping[OP_NCHANNELS_MAX];
+  /*The buffered data for one decoded packet.*/
+  op_sample         *od_buffer;
+  /*The current position in the decoded buffer.*/
+  int                od_buffer_pos;
+  /*The number of valid samples in the decoded buffer.*/
+  int                od_buffer_size;
+  /*The type of gain offset to apply.
+    One of OP_HEADER_GAIN, OP_TRACK_GAIN, or OP_ABSOLUTE_GAIN.*/
+  int                gain_type;
+  /*The offset to apply to the gain.*/
+  opus_int32         gain_offset_q8;
+  /*Internal state for soft clipping and dithering float->short output.*/
+#if !defined(OPUS_FIXED_POINT)
+# if defined(OP_SOFT_CLIP)
+  float              clip_state[OP_NCHANNELS_MAX];
+# endif
+  float              dither_a[OP_NCHANNELS_MAX*4];
+  float              dither_b[OP_NCHANNELS_MAX*4];
+  opus_uint32        dither_seed;
+  int                dither_mute;
+  int                dither_disabled;
+  /*The number of channels represented by the internal state.
+    This gets set to 0 whenever anything that would prevent state propagation
+     occurs (switching between the float/short APIs, or between the
+     stereo/multistream APIs).*/
+  int                state_channel_count;
+#endif
+};
+
+int op_strncasecmp(const char *_a,const char *_b,int _n);
+
+#endif
diff --git a/drivers/opus/mlp.c b/drivers/opus/mlp.c
new file mode 100644
index 0000000000..7220a23d42
--- /dev/null
+++ b/drivers/opus/mlp.c
@@ -0,0 +1,140 @@
+/* Copyright (c) 2008-2011 Octasic Inc.
+   Written by Jean-Marc Valin */
+/*
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR
+   CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifdef OPUS_HAVE_CONFIG_H
+#include "opus_config.h"
+#endif
+
+#include "opus_types.h"
+#include "opus_defines.h"
+
+#include <math.h>
+#include "mlp.h"
+#include "arch.h"
+#include "tansig_table.h"
+#define MAX_NEURONS 100
+
+#if 0
+static OPUS_INLINE opus_val16 tansig_approx(opus_val32 _x) /* Q19 */
+{
+	int i;
+	opus_val16 xx; /* Q11 */
+	/*double x, y;*/
+	opus_val16 dy, yy; /* Q14 */
+	/*x = 1.9073e-06*_x;*/
+	if (_x>=QCONST32(8,19))
+		return QCONST32(1.,14);
+	if (_x<=-QCONST32(8,19))
+		return -QCONST32(1.,14);
+	xx = EXTRACT16(SHR32(_x, 8));
+	/*i = lrint(25*x);*/
+	i = SHR32(ADD32(1024,MULT16_16(25, xx)),11);
+	/*x -= .04*i;*/
+	xx -= EXTRACT16(SHR32(MULT16_16(20972,i),8));
+	/*x = xx*(1./2048);*/
+	/*y = tansig_table[250+i];*/
+	yy = tansig_table[250+i];
+	/*y = yy*(1./16384);*/
+	dy = 16384-MULT16_16_Q14(yy,yy);
+	yy = yy + MULT16_16_Q14(MULT16_16_Q11(xx,dy),(16384 - MULT16_16_Q11(yy,xx)));
+	return yy;
+}
+#else
+/*extern const float tansig_table[501];*/
+static OPUS_INLINE float tansig_approx(float x)
+{
+	int i;
+	float y, dy;
+	float sign=1;
+	/* Tests are reversed to catch NaNs */
+    if (!(x<8))
+        return 1;
+    if (!(x>-8))
+        return -1;
+	if (x<0)
+	{
+	   x=-x;
+	   sign=-1;
+	}
+	i = (int)floor(.5f+25*x);
+	x -= .04f*i;
+	y = tansig_table[i];
+	dy = 1-y*y;
+	y = y + x*dy*(1 - y*x);
+	return sign*y;
+}
+#endif
+
+#if 0
+void mlp_process(const MLP *m, const opus_val16 *in, opus_val16 *out)
+{
+	int j;
+	opus_val16 hidden[MAX_NEURONS];
+	const opus_val16 *W = m->weights;
+	/* Copy to tmp_in */
+	for (j=0;j<m->topo[1];j++)
+	{
+		int k;
+		opus_val32 sum = SHL32(EXTEND32(*W++),8);
+		for (k=0;k<m->topo[0];k++)
+			sum = MAC16_16(sum, in[k],*W++);
+		hidden[j] = tansig_approx(sum);
+	}
+	for (j=0;j<m->topo[2];j++)
+	{
+		int k;
+		opus_val32 sum = SHL32(EXTEND32(*W++),14);
+		for (k=0;k<m->topo[1];k++)
+			sum = MAC16_16(sum, hidden[k], *W++);
+		out[j] = tansig_approx(EXTRACT16(PSHR32(sum,17)));
+	}
+}
+#else
+void mlp_process(const MLP *m, const float *in, float *out)
+{
+    int j;
+    float hidden[MAX_NEURONS];
+    const float *W = m->weights;
+    /* Copy to tmp_in */
+    for (j=0;j<m->topo[1];j++)
+    {
+        int k;
+        float sum = *W++;
+        for (k=0;k<m->topo[0];k++)
+            sum = sum + in[k]**W++;
+        hidden[j] = tansig_approx(sum);
+    }
+    for (j=0;j<m->topo[2];j++)
+    {
+        int k;
+        float sum = *W++;
+        for (k=0;k<m->topo[1];k++)
+            sum = sum + hidden[k]**W++;
+        out[j] = tansig_approx(sum);
+    }
+}
+#endif
diff --git a/drivers/opus/mlp.h b/drivers/opus/mlp.h
new file mode 100644
index 0000000000..86c8e0617d
--- /dev/null
+++ b/drivers/opus/mlp.h
@@ -0,0 +1,41 @@
+/* Copyright (c) 2008-2011 Octasic Inc.
+   Written by Jean-Marc Valin */
+/*
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR
+   CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef _MLP_H_
+#define _MLP_H_
+
+#include "arch.h"
+
+typedef struct {
+	int layers;
+	const int *topo;
+	const float *weights;
+} MLP;
+
+void mlp_process(const MLP *m, const float *in, float *out);
+
+#endif /* _MLP_H_ */
diff --git a/drivers/opus/mlp_data.c b/drivers/opus/mlp_data.c
new file mode 100644
index 0000000000..401c4c0250
--- /dev/null
+++ b/drivers/opus/mlp_data.c
@@ -0,0 +1,105 @@
+/* The contents of this file was automatically generated by mlp_train.c
+   It contains multi-layer perceptron (MLP) weights. */
+
+#include "mlp.h"
+
+/* RMS error was 0.138320, seed was 1361535663 */
+
+static const float weights[422] = {
+
+/* hidden layer */
+-0.0941125f, -0.302976f, -0.603555f, -0.19393f, -0.185983f,
+-0.601617f, -0.0465317f, -0.114563f, -0.103599f, -0.618938f,
+-0.317859f, -0.169949f, -0.0702885f, 0.148065f, 0.409524f,
+0.548432f, 0.367649f, -0.494393f, 0.764306f, -1.83957f,
+0.170849f, 12.786f, -1.08848f, -1.27284f, -16.2606f,
+24.1773f, -5.57454f, -0.17276f, -0.163388f, -0.224421f,
+-0.0948944f, -0.0728695f, -0.26557f, -0.100283f, -0.0515459f,
+-0.146142f, -0.120674f, -0.180655f, 0.12857f, 0.442138f,
+-0.493735f, 0.167767f, 0.206699f, -0.197567f, 0.417999f,
+1.50364f, -0.773341f, -10.0401f, 0.401872f, 2.97966f,
+15.2165f, -1.88905f, -1.19254f, 0.0285397f, -0.00405139f,
+0.0707565f, 0.00825699f, -0.0927269f, -0.010393f, -0.00428882f,
+-0.00489743f, -0.0709731f, -0.00255992f, 0.0395619f, 0.226424f,
+0.0325231f, 0.162175f, -0.100118f, 0.485789f, 0.12697f,
+0.285937f, 0.0155637f, 0.10546f, 3.05558f, 1.15059f,
+-1.00904f, -1.83088f, 3.31766f, -3.42516f, -0.119135f,
+-0.0405654f, 0.00690068f, 0.0179877f, -0.0382487f, 0.00597941f,
+-0.0183611f, 0.00190395f, -0.144322f, -0.0435671f, 0.000990594f,
+0.221087f, 0.142405f, 0.484066f, 0.404395f, 0.511955f,
+-0.237255f, 0.241742f, 0.35045f, -0.699428f, 10.3993f,
+2.6507f, -2.43459f, -4.18838f, 1.05928f, 1.71067f,
+0.00667811f, -0.0721335f, -0.0397346f, 0.0362704f, -0.11496f,
+-0.0235776f, 0.0082161f, -0.0141741f, -0.0329699f, -0.0354253f,
+0.00277404f, -0.290654f, -1.14767f, -0.319157f, -0.686544f,
+0.36897f, 0.478899f, 0.182579f, -0.411069f, 0.881104f,
+-4.60683f, 1.4697f, 0.335845f, -1.81905f, -30.1699f,
+5.55225f, 0.0019508f, -0.123576f, -0.0727332f, -0.0641597f,
+-0.0534458f, -0.108166f, -0.0937368f, -0.0697883f, -0.0275475f,
+-0.192309f, -0.110074f, 0.285375f, -0.405597f, 0.0926724f,
+-0.287881f, -0.851193f, -0.099493f, -0.233764f, -1.2852f,
+1.13611f, 3.12168f, -0.0699f, -1.86216f, 2.65292f,
+-7.31036f, 2.44776f, -0.00111802f, -0.0632786f, -0.0376296f,
+-0.149851f, 0.142963f, 0.184368f, 0.123433f, 0.0756158f,
+0.117312f, 0.0933395f, 0.0692163f, 0.0842592f, 0.0704683f,
+0.0589963f, 0.0942205f, -0.448862f, 0.0262677f, 0.270352f,
+-0.262317f, 0.172586f, 2.00227f, -0.159216f, 0.038422f,
+10.2073f, 4.15536f, -2.3407f, -0.0550265f, 0.00964792f,
+-0.141336f, 0.0274501f, 0.0343921f, -0.0487428f, 0.0950172f,
+-0.00775017f, -0.0372492f, -0.00548121f, -0.0663695f, 0.0960506f,
+-0.200008f, -0.0412827f, 0.58728f, 0.0515787f, 0.337254f,
+0.855024f, 0.668371f, -0.114904f, -3.62962f, -0.467477f,
+-0.215472f, 2.61537f, 0.406117f, -1.36373f, 0.0425394f,
+0.12208f, 0.0934502f, 0.123055f, 0.0340935f, -0.142466f,
+0.035037f, -0.0490666f, 0.0733208f, 0.0576672f, 0.123984f,
+-0.0517194f, -0.253018f, 0.590565f, 0.145849f, 0.315185f,
+0.221534f, -0.149081f, 0.216161f, -0.349575f, 24.5664f,
+-0.994196f, 0.614289f, -18.7905f, -2.83277f, -0.716801f,
+-0.347201f, 0.479515f, -0.246027f, 0.0758683f, 0.137293f,
+-0.17781f, 0.118751f, -0.00108329f, -0.237334f, 0.355732f,
+-0.12991f, -0.0547627f, -0.318576f, -0.325524f, 0.180494f,
+-0.0625604f, 0.141219f, 0.344064f, 0.37658f, -0.591772f,
+5.8427f, -0.38075f, 0.221894f, -1.41934f, -1.87943e+06f,
+1.34114f, 0.0283355f, -0.0447856f, -0.0211466f, -0.0256927f,
+0.0139618f, 0.0207934f, -0.0107666f, 0.0110969f, 0.0586069f,
+-0.0253545f, -0.0328433f, 0.11872f, -0.216943f, 0.145748f,
+0.119808f, -0.0915211f, -0.120647f, -0.0787719f, -0.143644f,
+-0.595116f, -1.152f, -1.25335f, -1.17092f, 4.34023f,
+-975268.f, -1.37033f, -0.0401123f, 0.210602f, -0.136656f,
+0.135962f, -0.0523293f, 0.0444604f, 0.0143928f, 0.00412666f,
+-0.0193003f, 0.218452f, -0.110204f, -2.02563f, 0.918238f,
+-2.45362f, 1.19542f, -0.061362f, -1.92243f, 0.308111f,
+0.49764f, 0.912356f, 0.209272f, -2.34525f, 2.19326f,
+-6.47121f, 1.69771f, -0.725123f, 0.0118929f, 0.0377944f,
+0.0554003f, 0.0226452f, -0.0704421f, -0.0300309f, 0.0122978f,
+-0.0041782f, -0.0686612f, 0.0313115f, 0.039111f, 0.364111f,
+-0.0945548f, 0.0229876f, -0.17414f, 0.329795f, 0.114714f,
+0.30022f, 0.106997f, 0.132355f, 5.79932f, 0.908058f,
+-0.905324f, -3.3561f, 0.190647f, 0.184211f, -0.673648f,
+0.231807f, -0.0586222f, 0.230752f, -0.438277f, 0.245857f,
+-0.17215f, 0.0876383f, -0.720512f, 0.162515f, 0.0170571f,
+0.101781f, 0.388477f, 1.32931f, 1.08548f, -0.936301f,
+-2.36958f, -6.71988f, -3.44376f, 2.13818f, 14.2318f,
+4.91459f, -3.09052f, -9.69191f, -0.768234f, 1.79604f,
+0.0549653f, 0.163399f, 0.0797025f, 0.0343933f, -0.0555876f,
+-0.00505673f, 0.0187258f, 0.0326628f, 0.0231486f, 0.15573f,
+0.0476223f, -0.254824f, 1.60155f, -0.801221f, 2.55496f,
+0.737629f, -1.36249f, -0.695463f, -2.44301f, -1.73188f,
+3.95279f, 1.89068f, 0.486087f, -11.3343f, 3.9416e+06f,
+
+/* output layer */
+-0.381439f, 0.12115f, -0.906927f, 2.93878f, 1.6388f,
+0.882811f, 0.874344f, 1.21726f, -0.874545f, 0.321706f,
+0.785055f, 0.946558f, -0.575066f, -3.46553f, 0.884905f,
+0.0924047f, -9.90712f, 0.391338f, 0.160103f, -2.04954f,
+4.1455f, 0.0684029f, -0.144761f, -0.285282f, 0.379244f,
+-1.1584f, -0.0277241f, -9.85f, -4.82386f, 3.71333f,
+3.87308f, 3.52558f};
+
+static const int topo[3] = {25, 15, 2};
+
+const MLP net = {
+    3,
+    topo,
+    weights
+};
diff --git a/drivers/opus/opus.c b/drivers/opus/opus.c
new file mode 100644
index 0000000000..8978e3b06b
--- /dev/null
+++ b/drivers/opus/opus.c
@@ -0,0 +1,329 @@
+/* Copyright (c) 2011 Xiph.Org Foundation, Skype Limited
+   Written by Jean-Marc Valin and Koen Vos */
+/*
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifdef OPUS_HAVE_CONFIG_H
+#include "opus_config.h"
+#endif
+
+#include "opus.h"
+#include "opus_private.h"
+
+#ifndef DISABLE_FLOAT_API
+OPUS_EXPORT void opus_pcm_soft_clip(float *_x, int N, int C, float *declip_mem)
+{
+   int c;
+   int i;
+   float *x;
+
+   if (C<1 || N<1 || !_x || !declip_mem) return;
+
+   /* First thing: saturate everything to +/- 2 which is the highest level our
+      non-linearity can handle. At the point where the signal reaches +/-2,
+      the derivative will be zero anyway, so this doesn't introduce any
+      discontinuity in the derivative. */
+   for (i=0;i<N*C;i++)
+      _x[i] = MAX16(-2.f, MIN16(2.f, _x[i]));
+   for (c=0;c<C;c++)
+   {
+      float a;
+      float x0;
+      int curr;
+
+      x = _x+c;
+      a = declip_mem[c];
+      /* Continue applying the non-linearity from the previous frame to avoid
+         any discontinuity. */
+      for (i=0;i<N;i++)
+      {
+         if (x[i*C]*a>=0)
+            break;
+         x[i*C] = x[i*C]+a*x[i*C]*x[i*C];
+      }
+
+      curr=0;
+      x0 = x[0];
+      while(1)
+      {
+         int start, end;
+         float maxval;
+         int special=0;
+         int peak_pos;
+         for (i=curr;i<N;i++)
+         {
+            if (x[i*C]>1 || x[i*C]<-1)
+               break;
+         }
+         if (i==N)
+         {
+            a=0;
+            break;
+         }
+         peak_pos = i;
+         start=end=i;
+         maxval=ABS16(x[i*C]);
+         /* Look for first zero crossing before clipping */
+         while (start>0 && x[i*C]*x[(start-1)*C]>=0)
+            start--;
+         /* Look for first zero crossing after clipping */
+         while (end<N && x[i*C]*x[end*C]>=0)
+         {
+            /* Look for other peaks until the next zero-crossing. */
+            if (ABS16(x[end*C])>maxval)
+            {
+               maxval = ABS16(x[end*C]);
+               peak_pos = end;
+            }
+            end++;
+         }
+         /* Detect the special case where we clip before the first zero crossing */
+         special = (start==0 && x[i*C]*x[0]>=0);
+
+         /* Compute a such that maxval + a*maxval^2 = 1 */
+         a=(maxval-1)/(maxval*maxval);
+         if (x[i*C]>0)
+            a = -a;
+         /* Apply soft clipping */
+         for (i=start;i<end;i++)
+            x[i*C] = x[i*C]+a*x[i*C]*x[i*C];
+
+         if (special && peak_pos>=2)
+         {
+            /* Add a linear ramp from the first sample to the signal peak.
+               This avoids a discontinuity at the beginning of the frame. */
+            float delta;
+            float offset = x0-x[0];
+            delta = offset / peak_pos;
+            for (i=curr;i<peak_pos;i++)
+            {
+               offset -= delta;
+               x[i*C] += offset;
+               x[i*C] = MAX16(-1.f, MIN16(1.f, x[i*C]));
+            }
+         }
+         curr = end;
+         if (curr==N)
+            break;
+      }
+      declip_mem[c] = a;
+   }
+}
+#endif
+
+int encode_size(int size, unsigned char *data)
+{
+   if (size < 252)
+   {
+      data[0] = size;
+      return 1;
+   } else {
+      data[0] = 252+(size&0x3);
+      data[1] = (size-(int)data[0])>>2;
+      return 2;
+   }
+}
+
+static int parse_size(const unsigned char *data, opus_int32 len, opus_int16 *size)
+{
+   if (len<1)
+   {
+      *size = -1;
+      return -1;
+   } else if (data[0]<252)
+   {
+      *size = data[0];
+      return 1;
+   } else if (len<2)
+   {
+      *size = -1;
+      return -1;
+   } else {
+      *size = 4*data[1] + data[0];
+      return 2;
+   }
+}
+
+int opus_packet_parse_impl(const unsigned char *data, opus_int32 len,
+      int self_delimited, unsigned char *out_toc,
+      const unsigned char *frames[48], opus_int16 size[48],
+      int *payload_offset, opus_int32 *packet_offset)
+{
+   int i, bytes;
+   int count;
+   int cbr;
+   unsigned char ch, toc;
+   int framesize;
+   opus_int32 last_size;
+   opus_int32 pad = 0;
+   const unsigned char *data0 = data;
+
+   if (size==NULL)
+      return OPUS_BAD_ARG;
+
+   framesize = opus_packet_get_samples_per_frame(data, 48000);
+
+   cbr = 0;
+   toc = *data++;
+   len--;
+   last_size = len;
+   switch (toc&0x3)
+   {
+   /* One frame */
+   case 0:
+      count=1;
+      break;
+   /* Two CBR frames */
+   case 1:
+      count=2;
+      cbr = 1;
+      if (!self_delimited)
+      {
+         if (len&0x1)
+            return OPUS_INVALID_PACKET;
+         last_size = len/2;
+         /* If last_size doesn't fit in size[0], we'll catch it later */
+         size[0] = (opus_int16)last_size;
+      }
+      break;
+   /* Two VBR frames */
+   case 2:
+      count = 2;
+      bytes = parse_size(data, len, size);
+      len -= bytes;
+      if (size[0]<0 || size[0] > len)
+         return OPUS_INVALID_PACKET;
+      data += bytes;
+      last_size = len-size[0];
+      break;
+   /* Multiple CBR/VBR frames (from 0 to 120 ms) */
+   default: /*case 3:*/
+      if (len<1)
+         return OPUS_INVALID_PACKET;
+      /* Number of frames encoded in bits 0 to 5 */
+      ch = *data++;
+      count = ch&0x3F;
+      if (count <= 0 || framesize*count > 5760)
+         return OPUS_INVALID_PACKET;
+      len--;
+      /* Padding flag is bit 6 */
+      if (ch&0x40)
+      {
+         int p;
+         do {
+            int tmp;
+            if (len<=0)
+               return OPUS_INVALID_PACKET;
+            p = *data++;
+            len--;
+            tmp = p==255 ? 254: p;
+            len -= tmp;
+            pad += tmp;
+         } while (p==255);
+      }
+      if (len<0)
+         return OPUS_INVALID_PACKET;
+      /* VBR flag is bit 7 */
+      cbr = !(ch&0x80);
+      if (!cbr)
+      {
+         /* VBR case */
+         last_size = len;
+         for (i=0;i<count-1;i++)
+         {
+            bytes = parse_size(data, len, size+i);
+            len -= bytes;
+            if (size[i]<0 || size[i] > len)
+               return OPUS_INVALID_PACKET;
+            data += bytes;
+            last_size -= bytes+size[i];
+         }
+         if (last_size<0)
+            return OPUS_INVALID_PACKET;
+      } else if (!self_delimited)
+      {
+         /* CBR case */
+         last_size = len/count;
+         if (last_size*count!=len)
+            return OPUS_INVALID_PACKET;
+         for (i=0;i<count-1;i++)
+            size[i] = (opus_int16)last_size;
+      }
+      break;
+   }
+   /* Self-delimited framing has an extra size for the last frame. */
+   if (self_delimited)
+   {
+      bytes = parse_size(data, len, size+count-1);
+      len -= bytes;
+      if (size[count-1]<0 || size[count-1] > len)
+         return OPUS_INVALID_PACKET;
+      data += bytes;
+      /* For CBR packets, apply the size to all the frames. */
+      if (cbr)
+      {
+         if (size[count-1]*count > len)
+            return OPUS_INVALID_PACKET;
+         for (i=0;i<count-1;i++)
+            size[i] = size[count-1];
+      } else if (bytes+size[count-1] > last_size)
+         return OPUS_INVALID_PACKET;
+   } else
+   {
+      /* Because it's not encoded explicitly, it's possible the size of the
+         last packet (or all the packets, for the CBR case) is larger than
+         1275. Reject them here.*/
+      if (last_size > 1275)
+         return OPUS_INVALID_PACKET;
+      size[count-1] = (opus_int16)last_size;
+   }
+
+   if (payload_offset)
+      *payload_offset = (int)(data-data0);
+
+   for (i=0;i<count;i++)
+   {
+      if (frames)
+         frames[i] = data;
+      data += size[i];
+   }
+
+   if (packet_offset)
+      *packet_offset = pad+(opus_int32)(data-data0);
+
+   if (out_toc)
+      *out_toc = toc;
+
+   return count;
+}
+
+int opus_packet_parse(const unsigned char *data, opus_int32 len,
+      unsigned char *out_toc, const unsigned char *frames[48],
+      opus_int16 size[48], int *payload_offset)
+{
+   return opus_packet_parse_impl(data, len, 0, out_toc,
+                                 frames, size, payload_offset, NULL);
+}
+
diff --git a/drivers/opus/opus.h b/drivers/opus/opus.h
new file mode 100644
index 0000000000..93a53a2ffc
--- /dev/null
+++ b/drivers/opus/opus.h
@@ -0,0 +1,978 @@
+/* Copyright (c) 2010-2011 Xiph.Org Foundation, Skype Limited
+   Written by Jean-Marc Valin and Koen Vos */
+/*
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+/**
+ * @file opus.h
+ * @brief Opus reference implementation API
+ */
+
+#ifndef OPUS_H
+#define OPUS_H
+
+#include "opus_types.h"
+#include "opus_defines.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/**
+ * @mainpage Opus
+ *
+ * The Opus codec is designed for interactive speech and audio transmission over the Internet.
+ * It is designed by the IETF Codec Working Group and incorporates technology from
+ * Skype's SILK codec and Xiph.Org's CELT codec.
+ *
+ * The Opus codec is designed to handle a wide range of interactive audio applications,
+ * including Voice over IP, videoconferencing, in-game chat, and even remote live music
+ * performances. It can scale from low bit-rate narrowband speech to very high quality
+ * stereo music. Its main features are:
+
+ * @li Sampling rates from 8 to 48 kHz
+ * @li Bit-rates from 6 kb/s to 510 kb/s
+ * @li Support for both constant bit-rate (CBR) and variable bit-rate (VBR)
+ * @li Audio bandwidth from narrowband to full-band
+ * @li Support for speech and music
+ * @li Support for mono and stereo
+ * @li Support for multichannel (up to 255 channels)
+ * @li Frame sizes from 2.5 ms to 60 ms
+ * @li Good loss robustness and packet loss concealment (PLC)
+ * @li Floating point and fixed-point implementation
+ *
+ * Documentation sections:
+ * @li @ref opus_encoder
+ * @li @ref opus_decoder
+ * @li @ref opus_repacketizer
+ * @li @ref opus_multistream
+ * @li @ref opus_libinfo
+ * @li @ref opus_custom
+ */
+
+/** @defgroup opus_encoder Opus Encoder
+  * @{
+  *
+  * @brief This page describes the process and functions used to encode Opus.
+  *
+  * Since Opus is a stateful codec, the encoding process starts with creating an encoder
+  * state. This can be done with:
+  *
+  * @code
+  * int          error;
+  * OpusEncoder *enc;
+  * enc = opus_encoder_create(Fs, channels, application, &error);
+  * @endcode
+  *
+  * From this point, @c enc can be used for encoding an audio stream. An encoder state
+  * @b must @b not be used for more than one stream at the same time. Similarly, the encoder
+  * state @b must @b not be re-initialized for each frame.
+  *
+  * While opus_encoder_create() allocates memory for the state, it's also possible
+  * to initialize pre-allocated memory:
+  *
+  * @code
+  * int          size;
+  * int          error;
+  * OpusEncoder *enc;
+  * size = opus_encoder_get_size(channels);
+  * enc = malloc(size);
+  * error = opus_encoder_init(enc, Fs, channels, application);
+  * @endcode
+  *
+  * where opus_encoder_get_size() returns the required size for the encoder state. Note that
+  * future versions of this code may change the size, so no assuptions should be made about it.
+  *
+  * The encoder state is always continuous in memory and only a shallow copy is sufficient
+  * to copy it (e.g. memcpy())
+  *
+  * It is possible to change some of the encoder's settings using the opus_encoder_ctl()
+  * interface. All these settings already default to the recommended value, so they should
+  * only be changed when necessary. The most common settings one may want to change are:
+  *
+  * @code
+  * opus_encoder_ctl(enc, OPUS_SET_BITRATE(bitrate));
+  * opus_encoder_ctl(enc, OPUS_SET_COMPLEXITY(complexity));
+  * opus_encoder_ctl(enc, OPUS_SET_SIGNAL(signal_type));
+  * @endcode
+  *
+  * where
+  *
+  * @arg bitrate is in bits per second (b/s)
+  * @arg complexity is a value from 1 to 10, where 1 is the lowest complexity and 10 is the highest
+  * @arg signal_type is either OPUS_AUTO (default), OPUS_SIGNAL_VOICE, or OPUS_SIGNAL_MUSIC
+  *
+  * See @ref opus_encoderctls and @ref opus_genericctls for a complete list of parameters that can be set or queried. Most parameters can be set or changed at any time during a stream.
+  *
+  * To encode a frame, opus_encode() or opus_encode_float() must be called with exactly one frame (2.5, 5, 10, 20, 40 or 60 ms) of audio data:
+  * @code
+  * len = opus_encode(enc, audio_frame, frame_size, packet, max_packet);
+  * @endcode
+  *
+  * where
+  * <ul>
+  * <li>audio_frame is the audio data in opus_int16 (or float for opus_encode_float())</li>
+  * <li>frame_size is the duration of the frame in samples (per channel)</li>
+  * <li>packet is the byte array to which the compressed data is written</li>
+  * <li>max_packet is the maximum number of bytes that can be written in the packet (4000 bytes is recommended).
+  *     Do not use max_packet to control VBR target bitrate, instead use the #OPUS_SET_BITRATE CTL.</li>
+  * </ul>
+  *
+  * opus_encode() and opus_encode_float() return the number of bytes actually written to the packet.
+  * The return value <b>can be negative</b>, which indicates that an error has occurred. If the return value
+  * is 1 byte, then the packet does not need to be transmitted (DTX).
+  *
+  * Once the encoder state if no longer needed, it can be destroyed with
+  *
+  * @code
+  * opus_encoder_destroy(enc);
+  * @endcode
+  *
+  * If the encoder was created with opus_encoder_init() rather than opus_encoder_create(),
+  * then no action is required aside from potentially freeing the memory that was manually
+  * allocated for it (calling free(enc) for the example above)
+  *
+  */
+
+/** Opus encoder state.
+  * This contains the complete state of an Opus encoder.
+  * It is position independent and can be freely copied.
+  * @see opus_encoder_create,opus_encoder_init
+  */
+typedef struct OpusEncoder OpusEncoder;
+
+/** Gets the size of an <code>OpusEncoder</code> structure.
+  * @param[in] channels <tt>int</tt>: Number of channels.
+  *                                   This must be 1 or 2.
+  * @returns The size in bytes.
+  */
+OPUS_EXPORT OPUS_WARN_UNUSED_RESULT int opus_encoder_get_size(int channels);
+
+/**
+ */
+
+/** Allocates and initializes an encoder state.
+ * There are three coding modes:
+ *
+ * @ref OPUS_APPLICATION_VOIP gives best quality at a given bitrate for voice
+ *    signals. It enhances the  input signal by high-pass filtering and
+ *    emphasizing formants and harmonics. Optionally  it includes in-band
+ *    forward error correction to protect against packet loss. Use this
+ *    mode for typical VoIP applications. Because of the enhancement,
+ *    even at high bitrates the output may sound different from the input.
+ *
+ * @ref OPUS_APPLICATION_AUDIO gives best quality at a given bitrate for most
+ *    non-voice signals like music. Use this mode for music and mixed
+ *    (music/voice) content, broadcast, and applications requiring less
+ *    than 15 ms of coding delay.
+ *
+ * @ref OPUS_APPLICATION_RESTRICTED_LOWDELAY configures low-delay mode that
+ *    disables the speech-optimized mode in exchange for slightly reduced delay.
+ *    This mode can only be set on an newly initialized or freshly reset encoder
+ *    because it changes the codec delay.
+ *
+ * This is useful when the caller knows that the speech-optimized modes will not be needed (use with caution).
+ * @param [in] Fs <tt>opus_int32</tt>: Sampling rate of input signal (Hz)
+ *                                     This must be one of 8000, 12000, 16000,
+ *                                     24000, or 48000.
+ * @param [in] channels <tt>int</tt>: Number of channels (1 or 2) in input signal
+ * @param [in] application <tt>int</tt>: Coding mode (@ref OPUS_APPLICATION_VOIP/@ref OPUS_APPLICATION_AUDIO/@ref OPUS_APPLICATION_RESTRICTED_LOWDELAY)
+ * @param [out] error <tt>int*</tt>: @ref opus_errorcodes
+ * @note Regardless of the sampling rate and number channels selected, the Opus encoder
+ * can switch to a lower audio bandwidth or number of channels if the bitrate
+ * selected is too low. This also means that it is safe to always use 48 kHz stereo input
+ * and let the encoder optimize the encoding.
+ */
+OPUS_EXPORT OPUS_WARN_UNUSED_RESULT OpusEncoder *opus_encoder_create(
+    opus_int32 Fs,
+    int channels,
+    int application,
+    int *error
+);
+
+/** Initializes a previously allocated encoder state
+  * The memory pointed to by st must be at least the size returned by opus_encoder_get_size().
+  * This is intended for applications which use their own allocator instead of malloc.
+  * @see opus_encoder_create(),opus_encoder_get_size()
+  * To reset a previously initialized state, use the #OPUS_RESET_STATE CTL.
+  * @param [in] st <tt>OpusEncoder*</tt>: Encoder state
+  * @param [in] Fs <tt>opus_int32</tt>: Sampling rate of input signal (Hz)
+ *                                      This must be one of 8000, 12000, 16000,
+ *                                      24000, or 48000.
+  * @param [in] channels <tt>int</tt>: Number of channels (1 or 2) in input signal
+  * @param [in] application <tt>int</tt>: Coding mode (OPUS_APPLICATION_VOIP/OPUS_APPLICATION_AUDIO/OPUS_APPLICATION_RESTRICTED_LOWDELAY)
+  * @retval #OPUS_OK Success or @ref opus_errorcodes
+  */
+OPUS_EXPORT int opus_encoder_init(
+    OpusEncoder *st,
+    opus_int32 Fs,
+    int channels,
+    int application
+) OPUS_ARG_NONNULL(1);
+
+/** Encodes an Opus frame.
+  * @param [in] st <tt>OpusEncoder*</tt>: Encoder state
+  * @param [in] pcm <tt>opus_int16*</tt>: Input signal (interleaved if 2 channels). length is frame_size*channels*sizeof(opus_int16)
+  * @param [in] frame_size <tt>int</tt>: Number of samples per channel in the
+  *                                      input signal.
+  *                                      This must be an Opus frame size for
+  *                                      the encoder's sampling rate.
+  *                                      For example, at 48 kHz the permitted
+  *                                      values are 120, 240, 480, 960, 1920,
+  *                                      and 2880.
+  *                                      Passing in a duration of less than
+  *                                      10 ms (480 samples at 48 kHz) will
+  *                                      prevent the encoder from using the LPC
+  *                                      or hybrid modes.
+  * @param [out] data <tt>unsigned char*</tt>: Output payload.
+  *                                            This must contain storage for at
+  *                                            least \a max_data_bytes.
+  * @param [in] max_data_bytes <tt>opus_int32</tt>: Size of the allocated
+  *                                                 memory for the output
+  *                                                 payload. This may be
+  *                                                 used to impose an upper limit on
+  *                                                 the instant bitrate, but should
+  *                                                 not be used as the only bitrate
+  *                                                 control. Use #OPUS_SET_BITRATE to
+  *                                                 control the bitrate.
+  * @returns The length of the encoded packet (in bytes) on success or a
+  *          negative error code (see @ref opus_errorcodes) on failure.
+  */
+OPUS_EXPORT OPUS_WARN_UNUSED_RESULT opus_int32 opus_encode(
+    OpusEncoder *st,
+    const opus_int16 *pcm,
+    int frame_size,
+    unsigned char *data,
+    opus_int32 max_data_bytes
+) OPUS_ARG_NONNULL(1) OPUS_ARG_NONNULL(2) OPUS_ARG_NONNULL(4);
+
+/** Encodes an Opus frame from floating point input.
+  * @param [in] st <tt>OpusEncoder*</tt>: Encoder state
+  * @param [in] pcm <tt>float*</tt>: Input in float format (interleaved if 2 channels), with a normal range of +/-1.0.
+  *          Samples with a range beyond +/-1.0 are supported but will
+  *          be clipped by decoders using the integer API and should
+  *          only be used if it is known that the far end supports
+  *          extended dynamic range.
+  *          length is frame_size*channels*sizeof(float)
+  * @param [in] frame_size <tt>int</tt>: Number of samples per channel in the
+  *                                      input signal.
+  *                                      This must be an Opus frame size for
+  *                                      the encoder's sampling rate.
+  *                                      For example, at 48 kHz the permitted
+  *                                      values are 120, 240, 480, 960, 1920,
+  *                                      and 2880.
+  *                                      Passing in a duration of less than
+  *                                      10 ms (480 samples at 48 kHz) will
+  *                                      prevent the encoder from using the LPC
+  *                                      or hybrid modes.
+  * @param [out] data <tt>unsigned char*</tt>: Output payload.
+  *                                            This must contain storage for at
+  *                                            least \a max_data_bytes.
+  * @param [in] max_data_bytes <tt>opus_int32</tt>: Size of the allocated
+  *                                                 memory for the output
+  *                                                 payload. This may be
+  *                                                 used to impose an upper limit on
+  *                                                 the instant bitrate, but should
+  *                                                 not be used as the only bitrate
+  *                                                 control. Use #OPUS_SET_BITRATE to
+  *                                                 control the bitrate.
+  * @returns The length of the encoded packet (in bytes) on success or a
+  *          negative error code (see @ref opus_errorcodes) on failure.
+  */
+OPUS_EXPORT OPUS_WARN_UNUSED_RESULT opus_int32 opus_encode_float(
+    OpusEncoder *st,
+    const float *pcm,
+    int frame_size,
+    unsigned char *data,
+    opus_int32 max_data_bytes
+) OPUS_ARG_NONNULL(1) OPUS_ARG_NONNULL(2) OPUS_ARG_NONNULL(4);
+
+/** Frees an <code>OpusEncoder</code> allocated by opus_encoder_create().
+  * @param[in] st <tt>OpusEncoder*</tt>: State to be freed.
+  */
+OPUS_EXPORT void opus_encoder_destroy(OpusEncoder *st);
+
+/** Perform a CTL function on an Opus encoder.
+  *
+  * Generally the request and subsequent arguments are generated
+  * by a convenience macro.
+  * @param st <tt>OpusEncoder*</tt>: Encoder state.
+  * @param request This and all remaining parameters should be replaced by one
+  *                of the convenience macros in @ref opus_genericctls or
+  *                @ref opus_encoderctls.
+  * @see opus_genericctls
+  * @see opus_encoderctls
+  */
+OPUS_EXPORT int opus_encoder_ctl(OpusEncoder *st, int request, ...) OPUS_ARG_NONNULL(1);
+/**@}*/
+
+/** @defgroup opus_decoder Opus Decoder
+  * @{
+  *
+  * @brief This page describes the process and functions used to decode Opus.
+  *
+  * The decoding process also starts with creating a decoder
+  * state. This can be done with:
+  * @code
+  * int          error;
+  * OpusDecoder *dec;
+  * dec = opus_decoder_create(Fs, channels, &error);
+  * @endcode
+  * where
+  * @li Fs is the sampling rate and must be 8000, 12000, 16000, 24000, or 48000
+  * @li channels is the number of channels (1 or 2)
+  * @li error will hold the error code in case of failure (or #OPUS_OK on success)
+  * @li the return value is a newly created decoder state to be used for decoding
+  *
+  * While opus_decoder_create() allocates memory for the state, it's also possible
+  * to initialize pre-allocated memory:
+  * @code
+  * int          size;
+  * int          error;
+  * OpusDecoder *dec;
+  * size = opus_decoder_get_size(channels);
+  * dec = malloc(size);
+  * error = opus_decoder_init(dec, Fs, channels);
+  * @endcode
+  * where opus_decoder_get_size() returns the required size for the decoder state. Note that
+  * future versions of this code may change the size, so no assuptions should be made about it.
+  *
+  * The decoder state is always continuous in memory and only a shallow copy is sufficient
+  * to copy it (e.g. memcpy())
+  *
+  * To decode a frame, opus_decode() or opus_decode_float() must be called with a packet of compressed audio data:
+  * @code
+  * frame_size = opus_decode(dec, packet, len, decoded, max_size, 0);
+  * @endcode
+  * where
+  *
+  * @li packet is the byte array containing the compressed data
+  * @li len is the exact number of bytes contained in the packet
+  * @li decoded is the decoded audio data in opus_int16 (or float for opus_decode_float())
+  * @li max_size is the max duration of the frame in samples (per channel) that can fit into the decoded_frame array
+  *
+  * opus_decode() and opus_decode_float() return the number of samples (per channel) decoded from the packet.
+  * If that value is negative, then an error has occurred. This can occur if the packet is corrupted or if the audio
+  * buffer is too small to hold the decoded audio.
+  *
+  * Opus is a stateful codec with overlapping blocks and as a result Opus
+  * packets are not coded independently of each other. Packets must be
+  * passed into the decoder serially and in the correct order for a correct
+  * decode. Lost packets can be replaced with loss concealment by calling
+  * the decoder with a null pointer and zero length for the missing packet.
+  *
+  * A single codec state may only be accessed from a single thread at
+  * a time and any required locking must be performed by the caller. Separate
+  * streams must be decoded with separate decoder states and can be decoded
+  * in parallel unless the library was compiled with NONTHREADSAFE_PSEUDOSTACK
+  * defined.
+  *
+  */
+
+/** Opus decoder state.
+  * This contains the complete state of an Opus decoder.
+  * It is position independent and can be freely copied.
+  * @see opus_decoder_create,opus_decoder_init
+  */
+typedef struct OpusDecoder OpusDecoder;
+
+/** Gets the size of an <code>OpusDecoder</code> structure.
+  * @param [in] channels <tt>int</tt>: Number of channels.
+  *                                    This must be 1 or 2.
+  * @returns The size in bytes.
+  */
+OPUS_EXPORT OPUS_WARN_UNUSED_RESULT int opus_decoder_get_size(int channels);
+
+/** Allocates and initializes a decoder state.
+  * @param [in] Fs <tt>opus_int32</tt>: Sample rate to decode at (Hz).
+  *                                     This must be one of 8000, 12000, 16000,
+  *                                     24000, or 48000.
+  * @param [in] channels <tt>int</tt>: Number of channels (1 or 2) to decode
+  * @param [out] error <tt>int*</tt>: #OPUS_OK Success or @ref opus_errorcodes
+  *
+  * Internally Opus stores data at 48000 Hz, so that should be the default
+  * value for Fs. However, the decoder can efficiently decode to buffers
+  * at 8, 12, 16, and 24 kHz so if for some reason the caller cannot use
+  * data at the full sample rate, or knows the compressed data doesn't
+  * use the full frequency range, it can request decoding at a reduced
+  * rate. Likewise, the decoder is capable of filling in either mono or
+  * interleaved stereo pcm buffers, at the caller's request.
+  */
+OPUS_EXPORT OPUS_WARN_UNUSED_RESULT OpusDecoder *opus_decoder_create(
+    opus_int32 Fs,
+    int channels,
+    int *error
+);
+
+/** Initializes a previously allocated decoder state.
+  * The state must be at least the size returned by opus_decoder_get_size().
+  * This is intended for applications which use their own allocator instead of malloc. @see opus_decoder_create,opus_decoder_get_size
+  * To reset a previously initialized state, use the #OPUS_RESET_STATE CTL.
+  * @param [in] st <tt>OpusDecoder*</tt>: Decoder state.
+  * @param [in] Fs <tt>opus_int32</tt>: Sampling rate to decode to (Hz).
+  *                                     This must be one of 8000, 12000, 16000,
+  *                                     24000, or 48000.
+  * @param [in] channels <tt>int</tt>: Number of channels (1 or 2) to decode
+  * @retval #OPUS_OK Success or @ref opus_errorcodes
+  */
+OPUS_EXPORT int opus_decoder_init(
+    OpusDecoder *st,
+    opus_int32 Fs,
+    int channels
+) OPUS_ARG_NONNULL(1);
+
+/** Decode an Opus packet.
+  * @param [in] st <tt>OpusDecoder*</tt>: Decoder state
+  * @param [in] data <tt>char*</tt>: Input payload. Use a NULL pointer to indicate packet loss
+  * @param [in] len <tt>opus_int32</tt>: Number of bytes in payload*
+  * @param [out] pcm <tt>opus_int16*</tt>: Output signal (interleaved if 2 channels). length
+  *  is frame_size*channels*sizeof(opus_int16)
+  * @param [in] frame_size Number of samples per channel of available space in \a pcm.
+  *  If this is less than the maximum packet duration (120ms; 5760 for 48kHz), this function will
+  *  not be capable of decoding some packets. In the case of PLC (data==NULL) or FEC (decode_fec=1),
+  *  then frame_size needs to be exactly the duration of audio that is missing, otherwise the
+  *  decoder will not be in the optimal state to decode the next incoming packet. For the PLC and
+  *  FEC cases, frame_size <b>must</b> be a multiple of 2.5 ms.
+  * @param [in] decode_fec <tt>int</tt>: Flag (0 or 1) to request that any in-band forward error correction data be
+  *  decoded. If no such data is available, the frame is decoded as if it were lost.
+  * @returns Number of decoded samples or @ref opus_errorcodes
+  */
+OPUS_EXPORT OPUS_WARN_UNUSED_RESULT int opus_decode(
+    OpusDecoder *st,
+    const unsigned char *data,
+    opus_int32 len,
+    opus_int16 *pcm,
+    int frame_size,
+    int decode_fec
+) OPUS_ARG_NONNULL(1) OPUS_ARG_NONNULL(4);
+
+/** Decode an Opus packet with floating point output.
+  * @param [in] st <tt>OpusDecoder*</tt>: Decoder state
+  * @param [in] data <tt>char*</tt>: Input payload. Use a NULL pointer to indicate packet loss
+  * @param [in] len <tt>opus_int32</tt>: Number of bytes in payload
+  * @param [out] pcm <tt>float*</tt>: Output signal (interleaved if 2 channels). length
+  *  is frame_size*channels*sizeof(float)
+  * @param [in] frame_size Number of samples per channel of available space in \a pcm.
+  *  If this is less than the maximum packet duration (120ms; 5760 for 48kHz), this function will
+  *  not be capable of decoding some packets. In the case of PLC (data==NULL) or FEC (decode_fec=1),
+  *  then frame_size needs to be exactly the duration of audio that is missing, otherwise the
+  *  decoder will not be in the optimal state to decode the next incoming packet. For the PLC and
+  *  FEC cases, frame_size <b>must</b> be a multiple of 2.5 ms.
+  * @param [in] decode_fec <tt>int</tt>: Flag (0 or 1) to request that any in-band forward error correction data be
+  *  decoded. If no such data is available the frame is decoded as if it were lost.
+  * @returns Number of decoded samples or @ref opus_errorcodes
+  */
+OPUS_EXPORT OPUS_WARN_UNUSED_RESULT int opus_decode_float(
+    OpusDecoder *st,
+    const unsigned char *data,
+    opus_int32 len,
+    float *pcm,
+    int frame_size,
+    int decode_fec
+) OPUS_ARG_NONNULL(1) OPUS_ARG_NONNULL(4);
+
+/** Perform a CTL function on an Opus decoder.
+  *
+  * Generally the request and subsequent arguments are generated
+  * by a convenience macro.
+  * @param st <tt>OpusDecoder*</tt>: Decoder state.
+  * @param request This and all remaining parameters should be replaced by one
+  *                of the convenience macros in @ref opus_genericctls or
+  *                @ref opus_decoderctls.
+  * @see opus_genericctls
+  * @see opus_decoderctls
+  */
+OPUS_EXPORT int opus_decoder_ctl(OpusDecoder *st, int request, ...) OPUS_ARG_NONNULL(1);
+
+/** Frees an <code>OpusDecoder</code> allocated by opus_decoder_create().
+  * @param[in] st <tt>OpusDecoder*</tt>: State to be freed.
+  */
+OPUS_EXPORT void opus_decoder_destroy(OpusDecoder *st);
+
+/** Parse an opus packet into one or more frames.
+  * Opus_decode will perform this operation internally so most applications do
+  * not need to use this function.
+  * This function does not copy the frames, the returned pointers are pointers into
+  * the input packet.
+  * @param [in] data <tt>char*</tt>: Opus packet to be parsed
+  * @param [in] len <tt>opus_int32</tt>: size of data
+  * @param [out] out_toc <tt>char*</tt>: TOC pointer
+  * @param [out] frames <tt>char*[48]</tt> encapsulated frames
+  * @param [out] size <tt>opus_int16[48]</tt> sizes of the encapsulated frames
+  * @param [out] payload_offset <tt>int*</tt>: returns the position of the payload within the packet (in bytes)
+  * @returns number of frames
+  */
+OPUS_EXPORT int opus_packet_parse(
+   const unsigned char *data,
+   opus_int32 len,
+   unsigned char *out_toc,
+   const unsigned char *frames[48],
+   opus_int16 size[48],
+   int *payload_offset
+) OPUS_ARG_NONNULL(1) OPUS_ARG_NONNULL(4);
+
+/** Gets the bandwidth of an Opus packet.
+  * @param [in] data <tt>char*</tt>: Opus packet
+  * @retval OPUS_BANDWIDTH_NARROWBAND Narrowband (4kHz bandpass)
+  * @retval OPUS_BANDWIDTH_MEDIUMBAND Mediumband (6kHz bandpass)
+  * @retval OPUS_BANDWIDTH_WIDEBAND Wideband (8kHz bandpass)
+  * @retval OPUS_BANDWIDTH_SUPERWIDEBAND Superwideband (12kHz bandpass)
+  * @retval OPUS_BANDWIDTH_FULLBAND Fullband (20kHz bandpass)
+  * @retval OPUS_INVALID_PACKET The compressed data passed is corrupted or of an unsupported type
+  */
+OPUS_EXPORT OPUS_WARN_UNUSED_RESULT int opus_packet_get_bandwidth(const unsigned char *data) OPUS_ARG_NONNULL(1);
+
+/** Gets the number of samples per frame from an Opus packet.
+  * @param [in] data <tt>char*</tt>: Opus packet.
+  *                                  This must contain at least one byte of
+  *                                  data.
+  * @param [in] Fs <tt>opus_int32</tt>: Sampling rate in Hz.
+  *                                     This must be a multiple of 400, or
+  *                                     inaccurate results will be returned.
+  * @returns Number of samples per frame.
+  */
+OPUS_EXPORT OPUS_WARN_UNUSED_RESULT int opus_packet_get_samples_per_frame(const unsigned char *data, opus_int32 Fs) OPUS_ARG_NONNULL(1);
+
+/** Gets the number of channels from an Opus packet.
+  * @param [in] data <tt>char*</tt>: Opus packet
+  * @returns Number of channels
+  * @retval OPUS_INVALID_PACKET The compressed data passed is corrupted or of an unsupported type
+  */
+OPUS_EXPORT OPUS_WARN_UNUSED_RESULT int opus_packet_get_nb_channels(const unsigned char *data) OPUS_ARG_NONNULL(1);
+
+/** Gets the number of frames in an Opus packet.
+  * @param [in] packet <tt>char*</tt>: Opus packet
+  * @param [in] len <tt>opus_int32</tt>: Length of packet
+  * @returns Number of frames
+  * @retval OPUS_BAD_ARG Insufficient data was passed to the function
+  * @retval OPUS_INVALID_PACKET The compressed data passed is corrupted or of an unsupported type
+  */
+OPUS_EXPORT OPUS_WARN_UNUSED_RESULT int opus_packet_get_nb_frames(const unsigned char packet[], opus_int32 len) OPUS_ARG_NONNULL(1);
+
+/** Gets the number of samples of an Opus packet.
+  * @param [in] packet <tt>char*</tt>: Opus packet
+  * @param [in] len <tt>opus_int32</tt>: Length of packet
+  * @param [in] Fs <tt>opus_int32</tt>: Sampling rate in Hz.
+  *                                     This must be a multiple of 400, or
+  *                                     inaccurate results will be returned.
+  * @returns Number of samples
+  * @retval OPUS_BAD_ARG Insufficient data was passed to the function
+  * @retval OPUS_INVALID_PACKET The compressed data passed is corrupted or of an unsupported type
+  */
+OPUS_EXPORT OPUS_WARN_UNUSED_RESULT int opus_packet_get_nb_samples(const unsigned char packet[], opus_int32 len, opus_int32 Fs) OPUS_ARG_NONNULL(1);
+
+/** Gets the number of samples of an Opus packet.
+  * @param [in] dec <tt>OpusDecoder*</tt>: Decoder state
+  * @param [in] packet <tt>char*</tt>: Opus packet
+  * @param [in] len <tt>opus_int32</tt>: Length of packet
+  * @returns Number of samples
+  * @retval OPUS_BAD_ARG Insufficient data was passed to the function
+  * @retval OPUS_INVALID_PACKET The compressed data passed is corrupted or of an unsupported type
+  */
+OPUS_EXPORT OPUS_WARN_UNUSED_RESULT int opus_decoder_get_nb_samples(const OpusDecoder *dec, const unsigned char packet[], opus_int32 len) OPUS_ARG_NONNULL(1) OPUS_ARG_NONNULL(2);
+
+/** Applies soft-clipping to bring a float signal within the [-1,1] range. If
+  * the signal is already in that range, nothing is done. If there are values
+  * outside of [-1,1], then the signal is clipped as smoothly as possible to
+  * both fit in the range and avoid creating excessive distortion in the
+  * process.
+  * @param [in,out] pcm <tt>float*</tt>: Input PCM and modified PCM
+  * @param [in] frame_size <tt>int</tt> Number of samples per channel to process
+  * @param [in] channels <tt>int</tt>: Number of channels
+  * @param [in,out] softclip_mem <tt>float*</tt>: State memory for the soft clipping process (one float per channel, initialized to zero)
+  */
+OPUS_EXPORT void opus_pcm_soft_clip(float *pcm, int frame_size, int channels, float *softclip_mem);
+
+
+/**@}*/
+
+/** @defgroup opus_repacketizer Repacketizer
+  * @{
+  *
+  * The repacketizer can be used to merge multiple Opus packets into a single
+  * packet or alternatively to split Opus packets that have previously been
+  * merged. Splitting valid Opus packets is always guaranteed to succeed,
+  * whereas merging valid packets only succeeds if all frames have the same
+  * mode, bandwidth, and frame size, and when the total duration of the merged
+  * packet is no more than 120 ms.
+  * The repacketizer currently only operates on elementary Opus
+  * streams. It will not manipualte multistream packets successfully, except in
+  * the degenerate case where they consist of data from a single stream.
+  *
+  * The repacketizing process starts with creating a repacketizer state, either
+  * by calling opus_repacketizer_create() or by allocating the memory yourself,
+  * e.g.,
+  * @code
+  * OpusRepacketizer *rp;
+  * rp = (OpusRepacketizer*)malloc(opus_repacketizer_get_size());
+  * if (rp != NULL)
+  *     opus_repacketizer_init(rp);
+  * @endcode
+  *
+  * Then the application should submit packets with opus_repacketizer_cat(),
+  * extract new packets with opus_repacketizer_out() or
+  * opus_repacketizer_out_range(), and then reset the state for the next set of
+  * input packets via opus_repacketizer_init().
+  *
+  * For example, to split a sequence of packets into individual frames:
+  * @code
+  * unsigned char *data;
+  * int len;
+  * while (get_next_packet(&data, &len))
+  * {
+  *   unsigned char out[1276];
+  *   opus_int32 out_len;
+  *   int nb_frames;
+  *   int err;
+  *   int i;
+  *   err = opus_repacketizer_cat(rp, data, len);
+  *   if (err != OPUS_OK)
+  *   {
+  *     release_packet(data);
+  *     return err;
+  *   }
+  *   nb_frames = opus_repacketizer_get_nb_frames(rp);
+  *   for (i = 0; i < nb_frames; i++)
+  *   {
+  *     out_len = opus_repacketizer_out_range(rp, i, i+1, out, sizeof(out));
+  *     if (out_len < 0)
+  *     {
+  *        release_packet(data);
+  *        return (int)out_len;
+  *     }
+  *     output_next_packet(out, out_len);
+  *   }
+  *   opus_repacketizer_init(rp);
+  *   release_packet(data);
+  * }
+  * @endcode
+  *
+  * Alternatively, to combine a sequence of frames into packets that each
+  * contain up to <code>TARGET_DURATION_MS</code> milliseconds of data:
+  * @code
+  * // The maximum number of packets with duration TARGET_DURATION_MS occurs
+  * // when the frame size is 2.5 ms, for a total of (TARGET_DURATION_MS*2/5)
+  * // packets.
+  * unsigned char *data[(TARGET_DURATION_MS*2/5)+1];
+  * opus_int32 len[(TARGET_DURATION_MS*2/5)+1];
+  * int nb_packets;
+  * unsigned char out[1277*(TARGET_DURATION_MS*2/2)];
+  * opus_int32 out_len;
+  * int prev_toc;
+  * nb_packets = 0;
+  * while (get_next_packet(data+nb_packets, len+nb_packets))
+  * {
+  *   int nb_frames;
+  *   int err;
+  *   nb_frames = opus_packet_get_nb_frames(data[nb_packets], len[nb_packets]);
+  *   if (nb_frames < 1)
+  *   {
+  *     release_packets(data, nb_packets+1);
+  *     return nb_frames;
+  *   }
+  *   nb_frames += opus_repacketizer_get_nb_frames(rp);
+  *   // If adding the next packet would exceed our target, or it has an
+  *   // incompatible TOC sequence, output the packets we already have before
+  *   // submitting it.
+  *   // N.B., The nb_packets > 0 check ensures we've submitted at least one
+  *   // packet since the last call to opus_repacketizer_init(). Otherwise a
+  *   // single packet longer than TARGET_DURATION_MS would cause us to try to
+  *   // output an (invalid) empty packet. It also ensures that prev_toc has
+  *   // been set to a valid value. Additionally, len[nb_packets] > 0 is
+  *   // guaranteed by the call to opus_packet_get_nb_frames() above, so the
+  *   // reference to data[nb_packets][0] should be valid.
+  *   if (nb_packets > 0 && (
+  *       ((prev_toc & 0xFC) != (data[nb_packets][0] & 0xFC)) ||
+  *       opus_packet_get_samples_per_frame(data[nb_packets], 48000)*nb_frames >
+  *       TARGET_DURATION_MS*48))
+  *   {
+  *     out_len = opus_repacketizer_out(rp, out, sizeof(out));
+  *     if (out_len < 0)
+  *     {
+  *        release_packets(data, nb_packets+1);
+  *        return (int)out_len;
+  *     }
+  *     output_next_packet(out, out_len);
+  *     opus_repacketizer_init(rp);
+  *     release_packets(data, nb_packets);
+  *     data[0] = data[nb_packets];
+  *     len[0] = len[nb_packets];
+  *     nb_packets = 0;
+  *   }
+  *   err = opus_repacketizer_cat(rp, data[nb_packets], len[nb_packets]);
+  *   if (err != OPUS_OK)
+  *   {
+  *     release_packets(data, nb_packets+1);
+  *     return err;
+  *   }
+  *   prev_toc = data[nb_packets][0];
+  *   nb_packets++;
+  * }
+  * // Output the final, partial packet.
+  * if (nb_packets > 0)
+  * {
+  *   out_len = opus_repacketizer_out(rp, out, sizeof(out));
+  *   release_packets(data, nb_packets);
+  *   if (out_len < 0)
+  *     return (int)out_len;
+  *   output_next_packet(out, out_len);
+  * }
+  * @endcode
+  *
+  * An alternate way of merging packets is to simply call opus_repacketizer_cat()
+  * unconditionally until it fails. At that point, the merged packet can be
+  * obtained with opus_repacketizer_out() and the input packet for which
+  * opus_repacketizer_cat() needs to be re-added to a newly reinitialized
+  * repacketizer state.
+  */
+
+typedef struct OpusRepacketizer OpusRepacketizer;
+
+/** Gets the size of an <code>OpusRepacketizer</code> structure.
+  * @returns The size in bytes.
+  */
+OPUS_EXPORT OPUS_WARN_UNUSED_RESULT int opus_repacketizer_get_size(void);
+
+/** (Re)initializes a previously allocated repacketizer state.
+  * The state must be at least the size returned by opus_repacketizer_get_size().
+  * This can be used for applications which use their own allocator instead of
+  * malloc().
+  * It must also be called to reset the queue of packets waiting to be
+  * repacketized, which is necessary if the maximum packet duration of 120 ms
+  * is reached or if you wish to submit packets with a different Opus
+  * configuration (coding mode, audio bandwidth, frame size, or channel count).
+  * Failure to do so will prevent a new packet from being added with
+  * opus_repacketizer_cat().
+  * @see opus_repacketizer_create
+  * @see opus_repacketizer_get_size
+  * @see opus_repacketizer_cat
+  * @param rp <tt>OpusRepacketizer*</tt>: The repacketizer state to
+  *                                       (re)initialize.
+  * @returns A pointer to the same repacketizer state that was passed in.
+  */
+OPUS_EXPORT OpusRepacketizer *opus_repacketizer_init(OpusRepacketizer *rp) OPUS_ARG_NONNULL(1);
+
+/** Allocates memory and initializes the new repacketizer with
+ * opus_repacketizer_init().
+  */
+OPUS_EXPORT OPUS_WARN_UNUSED_RESULT OpusRepacketizer *opus_repacketizer_create(void);
+
+/** Frees an <code>OpusRepacketizer</code> allocated by
+  * opus_repacketizer_create().
+  * @param[in] rp <tt>OpusRepacketizer*</tt>: State to be freed.
+  */
+OPUS_EXPORT void opus_repacketizer_destroy(OpusRepacketizer *rp);
+
+/** Add a packet to the current repacketizer state.
+  * This packet must match the configuration of any packets already submitted
+  * for repacketization since the last call to opus_repacketizer_init().
+  * This means that it must have the same coding mode, audio bandwidth, frame
+  * size, and channel count.
+  * This can be checked in advance by examining the top 6 bits of the first
+  * byte of the packet, and ensuring they match the top 6 bits of the first
+  * byte of any previously submitted packet.
+  * The total duration of audio in the repacketizer state also must not exceed
+  * 120 ms, the maximum duration of a single packet, after adding this packet.
+  *
+  * The contents of the current repacketizer state can be extracted into new
+  * packets using opus_repacketizer_out() or opus_repacketizer_out_range().
+  *
+  * In order to add a packet with a different configuration or to add more
+  * audio beyond 120 ms, you must clear the repacketizer state by calling
+  * opus_repacketizer_init().
+  * If a packet is too large to add to the current repacketizer state, no part
+  * of it is added, even if it contains multiple frames, some of which might
+  * fit.
+  * If you wish to be able to add parts of such packets, you should first use
+  * another repacketizer to split the packet into pieces and add them
+  * individually.
+  * @see opus_repacketizer_out_range
+  * @see opus_repacketizer_out
+  * @see opus_repacketizer_init
+  * @param rp <tt>OpusRepacketizer*</tt>: The repacketizer state to which to
+  *                                       add the packet.
+  * @param[in] data <tt>const unsigned char*</tt>: The packet data.
+  *                                                The application must ensure
+  *                                                this pointer remains valid
+  *                                                until the next call to
+  *                                                opus_repacketizer_init() or
+  *                                                opus_repacketizer_destroy().
+  * @param len <tt>opus_int32</tt>: The number of bytes in the packet data.
+  * @returns An error code indicating whether or not the operation succeeded.
+  * @retval #OPUS_OK The packet's contents have been added to the repacketizer
+  *                  state.
+  * @retval #OPUS_INVALID_PACKET The packet did not have a valid TOC sequence,
+  *                              the packet's TOC sequence was not compatible
+  *                              with previously submitted packets (because
+  *                              the coding mode, audio bandwidth, frame size,
+  *                              or channel count did not match), or adding
+  *                              this packet would increase the total amount of
+  *                              audio stored in the repacketizer state to more
+  *                              than 120 ms.
+  */
+OPUS_EXPORT int opus_repacketizer_cat(OpusRepacketizer *rp, const unsigned char *data, opus_int32 len) OPUS_ARG_NONNULL(1) OPUS_ARG_NONNULL(2);
+
+
+/** Construct a new packet from data previously submitted to the repacketizer
+  * state via opus_repacketizer_cat().
+  * @param rp <tt>OpusRepacketizer*</tt>: The repacketizer state from which to
+  *                                       construct the new packet.
+  * @param begin <tt>int</tt>: The index of the first frame in the current
+  *                            repacketizer state to include in the output.
+  * @param end <tt>int</tt>: One past the index of the last frame in the
+  *                          current repacketizer state to include in the
+  *                          output.
+  * @param[out] data <tt>const unsigned char*</tt>: The buffer in which to
+  *                                                 store the output packet.
+  * @param maxlen <tt>opus_int32</tt>: The maximum number of bytes to store in
+  *                                    the output buffer. In order to guarantee
+  *                                    success, this should be at least
+  *                                    <code>1276</code> for a single frame,
+  *                                    or for multiple frames,
+  *                                    <code>1277*(end-begin)</code>.
+  *                                    However, <code>1*(end-begin)</code> plus
+  *                                    the size of all packet data submitted to
+  *                                    the repacketizer since the last call to
+  *                                    opus_repacketizer_init() or
+  *                                    opus_repacketizer_create() is also
+  *                                    sufficient, and possibly much smaller.
+  * @returns The total size of the output packet on success, or an error code
+  *          on failure.
+  * @retval #OPUS_BAD_ARG <code>[begin,end)</code> was an invalid range of
+  *                       frames (begin < 0, begin >= end, or end >
+  *                       opus_repacketizer_get_nb_frames()).
+  * @retval #OPUS_BUFFER_TOO_SMALL \a maxlen was insufficient to contain the
+  *                                complete output packet.
+  */
+OPUS_EXPORT OPUS_WARN_UNUSED_RESULT opus_int32 opus_repacketizer_out_range(OpusRepacketizer *rp, int begin, int end, unsigned char *data, opus_int32 maxlen) OPUS_ARG_NONNULL(1) OPUS_ARG_NONNULL(4);
+
+/** Return the total number of frames contained in packet data submitted to
+  * the repacketizer state so far via opus_repacketizer_cat() since the last
+  * call to opus_repacketizer_init() or opus_repacketizer_create().
+  * This defines the valid range of packets that can be extracted with
+  * opus_repacketizer_out_range() or opus_repacketizer_out().
+  * @param rp <tt>OpusRepacketizer*</tt>: The repacketizer state containing the
+  *                                       frames.
+  * @returns The total number of frames contained in the packet data submitted
+  *          to the repacketizer state.
+  */
+OPUS_EXPORT OPUS_WARN_UNUSED_RESULT int opus_repacketizer_get_nb_frames(OpusRepacketizer *rp) OPUS_ARG_NONNULL(1);
+
+/** Construct a new packet from data previously submitted to the repacketizer
+  * state via opus_repacketizer_cat().
+  * This is a convenience routine that returns all the data submitted so far
+  * in a single packet.
+  * It is equivalent to calling
+  * @code
+  * opus_repacketizer_out_range(rp, 0, opus_repacketizer_get_nb_frames(rp),
+  *                             data, maxlen)
+  * @endcode
+  * @param rp <tt>OpusRepacketizer*</tt>: The repacketizer state from which to
+  *                                       construct the new packet.
+  * @param[out] data <tt>const unsigned char*</tt>: The buffer in which to
+  *                                                 store the output packet.
+  * @param maxlen <tt>opus_int32</tt>: The maximum number of bytes to store in
+  *                                    the output buffer. In order to guarantee
+  *                                    success, this should be at least
+  *                                    <code>1277*opus_repacketizer_get_nb_frames(rp)</code>.
+  *                                    However,
+  *                                    <code>1*opus_repacketizer_get_nb_frames(rp)</code>
+  *                                    plus the size of all packet data
+  *                                    submitted to the repacketizer since the
+  *                                    last call to opus_repacketizer_init() or
+  *                                    opus_repacketizer_create() is also
+  *                                    sufficient, and possibly much smaller.
+  * @returns The total size of the output packet on success, or an error code
+  *          on failure.
+  * @retval #OPUS_BUFFER_TOO_SMALL \a maxlen was insufficient to contain the
+  *                                complete output packet.
+  */
+OPUS_EXPORT OPUS_WARN_UNUSED_RESULT opus_int32 opus_repacketizer_out(OpusRepacketizer *rp, unsigned char *data, opus_int32 maxlen) OPUS_ARG_NONNULL(1);
+
+/** Pads a given Opus packet to a larger size (possibly changing the TOC sequence).
+  * @param[in,out] data <tt>const unsigned char*</tt>: The buffer containing the
+  *                                                   packet to pad.
+  * @param len <tt>opus_int32</tt>: The size of the packet.
+  *                                 This must be at least 1.
+  * @param new_len <tt>opus_int32</tt>: The desired size of the packet after padding.
+  *                                 This must be at least as large as len.
+  * @returns an error code
+  * @retval #OPUS_OK \a on success.
+  * @retval #OPUS_BAD_ARG \a len was less than 1 or new_len was less than len.
+  * @retval #OPUS_INVALID_PACKET \a data did not contain a valid Opus packet.
+  */
+OPUS_EXPORT int opus_packet_pad(unsigned char *data, opus_int32 len, opus_int32 new_len);
+
+/** Remove all padding from a given Opus packet and rewrite the TOC sequence to
+  * minimize space usage.
+  * @param[in,out] data <tt>const unsigned char*</tt>: The buffer containing the
+  *                                                   packet to strip.
+  * @param len <tt>opus_int32</tt>: The size of the packet.
+  *                                 This must be at least 1.
+  * @returns The new size of the output packet on success, or an error code
+  *          on failure.
+  * @retval #OPUS_BAD_ARG \a len was less than 1.
+  * @retval #OPUS_INVALID_PACKET \a data did not contain a valid Opus packet.
+  */
+OPUS_EXPORT OPUS_WARN_UNUSED_RESULT opus_int32 opus_packet_unpad(unsigned char *data, opus_int32 len);
+
+/** Pads a given Opus multi-stream packet to a larger size (possibly changing the TOC sequence).
+  * @param[in,out] data <tt>const unsigned char*</tt>: The buffer containing the
+  *                                                   packet to pad.
+  * @param len <tt>opus_int32</tt>: The size of the packet.
+  *                                 This must be at least 1.
+  * @param new_len <tt>opus_int32</tt>: The desired size of the packet after padding.
+  *                                 This must be at least 1.
+  * @param nb_streams <tt>opus_int32</tt>: The number of streams (not channels) in the packet.
+  *                                 This must be at least as large as len.
+  * @returns an error code
+  * @retval #OPUS_OK \a on success.
+  * @retval #OPUS_BAD_ARG \a len was less than 1.
+  * @retval #OPUS_INVALID_PACKET \a data did not contain a valid Opus packet.
+  */
+OPUS_EXPORT int opus_multistream_packet_pad(unsigned char *data, opus_int32 len, opus_int32 new_len, int nb_streams);
+
+/** Remove all padding from a given Opus multi-stream packet and rewrite the TOC sequence to
+  * minimize space usage.
+  * @param[in,out] data <tt>const unsigned char*</tt>: The buffer containing the
+  *                                                   packet to strip.
+  * @param len <tt>opus_int32</tt>: The size of the packet.
+  *                                 This must be at least 1.
+  * @param nb_streams <tt>opus_int32</tt>: The number of streams (not channels) in the packet.
+  *                                 This must be at least 1.
+  * @returns The new size of the output packet on success, or an error code
+  *          on failure.
+  * @retval #OPUS_BAD_ARG \a len was less than 1 or new_len was less than len.
+  * @retval #OPUS_INVALID_PACKET \a data did not contain a valid Opus packet.
+  */
+OPUS_EXPORT OPUS_WARN_UNUSED_RESULT opus_int32 opus_multistream_packet_unpad(unsigned char *data, opus_int32 len, int nb_streams);
+
+/**@}*/
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* OPUS_H */
diff --git a/drivers/opus/opus_compare.c b/drivers/opus/opus_compare.c
new file mode 100644
index 0000000000..06c67d752f
--- /dev/null
+++ b/drivers/opus/opus_compare.c
@@ -0,0 +1,379 @@
+/* Copyright (c) 2011-2012 Xiph.Org Foundation, Mozilla Corporation
+   Written by Jean-Marc Valin and Timothy B. Terriberry */
+/*
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <math.h>
+#include <string.h>
+
+#define OPUS_PI (3.14159265F)
+
+#define OPUS_COSF(_x)        ((float)cos(_x))
+#define OPUS_SINF(_x)        ((float)sin(_x))
+
+static void *check_alloc(void *_ptr){
+  if(_ptr==NULL){
+    fprintf(stderr,"Out of memory.\n");
+    exit(EXIT_FAILURE);
+  }
+  return _ptr;
+}
+
+static void *opus_malloc(size_t _size){
+  return check_alloc(malloc(_size));
+}
+
+static void *opus_realloc(void *_ptr,size_t _size){
+  return check_alloc(realloc(_ptr,_size));
+}
+
+static size_t read_pcm16(float **_samples,FILE *_fin,int _nchannels){
+  unsigned char  buf[1024];
+  float         *samples;
+  size_t         nsamples;
+  size_t         csamples;
+  size_t         xi;
+  size_t         nread;
+  samples=NULL;
+  nsamples=csamples=0;
+  for(;;){
+    nread=fread(buf,2*_nchannels,1024/(2*_nchannels),_fin);
+    if(nread<=0)break;
+    if(nsamples+nread>csamples){
+      do csamples=csamples<<1|1;
+      while(nsamples+nread>csamples);
+      samples=(float *)opus_realloc(samples,
+       _nchannels*csamples*sizeof(*samples));
+    }
+    for(xi=0;xi<nread;xi++){
+      int ci;
+      for(ci=0;ci<_nchannels;ci++){
+        int s;
+        s=buf[2*(xi*_nchannels+ci)+1]<<8|buf[2*(xi*_nchannels+ci)];
+        s=((s&0xFFFF)^0x8000)-0x8000;
+        samples[(nsamples+xi)*_nchannels+ci]=s;
+      }
+    }
+    nsamples+=nread;
+  }
+  *_samples=(float *)opus_realloc(samples,
+   _nchannels*nsamples*sizeof(*samples));
+  return nsamples;
+}
+
+static void band_energy(float *_out,float *_ps,const int *_bands,int _nbands,
+ const float *_in,int _nchannels,size_t _nframes,int _window_sz,
+ int _step,int _downsample){
+  float *window;
+  float *x;
+  float *c;
+  float *s;
+  size_t xi;
+  int    xj;
+  int    ps_sz;
+  window=(float *)opus_malloc((3+_nchannels)*_window_sz*sizeof(*window));
+  c=window+_window_sz;
+  s=c+_window_sz;
+  x=s+_window_sz;
+  ps_sz=_window_sz/2;
+  for(xj=0;xj<_window_sz;xj++){
+    window[xj]=0.5F-0.5F*OPUS_COSF((2*OPUS_PI/(_window_sz-1))*xj);
+  }
+  for(xj=0;xj<_window_sz;xj++){
+    c[xj]=OPUS_COSF((2*OPUS_PI/_window_sz)*xj);
+  }
+  for(xj=0;xj<_window_sz;xj++){
+    s[xj]=OPUS_SINF((2*OPUS_PI/_window_sz)*xj);
+  }
+  for(xi=0;xi<_nframes;xi++){
+    int ci;
+    int xk;
+    int bi;
+    for(ci=0;ci<_nchannels;ci++){
+      for(xk=0;xk<_window_sz;xk++){
+        x[ci*_window_sz+xk]=window[xk]*_in[(xi*_step+xk)*_nchannels+ci];
+      }
+    }
+    for(bi=xj=0;bi<_nbands;bi++){
+      float p[2]={0};
+      for(;xj<_bands[bi+1];xj++){
+        for(ci=0;ci<_nchannels;ci++){
+          float re;
+          float im;
+          int   ti;
+          ti=0;
+          re=im=0;
+          for(xk=0;xk<_window_sz;xk++){
+            re+=c[ti]*x[ci*_window_sz+xk];
+            im-=s[ti]*x[ci*_window_sz+xk];
+            ti+=xj;
+            if(ti>=_window_sz)ti-=_window_sz;
+          }
+          re*=_downsample;
+          im*=_downsample;
+          _ps[(xi*ps_sz+xj)*_nchannels+ci]=re*re+im*im+100000;
+          p[ci]+=_ps[(xi*ps_sz+xj)*_nchannels+ci];
+        }
+      }
+      if(_out){
+        _out[(xi*_nbands+bi)*_nchannels]=p[0]/(_bands[bi+1]-_bands[bi]);
+        if(_nchannels==2){
+          _out[(xi*_nbands+bi)*_nchannels+1]=p[1]/(_bands[bi+1]-_bands[bi]);
+        }
+      }
+    }
+  }
+  free(window);
+}
+
+#define NBANDS (21)
+#define NFREQS (240)
+
+/*Bands on which we compute the pseudo-NMR (Bark-derived
+  CELT bands).*/
+static const int BANDS[NBANDS+1]={
+  0,2,4,6,8,10,12,14,16,20,24,28,32,40,48,56,68,80,96,120,156,200
+};
+
+#define TEST_WIN_SIZE (480)
+#define TEST_WIN_STEP (120)
+
+int main(int _argc,const char **_argv){
+  FILE    *fin1;
+  FILE    *fin2;
+  float   *x;
+  float   *y;
+  float   *xb;
+  float   *X;
+  float   *Y;
+  double    err;
+  float    Q;
+  size_t   xlength;
+  size_t   ylength;
+  size_t   nframes;
+  size_t   xi;
+  int      ci;
+  int      xj;
+  int      bi;
+  int      nchannels;
+  unsigned rate;
+  int      downsample;
+  int      ybands;
+  int      yfreqs;
+  int      max_compare;
+  if(_argc<3||_argc>6){
+    fprintf(stderr,"Usage: %s [-s] [-r rate2] <file1.sw> <file2.sw>\n",
+     _argv[0]);
+    return EXIT_FAILURE;
+  }
+  nchannels=1;
+  if(strcmp(_argv[1],"-s")==0){
+    nchannels=2;
+    _argv++;
+  }
+  rate=48000;
+  ybands=NBANDS;
+  yfreqs=NFREQS;
+  downsample=1;
+  if(strcmp(_argv[1],"-r")==0){
+    rate=atoi(_argv[2]);
+    if(rate!=8000&&rate!=12000&&rate!=16000&&rate!=24000&&rate!=48000){
+      fprintf(stderr,
+       "Sampling rate must be 8000, 12000, 16000, 24000, or 48000\n");
+      return EXIT_FAILURE;
+    }
+    downsample=48000/rate;
+    switch(rate){
+      case  8000:ybands=13;break;
+      case 12000:ybands=15;break;
+      case 16000:ybands=17;break;
+      case 24000:ybands=19;break;
+    }
+    yfreqs=NFREQS/downsample;
+    _argv+=2;
+  }
+  fin1=fopen(_argv[1],"rb");
+  if(fin1==NULL){
+    fprintf(stderr,"Error opening '%s'.\n",_argv[1]);
+    return EXIT_FAILURE;
+  }
+  fin2=fopen(_argv[2],"rb");
+  if(fin2==NULL){
+    fprintf(stderr,"Error opening '%s'.\n",_argv[2]);
+    fclose(fin1);
+    return EXIT_FAILURE;
+  }
+  /*Read in the data and allocate scratch space.*/
+  xlength=read_pcm16(&x,fin1,2);
+  if(nchannels==1){
+    for(xi=0;xi<xlength;xi++)x[xi]=.5*(x[2*xi]+x[2*xi+1]);
+  }
+  fclose(fin1);
+  ylength=read_pcm16(&y,fin2,nchannels);
+  fclose(fin2);
+  if(xlength!=ylength*downsample){
+    fprintf(stderr,"Sample counts do not match (%lu!=%lu).\n",
+     (unsigned long)xlength,(unsigned long)ylength*downsample);
+    return EXIT_FAILURE;
+  }
+  if(xlength<TEST_WIN_SIZE){
+    fprintf(stderr,"Insufficient sample data (%lu<%i).\n",
+     (unsigned long)xlength,TEST_WIN_SIZE);
+    return EXIT_FAILURE;
+  }
+  nframes=(xlength-TEST_WIN_SIZE+TEST_WIN_STEP)/TEST_WIN_STEP;
+  xb=(float *)opus_malloc(nframes*NBANDS*nchannels*sizeof(*xb));
+  X=(float *)opus_malloc(nframes*NFREQS*nchannels*sizeof(*X));
+  Y=(float *)opus_malloc(nframes*yfreqs*nchannels*sizeof(*Y));
+  /*Compute the per-band spectral energy of the original signal
+     and the error.*/
+  band_energy(xb,X,BANDS,NBANDS,x,nchannels,nframes,
+   TEST_WIN_SIZE,TEST_WIN_STEP,1);
+  free(x);
+  band_energy(NULL,Y,BANDS,ybands,y,nchannels,nframes,
+   TEST_WIN_SIZE/downsample,TEST_WIN_STEP/downsample,downsample);
+  free(y);
+  for(xi=0;xi<nframes;xi++){
+    /*Frequency masking (low to high): 10 dB/Bark slope.*/
+    for(bi=1;bi<NBANDS;bi++){
+      for(ci=0;ci<nchannels;ci++){
+        xb[(xi*NBANDS+bi)*nchannels+ci]+=
+         0.1F*xb[(xi*NBANDS+bi-1)*nchannels+ci];
+      }
+    }
+    /*Frequency masking (high to low): 15 dB/Bark slope.*/
+    for(bi=NBANDS-1;bi-->0;){
+      for(ci=0;ci<nchannels;ci++){
+        xb[(xi*NBANDS+bi)*nchannels+ci]+=
+         0.03F*xb[(xi*NBANDS+bi+1)*nchannels+ci];
+      }
+    }
+    if(xi>0){
+      /*Temporal masking: -3 dB/2.5ms slope.*/
+      for(bi=0;bi<NBANDS;bi++){
+        for(ci=0;ci<nchannels;ci++){
+          xb[(xi*NBANDS+bi)*nchannels+ci]+=
+           0.5F*xb[((xi-1)*NBANDS+bi)*nchannels+ci];
+        }
+      }
+    }
+    /* Allowing some cross-talk */
+    if(nchannels==2){
+      for(bi=0;bi<NBANDS;bi++){
+        float l,r;
+        l=xb[(xi*NBANDS+bi)*nchannels+0];
+        r=xb[(xi*NBANDS+bi)*nchannels+1];
+        xb[(xi*NBANDS+bi)*nchannels+0]+=0.01F*r;
+        xb[(xi*NBANDS+bi)*nchannels+1]+=0.01F*l;
+      }
+    }
+
+    /* Apply masking */
+    for(bi=0;bi<ybands;bi++){
+      for(xj=BANDS[bi];xj<BANDS[bi+1];xj++){
+        for(ci=0;ci<nchannels;ci++){
+          X[(xi*NFREQS+xj)*nchannels+ci]+=
+           0.1F*xb[(xi*NBANDS+bi)*nchannels+ci];
+          Y[(xi*yfreqs+xj)*nchannels+ci]+=
+           0.1F*xb[(xi*NBANDS+bi)*nchannels+ci];
+        }
+      }
+    }
+  }
+
+  /* Average of consecutive frames to make comparison slightly less sensitive */
+  for(bi=0;bi<ybands;bi++){
+    for(xj=BANDS[bi];xj<BANDS[bi+1];xj++){
+      for(ci=0;ci<nchannels;ci++){
+         float xtmp;
+         float ytmp;
+         xtmp = X[xj*nchannels+ci];
+         ytmp = Y[xj*nchannels+ci];
+         for(xi=1;xi<nframes;xi++){
+           float xtmp2;
+           float ytmp2;
+           xtmp2 = X[(xi*NFREQS+xj)*nchannels+ci];
+           ytmp2 = Y[(xi*yfreqs+xj)*nchannels+ci];
+           X[(xi*NFREQS+xj)*nchannels+ci] += xtmp;
+           Y[(xi*yfreqs+xj)*nchannels+ci] += ytmp;
+           xtmp = xtmp2;
+           ytmp = ytmp2;
+         }
+      }
+    }
+  }
+
+  /*If working at a lower sampling rate, don't take into account the last
+     300 Hz to allow for different transition bands.
+    For 12 kHz, we don't skip anything, because the last band already skips
+     400 Hz.*/
+  if(rate==48000)max_compare=BANDS[NBANDS];
+  else if(rate==12000)max_compare=BANDS[ybands];
+  else max_compare=BANDS[ybands]-3;
+  err=0;
+  for(xi=0;xi<nframes;xi++){
+    double Ef;
+    Ef=0;
+    for(bi=0;bi<ybands;bi++){
+      double Eb;
+      Eb=0;
+      for(xj=BANDS[bi];xj<BANDS[bi+1]&&xj<max_compare;xj++){
+        for(ci=0;ci<nchannels;ci++){
+          float re;
+          float im;
+          re=Y[(xi*yfreqs+xj)*nchannels+ci]/X[(xi*NFREQS+xj)*nchannels+ci];
+          im=re-log(re)-1;
+          /*Make comparison less sensitive around the SILK/CELT cross-over to
+            allow for mode freedom in the filters.*/
+          if(xj>=79&&xj<=81)im*=0.1F;
+          if(xj==80)im*=0.1F;
+          Eb+=im;
+        }
+      }
+      Eb /= (BANDS[bi+1]-BANDS[bi])*nchannels;
+      Ef += Eb*Eb;
+    }
+    /*Using a fixed normalization value means we're willing to accept slightly
+       lower quality for lower sampling rates.*/
+    Ef/=NBANDS;
+    Ef*=Ef;
+    err+=Ef*Ef;
+  }
+  err=pow(err/nframes,1.0/16);
+  Q=100*(1-0.5*log(1+err)/log(1.13));
+  if(Q<0){
+    fprintf(stderr,"Test vector FAILS\n");
+    fprintf(stderr,"Internal weighted error is %f\n",err);
+    return EXIT_FAILURE;
+  }
+  else{
+    fprintf(stderr,"Test vector PASSES\n");
+    fprintf(stderr,
+     "Opus quality metric: %.1f %% (internal weighted error is %f)\n",Q,err);
+    return EXIT_SUCCESS;
+  }
+}
diff --git a/drivers/opus/opus_config.h b/drivers/opus/opus_config.h
new file mode 100644
index 0000000000..c6470e92c3
--- /dev/null
+++ b/drivers/opus/opus_config.h
@@ -0,0 +1,121 @@
+/* Opus configuration header */
+/* Based on the output of libopus configure script */
+
+/* Define to 1 if you have the <dlfcn.h> header file. */
+#define HAVE_DLFCN_H 1
+
+/* Define to 1 if you have the <inttypes.h> header file. */
+#define HAVE_INTTYPES_H 1
+
+/* Define to 1 if you have the `lrint' function. */
+#define HAVE_LRINT 1
+
+/* Define to 1 if you have the `lrintf' function. */
+#define HAVE_LRINTF 1
+
+/* Define to 1 if you have the <memory.h> header file. */
+#define HAVE_MEMORY_H 1
+
+/* Define to 1 if you have the <stdint.h> header file. */
+#define HAVE_STDINT_H 1
+
+/* Define to 1 if you have the <stdlib.h> header file. */
+#define HAVE_STDLIB_H 1
+
+/* Define to 1 if you have the <strings.h> header file. */
+#define HAVE_STRINGS_H 1
+
+/* Define to 1 if you have the <string.h> header file. */
+#define HAVE_STRING_H 1
+
+/* Define to 1 if you have the <sys/stat.h> header file. */
+#define HAVE_SYS_STAT_H 1
+
+/* Define to 1 if you have the <sys/types.h> header file. */
+#define HAVE_SYS_TYPES_H 1
+
+/* Define to 1 if you have the <unistd.h> header file. */
+#define HAVE_UNISTD_H 1
+
+/* Define to the sub-directory in which libtool stores uninstalled libraries.
+   */
+#define LT_OBJDIR ".libs/"
+
+#ifdef OPUS_ARM_OPT
+/* Make use of ARM asm optimization */
+#define OPUS_ARM_ASM 1
+
+/* Use generic ARMv4 inline asm optimizations */
+#define OPUS_ARM_INLINE_ASM 1
+
+/* Use ARMv5E inline asm optimizations */
+#define OPUS_ARM_INLINE_EDSP 1
+
+/* Use ARMv6 inline asm optimizations */
+#define OPUS_ARM_INLINE_MEDIA 1
+
+/* Use ARM NEON inline asm optimizations */
+#define OPUS_ARM_INLINE_NEON 1
+
+/* Define if assembler supports EDSP instructions */
+#define OPUS_ARM_MAY_HAVE_EDSP 1
+
+/* Define if assembler supports ARMv6 media instructions */
+#define OPUS_ARM_MAY_HAVE_MEDIA 1
+
+/* Define if compiler supports NEON instructions */
+#define OPUS_ARM_MAY_HAVE_NEON 1
+#endif // OPUS_ARM_OPT
+
+#ifdef OPUS_ARM64_OPT
+/* Make use of ARM asm optimization */
+#define OPUS_ARM_ASM 1
+
+/* Use ARMv6 inline asm optimizations */
+#define OPUS_ARM_INLINE_MEDIA 1 // work
+
+/* Use ARM NEON inline asm optimizations */
+#define OPUS_ARM_INLINE_NEON 1 // work
+
+/* Define if assembler supports EDSP instructions */
+#define OPUS_ARM_MAY_HAVE_EDSP 1 // work
+
+/* Define if assembler supports ARMv6 media instructions */
+#define OPUS_ARM_MAY_HAVE_MEDIA 1 // work
+
+/* Define if compiler supports NEON instructions */
+#define OPUS_ARM_MAY_HAVE_NEON 1
+
+#endif // OPUS_ARM64_OPT
+
+/* This is a build of OPUS */
+#define OPUS_BUILD /**/
+
+#ifndef WIN32
+	/* Use C99 variable-size arrays */
+	#define VAR_ARRAYS 1
+#else
+	/* Fixes VS 2013 compile error */
+	#define USE_ALLOCA 1
+#endif
+
+
+/* Define to `__inline__' or `__inline' if that's what the C compiler
+   calls it, or to nothing if 'inline' is not supported under any name.  */
+#ifndef __cplusplus
+/* #undef inline */
+#endif
+
+/* Define to the equivalent of the C99 'restrict' keyword, or to
+   nothing if this is not supported.  Do not define if restrict is
+   supported directly.  */
+#define restrict __restrict
+/* Work around a bug in Sun C++: it does not support _Restrict or
+   __restrict__, even though the corresponding Sun C compiler ends up with
+   "#define restrict _Restrict" or "#define restrict __restrict__" in the
+   previous line.  Perhaps some future version of Sun C++ will work with
+   restrict; if so, hopefully it defines __RESTRICT like Sun C does.  */
+#if defined __SUNPRO_CC && !defined __RESTRICT
+# define _Restrict
+# define __restrict__
+#endif
diff --git a/drivers/opus/opus_custom.h b/drivers/opus/opus_custom.h
new file mode 100644
index 0000000000..41f36bf2fb
--- /dev/null
+++ b/drivers/opus/opus_custom.h
@@ -0,0 +1,342 @@
+/* Copyright (c) 2007-2008 CSIRO
+   Copyright (c) 2007-2009 Xiph.Org Foundation
+   Copyright (c) 2008-2012 Gregory Maxwell
+   Written by Jean-Marc Valin and Gregory Maxwell */
+/*
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+/**
+  @file opus_custom.h
+  @brief Opus-Custom reference implementation API
+ */
+
+#ifndef OPUS_CUSTOM_H
+#define OPUS_CUSTOM_H
+
+#include "opus_defines.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#ifdef CUSTOM_MODES
+# define OPUS_CUSTOM_EXPORT OPUS_EXPORT
+# define OPUS_CUSTOM_EXPORT_STATIC OPUS_EXPORT
+#else
+# define OPUS_CUSTOM_EXPORT
+# ifdef OPUS_BUILD
+#  define OPUS_CUSTOM_EXPORT_STATIC static OPUS_INLINE
+# else
+#  define OPUS_CUSTOM_EXPORT_STATIC
+# endif
+#endif
+
+/** @defgroup opus_custom Opus Custom
+  * @{
+  *  Opus Custom is an optional part of the Opus specification and
+  * reference implementation which uses a distinct API from the regular
+  * API and supports frame sizes that are not normally supported.\ Use
+  * of Opus Custom is discouraged for all but very special applications
+  * for which a frame size different from 2.5, 5, 10, or 20 ms is needed
+  * (for either complexity or latency reasons) and where interoperability
+  * is less important.
+  *
+  * In addition to the interoperability limitations the use of Opus custom
+  * disables a substantial chunk of the codec and generally lowers the
+  * quality available at a given bitrate. Normally when an application needs
+  * a different frame size from the codec it should buffer to match the
+  * sizes but this adds a small amount of delay which may be important
+  * in some very low latency applications. Some transports (especially
+  * constant rate RF transports) may also work best with frames of
+  * particular durations.
+  *
+  * Libopus only supports custom modes if they are enabled at compile time.
+  *
+  * The Opus Custom API is similar to the regular API but the
+  * @ref opus_encoder_create and @ref opus_decoder_create calls take
+  * an additional mode parameter which is a structure produced by
+  * a call to @ref opus_custom_mode_create. Both the encoder and decoder
+  * must create a mode using the same sample rate (fs) and frame size
+  * (frame size) so these parameters must either be signaled out of band
+  * or fixed in a particular implementation.
+  *
+  * Similar to regular Opus the custom modes support on the fly frame size
+  * switching, but the sizes available depend on the particular frame size in
+  * use. For some initial frame sizes on a single on the fly size is available.
+  */
+
+/** Contains the state of an encoder. One encoder state is needed
+    for each stream. It is initialized once at the beginning of the
+    stream. Do *not* re-initialize the state for every frame.
+   @brief Encoder state
+ */
+typedef struct OpusCustomEncoder OpusCustomEncoder;
+
+/** State of the decoder. One decoder state is needed for each stream.
+    It is initialized once at the beginning of the stream. Do *not*
+    re-initialize the state for every frame.
+   @brief Decoder state
+ */
+typedef struct OpusCustomDecoder OpusCustomDecoder;
+
+/** The mode contains all the information necessary to create an
+    encoder. Both the encoder and decoder need to be initialized
+    with exactly the same mode, otherwise the output will be
+    corrupted.
+   @brief Mode configuration
+ */
+typedef struct OpusCustomMode OpusCustomMode;
+
+/** Creates a new mode struct. This will be passed to an encoder or
+  * decoder. The mode MUST NOT BE DESTROYED until the encoders and
+  * decoders that use it are destroyed as well.
+  * @param [in] Fs <tt>int</tt>: Sampling rate (8000 to 96000 Hz)
+  * @param [in] frame_size <tt>int</tt>: Number of samples (per channel) to encode in each
+  *        packet (64 - 1024, prime factorization must contain zero or more 2s, 3s, or 5s and no other primes)
+  * @param [out] error <tt>int*</tt>: Returned error code (if NULL, no error will be returned)
+  * @return A newly created mode
+  */
+OPUS_CUSTOM_EXPORT OPUS_WARN_UNUSED_RESULT OpusCustomMode *opus_custom_mode_create(opus_int32 Fs, int frame_size, int *error);
+
+/** Destroys a mode struct. Only call this after all encoders and
+  * decoders using this mode are destroyed as well.
+  * @param [in] mode <tt>OpusCustomMode*</tt>: Mode to be freed.
+  */
+OPUS_CUSTOM_EXPORT void opus_custom_mode_destroy(OpusCustomMode *mode);
+
+
+#if !defined(OPUS_BUILD) || defined(CELT_ENCODER_C)
+
+/* Encoder */
+/** Gets the size of an OpusCustomEncoder structure.
+  * @param [in] mode <tt>OpusCustomMode *</tt>: Mode configuration
+  * @param [in] channels <tt>int</tt>: Number of channels
+  * @returns size
+  */
+OPUS_CUSTOM_EXPORT_STATIC OPUS_WARN_UNUSED_RESULT int opus_custom_encoder_get_size(
+    const OpusCustomMode *mode,
+    int channels
+) OPUS_ARG_NONNULL(1);
+
+# ifdef CUSTOM_MODES
+/** Initializes a previously allocated encoder state
+  * The memory pointed to by st must be the size returned by opus_custom_encoder_get_size.
+  * This is intended for applications which use their own allocator instead of malloc.
+  * @see opus_custom_encoder_create(),opus_custom_encoder_get_size()
+  * To reset a previously initialized state use the OPUS_RESET_STATE CTL.
+  * @param [in] st <tt>OpusCustomEncoder*</tt>: Encoder state
+  * @param [in] mode <tt>OpusCustomMode *</tt>: Contains all the information about the characteristics of
+  *  the stream (must be the same characteristics as used for the
+  *  decoder)
+  * @param [in] channels <tt>int</tt>: Number of channels
+  * @return OPUS_OK Success or @ref opus_errorcodes
+  */
+OPUS_CUSTOM_EXPORT int opus_custom_encoder_init(
+    OpusCustomEncoder *st,
+    const OpusCustomMode *mode,
+    int channels
+) OPUS_ARG_NONNULL(1) OPUS_ARG_NONNULL(2);
+# endif
+#endif
+
+
+/** Creates a new encoder state. Each stream needs its own encoder
+  * state (can't be shared across simultaneous streams).
+  * @param [in] mode <tt>OpusCustomMode*</tt>: Contains all the information about the characteristics of
+  *  the stream (must be the same characteristics as used for the
+  *  decoder)
+  * @param [in] channels <tt>int</tt>: Number of channels
+  * @param [out] error <tt>int*</tt>: Returns an error code
+  * @return Newly created encoder state.
+*/
+OPUS_CUSTOM_EXPORT OPUS_WARN_UNUSED_RESULT OpusCustomEncoder *opus_custom_encoder_create(
+    const OpusCustomMode *mode,
+    int channels,
+    int *error
+) OPUS_ARG_NONNULL(1);
+
+
+/** Destroys a an encoder state.
+  * @param[in] st <tt>OpusCustomEncoder*</tt>: State to be freed.
+  */
+OPUS_CUSTOM_EXPORT void opus_custom_encoder_destroy(OpusCustomEncoder *st);
+
+/** Encodes a frame of audio.
+  * @param [in] st <tt>OpusCustomEncoder*</tt>: Encoder state
+  * @param [in] pcm <tt>float*</tt>: PCM audio in float format, with a normal range of +/-1.0.
+  *          Samples with a range beyond +/-1.0 are supported but will
+  *          be clipped by decoders using the integer API and should
+  *          only be used if it is known that the far end supports
+  *          extended dynamic range. There must be exactly
+  *          frame_size samples per channel.
+  * @param [in] frame_size <tt>int</tt>: Number of samples per frame of input signal
+  * @param [out] compressed <tt>char *</tt>: The compressed data is written here. This may not alias pcm and must be at least maxCompressedBytes long.
+  * @param [in] maxCompressedBytes <tt>int</tt>: Maximum number of bytes to use for compressing the frame
+  *          (can change from one frame to another)
+  * @return Number of bytes written to "compressed".
+  *       If negative, an error has occurred (see error codes). It is IMPORTANT that
+  *       the length returned be somehow transmitted to the decoder. Otherwise, no
+  *       decoding is possible.
+  */
+OPUS_CUSTOM_EXPORT OPUS_WARN_UNUSED_RESULT int opus_custom_encode_float(
+    OpusCustomEncoder *st,
+    const float *pcm,
+    int frame_size,
+    unsigned char *compressed,
+    int maxCompressedBytes
+) OPUS_ARG_NONNULL(1) OPUS_ARG_NONNULL(2) OPUS_ARG_NONNULL(4);
+
+/** Encodes a frame of audio.
+  * @param [in] st <tt>OpusCustomEncoder*</tt>: Encoder state
+  * @param [in] pcm <tt>opus_int16*</tt>: PCM audio in signed 16-bit format (native endian).
+  *          There must be exactly frame_size samples per channel.
+  * @param [in] frame_size <tt>int</tt>: Number of samples per frame of input signal
+  * @param [out] compressed <tt>char *</tt>: The compressed data is written here. This may not alias pcm and must be at least maxCompressedBytes long.
+  * @param [in] maxCompressedBytes <tt>int</tt>: Maximum number of bytes to use for compressing the frame
+  *          (can change from one frame to another)
+  * @return Number of bytes written to "compressed".
+  *       If negative, an error has occurred (see error codes). It is IMPORTANT that
+  *       the length returned be somehow transmitted to the decoder. Otherwise, no
+  *       decoding is possible.
+ */
+OPUS_CUSTOM_EXPORT OPUS_WARN_UNUSED_RESULT int opus_custom_encode(
+    OpusCustomEncoder *st,
+    const opus_int16 *pcm,
+    int frame_size,
+    unsigned char *compressed,
+    int maxCompressedBytes
+) OPUS_ARG_NONNULL(1) OPUS_ARG_NONNULL(2) OPUS_ARG_NONNULL(4);
+
+/** Perform a CTL function on an Opus custom encoder.
+  *
+  * Generally the request and subsequent arguments are generated
+  * by a convenience macro.
+  * @see opus_encoderctls
+  */
+OPUS_CUSTOM_EXPORT int opus_custom_encoder_ctl(OpusCustomEncoder * OPUS_RESTRICT st, int request, ...) OPUS_ARG_NONNULL(1);
+
+
+#if !defined(OPUS_BUILD) || defined(CELT_DECODER_C)
+/* Decoder */
+
+/** Gets the size of an OpusCustomDecoder structure.
+  * @param [in] mode <tt>OpusCustomMode *</tt>: Mode configuration
+  * @param [in] channels <tt>int</tt>: Number of channels
+  * @returns size
+  */
+OPUS_CUSTOM_EXPORT_STATIC OPUS_WARN_UNUSED_RESULT int opus_custom_decoder_get_size(
+    const OpusCustomMode *mode,
+    int channels
+) OPUS_ARG_NONNULL(1);
+
+/** Initializes a previously allocated decoder state
+  * The memory pointed to by st must be the size returned by opus_custom_decoder_get_size.
+  * This is intended for applications which use their own allocator instead of malloc.
+  * @see opus_custom_decoder_create(),opus_custom_decoder_get_size()
+  * To reset a previously initialized state use the OPUS_RESET_STATE CTL.
+  * @param [in] st <tt>OpusCustomDecoder*</tt>: Decoder state
+  * @param [in] mode <tt>OpusCustomMode *</tt>: Contains all the information about the characteristics of
+  *  the stream (must be the same characteristics as used for the
+  *  encoder)
+  * @param [in] channels <tt>int</tt>: Number of channels
+  * @return OPUS_OK Success or @ref opus_errorcodes
+  */
+OPUS_CUSTOM_EXPORT_STATIC int opus_custom_decoder_init(
+    OpusCustomDecoder *st,
+    const OpusCustomMode *mode,
+    int channels
+) OPUS_ARG_NONNULL(1) OPUS_ARG_NONNULL(2);
+
+#endif
+
+
+/** Creates a new decoder state. Each stream needs its own decoder state (can't
+  * be shared across simultaneous streams).
+  * @param [in] mode <tt>OpusCustomMode</tt>: Contains all the information about the characteristics of the
+  *          stream (must be the same characteristics as used for the encoder)
+  * @param [in] channels <tt>int</tt>: Number of channels
+  * @param [out] error <tt>int*</tt>: Returns an error code
+  * @return Newly created decoder state.
+  */
+OPUS_CUSTOM_EXPORT OPUS_WARN_UNUSED_RESULT OpusCustomDecoder *opus_custom_decoder_create(
+    const OpusCustomMode *mode,
+    int channels,
+    int *error
+) OPUS_ARG_NONNULL(1);
+
+/** Destroys a an decoder state.
+  * @param[in] st <tt>OpusCustomDecoder*</tt>: State to be freed.
+  */
+OPUS_CUSTOM_EXPORT void opus_custom_decoder_destroy(OpusCustomDecoder *st);
+
+/** Decode an opus custom frame with floating point output
+  * @param [in] st <tt>OpusCustomDecoder*</tt>: Decoder state
+  * @param [in] data <tt>char*</tt>: Input payload. Use a NULL pointer to indicate packet loss
+  * @param [in] len <tt>int</tt>: Number of bytes in payload
+  * @param [out] pcm <tt>float*</tt>: Output signal (interleaved if 2 channels). length
+  *  is frame_size*channels*sizeof(float)
+  * @param [in] frame_size Number of samples per channel of available space in *pcm.
+  * @returns Number of decoded samples or @ref opus_errorcodes
+  */
+OPUS_CUSTOM_EXPORT OPUS_WARN_UNUSED_RESULT int opus_custom_decode_float(
+    OpusCustomDecoder *st,
+    const unsigned char *data,
+    int len,
+    float *pcm,
+    int frame_size
+) OPUS_ARG_NONNULL(1) OPUS_ARG_NONNULL(4);
+
+/** Decode an opus custom frame
+  * @param [in] st <tt>OpusCustomDecoder*</tt>: Decoder state
+  * @param [in] data <tt>char*</tt>: Input payload. Use a NULL pointer to indicate packet loss
+  * @param [in] len <tt>int</tt>: Number of bytes in payload
+  * @param [out] pcm <tt>opus_int16*</tt>: Output signal (interleaved if 2 channels). length
+  *  is frame_size*channels*sizeof(opus_int16)
+  * @param [in] frame_size Number of samples per channel of available space in *pcm.
+  * @returns Number of decoded samples or @ref opus_errorcodes
+  */
+OPUS_CUSTOM_EXPORT OPUS_WARN_UNUSED_RESULT int opus_custom_decode(
+    OpusCustomDecoder *st,
+    const unsigned char *data,
+    int len,
+    opus_int16 *pcm,
+    int frame_size
+) OPUS_ARG_NONNULL(1) OPUS_ARG_NONNULL(4);
+
+/** Perform a CTL function on an Opus custom decoder.
+  *
+  * Generally the request and subsequent arguments are generated
+  * by a convenience macro.
+  * @see opus_genericctls
+  */
+OPUS_CUSTOM_EXPORT int opus_custom_decoder_ctl(OpusCustomDecoder * OPUS_RESTRICT st, int request, ...) OPUS_ARG_NONNULL(1);
+
+/**@}*/
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* OPUS_CUSTOM_H */
diff --git a/drivers/opus/opus_decoder.c b/drivers/opus/opus_decoder.c
new file mode 100644
index 0000000000..c5d4cc6aaa
--- /dev/null
+++ b/drivers/opus/opus_decoder.c
@@ -0,0 +1,970 @@
+/* Copyright (c) 2010 Xiph.Org Foundation, Skype Limited
+   Written by Jean-Marc Valin and Koen Vos */
+/*
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifdef OPUS_HAVE_CONFIG_H
+# include "opus_config.h"
+#endif
+
+#ifndef OPUS_BUILD
+# error "OPUS_BUILD _MUST_ be defined to build Opus. This probably means you need other defines as well, as in a config.h. See the included build files for details."
+#endif
+
+#if defined(__GNUC__) && (__GNUC__ >= 2) && !defined(__OPTIMIZE__)
+# pragma message "You appear to be compiling without optimization, if so opus will be very slow."
+#endif
+
+#include <stdarg.h>
+#include "celt.h"
+#include "opus.h"
+#include "entdec.h"
+#include "opus_modes.h"
+#include "API.h"
+#include "stack_alloc.h"
+#include "float_cast.h"
+#include "opus_private.h"
+#include "os_support.h"
+#include "structs.h"
+#include "define.h"
+#include "mathops.h"
+#include "cpu_support.h"
+
+struct OpusDecoder {
+   int          celt_dec_offset;
+   int          silk_dec_offset;
+   int          channels;
+   opus_int32   Fs;          /** Sampling rate (at the API level) */
+   silk_DecControlStruct DecControl;
+   int          decode_gain;
+
+   /* Everything beyond this point gets cleared on a reset */
+#define OPUS_DECODER_RESET_START stream_channels
+   int          stream_channels;
+
+   int          bandwidth;
+   int          mode;
+   int          prev_mode;
+   int          frame_size;
+   int          prev_redundancy;
+   int          last_packet_duration;
+#ifndef OPUS_FIXED_POINT
+   opus_val16   softclip_mem[2];
+#endif
+
+   opus_uint32  rangeFinal;
+};
+
+#ifdef OPUS_FIXED_POINT
+static OPUS_INLINE opus_int16 SAT16(opus_int32 x) {
+   return x > 32767 ? 32767 : x < -32768 ? -32768 : (opus_int16)x;
+}
+#endif
+
+
+int opus_decoder_get_size(int channels)
+{
+   int silkDecSizeBytes, celtDecSizeBytes;
+   int ret;
+   if (channels<1 || channels > 2)
+      return 0;
+   ret = silk_Get_Decoder_Size( &silkDecSizeBytes );
+   if(ret)
+      return 0;
+   silkDecSizeBytes = align(silkDecSizeBytes);
+   celtDecSizeBytes = celt_decoder_get_size(channels);
+   return align(sizeof(OpusDecoder))+silkDecSizeBytes+celtDecSizeBytes;
+}
+
+int opus_decoder_init(OpusDecoder *st, opus_int32 Fs, int channels)
+{
+   void *silk_dec;
+   CELTDecoder *celt_dec;
+   int ret, silkDecSizeBytes;
+
+   if ((Fs!=48000&&Fs!=24000&&Fs!=16000&&Fs!=12000&&Fs!=8000)
+    || (channels!=1&&channels!=2))
+      return OPUS_BAD_ARG;
+
+   OPUS_CLEAR((char*)st, opus_decoder_get_size(channels));
+   /* Initialize SILK encoder */
+   ret = silk_Get_Decoder_Size(&silkDecSizeBytes);
+   if (ret)
+      return OPUS_INTERNAL_ERROR;
+
+   silkDecSizeBytes = align(silkDecSizeBytes);
+   st->silk_dec_offset = align(sizeof(OpusDecoder));
+   st->celt_dec_offset = st->silk_dec_offset+silkDecSizeBytes;
+   silk_dec = (char*)st+st->silk_dec_offset;
+   celt_dec = (CELTDecoder*)((char*)st+st->celt_dec_offset);
+   st->stream_channels = st->channels = channels;
+
+   st->Fs = Fs;
+   st->DecControl.API_sampleRate = st->Fs;
+   st->DecControl.nChannelsAPI      = st->channels;
+
+   /* Reset decoder */
+   ret = silk_InitDecoder( silk_dec );
+   if(ret)return OPUS_INTERNAL_ERROR;
+
+   /* Initialize CELT decoder */
+   ret = celt_decoder_init(celt_dec, Fs, channels);
+   if(ret!=OPUS_OK)return OPUS_INTERNAL_ERROR;
+
+   celt_decoder_ctl(celt_dec, CELT_SET_SIGNALLING(0));
+
+   st->prev_mode = 0;
+   st->frame_size = Fs/400;
+   return OPUS_OK;
+}
+
+OpusDecoder *opus_decoder_create(opus_int32 Fs, int channels, int *error)
+{
+   int ret;
+   OpusDecoder *st;
+   if ((Fs!=48000&&Fs!=24000&&Fs!=16000&&Fs!=12000&&Fs!=8000)
+    || (channels!=1&&channels!=2))
+   {
+      if (error)
+         *error = OPUS_BAD_ARG;
+      return NULL;
+   }
+   st = (OpusDecoder *)opus_alloc(opus_decoder_get_size(channels));
+   if (st == NULL)
+   {
+      if (error)
+         *error = OPUS_ALLOC_FAIL;
+      return NULL;
+   }
+   ret = opus_decoder_init(st, Fs, channels);
+   if (error)
+      *error = ret;
+   if (ret != OPUS_OK)
+   {
+      opus_free(st);
+      st = NULL;
+   }
+   return st;
+}
+
+static void smooth_fade(const opus_val16 *in1, const opus_val16 *in2,
+      opus_val16 *out, int overlap, int channels,
+      const opus_val16 *window, opus_int32 Fs)
+{
+   int i, c;
+   int inc = 48000/Fs;
+   for (c=0;c<channels;c++)
+   {
+      for (i=0;i<overlap;i++)
+      {
+         opus_val16 w = MULT16_16_Q15(window[i*inc], window[i*inc]);
+         out[i*channels+c] = SHR32(MAC16_16(MULT16_16(w,in2[i*channels+c]),
+                                   Q15ONE-w, in1[i*channels+c]), 15);
+      }
+   }
+}
+
+static int opus_packet_get_mode(const unsigned char *data)
+{
+   int mode;
+   if (data[0]&0x80)
+   {
+      mode = MODE_CELT_ONLY;
+   } else if ((data[0]&0x60) == 0x60)
+   {
+      mode = MODE_HYBRID;
+   } else {
+      mode = MODE_SILK_ONLY;
+   }
+   return mode;
+}
+
+static int opus_decode_frame(OpusDecoder *st, const unsigned char *data,
+      opus_int32 len, opus_val16 *pcm, int frame_size, int decode_fec)
+{
+   void *silk_dec;
+   CELTDecoder *celt_dec;
+   int i, silk_ret=0, celt_ret=0;
+   ec_dec dec;
+   opus_int32 silk_frame_size;
+   int pcm_silk_size;
+   VARDECL(opus_int16, pcm_silk);
+   int pcm_transition_silk_size;
+   VARDECL(opus_val16, pcm_transition_silk);
+   int pcm_transition_celt_size;
+   VARDECL(opus_val16, pcm_transition_celt);
+   opus_val16 *pcm_transition;
+   int redundant_audio_size;
+   VARDECL(opus_val16, redundant_audio);
+
+   int audiosize;
+   int mode;
+   int transition=0;
+   int start_band;
+   int redundancy=0;
+   int redundancy_bytes = 0;
+   int celt_to_silk=0;
+   int c;
+   int F2_5, F5, F10, F20;
+   const opus_val16 *window;
+   opus_uint32 redundant_rng = 0;
+   ALLOC_STACK;
+
+   silk_dec = (char*)st+st->silk_dec_offset;
+   celt_dec = (CELTDecoder*)((char*)st+st->celt_dec_offset);
+   F20 = st->Fs/50;
+   F10 = F20>>1;
+   F5 = F10>>1;
+   F2_5 = F5>>1;
+   if (frame_size < F2_5)
+   {
+      RESTORE_STACK;
+      return OPUS_BUFFER_TOO_SMALL;
+   }
+   /* Limit frame_size to avoid excessive stack allocations. */
+   frame_size = IMIN(frame_size, st->Fs/25*3);
+   /* Payloads of 1 (2 including ToC) or 0 trigger the PLC/DTX */
+   if (len<=1)
+   {
+      data = NULL;
+      /* In that case, don't conceal more than what the ToC says */
+      frame_size = IMIN(frame_size, st->frame_size);
+   }
+   if (data != NULL)
+   {
+      audiosize = st->frame_size;
+      mode = st->mode;
+      ec_dec_init(&dec,(unsigned char*)data,len);
+   } else {
+      audiosize = frame_size;
+      mode = st->prev_mode;
+
+      if (mode == 0)
+      {
+         /* If we haven't got any packet yet, all we can do is return zeros */
+         for (i=0;i<audiosize*st->channels;i++)
+            pcm[i] = 0;
+         RESTORE_STACK;
+         return audiosize;
+      }
+
+      /* Avoids trying to run the PLC on sizes other than 2.5 (CELT), 5 (CELT),
+         10, or 20 (e.g. 12.5 or 30 ms). */
+      if (audiosize > F20)
+      {
+         do {
+            int ret = opus_decode_frame(st, NULL, 0, pcm, IMIN(audiosize, F20), 0);
+            if (ret<0)
+            {
+               RESTORE_STACK;
+               return ret;
+            }
+            pcm += ret*st->channels;
+            audiosize -= ret;
+         } while (audiosize > 0);
+         RESTORE_STACK;
+         return frame_size;
+      } else if (audiosize < F20)
+      {
+         if (audiosize > F10)
+            audiosize = F10;
+         else if (mode != MODE_SILK_ONLY && audiosize > F5 && audiosize < F10)
+            audiosize = F5;
+      }
+   }
+
+   pcm_transition_silk_size = ALLOC_NONE;
+   pcm_transition_celt_size = ALLOC_NONE;
+   if (data!=NULL && st->prev_mode > 0 && (
+       (mode == MODE_CELT_ONLY && st->prev_mode != MODE_CELT_ONLY && !st->prev_redundancy)
+    || (mode != MODE_CELT_ONLY && st->prev_mode == MODE_CELT_ONLY) )
+      )
+   {
+      transition = 1;
+      /* Decide where to allocate the stack memory for pcm_transition */
+      if (mode == MODE_CELT_ONLY)
+         pcm_transition_celt_size = F5*st->channels;
+      else
+         pcm_transition_silk_size = F5*st->channels;
+   }
+   ALLOC(pcm_transition_celt, pcm_transition_celt_size, opus_val16);
+   if (transition && mode == MODE_CELT_ONLY)
+   {
+      pcm_transition = pcm_transition_celt;
+      opus_decode_frame(st, NULL, 0, pcm_transition, IMIN(F5, audiosize), 0);
+   }
+   if (audiosize > frame_size)
+   {
+      /*fprintf(stderr, "PCM buffer too small: %d vs %d (mode = %d)\n", audiosize, frame_size, mode);*/
+      RESTORE_STACK;
+      return OPUS_BAD_ARG;
+   } else {
+      frame_size = audiosize;
+   }
+
+   /* Don't allocate any memory when in CELT-only mode */
+   pcm_silk_size = (mode != MODE_CELT_ONLY) ? IMAX(F10, frame_size)*st->channels : ALLOC_NONE;
+   ALLOC(pcm_silk, pcm_silk_size, opus_int16);
+
+   /* SILK processing */
+   if (mode != MODE_CELT_ONLY)
+   {
+      int lost_flag, decoded_samples;
+      opus_int16 *pcm_ptr = pcm_silk;
+
+      if (st->prev_mode==MODE_CELT_ONLY)
+         silk_InitDecoder( silk_dec );
+
+      /* The SILK PLC cannot produce frames of less than 10 ms */
+      st->DecControl.payloadSize_ms = IMAX(10, 1000 * audiosize / st->Fs);
+
+      if (data != NULL)
+      {
+        st->DecControl.nChannelsInternal = st->stream_channels;
+        if( mode == MODE_SILK_ONLY ) {
+           if( st->bandwidth == OPUS_BANDWIDTH_NARROWBAND ) {
+              st->DecControl.internalSampleRate = 8000;
+           } else if( st->bandwidth == OPUS_BANDWIDTH_MEDIUMBAND ) {
+              st->DecControl.internalSampleRate = 12000;
+           } else if( st->bandwidth == OPUS_BANDWIDTH_WIDEBAND ) {
+              st->DecControl.internalSampleRate = 16000;
+           } else {
+              st->DecControl.internalSampleRate = 16000;
+              silk_assert( 0 );
+           }
+        } else {
+           /* Hybrid mode */
+           st->DecControl.internalSampleRate = 16000;
+        }
+     }
+
+     lost_flag = data == NULL ? 1 : 2 * decode_fec;
+     decoded_samples = 0;
+     do {
+        /* Call SILK decoder */
+        int first_frame = decoded_samples == 0;
+        silk_ret = silk_Decode( silk_dec, &st->DecControl,
+                                lost_flag, first_frame, &dec, pcm_ptr, &silk_frame_size );
+        if( silk_ret ) {
+           if (lost_flag) {
+              /* PLC failure should not be fatal */
+              silk_frame_size = frame_size;
+              for (i=0;i<frame_size*st->channels;i++)
+                 pcm_ptr[i] = 0;
+           } else {
+             RESTORE_STACK;
+             return OPUS_INTERNAL_ERROR;
+           }
+        }
+        pcm_ptr += silk_frame_size * st->channels;
+        decoded_samples += silk_frame_size;
+      } while( decoded_samples < frame_size );
+   }
+
+   start_band = 0;
+   if (!decode_fec && mode != MODE_CELT_ONLY && data != NULL
+    && ec_tell(&dec)+17+20*(st->mode == MODE_HYBRID) <= 8*len)
+   {
+      /* Check if we have a redundant 0-8 kHz band */
+      if (mode == MODE_HYBRID)
+         redundancy = ec_dec_bit_logp(&dec, 12);
+      else
+         redundancy = 1;
+      if (redundancy)
+      {
+         celt_to_silk = ec_dec_bit_logp(&dec, 1);
+         /* redundancy_bytes will be at least two, in the non-hybrid
+            case due to the ec_tell() check above */
+         redundancy_bytes = mode==MODE_HYBRID ?
+               (opus_int32)ec_dec_uint(&dec, 256)+2 :
+               len-((ec_tell(&dec)+7)>>3);
+         len -= redundancy_bytes;
+         /* This is a sanity check. It should never happen for a valid
+            packet, so the exact behaviour is not normative. */
+         if (len*8 < ec_tell(&dec))
+         {
+            len = 0;
+            redundancy_bytes = 0;
+            redundancy = 0;
+         }
+         /* Shrink decoder because of raw bits */
+         dec.storage -= redundancy_bytes;
+      }
+   }
+   if (mode != MODE_CELT_ONLY)
+      start_band = 17;
+
+   {
+      int endband=21;
+
+      switch(st->bandwidth)
+      {
+      case OPUS_BANDWIDTH_NARROWBAND:
+         endband = 13;
+         break;
+      case OPUS_BANDWIDTH_MEDIUMBAND:
+      case OPUS_BANDWIDTH_WIDEBAND:
+         endband = 17;
+         break;
+      case OPUS_BANDWIDTH_SUPERWIDEBAND:
+         endband = 19;
+         break;
+      case OPUS_BANDWIDTH_FULLBAND:
+         endband = 21;
+         break;
+      }
+      celt_decoder_ctl(celt_dec, CELT_SET_END_BAND(endband));
+      celt_decoder_ctl(celt_dec, CELT_SET_CHANNELS(st->stream_channels));
+   }
+
+   if (redundancy)
+   {
+      transition = 0;
+      pcm_transition_silk_size=ALLOC_NONE;
+   }
+
+   ALLOC(pcm_transition_silk, pcm_transition_silk_size, opus_val16);
+
+   if (transition && mode != MODE_CELT_ONLY)
+   {
+      pcm_transition = pcm_transition_silk;
+      opus_decode_frame(st, NULL, 0, pcm_transition, IMIN(F5, audiosize), 0);
+   }
+
+   /* Only allocation memory for redundancy if/when needed */
+   redundant_audio_size = redundancy ? F5*st->channels : ALLOC_NONE;
+   ALLOC(redundant_audio, redundant_audio_size, opus_val16);
+
+   /* 5 ms redundant frame for CELT->SILK*/
+   if (redundancy && celt_to_silk)
+   {
+      celt_decoder_ctl(celt_dec, CELT_SET_START_BAND(0));
+      celt_decode_with_ec(celt_dec, data+len, redundancy_bytes,
+                          redundant_audio, F5, NULL);
+      celt_decoder_ctl(celt_dec, OPUS_GET_FINAL_RANGE(&redundant_rng));
+   }
+
+   /* MUST be after PLC */
+   celt_decoder_ctl(celt_dec, CELT_SET_START_BAND(start_band));
+
+   if (mode != MODE_SILK_ONLY)
+   {
+      int celt_frame_size = IMIN(F20, frame_size);
+      /* Make sure to discard any previous CELT state */
+      if (mode != st->prev_mode && st->prev_mode > 0 && !st->prev_redundancy)
+         celt_decoder_ctl(celt_dec, OPUS_RESET_STATE);
+      /* Decode CELT */
+      celt_ret = celt_decode_with_ec(celt_dec, decode_fec ? NULL : data,
+                                     len, pcm, celt_frame_size, &dec);
+   } else {
+      unsigned char silence[2] = {0xFF, 0xFF};
+      for (i=0;i<frame_size*st->channels;i++)
+         pcm[i] = 0;
+      /* For hybrid -> SILK transitions, we let the CELT MDCT
+         do a fade-out by decoding a silence frame */
+      if (st->prev_mode == MODE_HYBRID && !(redundancy && celt_to_silk && st->prev_redundancy) )
+      {
+         celt_decoder_ctl(celt_dec, CELT_SET_START_BAND(0));
+         celt_decode_with_ec(celt_dec, silence, 2, pcm, F2_5, NULL);
+      }
+   }
+
+   if (mode != MODE_CELT_ONLY)
+   {
+#ifdef OPUS_FIXED_POINT
+      for (i=0;i<frame_size*st->channels;i++)
+         pcm[i] = SAT16(pcm[i] + pcm_silk[i]);
+#else
+      for (i=0;i<frame_size*st->channels;i++)
+         pcm[i] = pcm[i] + (opus_val16)((1.f/32768.f)*pcm_silk[i]);
+#endif
+   }
+
+   {
+      const CELTMode *celt_mode;
+      celt_decoder_ctl(celt_dec, CELT_GET_MODE(&celt_mode));
+      window = celt_mode->window;
+   }
+
+   /* 5 ms redundant frame for SILK->CELT */
+   if (redundancy && !celt_to_silk)
+   {
+      celt_decoder_ctl(celt_dec, OPUS_RESET_STATE);
+      celt_decoder_ctl(celt_dec, CELT_SET_START_BAND(0));
+
+      celt_decode_with_ec(celt_dec, data+len, redundancy_bytes, redundant_audio, F5, NULL);
+      celt_decoder_ctl(celt_dec, OPUS_GET_FINAL_RANGE(&redundant_rng));
+      smooth_fade(pcm+st->channels*(frame_size-F2_5), redundant_audio+st->channels*F2_5,
+                  pcm+st->channels*(frame_size-F2_5), F2_5, st->channels, window, st->Fs);
+   }
+   if (redundancy && celt_to_silk)
+   {
+      for (c=0;c<st->channels;c++)
+      {
+         for (i=0;i<F2_5;i++)
+            pcm[st->channels*i+c] = redundant_audio[st->channels*i+c];
+      }
+      smooth_fade(redundant_audio+st->channels*F2_5, pcm+st->channels*F2_5,
+                  pcm+st->channels*F2_5, F2_5, st->channels, window, st->Fs);
+   }
+   if (transition)
+   {
+      if (audiosize >= F5)
+      {
+         for (i=0;i<st->channels*F2_5;i++)
+            pcm[i] = pcm_transition[i];
+         smooth_fade(pcm_transition+st->channels*F2_5, pcm+st->channels*F2_5,
+                     pcm+st->channels*F2_5, F2_5,
+                     st->channels, window, st->Fs);
+      } else {
+         /* Not enough time to do a clean transition, but we do it anyway
+            This will not preserve amplitude perfectly and may introduce
+            a bit of temporal aliasing, but it shouldn't be too bad and
+            that's pretty much the best we can do. In any case, generating this
+            transition it pretty silly in the first place */
+         smooth_fade(pcm_transition, pcm,
+                     pcm, F2_5,
+                     st->channels, window, st->Fs);
+      }
+   }
+
+   if(st->decode_gain)
+   {
+      opus_val32 gain;
+      gain = celt_exp2(MULT16_16_P15(QCONST16(6.48814081e-4f, 25), st->decode_gain));
+      for (i=0;i<frame_size*st->channels;i++)
+      {
+         opus_val32 x;
+         x = MULT16_32_P16(pcm[i],gain);
+         pcm[i] = SATURATE(x, 32767);
+      }
+   }
+
+   if (len <= 1)
+      st->rangeFinal = 0;
+   else
+      st->rangeFinal = dec.rng ^ redundant_rng;
+
+   st->prev_mode = mode;
+   st->prev_redundancy = redundancy && !celt_to_silk;
+
+   if (celt_ret>=0)
+   {
+      if (OPUS_CHECK_ARRAY(pcm, audiosize*st->channels))
+         OPUS_PRINT_INT(audiosize);
+   }
+
+   RESTORE_STACK;
+   return celt_ret < 0 ? celt_ret : audiosize;
+
+}
+
+int opus_decode_native(OpusDecoder *st, const unsigned char *data,
+      opus_int32 len, opus_val16 *pcm, int frame_size, int decode_fec,
+      int self_delimited, opus_int32 *packet_offset, int soft_clip)
+{
+   int i, nb_samples;
+   int count, offset;
+   unsigned char toc;
+   int packet_frame_size, packet_bandwidth, packet_mode, packet_stream_channels;
+   /* 48 x 2.5 ms = 120 ms */
+   opus_int16 size[48];
+   if (decode_fec<0 || decode_fec>1)
+      return OPUS_BAD_ARG;
+   /* For FEC/PLC, frame_size has to be to have a multiple of 2.5 ms */
+   if ((decode_fec || len==0 || data==NULL) && frame_size%(st->Fs/400)!=0)
+      return OPUS_BAD_ARG;
+   if (len==0 || data==NULL)
+   {
+      int pcm_count=0;
+      do {
+         int ret;
+         ret = opus_decode_frame(st, NULL, 0, pcm+pcm_count*st->channels, frame_size-pcm_count, 0);
+         if (ret<0)
+            return ret;
+         pcm_count += ret;
+      } while (pcm_count < frame_size);
+      celt_assert(pcm_count == frame_size);
+      if (OPUS_CHECK_ARRAY(pcm, pcm_count*st->channels))
+         OPUS_PRINT_INT(pcm_count);
+      st->last_packet_duration = pcm_count;
+      return pcm_count;
+   } else if (len<0)
+      return OPUS_BAD_ARG;
+
+   packet_mode = opus_packet_get_mode(data);
+   packet_bandwidth = opus_packet_get_bandwidth(data);
+   packet_frame_size = opus_packet_get_samples_per_frame(data, st->Fs);
+   packet_stream_channels = opus_packet_get_nb_channels(data);
+
+   count = opus_packet_parse_impl(data, len, self_delimited, &toc, NULL,
+                                  size, &offset, packet_offset);
+   if (count<0)
+      return count;
+
+   data += offset;
+
+   if (decode_fec)
+   {
+      int duration_copy;
+      int ret;
+      /* If no FEC can be present, run the PLC (recursive call) */
+      if (frame_size < packet_frame_size || packet_mode == MODE_CELT_ONLY || st->mode == MODE_CELT_ONLY)
+         return opus_decode_native(st, NULL, 0, pcm, frame_size, 0, 0, NULL, soft_clip);
+      /* Otherwise, run the PLC on everything except the size for which we might have FEC */
+      duration_copy = st->last_packet_duration;
+      if (frame_size-packet_frame_size!=0)
+      {
+         ret = opus_decode_native(st, NULL, 0, pcm, frame_size-packet_frame_size, 0, 0, NULL, soft_clip);
+         if (ret<0)
+         {
+            st->last_packet_duration = duration_copy;
+            return ret;
+         }
+         celt_assert(ret==frame_size-packet_frame_size);
+      }
+      /* Complete with FEC */
+      st->mode = packet_mode;
+      st->bandwidth = packet_bandwidth;
+      st->frame_size = packet_frame_size;
+      st->stream_channels = packet_stream_channels;
+      ret = opus_decode_frame(st, data, size[0], pcm+st->channels*(frame_size-packet_frame_size),
+            packet_frame_size, 1);
+      if (ret<0)
+         return ret;
+      else {
+         if (OPUS_CHECK_ARRAY(pcm, frame_size*st->channels))
+            OPUS_PRINT_INT(frame_size);
+         st->last_packet_duration = frame_size;
+         return frame_size;
+      }
+   }
+
+   if (count*packet_frame_size > frame_size)
+      return OPUS_BUFFER_TOO_SMALL;
+
+   /* Update the state as the last step to avoid updating it on an invalid packet */
+   st->mode = packet_mode;
+   st->bandwidth = packet_bandwidth;
+   st->frame_size = packet_frame_size;
+   st->stream_channels = packet_stream_channels;
+
+   nb_samples=0;
+   for (i=0;i<count;i++)
+   {
+      int ret;
+      ret = opus_decode_frame(st, data, size[i], pcm+nb_samples*st->channels, frame_size-nb_samples, 0);
+      if (ret<0)
+         return ret;
+      celt_assert(ret==packet_frame_size);
+      data += size[i];
+      nb_samples += ret;
+   }
+   st->last_packet_duration = nb_samples;
+   if (OPUS_CHECK_ARRAY(pcm, nb_samples*st->channels))
+      OPUS_PRINT_INT(nb_samples);
+#ifndef OPUS_FIXED_POINT
+   if (soft_clip)
+      opus_pcm_soft_clip(pcm, nb_samples, st->channels, st->softclip_mem);
+   else
+      st->softclip_mem[0]=st->softclip_mem[1]=0;
+#endif
+   return nb_samples;
+}
+
+#ifdef OPUS_FIXED_POINT
+
+int opus_decode(OpusDecoder *st, const unsigned char *data,
+      opus_int32 len, opus_val16 *pcm, int frame_size, int decode_fec)
+{
+   if(frame_size<=0)
+      return OPUS_BAD_ARG;
+   return opus_decode_native(st, data, len, pcm, frame_size, decode_fec, 0, NULL, 0);
+}
+
+#ifndef DISABLE_FLOAT_API
+int opus_decode_float(OpusDecoder *st, const unsigned char *data,
+      opus_int32 len, float *pcm, int frame_size, int decode_fec)
+{
+   VARDECL(opus_int16, out);
+   int ret, i;
+   ALLOC_STACK;
+
+   if(frame_size<=0)
+   {
+      RESTORE_STACK;
+      return OPUS_BAD_ARG;
+   }
+   ALLOC(out, frame_size*st->channels, opus_int16);
+
+   ret = opus_decode_native(st, data, len, out, frame_size, decode_fec, 0, NULL, 0);
+   if (ret > 0)
+   {
+      for (i=0;i<ret*st->channels;i++)
+         pcm[i] = (1.f/32768.f)*(out[i]);
+   }
+   RESTORE_STACK;
+   return ret;
+}
+#endif
+
+
+#else
+int opus_decode(OpusDecoder *st, const unsigned char *data,
+      opus_int32 len, opus_int16 *pcm, int frame_size, int decode_fec)
+{
+   VARDECL(float, out);
+   int ret, i;
+   ALLOC_STACK;
+
+   if(frame_size<=0)
+   {
+      RESTORE_STACK;
+      return OPUS_BAD_ARG;
+   }
+
+   ALLOC(out, frame_size*st->channels, float);
+
+   ret = opus_decode_native(st, data, len, out, frame_size, decode_fec, 0, NULL, 1);
+   if (ret > 0)
+   {
+      for (i=0;i<ret*st->channels;i++)
+         pcm[i] = FLOAT2INT16(out[i]);
+   }
+   RESTORE_STACK;
+   return ret;
+}
+
+int opus_decode_float(OpusDecoder *st, const unsigned char *data,
+      opus_int32 len, opus_val16 *pcm, int frame_size, int decode_fec)
+{
+   if(frame_size<=0)
+      return OPUS_BAD_ARG;
+   return opus_decode_native(st, data, len, pcm, frame_size, decode_fec, 0, NULL, 0);
+}
+
+#endif
+
+int opus_decoder_ctl(OpusDecoder *st, int request, ...)
+{
+   int ret = OPUS_OK;
+   va_list ap;
+   void *silk_dec;
+   CELTDecoder *celt_dec;
+
+   silk_dec = (char*)st+st->silk_dec_offset;
+   celt_dec = (CELTDecoder*)((char*)st+st->celt_dec_offset);
+
+
+   va_start(ap, request);
+
+   switch (request)
+   {
+   case OPUS_GET_BANDWIDTH_REQUEST:
+   {
+      opus_int32 *value = va_arg(ap, opus_int32*);
+      if (!value)
+      {
+         goto bad_arg;
+      }
+      *value = st->bandwidth;
+   }
+   break;
+   case OPUS_GET_FINAL_RANGE_REQUEST:
+   {
+      opus_uint32 *value = va_arg(ap, opus_uint32*);
+      if (!value)
+      {
+         goto bad_arg;
+      }
+      *value = st->rangeFinal;
+   }
+   break;
+   case OPUS_RESET_STATE:
+   {
+      OPUS_CLEAR((char*)&st->OPUS_DECODER_RESET_START,
+            sizeof(OpusDecoder)-
+            ((char*)&st->OPUS_DECODER_RESET_START - (char*)st));
+
+      celt_decoder_ctl(celt_dec, OPUS_RESET_STATE);
+      silk_InitDecoder( silk_dec );
+      st->stream_channels = st->channels;
+      st->frame_size = st->Fs/400;
+   }
+   break;
+   case OPUS_GET_SAMPLE_RATE_REQUEST:
+   {
+      opus_int32 *value = va_arg(ap, opus_int32*);
+      if (!value)
+      {
+         goto bad_arg;
+      }
+      *value = st->Fs;
+   }
+   break;
+   case OPUS_GET_PITCH_REQUEST:
+   {
+      opus_int32 *value = va_arg(ap, opus_int32*);
+      if (!value)
+      {
+         goto bad_arg;
+      }
+      if (st->prev_mode == MODE_CELT_ONLY)
+         celt_decoder_ctl(celt_dec, OPUS_GET_PITCH(value));
+      else
+         *value = st->DecControl.prevPitchLag;
+   }
+   break;
+   case OPUS_GET_GAIN_REQUEST:
+   {
+      opus_int32 *value = va_arg(ap, opus_int32*);
+      if (!value)
+      {
+         goto bad_arg;
+      }
+      *value = st->decode_gain;
+   }
+   break;
+   case OPUS_SET_GAIN_REQUEST:
+   {
+       opus_int32 value = va_arg(ap, opus_int32);
+       if (value<-32768 || value>32767)
+       {
+          goto bad_arg;
+       }
+       st->decode_gain = value;
+   }
+   break;
+   case OPUS_GET_LAST_PACKET_DURATION_REQUEST:
+   {
+      opus_uint32 *value = va_arg(ap, opus_uint32*);
+      if (!value)
+      {
+         goto bad_arg;
+      }
+      *value = st->last_packet_duration;
+   }
+   break;
+   default:
+      /*fprintf(stderr, "unknown opus_decoder_ctl() request: %d", request);*/
+      ret = OPUS_UNIMPLEMENTED;
+      break;
+   }
+
+   va_end(ap);
+   return ret;
+bad_arg:
+   va_end(ap);
+   return OPUS_BAD_ARG;
+}
+
+void opus_decoder_destroy(OpusDecoder *st)
+{
+   opus_free(st);
+}
+
+
+int opus_packet_get_bandwidth(const unsigned char *data)
+{
+   int bandwidth;
+   if (data[0]&0x80)
+   {
+      bandwidth = OPUS_BANDWIDTH_MEDIUMBAND + ((data[0]>>5)&0x3);
+      if (bandwidth == OPUS_BANDWIDTH_MEDIUMBAND)
+         bandwidth = OPUS_BANDWIDTH_NARROWBAND;
+   } else if ((data[0]&0x60) == 0x60)
+   {
+      bandwidth = (data[0]&0x10) ? OPUS_BANDWIDTH_FULLBAND :
+                                   OPUS_BANDWIDTH_SUPERWIDEBAND;
+   } else {
+      bandwidth = OPUS_BANDWIDTH_NARROWBAND + ((data[0]>>5)&0x3);
+   }
+   return bandwidth;
+}
+
+int opus_packet_get_samples_per_frame(const unsigned char *data,
+      opus_int32 Fs)
+{
+   int audiosize;
+   if (data[0]&0x80)
+   {
+      audiosize = ((data[0]>>3)&0x3);
+      audiosize = (Fs<<audiosize)/400;
+   } else if ((data[0]&0x60) == 0x60)
+   {
+      audiosize = (data[0]&0x08) ? Fs/50 : Fs/100;
+   } else {
+      audiosize = ((data[0]>>3)&0x3);
+      if (audiosize == 3)
+         audiosize = Fs*60/1000;
+      else
+         audiosize = (Fs<<audiosize)/100;
+   }
+   return audiosize;
+}
+
+int opus_packet_get_nb_channels(const unsigned char *data)
+{
+   return (data[0]&0x4) ? 2 : 1;
+}
+
+int opus_packet_get_nb_frames(const unsigned char packet[], opus_int32 len)
+{
+   int count;
+   if (len<1)
+      return OPUS_BAD_ARG;
+   count = packet[0]&0x3;
+   if (count==0)
+      return 1;
+   else if (count!=3)
+      return 2;
+   else if (len<2)
+      return OPUS_INVALID_PACKET;
+   else
+      return packet[1]&0x3F;
+}
+
+int opus_packet_get_nb_samples(const unsigned char packet[], opus_int32 len,
+      opus_int32 Fs)
+{
+   int samples;
+   int count = opus_packet_get_nb_frames(packet, len);
+
+   if (count<0)
+      return count;
+
+   samples = count*opus_packet_get_samples_per_frame(packet, Fs);
+   /* Can't have more than 120 ms */
+   if (samples*25 > Fs*3)
+      return OPUS_INVALID_PACKET;
+   else
+      return samples;
+}
+
+int opus_decoder_get_nb_samples(const OpusDecoder *dec,
+      const unsigned char packet[], opus_int32 len)
+{
+   return opus_packet_get_nb_samples(packet, len, dec->Fs);
+}
diff --git a/drivers/opus/opus_defines.h b/drivers/opus/opus_defines.h
new file mode 100644
index 0000000000..265089f65e
--- /dev/null
+++ b/drivers/opus/opus_defines.h
@@ -0,0 +1,726 @@
+/* Copyright (c) 2010-2011 Xiph.Org Foundation, Skype Limited
+   Written by Jean-Marc Valin and Koen Vos */
+/*
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+/**
+ * @file opus_defines.h
+ * @brief Opus reference implementation constants
+ */
+
+#ifndef OPUS_DEFINES_H
+#define OPUS_DEFINES_H
+
+#include "opus_types.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/** @defgroup opus_errorcodes Error codes
+ * @{
+ */
+/** No error @hideinitializer*/
+#define OPUS_OK                0
+/** One or more invalid/out of range arguments @hideinitializer*/
+#define OPUS_BAD_ARG          -1
+/** The mode struct passed is invalid @hideinitializer*/
+#define OPUS_BUFFER_TOO_SMALL -2
+/** An internal error was detected @hideinitializer*/
+#define OPUS_INTERNAL_ERROR   -3
+/** The compressed data passed is corrupted @hideinitializer*/
+#define OPUS_INVALID_PACKET   -4
+/** Invalid/unsupported request number @hideinitializer*/
+#define OPUS_UNIMPLEMENTED    -5
+/** An encoder or decoder structure is invalid or already freed @hideinitializer*/
+#define OPUS_INVALID_STATE    -6
+/** Memory allocation has failed @hideinitializer*/
+#define OPUS_ALLOC_FAIL       -7
+/**@}*/
+
+/** @cond OPUS_INTERNAL_DOC */
+/**Export control for opus functions */
+
+#ifndef OPUS_EXPORT
+# if defined(WIN32)
+#  ifdef OPUS_BUILD
+#   define OPUS_EXPORT __declspec(dllexport)
+#  else
+#   define OPUS_EXPORT
+#  endif
+# elif defined(__GNUC__) && defined(OPUS_BUILD)
+#  define OPUS_EXPORT __attribute__ ((visibility ("default")))
+# else
+#  define OPUS_EXPORT
+# endif
+#endif
+
+# if !defined(OPUS_GNUC_PREREQ)
+#  if defined(__GNUC__)&&defined(__GNUC_MINOR__)
+#   define OPUS_GNUC_PREREQ(_maj,_min) \
+ ((__GNUC__<<16)+__GNUC_MINOR__>=((_maj)<<16)+(_min))
+#  else
+#   define OPUS_GNUC_PREREQ(_maj,_min) 0
+#  endif
+# endif
+
+#if (!defined(__STDC_VERSION__) || (__STDC_VERSION__ < 199901L) )
+# if OPUS_GNUC_PREREQ(3,0)
+#  define OPUS_RESTRICT __restrict__
+# elif (defined(_MSC_VER) && _MSC_VER >= 1400)
+#  define OPUS_RESTRICT __restrict
+# else
+#  define OPUS_RESTRICT
+# endif
+#else
+# define OPUS_RESTRICT restrict
+#endif
+
+#if (!defined(__STDC_VERSION__) || (__STDC_VERSION__ < 199901L) )
+# if OPUS_GNUC_PREREQ(2,7)
+#  define OPUS_INLINE __inline__
+# elif (defined(_MSC_VER))
+#  define OPUS_INLINE __inline
+# else
+#  define OPUS_INLINE
+# endif
+#else
+# define OPUS_INLINE inline
+#endif
+
+/**Warning attributes for opus functions
+  * NONNULL is not used in OPUS_BUILD to avoid the compiler optimizing out
+  * some paranoid null checks. */
+#if defined(__GNUC__) && OPUS_GNUC_PREREQ(3, 4)
+# define OPUS_WARN_UNUSED_RESULT __attribute__ ((__warn_unused_result__))
+#else
+# define OPUS_WARN_UNUSED_RESULT
+#endif
+#if !defined(OPUS_BUILD) && defined(__GNUC__) && OPUS_GNUC_PREREQ(3, 4)
+# define OPUS_ARG_NONNULL(_x)  __attribute__ ((__nonnull__(_x)))
+#else
+# define OPUS_ARG_NONNULL(_x)
+#endif
+
+/** These are the actual Encoder CTL ID numbers.
+  * They should not be used directly by applications.
+  * In general, SETs should be even and GETs should be odd.*/
+#define OPUS_SET_APPLICATION_REQUEST         4000
+#define OPUS_GET_APPLICATION_REQUEST         4001
+#define OPUS_SET_BITRATE_REQUEST             4002
+#define OPUS_GET_BITRATE_REQUEST             4003
+#define OPUS_SET_MAX_BANDWIDTH_REQUEST       4004
+#define OPUS_GET_MAX_BANDWIDTH_REQUEST       4005
+#define OPUS_SET_VBR_REQUEST                 4006
+#define OPUS_GET_VBR_REQUEST                 4007
+#define OPUS_SET_BANDWIDTH_REQUEST           4008
+#define OPUS_GET_BANDWIDTH_REQUEST           4009
+#define OPUS_SET_COMPLEXITY_REQUEST          4010
+#define OPUS_GET_COMPLEXITY_REQUEST          4011
+#define OPUS_SET_INBAND_FEC_REQUEST          4012
+#define OPUS_GET_INBAND_FEC_REQUEST          4013
+#define OPUS_SET_PACKET_LOSS_PERC_REQUEST    4014
+#define OPUS_GET_PACKET_LOSS_PERC_REQUEST    4015
+#define OPUS_SET_DTX_REQUEST                 4016
+#define OPUS_GET_DTX_REQUEST                 4017
+#define OPUS_SET_VBR_CONSTRAINT_REQUEST      4020
+#define OPUS_GET_VBR_CONSTRAINT_REQUEST      4021
+#define OPUS_SET_FORCE_CHANNELS_REQUEST      4022
+#define OPUS_GET_FORCE_CHANNELS_REQUEST      4023
+#define OPUS_SET_SIGNAL_REQUEST              4024
+#define OPUS_GET_SIGNAL_REQUEST              4025
+#define OPUS_GET_LOOKAHEAD_REQUEST           4027
+/* #define OPUS_RESET_STATE 4028 */
+#define OPUS_GET_SAMPLE_RATE_REQUEST         4029
+#define OPUS_GET_FINAL_RANGE_REQUEST         4031
+#define OPUS_GET_PITCH_REQUEST               4033
+#define OPUS_SET_GAIN_REQUEST                4034
+#define OPUS_GET_GAIN_REQUEST                4045 /* Should have been 4035 */
+#define OPUS_SET_LSB_DEPTH_REQUEST           4036
+#define OPUS_GET_LSB_DEPTH_REQUEST           4037
+#define OPUS_GET_LAST_PACKET_DURATION_REQUEST 4039
+#define OPUS_SET_EXPERT_FRAME_DURATION_REQUEST 4040
+#define OPUS_GET_EXPERT_FRAME_DURATION_REQUEST 4041
+#define OPUS_SET_PREDICTION_DISABLED_REQUEST 4042
+#define OPUS_GET_PREDICTION_DISABLED_REQUEST 4043
+
+/* Don't use 4045, it's already taken by OPUS_GET_GAIN_REQUEST */
+
+/* Macros to trigger compilation errors when the wrong types are provided to a CTL */
+#define __opus_check_int(x) (((void)((x) == (opus_int32)0)), (opus_int32)(x))
+#define __opus_check_int_ptr(ptr) ((ptr) + ((ptr) - (opus_int32*)(ptr)))
+#define __opus_check_uint_ptr(ptr) ((ptr) + ((ptr) - (opus_uint32*)(ptr)))
+#define __opus_check_val16_ptr(ptr) ((ptr) + ((ptr) - (opus_val16*)(ptr)))
+/** @endcond */
+
+/** @defgroup opus_ctlvalues Pre-defined values for CTL interface
+  * @see opus_genericctls, opus_encoderctls
+  * @{
+  */
+/* Values for the various encoder CTLs */
+#define OPUS_AUTO                           -1000 /**<Auto/default setting @hideinitializer*/
+#define OPUS_BITRATE_MAX                       -1 /**<Maximum bitrate @hideinitializer*/
+
+/** Best for most VoIP/videoconference applications where listening quality and intelligibility matter most
+ * @hideinitializer */
+#define OPUS_APPLICATION_VOIP                2048
+/** Best for broadcast/high-fidelity application where the decoded audio should be as close as possible to the input
+ * @hideinitializer */
+#define OPUS_APPLICATION_AUDIO               2049
+/** Only use when lowest-achievable latency is what matters most. Voice-optimized modes cannot be used.
+ * @hideinitializer */
+#define OPUS_APPLICATION_RESTRICTED_LOWDELAY 2051
+
+#define OPUS_SIGNAL_VOICE                    3001 /**< Signal being encoded is voice */
+#define OPUS_SIGNAL_MUSIC                    3002 /**< Signal being encoded is music */
+#define OPUS_BANDWIDTH_NARROWBAND            1101 /**< 4 kHz bandpass @hideinitializer*/
+#define OPUS_BANDWIDTH_MEDIUMBAND            1102 /**< 6 kHz bandpass @hideinitializer*/
+#define OPUS_BANDWIDTH_WIDEBAND              1103 /**< 8 kHz bandpass @hideinitializer*/
+#define OPUS_BANDWIDTH_SUPERWIDEBAND         1104 /**<12 kHz bandpass @hideinitializer*/
+#define OPUS_BANDWIDTH_FULLBAND              1105 /**<20 kHz bandpass @hideinitializer*/
+
+#define OPUS_FRAMESIZE_ARG                   5000 /**< Select frame size from the argument (default) */
+#define OPUS_FRAMESIZE_2_5_MS                5001 /**< Use 2.5 ms frames */
+#define OPUS_FRAMESIZE_5_MS                  5002 /**< Use 5 ms frames */
+#define OPUS_FRAMESIZE_10_MS                 5003 /**< Use 10 ms frames */
+#define OPUS_FRAMESIZE_20_MS                 5004 /**< Use 20 ms frames */
+#define OPUS_FRAMESIZE_40_MS                 5005 /**< Use 40 ms frames */
+#define OPUS_FRAMESIZE_60_MS                 5006 /**< Use 60 ms frames */
+
+/**@}*/
+
+
+/** @defgroup opus_encoderctls Encoder related CTLs
+  *
+  * These are convenience macros for use with the \c opus_encode_ctl
+  * interface. They are used to generate the appropriate series of
+  * arguments for that call, passing the correct type, size and so
+  * on as expected for each particular request.
+  *
+  * Some usage examples:
+  *
+  * @code
+  * int ret;
+  * ret = opus_encoder_ctl(enc_ctx, OPUS_SET_BANDWIDTH(OPUS_AUTO));
+  * if (ret != OPUS_OK) return ret;
+  *
+  * opus_int32 rate;
+  * opus_encoder_ctl(enc_ctx, OPUS_GET_BANDWIDTH(&rate));
+  *
+  * opus_encoder_ctl(enc_ctx, OPUS_RESET_STATE);
+  * @endcode
+  *
+  * @see opus_genericctls, opus_encoder
+  * @{
+  */
+
+/** Configures the encoder's computational complexity.
+  * The supported range is 0-10 inclusive with 10 representing the highest complexity.
+  * @see OPUS_GET_COMPLEXITY
+  * @param[in] x <tt>opus_int32</tt>: Allowed values: 0-10, inclusive.
+  *
+  * @hideinitializer */
+#define OPUS_SET_COMPLEXITY(x) OPUS_SET_COMPLEXITY_REQUEST, __opus_check_int(x)
+/** Gets the encoder's complexity configuration.
+  * @see OPUS_SET_COMPLEXITY
+  * @param[out] x <tt>opus_int32 *</tt>: Returns a value in the range 0-10,
+  *                                      inclusive.
+  * @hideinitializer */
+#define OPUS_GET_COMPLEXITY(x) OPUS_GET_COMPLEXITY_REQUEST, __opus_check_int_ptr(x)
+
+/** Configures the bitrate in the encoder.
+  * Rates from 500 to 512000 bits per second are meaningful, as well as the
+  * special values #OPUS_AUTO and #OPUS_BITRATE_MAX.
+  * The value #OPUS_BITRATE_MAX can be used to cause the codec to use as much
+  * rate as it can, which is useful for controlling the rate by adjusting the
+  * output buffer size.
+  * @see OPUS_GET_BITRATE
+  * @param[in] x <tt>opus_int32</tt>: Bitrate in bits per second. The default
+  *                                   is determined based on the number of
+  *                                   channels and the input sampling rate.
+  * @hideinitializer */
+#define OPUS_SET_BITRATE(x) OPUS_SET_BITRATE_REQUEST, __opus_check_int(x)
+/** Gets the encoder's bitrate configuration.
+  * @see OPUS_SET_BITRATE
+  * @param[out] x <tt>opus_int32 *</tt>: Returns the bitrate in bits per second.
+  *                                      The default is determined based on the
+  *                                      number of channels and the input
+  *                                      sampling rate.
+  * @hideinitializer */
+#define OPUS_GET_BITRATE(x) OPUS_GET_BITRATE_REQUEST, __opus_check_int_ptr(x)
+
+/** Enables or disables variable bitrate (VBR) in the encoder.
+  * The configured bitrate may not be met exactly because frames must
+  * be an integer number of bytes in length.
+  * @warning Only the MDCT mode of Opus can provide hard CBR behavior.
+  * @see OPUS_GET_VBR
+  * @see OPUS_SET_VBR_CONSTRAINT
+  * @param[in] x <tt>opus_int32</tt>: Allowed values:
+  * <dl>
+  * <dt>0</dt><dd>Hard CBR. For LPC/hybrid modes at very low bit-rate, this can
+  *               cause noticeable quality degradation.</dd>
+  * <dt>1</dt><dd>VBR (default). The exact type of VBR is controlled by
+  *               #OPUS_SET_VBR_CONSTRAINT.</dd>
+  * </dl>
+  * @hideinitializer */
+#define OPUS_SET_VBR(x) OPUS_SET_VBR_REQUEST, __opus_check_int(x)
+/** Determine if variable bitrate (VBR) is enabled in the encoder.
+  * @see OPUS_SET_VBR
+  * @see OPUS_GET_VBR_CONSTRAINT
+  * @param[out] x <tt>opus_int32 *</tt>: Returns one of the following values:
+  * <dl>
+  * <dt>0</dt><dd>Hard CBR.</dd>
+  * <dt>1</dt><dd>VBR (default). The exact type of VBR may be retrieved via
+  *               #OPUS_GET_VBR_CONSTRAINT.</dd>
+  * </dl>
+  * @hideinitializer */
+#define OPUS_GET_VBR(x) OPUS_GET_VBR_REQUEST, __opus_check_int_ptr(x)
+
+/** Enables or disables constrained VBR in the encoder.
+  * This setting is ignored when the encoder is in CBR mode.
+  * @warning Only the MDCT mode of Opus currently heeds the constraint.
+  *  Speech mode ignores it completely, hybrid mode may fail to obey it
+  *  if the LPC layer uses more bitrate than the constraint would have
+  *  permitted.
+  * @see OPUS_GET_VBR_CONSTRAINT
+  * @see OPUS_SET_VBR
+  * @param[in] x <tt>opus_int32</tt>: Allowed values:
+  * <dl>
+  * <dt>0</dt><dd>Unconstrained VBR.</dd>
+  * <dt>1</dt><dd>Constrained VBR (default). This creates a maximum of one
+  *               frame of buffering delay assuming a transport with a
+  *               serialization speed of the nominal bitrate.</dd>
+  * </dl>
+  * @hideinitializer */
+#define OPUS_SET_VBR_CONSTRAINT(x) OPUS_SET_VBR_CONSTRAINT_REQUEST, __opus_check_int(x)
+/** Determine if constrained VBR is enabled in the encoder.
+  * @see OPUS_SET_VBR_CONSTRAINT
+  * @see OPUS_GET_VBR
+  * @param[out] x <tt>opus_int32 *</tt>: Returns one of the following values:
+  * <dl>
+  * <dt>0</dt><dd>Unconstrained VBR.</dd>
+  * <dt>1</dt><dd>Constrained VBR (default).</dd>
+  * </dl>
+  * @hideinitializer */
+#define OPUS_GET_VBR_CONSTRAINT(x) OPUS_GET_VBR_CONSTRAINT_REQUEST, __opus_check_int_ptr(x)
+
+/** Configures mono/stereo forcing in the encoder.
+  * This can force the encoder to produce packets encoded as either mono or
+  * stereo, regardless of the format of the input audio. This is useful when
+  * the caller knows that the input signal is currently a mono source embedded
+  * in a stereo stream.
+  * @see OPUS_GET_FORCE_CHANNELS
+  * @param[in] x <tt>opus_int32</tt>: Allowed values:
+  * <dl>
+  * <dt>#OPUS_AUTO</dt><dd>Not forced (default)</dd>
+  * <dt>1</dt>         <dd>Forced mono</dd>
+  * <dt>2</dt>         <dd>Forced stereo</dd>
+  * </dl>
+  * @hideinitializer */
+#define OPUS_SET_FORCE_CHANNELS(x) OPUS_SET_FORCE_CHANNELS_REQUEST, __opus_check_int(x)
+/** Gets the encoder's forced channel configuration.
+  * @see OPUS_SET_FORCE_CHANNELS
+  * @param[out] x <tt>opus_int32 *</tt>:
+  * <dl>
+  * <dt>#OPUS_AUTO</dt><dd>Not forced (default)</dd>
+  * <dt>1</dt>         <dd>Forced mono</dd>
+  * <dt>2</dt>         <dd>Forced stereo</dd>
+  * </dl>
+  * @hideinitializer */
+#define OPUS_GET_FORCE_CHANNELS(x) OPUS_GET_FORCE_CHANNELS_REQUEST, __opus_check_int_ptr(x)
+
+/** Configures the maximum bandpass that the encoder will select automatically.
+  * Applications should normally use this instead of #OPUS_SET_BANDWIDTH
+  * (leaving that set to the default, #OPUS_AUTO). This allows the
+  * application to set an upper bound based on the type of input it is
+  * providing, but still gives the encoder the freedom to reduce the bandpass
+  * when the bitrate becomes too low, for better overall quality.
+  * @see OPUS_GET_MAX_BANDWIDTH
+  * @param[in] x <tt>opus_int32</tt>: Allowed values:
+  * <dl>
+  * <dt>OPUS_BANDWIDTH_NARROWBAND</dt>    <dd>4 kHz passband</dd>
+  * <dt>OPUS_BANDWIDTH_MEDIUMBAND</dt>    <dd>6 kHz passband</dd>
+  * <dt>OPUS_BANDWIDTH_WIDEBAND</dt>      <dd>8 kHz passband</dd>
+  * <dt>OPUS_BANDWIDTH_SUPERWIDEBAND</dt><dd>12 kHz passband</dd>
+  * <dt>OPUS_BANDWIDTH_FULLBAND</dt>     <dd>20 kHz passband (default)</dd>
+  * </dl>
+  * @hideinitializer */
+#define OPUS_SET_MAX_BANDWIDTH(x) OPUS_SET_MAX_BANDWIDTH_REQUEST, __opus_check_int(x)
+
+/** Gets the encoder's configured maximum allowed bandpass.
+  * @see OPUS_SET_MAX_BANDWIDTH
+  * @param[out] x <tt>opus_int32 *</tt>: Allowed values:
+  * <dl>
+  * <dt>#OPUS_BANDWIDTH_NARROWBAND</dt>    <dd>4 kHz passband</dd>
+  * <dt>#OPUS_BANDWIDTH_MEDIUMBAND</dt>    <dd>6 kHz passband</dd>
+  * <dt>#OPUS_BANDWIDTH_WIDEBAND</dt>      <dd>8 kHz passband</dd>
+  * <dt>#OPUS_BANDWIDTH_SUPERWIDEBAND</dt><dd>12 kHz passband</dd>
+  * <dt>#OPUS_BANDWIDTH_FULLBAND</dt>     <dd>20 kHz passband (default)</dd>
+  * </dl>
+  * @hideinitializer */
+#define OPUS_GET_MAX_BANDWIDTH(x) OPUS_GET_MAX_BANDWIDTH_REQUEST, __opus_check_int_ptr(x)
+
+/** Sets the encoder's bandpass to a specific value.
+  * This prevents the encoder from automatically selecting the bandpass based
+  * on the available bitrate. If an application knows the bandpass of the input
+  * audio it is providing, it should normally use #OPUS_SET_MAX_BANDWIDTH
+  * instead, which still gives the encoder the freedom to reduce the bandpass
+  * when the bitrate becomes too low, for better overall quality.
+  * @see OPUS_GET_BANDWIDTH
+  * @param[in] x <tt>opus_int32</tt>: Allowed values:
+  * <dl>
+  * <dt>#OPUS_AUTO</dt>                    <dd>(default)</dd>
+  * <dt>#OPUS_BANDWIDTH_NARROWBAND</dt>    <dd>4 kHz passband</dd>
+  * <dt>#OPUS_BANDWIDTH_MEDIUMBAND</dt>    <dd>6 kHz passband</dd>
+  * <dt>#OPUS_BANDWIDTH_WIDEBAND</dt>      <dd>8 kHz passband</dd>
+  * <dt>#OPUS_BANDWIDTH_SUPERWIDEBAND</dt><dd>12 kHz passband</dd>
+  * <dt>#OPUS_BANDWIDTH_FULLBAND</dt>     <dd>20 kHz passband</dd>
+  * </dl>
+  * @hideinitializer */
+#define OPUS_SET_BANDWIDTH(x) OPUS_SET_BANDWIDTH_REQUEST, __opus_check_int(x)
+
+/** Configures the type of signal being encoded.
+  * This is a hint which helps the encoder's mode selection.
+  * @see OPUS_GET_SIGNAL
+  * @param[in] x <tt>opus_int32</tt>: Allowed values:
+  * <dl>
+  * <dt>#OPUS_AUTO</dt>        <dd>(default)</dd>
+  * <dt>#OPUS_SIGNAL_VOICE</dt><dd>Bias thresholds towards choosing LPC or Hybrid modes.</dd>
+  * <dt>#OPUS_SIGNAL_MUSIC</dt><dd>Bias thresholds towards choosing MDCT modes.</dd>
+  * </dl>
+  * @hideinitializer */
+#define OPUS_SET_SIGNAL(x) OPUS_SET_SIGNAL_REQUEST, __opus_check_int(x)
+/** Gets the encoder's configured signal type.
+  * @see OPUS_SET_SIGNAL
+  * @param[out] x <tt>opus_int32 *</tt>: Returns one of the following values:
+  * <dl>
+  * <dt>#OPUS_AUTO</dt>        <dd>(default)</dd>
+  * <dt>#OPUS_SIGNAL_VOICE</dt><dd>Bias thresholds towards choosing LPC or Hybrid modes.</dd>
+  * <dt>#OPUS_SIGNAL_MUSIC</dt><dd>Bias thresholds towards choosing MDCT modes.</dd>
+  * </dl>
+  * @hideinitializer */
+#define OPUS_GET_SIGNAL(x) OPUS_GET_SIGNAL_REQUEST, __opus_check_int_ptr(x)
+
+
+/** Configures the encoder's intended application.
+  * The initial value is a mandatory argument to the encoder_create function.
+  * @see OPUS_GET_APPLICATION
+  * @param[in] x <tt>opus_int32</tt>: Returns one of the following values:
+  * <dl>
+  * <dt>#OPUS_APPLICATION_VOIP</dt>
+  * <dd>Process signal for improved speech intelligibility.</dd>
+  * <dt>#OPUS_APPLICATION_AUDIO</dt>
+  * <dd>Favor faithfulness to the original input.</dd>
+  * <dt>#OPUS_APPLICATION_RESTRICTED_LOWDELAY</dt>
+  * <dd>Configure the minimum possible coding delay by disabling certain modes
+  * of operation.</dd>
+  * </dl>
+  * @hideinitializer */
+#define OPUS_SET_APPLICATION(x) OPUS_SET_APPLICATION_REQUEST, __opus_check_int(x)
+/** Gets the encoder's configured application.
+  * @see OPUS_SET_APPLICATION
+  * @param[out] x <tt>opus_int32 *</tt>: Returns one of the following values:
+  * <dl>
+  * <dt>#OPUS_APPLICATION_VOIP</dt>
+  * <dd>Process signal for improved speech intelligibility.</dd>
+  * <dt>#OPUS_APPLICATION_AUDIO</dt>
+  * <dd>Favor faithfulness to the original input.</dd>
+  * <dt>#OPUS_APPLICATION_RESTRICTED_LOWDELAY</dt>
+  * <dd>Configure the minimum possible coding delay by disabling certain modes
+  * of operation.</dd>
+  * </dl>
+  * @hideinitializer */
+#define OPUS_GET_APPLICATION(x) OPUS_GET_APPLICATION_REQUEST, __opus_check_int_ptr(x)
+
+/** Gets the sampling rate the encoder or decoder was initialized with.
+  * This simply returns the <code>Fs</code> value passed to opus_encoder_init()
+  * or opus_decoder_init().
+  * @param[out] x <tt>opus_int32 *</tt>: Sampling rate of encoder or decoder.
+  * @hideinitializer
+  */
+#define OPUS_GET_SAMPLE_RATE(x) OPUS_GET_SAMPLE_RATE_REQUEST, __opus_check_int_ptr(x)
+
+/** Gets the total samples of delay added by the entire codec.
+  * This can be queried by the encoder and then the provided number of samples can be
+  * skipped on from the start of the decoder's output to provide time aligned input
+  * and output. From the perspective of a decoding application the real data begins this many
+  * samples late.
+  *
+  * The decoder contribution to this delay is identical for all decoders, but the
+  * encoder portion of the delay may vary from implementation to implementation,
+  * version to version, or even depend on the encoder's initial configuration.
+  * Applications needing delay compensation should call this CTL rather than
+  * hard-coding a value.
+  * @param[out] x <tt>opus_int32 *</tt>:   Number of lookahead samples
+  * @hideinitializer */
+#define OPUS_GET_LOOKAHEAD(x) OPUS_GET_LOOKAHEAD_REQUEST, __opus_check_int_ptr(x)
+
+/** Configures the encoder's use of inband forward error correction (FEC).
+  * @note This is only applicable to the LPC layer
+  * @see OPUS_GET_INBAND_FEC
+  * @param[in] x <tt>opus_int32</tt>: Allowed values:
+  * <dl>
+  * <dt>0</dt><dd>Disable inband FEC (default).</dd>
+  * <dt>1</dt><dd>Enable inband FEC.</dd>
+  * </dl>
+  * @hideinitializer */
+#define OPUS_SET_INBAND_FEC(x) OPUS_SET_INBAND_FEC_REQUEST, __opus_check_int(x)
+/** Gets encoder's configured use of inband forward error correction.
+  * @see OPUS_SET_INBAND_FEC
+  * @param[out] x <tt>opus_int32 *</tt>: Returns one of the following values:
+  * <dl>
+  * <dt>0</dt><dd>Inband FEC disabled (default).</dd>
+  * <dt>1</dt><dd>Inband FEC enabled.</dd>
+  * </dl>
+  * @hideinitializer */
+#define OPUS_GET_INBAND_FEC(x) OPUS_GET_INBAND_FEC_REQUEST, __opus_check_int_ptr(x)
+
+/** Configures the encoder's expected packet loss percentage.
+  * Higher values with trigger progressively more loss resistant behavior in the encoder
+  * at the expense of quality at a given bitrate in the lossless case, but greater quality
+  * under loss.
+  * @see OPUS_GET_PACKET_LOSS_PERC
+  * @param[in] x <tt>opus_int32</tt>:   Loss percentage in the range 0-100, inclusive (default: 0).
+  * @hideinitializer */
+#define OPUS_SET_PACKET_LOSS_PERC(x) OPUS_SET_PACKET_LOSS_PERC_REQUEST, __opus_check_int(x)
+/** Gets the encoder's configured packet loss percentage.
+  * @see OPUS_SET_PACKET_LOSS_PERC
+  * @param[out] x <tt>opus_int32 *</tt>: Returns the configured loss percentage
+  *                                      in the range 0-100, inclusive (default: 0).
+  * @hideinitializer */
+#define OPUS_GET_PACKET_LOSS_PERC(x) OPUS_GET_PACKET_LOSS_PERC_REQUEST, __opus_check_int_ptr(x)
+
+/** Configures the encoder's use of discontinuous transmission (DTX).
+  * @note This is only applicable to the LPC layer
+  * @see OPUS_GET_DTX
+  * @param[in] x <tt>opus_int32</tt>: Allowed values:
+  * <dl>
+  * <dt>0</dt><dd>Disable DTX (default).</dd>
+  * <dt>1</dt><dd>Enabled DTX.</dd>
+  * </dl>
+  * @hideinitializer */
+#define OPUS_SET_DTX(x) OPUS_SET_DTX_REQUEST, __opus_check_int(x)
+/** Gets encoder's configured use of discontinuous transmission.
+  * @see OPUS_SET_DTX
+  * @param[out] x <tt>opus_int32 *</tt>: Returns one of the following values:
+  * <dl>
+  * <dt>0</dt><dd>DTX disabled (default).</dd>
+  * <dt>1</dt><dd>DTX enabled.</dd>
+  * </dl>
+  * @hideinitializer */
+#define OPUS_GET_DTX(x) OPUS_GET_DTX_REQUEST, __opus_check_int_ptr(x)
+/** Configures the depth of signal being encoded.
+  * This is a hint which helps the encoder identify silence and near-silence.
+  * @see OPUS_GET_LSB_DEPTH
+  * @param[in] x <tt>opus_int32</tt>: Input precision in bits, between 8 and 24
+  *                                   (default: 24).
+  * @hideinitializer */
+#define OPUS_SET_LSB_DEPTH(x) OPUS_SET_LSB_DEPTH_REQUEST, __opus_check_int(x)
+/** Gets the encoder's configured signal depth.
+  * @see OPUS_SET_LSB_DEPTH
+  * @param[out] x <tt>opus_int32 *</tt>: Input precision in bits, between 8 and
+  *                                      24 (default: 24).
+  * @hideinitializer */
+#define OPUS_GET_LSB_DEPTH(x) OPUS_GET_LSB_DEPTH_REQUEST, __opus_check_int_ptr(x)
+
+/** Gets the duration (in samples) of the last packet successfully decoded or concealed.
+  * @param[out] x <tt>opus_int32 *</tt>: Number of samples (at current sampling rate).
+  * @hideinitializer */
+#define OPUS_GET_LAST_PACKET_DURATION(x) OPUS_GET_LAST_PACKET_DURATION_REQUEST, __opus_check_int_ptr(x)
+
+/** Configures the encoder's use of variable duration frames.
+  * When variable duration is enabled, the encoder is free to use a shorter frame
+  * size than the one requested in the opus_encode*() call.
+  * It is then the user's responsibility
+  * to verify how much audio was encoded by checking the ToC byte of the encoded
+  * packet. The part of the audio that was not encoded needs to be resent to the
+  * encoder for the next call. Do not use this option unless you <b>really</b>
+  * know what you are doing.
+  * @see OPUS_GET_EXPERT_VARIABLE_DURATION
+  * @param[in] x <tt>opus_int32</tt>: Allowed values:
+  * <dl>
+  * <dt>OPUS_FRAMESIZE_ARG</dt><dd>Select frame size from the argument (default).</dd>
+  * <dt>OPUS_FRAMESIZE_2_5_MS</dt><dd>Use 2.5 ms frames.</dd>
+  * <dt>OPUS_FRAMESIZE_5_MS</dt><dd>Use 2.5 ms frames.</dd>
+  * <dt>OPUS_FRAMESIZE_10_MS</dt><dd>Use 10 ms frames.</dd>
+  * <dt>OPUS_FRAMESIZE_20_MS</dt><dd>Use 20 ms frames.</dd>
+  * <dt>OPUS_FRAMESIZE_40_MS</dt><dd>Use 40 ms frames.</dd>
+  * <dt>OPUS_FRAMESIZE_60_MS</dt><dd>Use 60 ms frames.</dd>
+  * <dt>OPUS_FRAMESIZE_VARIABLE</dt><dd>Optimize the frame size dynamically.</dd>
+  * </dl>
+  * @hideinitializer */
+#define OPUS_SET_EXPERT_FRAME_DURATION(x) OPUS_SET_EXPERT_FRAME_DURATION_REQUEST, __opus_check_int(x)
+/** Gets the encoder's configured use of variable duration frames.
+  * @see OPUS_SET_EXPERT_VARIABLE_DURATION
+  * @param[out] x <tt>opus_int32 *</tt>: Returns one of the following values:
+  * <dl>
+  * <dt>OPUS_FRAMESIZE_ARG</dt><dd>Select frame size from the argument (default).</dd>
+  * <dt>OPUS_FRAMESIZE_2_5_MS</dt><dd>Use 2.5 ms frames.</dd>
+  * <dt>OPUS_FRAMESIZE_5_MS</dt><dd>Use 2.5 ms frames.</dd>
+  * <dt>OPUS_FRAMESIZE_10_MS</dt><dd>Use 10 ms frames.</dd>
+  * <dt>OPUS_FRAMESIZE_20_MS</dt><dd>Use 20 ms frames.</dd>
+  * <dt>OPUS_FRAMESIZE_40_MS</dt><dd>Use 40 ms frames.</dd>
+  * <dt>OPUS_FRAMESIZE_60_MS</dt><dd>Use 60 ms frames.</dd>
+  * <dt>OPUS_FRAMESIZE_VARIABLE</dt><dd>Optimize the frame size dynamically.</dd>
+  * </dl>
+  * @hideinitializer */
+#define OPUS_GET_EXPERT_FRAME_DURATION(x) OPUS_GET_EXPERT_FRAME_DURATION_REQUEST, __opus_check_int_ptr(x)
+
+/** If set to 1, disables almost all use of prediction, making frames almost
+    completely independent. This reduces quality. (default : 0)
+  * @hideinitializer */
+#define OPUS_SET_PREDICTION_DISABLED(x) OPUS_SET_PREDICTION_DISABLED_REQUEST, __opus_check_int(x)
+/** Gets the encoder's configured prediction status.
+  * @hideinitializer */
+#define OPUS_GET_PREDICTION_DISABLED(x) OPUS_GET_PREDICTION_DISABLED_REQUEST, __opus_check_int_ptr(x)
+
+/**@}*/
+
+/** @defgroup opus_genericctls Generic CTLs
+  *
+  * These macros are used with the \c opus_decoder_ctl and
+  * \c opus_encoder_ctl calls to generate a particular
+  * request.
+  *
+  * When called on an \c OpusDecoder they apply to that
+  * particular decoder instance. When called on an
+  * \c OpusEncoder they apply to the corresponding setting
+  * on that encoder instance, if present.
+  *
+  * Some usage examples:
+  *
+  * @code
+  * int ret;
+  * opus_int32 pitch;
+  * ret = opus_decoder_ctl(dec_ctx, OPUS_GET_PITCH(&pitch));
+  * if (ret == OPUS_OK) return ret;
+  *
+  * opus_encoder_ctl(enc_ctx, OPUS_RESET_STATE);
+  * opus_decoder_ctl(dec_ctx, OPUS_RESET_STATE);
+  *
+  * opus_int32 enc_bw, dec_bw;
+  * opus_encoder_ctl(enc_ctx, OPUS_GET_BANDWIDTH(&enc_bw));
+  * opus_decoder_ctl(dec_ctx, OPUS_GET_BANDWIDTH(&dec_bw));
+  * if (enc_bw != dec_bw) {
+  *   printf("packet bandwidth mismatch!\n");
+  * }
+  * @endcode
+  *
+  * @see opus_encoder, opus_decoder_ctl, opus_encoder_ctl, opus_decoderctls, opus_encoderctls
+  * @{
+  */
+
+/** Resets the codec state to be equivalent to a freshly initialized state.
+  * This should be called when switching streams in order to prevent
+  * the back to back decoding from giving different results from
+  * one at a time decoding.
+  * @hideinitializer */
+#define OPUS_RESET_STATE 4028
+
+/** Gets the final state of the codec's entropy coder.
+  * This is used for testing purposes,
+  * The encoder and decoder state should be identical after coding a payload
+  * (assuming no data corruption or software bugs)
+  *
+  * @param[out] x <tt>opus_uint32 *</tt>: Entropy coder state
+  *
+  * @hideinitializer */
+#define OPUS_GET_FINAL_RANGE(x) OPUS_GET_FINAL_RANGE_REQUEST, __opus_check_uint_ptr(x)
+
+/** Gets the pitch of the last decoded frame, if available.
+  * This can be used for any post-processing algorithm requiring the use of pitch,
+  * e.g. time stretching/shortening. If the last frame was not voiced, or if the
+  * pitch was not coded in the frame, then zero is returned.
+  *
+  * This CTL is only implemented for decoder instances.
+  *
+  * @param[out] x <tt>opus_int32 *</tt>: pitch period at 48 kHz (or 0 if not available)
+  *
+  * @hideinitializer */
+#define OPUS_GET_PITCH(x) OPUS_GET_PITCH_REQUEST, __opus_check_int_ptr(x)
+
+/** Gets the encoder's configured bandpass or the decoder's last bandpass.
+  * @see OPUS_SET_BANDWIDTH
+  * @param[out] x <tt>opus_int32 *</tt>: Returns one of the following values:
+  * <dl>
+  * <dt>#OPUS_AUTO</dt>                    <dd>(default)</dd>
+  * <dt>#OPUS_BANDWIDTH_NARROWBAND</dt>    <dd>4 kHz passband</dd>
+  * <dt>#OPUS_BANDWIDTH_MEDIUMBAND</dt>    <dd>6 kHz passband</dd>
+  * <dt>#OPUS_BANDWIDTH_WIDEBAND</dt>      <dd>8 kHz passband</dd>
+  * <dt>#OPUS_BANDWIDTH_SUPERWIDEBAND</dt><dd>12 kHz passband</dd>
+  * <dt>#OPUS_BANDWIDTH_FULLBAND</dt>     <dd>20 kHz passband</dd>
+  * </dl>
+  * @hideinitializer */
+#define OPUS_GET_BANDWIDTH(x) OPUS_GET_BANDWIDTH_REQUEST, __opus_check_int_ptr(x)
+
+/**@}*/
+
+/** @defgroup opus_decoderctls Decoder related CTLs
+  * @see opus_genericctls, opus_encoderctls, opus_decoder
+  * @{
+  */
+
+/** Configures decoder gain adjustment.
+  * Scales the decoded output by a factor specified in Q8 dB units.
+  * This has a maximum range of -32768 to 32767 inclusive, and returns
+  * OPUS_BAD_ARG otherwise. The default is zero indicating no adjustment.
+  * This setting survives decoder reset.
+  *
+  * gain = pow(10, x/(20.0*256))
+  *
+  * @param[in] x <tt>opus_int32</tt>:   Amount to scale PCM signal by in Q8 dB units.
+  * @hideinitializer */
+#define OPUS_SET_GAIN(x) OPUS_SET_GAIN_REQUEST, __opus_check_int(x)
+/** Gets the decoder's configured gain adjustment. @see OPUS_SET_GAIN
+  *
+  * @param[out] x <tt>opus_int32 *</tt>: Amount to scale PCM signal by in Q8 dB units.
+  * @hideinitializer */
+#define OPUS_GET_GAIN(x) OPUS_GET_GAIN_REQUEST, __opus_check_int_ptr(x)
+
+/**@}*/
+
+/** @defgroup opus_libinfo Opus library information functions
+  * @{
+  */
+
+/** Converts an opus error code into a human readable string.
+  *
+  * @param[in] error <tt>int</tt>: Error number
+  * @returns Error string
+  */
+OPUS_EXPORT const char *opus_strerror(int error);
+
+/** Gets the libopus version string.
+  *
+  * @returns Version string
+  */
+OPUS_EXPORT const char *opus_get_version_string(void);
+/**@}*/
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* OPUS_DEFINES_H */
diff --git a/drivers/opus/opus_demo.c b/drivers/opus/opus_demo.c
new file mode 100644
index 0000000000..7fcf65fd8b
--- /dev/null
+++ b/drivers/opus/opus_demo.c
@@ -0,0 +1,885 @@
+/* Copyright (c) 2007-2008 CSIRO
+   Copyright (c) 2007-2009 Xiph.Org Foundation
+   Written by Jean-Marc Valin */
+/*
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifdef OPUS_HAVE_CONFIG_H
+#include "opus_config.h"
+#endif
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <math.h>
+#include <string.h>
+#include "opus.h"
+#include "debug.h"
+#include "opus_types.h"
+#include "opus_private.h"
+#include "opus_multistream.h"
+
+#define MAX_PACKET 1500
+
+void print_usage( char* argv[] )
+{
+    fprintf(stderr, "Usage: %s [-e] <application> <sampling rate (Hz)> <channels (1/2)> "
+        "<bits per second>  [options] <input> <output>\n", argv[0]);
+    fprintf(stderr, "       %s -d <sampling rate (Hz)> <channels (1/2)> "
+        "[options] <input> <output>\n\n", argv[0]);
+    fprintf(stderr, "mode: voip | audio | restricted-lowdelay\n" );
+    fprintf(stderr, "options:\n" );
+    fprintf(stderr, "-e                   : only runs the encoder (output the bit-stream)\n" );
+    fprintf(stderr, "-d                   : only runs the decoder (reads the bit-stream as input)\n" );
+    fprintf(stderr, "-cbr                 : enable constant bitrate; default: variable bitrate\n" );
+    fprintf(stderr, "-cvbr                : enable constrained variable bitrate; default: unconstrained\n" );
+    fprintf(stderr, "-variable-duration   : enable frames of variable duration (experts only); default: disabled\n" );
+    fprintf(stderr, "-bandwidth <NB|MB|WB|SWB|FB> : audio bandwidth (from narrowband to fullband); default: sampling rate\n" );
+    fprintf(stderr, "-framesize <2.5|5|10|20|40|60> : frame size in ms; default: 20 \n" );
+    fprintf(stderr, "-max_payload <bytes> : maximum payload size in bytes, default: 1024\n" );
+    fprintf(stderr, "-complexity <comp>   : complexity, 0 (lowest) ... 10 (highest); default: 10\n" );
+    fprintf(stderr, "-inbandfec           : enable SILK inband FEC\n" );
+    fprintf(stderr, "-forcemono           : force mono encoding, even for stereo input\n" );
+    fprintf(stderr, "-dtx                 : enable SILK DTX\n" );
+    fprintf(stderr, "-loss <perc>         : simulate packet loss, in percent (0-100); default: 0\n" );
+}
+
+static void int_to_char(opus_uint32 i, unsigned char ch[4])
+{
+    ch[0] = i>>24;
+    ch[1] = (i>>16)&0xFF;
+    ch[2] = (i>>8)&0xFF;
+    ch[3] = i&0xFF;
+}
+
+static opus_uint32 char_to_int(unsigned char ch[4])
+{
+    return ((opus_uint32)ch[0]<<24) | ((opus_uint32)ch[1]<<16)
+         | ((opus_uint32)ch[2]<< 8) |  (opus_uint32)ch[3];
+}
+
+static void check_encoder_option(int decode_only, const char *opt)
+{
+   if (decode_only)
+   {
+      fprintf(stderr, "option %s is only for encoding\n", opt);
+      exit(EXIT_FAILURE);
+   }
+}
+
+static const int silk8_test[][4] = {
+      {MODE_SILK_ONLY, OPUS_BANDWIDTH_NARROWBAND, 960*3, 1},
+      {MODE_SILK_ONLY, OPUS_BANDWIDTH_NARROWBAND, 960*2, 1},
+      {MODE_SILK_ONLY, OPUS_BANDWIDTH_NARROWBAND, 960,   1},
+      {MODE_SILK_ONLY, OPUS_BANDWIDTH_NARROWBAND, 480,   1},
+      {MODE_SILK_ONLY, OPUS_BANDWIDTH_NARROWBAND, 960*3, 2},
+      {MODE_SILK_ONLY, OPUS_BANDWIDTH_NARROWBAND, 960*2, 2},
+      {MODE_SILK_ONLY, OPUS_BANDWIDTH_NARROWBAND, 960,   2},
+      {MODE_SILK_ONLY, OPUS_BANDWIDTH_NARROWBAND, 480,   2}
+};
+
+static const int silk12_test[][4] = {
+      {MODE_SILK_ONLY, OPUS_BANDWIDTH_MEDIUMBAND, 960*3, 1},
+      {MODE_SILK_ONLY, OPUS_BANDWIDTH_MEDIUMBAND, 960*2, 1},
+      {MODE_SILK_ONLY, OPUS_BANDWIDTH_MEDIUMBAND, 960,   1},
+      {MODE_SILK_ONLY, OPUS_BANDWIDTH_MEDIUMBAND, 480,   1},
+      {MODE_SILK_ONLY, OPUS_BANDWIDTH_MEDIUMBAND, 960*3, 2},
+      {MODE_SILK_ONLY, OPUS_BANDWIDTH_MEDIUMBAND, 960*2, 2},
+      {MODE_SILK_ONLY, OPUS_BANDWIDTH_MEDIUMBAND, 960,   2},
+      {MODE_SILK_ONLY, OPUS_BANDWIDTH_MEDIUMBAND, 480,   2}
+};
+
+static const int silk16_test[][4] = {
+      {MODE_SILK_ONLY, OPUS_BANDWIDTH_WIDEBAND, 960*3, 1},
+      {MODE_SILK_ONLY, OPUS_BANDWIDTH_WIDEBAND, 960*2, 1},
+      {MODE_SILK_ONLY, OPUS_BANDWIDTH_WIDEBAND, 960,   1},
+      {MODE_SILK_ONLY, OPUS_BANDWIDTH_WIDEBAND, 480,   1},
+      {MODE_SILK_ONLY, OPUS_BANDWIDTH_WIDEBAND, 960*3, 2},
+      {MODE_SILK_ONLY, OPUS_BANDWIDTH_WIDEBAND, 960*2, 2},
+      {MODE_SILK_ONLY, OPUS_BANDWIDTH_WIDEBAND, 960,   2},
+      {MODE_SILK_ONLY, OPUS_BANDWIDTH_WIDEBAND, 480,   2}
+};
+
+static const int hybrid24_test[][4] = {
+      {MODE_SILK_ONLY, OPUS_BANDWIDTH_SUPERWIDEBAND, 960, 1},
+      {MODE_SILK_ONLY, OPUS_BANDWIDTH_SUPERWIDEBAND, 480, 1},
+      {MODE_SILK_ONLY, OPUS_BANDWIDTH_SUPERWIDEBAND, 960, 2},
+      {MODE_SILK_ONLY, OPUS_BANDWIDTH_SUPERWIDEBAND, 480, 2}
+};
+
+static const int hybrid48_test[][4] = {
+      {MODE_SILK_ONLY, OPUS_BANDWIDTH_FULLBAND, 960, 1},
+      {MODE_SILK_ONLY, OPUS_BANDWIDTH_FULLBAND, 480, 1},
+      {MODE_SILK_ONLY, OPUS_BANDWIDTH_FULLBAND, 960, 2},
+      {MODE_SILK_ONLY, OPUS_BANDWIDTH_FULLBAND, 480, 2}
+};
+
+static const int celt_test[][4] = {
+      {MODE_CELT_ONLY, OPUS_BANDWIDTH_FULLBAND,      960, 1},
+      {MODE_CELT_ONLY, OPUS_BANDWIDTH_SUPERWIDEBAND, 960, 1},
+      {MODE_CELT_ONLY, OPUS_BANDWIDTH_WIDEBAND,      960, 1},
+      {MODE_CELT_ONLY, OPUS_BANDWIDTH_NARROWBAND,    960, 1},
+
+      {MODE_CELT_ONLY, OPUS_BANDWIDTH_FULLBAND,      480, 1},
+      {MODE_CELT_ONLY, OPUS_BANDWIDTH_SUPERWIDEBAND, 480, 1},
+      {MODE_CELT_ONLY, OPUS_BANDWIDTH_WIDEBAND,      480, 1},
+      {MODE_CELT_ONLY, OPUS_BANDWIDTH_NARROWBAND,    480, 1},
+
+      {MODE_CELT_ONLY, OPUS_BANDWIDTH_FULLBAND,      240, 1},
+      {MODE_CELT_ONLY, OPUS_BANDWIDTH_SUPERWIDEBAND, 240, 1},
+      {MODE_CELT_ONLY, OPUS_BANDWIDTH_WIDEBAND,      240, 1},
+      {MODE_CELT_ONLY, OPUS_BANDWIDTH_NARROWBAND,    240, 1},
+
+      {MODE_CELT_ONLY, OPUS_BANDWIDTH_FULLBAND,      120, 1},
+      {MODE_CELT_ONLY, OPUS_BANDWIDTH_SUPERWIDEBAND, 120, 1},
+      {MODE_CELT_ONLY, OPUS_BANDWIDTH_WIDEBAND,      120, 1},
+      {MODE_CELT_ONLY, OPUS_BANDWIDTH_NARROWBAND,    120, 1},
+
+      {MODE_CELT_ONLY, OPUS_BANDWIDTH_FULLBAND,      960, 2},
+      {MODE_CELT_ONLY, OPUS_BANDWIDTH_SUPERWIDEBAND, 960, 2},
+      {MODE_CELT_ONLY, OPUS_BANDWIDTH_WIDEBAND,      960, 2},
+      {MODE_CELT_ONLY, OPUS_BANDWIDTH_NARROWBAND,    960, 2},
+
+      {MODE_CELT_ONLY, OPUS_BANDWIDTH_FULLBAND,      480, 2},
+      {MODE_CELT_ONLY, OPUS_BANDWIDTH_SUPERWIDEBAND, 480, 2},
+      {MODE_CELT_ONLY, OPUS_BANDWIDTH_WIDEBAND,      480, 2},
+      {MODE_CELT_ONLY, OPUS_BANDWIDTH_NARROWBAND,    480, 2},
+
+      {MODE_CELT_ONLY, OPUS_BANDWIDTH_FULLBAND,      240, 2},
+      {MODE_CELT_ONLY, OPUS_BANDWIDTH_SUPERWIDEBAND, 240, 2},
+      {MODE_CELT_ONLY, OPUS_BANDWIDTH_WIDEBAND,      240, 2},
+      {MODE_CELT_ONLY, OPUS_BANDWIDTH_NARROWBAND,    240, 2},
+
+      {MODE_CELT_ONLY, OPUS_BANDWIDTH_FULLBAND,      120, 2},
+      {MODE_CELT_ONLY, OPUS_BANDWIDTH_SUPERWIDEBAND, 120, 2},
+      {MODE_CELT_ONLY, OPUS_BANDWIDTH_WIDEBAND,      120, 2},
+      {MODE_CELT_ONLY, OPUS_BANDWIDTH_NARROWBAND,    120, 2},
+
+};
+
+static const int celt_hq_test[][4] = {
+      {MODE_CELT_ONLY, OPUS_BANDWIDTH_FULLBAND,      960, 2},
+      {MODE_CELT_ONLY, OPUS_BANDWIDTH_FULLBAND,      480, 2},
+      {MODE_CELT_ONLY, OPUS_BANDWIDTH_FULLBAND,      240, 2},
+      {MODE_CELT_ONLY, OPUS_BANDWIDTH_FULLBAND,      120, 2},
+};
+
+#if 0 /* This is a hack that replaces the normal encoder/decoder with the multistream version */
+#define OpusEncoder OpusMSEncoder
+#define OpusDecoder OpusMSDecoder
+#define opus_encode opus_multistream_encode
+#define opus_decode opus_multistream_decode
+#define opus_encoder_ctl opus_multistream_encoder_ctl
+#define opus_decoder_ctl opus_multistream_decoder_ctl
+#define opus_encoder_create ms_opus_encoder_create
+#define opus_decoder_create ms_opus_decoder_create
+#define opus_encoder_destroy opus_multistream_encoder_destroy
+#define opus_decoder_destroy opus_multistream_decoder_destroy
+
+static OpusEncoder *ms_opus_encoder_create(opus_int32 Fs, int channels, int application, int *error)
+{
+   int streams, coupled_streams;
+   unsigned char mapping[256];
+   return (OpusEncoder *)opus_multistream_surround_encoder_create(Fs, channels, 1, &streams, &coupled_streams, mapping, application, error);
+}
+static OpusDecoder *ms_opus_decoder_create(opus_int32 Fs, int channels, int *error)
+{
+   int streams;
+   int coupled_streams;
+   unsigned char mapping[256]={0,1};
+   streams = 1;
+   coupled_streams = channels==2;
+   return (OpusDecoder *)opus_multistream_decoder_create(Fs, channels, streams, coupled_streams, mapping, error);
+}
+#endif
+
+int main(int argc, char *argv[])
+{
+    int err;
+    char *inFile, *outFile;
+    FILE *fin, *fout;
+    OpusEncoder *enc=NULL;
+    OpusDecoder *dec=NULL;
+    int args;
+    int len[2];
+    int frame_size, channels;
+    opus_int32 bitrate_bps=0;
+    unsigned char *data[2];
+    unsigned char *fbytes;
+    opus_int32 sampling_rate;
+    int use_vbr;
+    int max_payload_bytes;
+    int complexity;
+    int use_inbandfec;
+    int use_dtx;
+    int forcechannels;
+    int cvbr = 0;
+    int packet_loss_perc;
+    opus_int32 count=0, count_act=0;
+    int k;
+    opus_int32 skip=0;
+    int stop=0;
+    short *in, *out;
+    int application=OPUS_APPLICATION_AUDIO;
+    double bits=0.0, bits_max=0.0, bits_act=0.0, bits2=0.0, nrg;
+    double tot_samples=0;
+    opus_uint64 tot_in, tot_out;
+    int bandwidth=-1;
+    const char *bandwidth_string;
+    int lost = 0, lost_prev = 1;
+    int toggle = 0;
+    opus_uint32 enc_final_range[2];
+    opus_uint32 dec_final_range;
+    int encode_only=0, decode_only=0;
+    int max_frame_size = 960*6;
+    int curr_read=0;
+    int sweep_bps = 0;
+    int random_framesize=0, newsize=0, delayed_celt=0;
+    int sweep_max=0, sweep_min=0;
+    int random_fec=0;
+    const int (*mode_list)[4]=NULL;
+    int nb_modes_in_list=0;
+    int curr_mode=0;
+    int curr_mode_count=0;
+    int mode_switch_time = 48000;
+    int nb_encoded=0;
+    int remaining=0;
+    int variable_duration=OPUS_FRAMESIZE_ARG;
+    int delayed_decision=0;
+
+    if (argc < 5 )
+    {
+       print_usage( argv );
+       return EXIT_FAILURE;
+    }
+
+    tot_in=tot_out=0;
+    fprintf(stderr, "%s\n", opus_get_version_string());
+
+    args = 1;
+    if (strcmp(argv[args], "-e")==0)
+    {
+        encode_only = 1;
+        args++;
+    } else if (strcmp(argv[args], "-d")==0)
+    {
+        decode_only = 1;
+        args++;
+    }
+    if (!decode_only && argc < 7 )
+    {
+       print_usage( argv );
+       return EXIT_FAILURE;
+    }
+
+    if (!decode_only)
+    {
+       if (strcmp(argv[args], "voip")==0)
+          application = OPUS_APPLICATION_VOIP;
+       else if (strcmp(argv[args], "restricted-lowdelay")==0)
+          application = OPUS_APPLICATION_RESTRICTED_LOWDELAY;
+       else if (strcmp(argv[args], "audio")!=0) {
+          fprintf(stderr, "unknown application: %s\n", argv[args]);
+          print_usage(argv);
+          return EXIT_FAILURE;
+       }
+       args++;
+    }
+    sampling_rate = (opus_int32)atol(argv[args]);
+    args++;
+
+    if (sampling_rate != 8000 && sampling_rate != 12000
+     && sampling_rate != 16000 && sampling_rate != 24000
+     && sampling_rate != 48000)
+    {
+        fprintf(stderr, "Supported sampling rates are 8000, 12000, "
+                "16000, 24000 and 48000.\n");
+        return EXIT_FAILURE;
+    }
+    frame_size = sampling_rate/50;
+
+    channels = atoi(argv[args]);
+    args++;
+
+    if (channels < 1 || channels > 2)
+    {
+        fprintf(stderr, "Opus_demo supports only 1 or 2 channels.\n");
+        return EXIT_FAILURE;
+    }
+
+    if (!decode_only)
+    {
+       bitrate_bps = (opus_int32)atol(argv[args]);
+       args++;
+    }
+
+    /* defaults: */
+    use_vbr = 1;
+    bandwidth = OPUS_AUTO;
+    max_payload_bytes = MAX_PACKET;
+    complexity = 10;
+    use_inbandfec = 0;
+    forcechannels = OPUS_AUTO;
+    use_dtx = 0;
+    packet_loss_perc = 0;
+    max_frame_size = 2*48000;
+    curr_read=0;
+
+    while( args < argc - 2 ) {
+        /* process command line options */
+        if( strcmp( argv[ args ], "-cbr" ) == 0 ) {
+            check_encoder_option(decode_only, "-cbr");
+            use_vbr = 0;
+            args++;
+        } else if( strcmp( argv[ args ], "-bandwidth" ) == 0 ) {
+            check_encoder_option(decode_only, "-bandwidth");
+            if (strcmp(argv[ args + 1 ], "NB")==0)
+                bandwidth = OPUS_BANDWIDTH_NARROWBAND;
+            else if (strcmp(argv[ args + 1 ], "MB")==0)
+                bandwidth = OPUS_BANDWIDTH_MEDIUMBAND;
+            else if (strcmp(argv[ args + 1 ], "WB")==0)
+                bandwidth = OPUS_BANDWIDTH_WIDEBAND;
+            else if (strcmp(argv[ args + 1 ], "SWB")==0)
+                bandwidth = OPUS_BANDWIDTH_SUPERWIDEBAND;
+            else if (strcmp(argv[ args + 1 ], "FB")==0)
+                bandwidth = OPUS_BANDWIDTH_FULLBAND;
+            else {
+                fprintf(stderr, "Unknown bandwidth %s. "
+                                "Supported are NB, MB, WB, SWB, FB.\n",
+                                argv[ args + 1 ]);
+                return EXIT_FAILURE;
+            }
+            args += 2;
+        } else if( strcmp( argv[ args ], "-framesize" ) == 0 ) {
+            check_encoder_option(decode_only, "-framesize");
+            if (strcmp(argv[ args + 1 ], "2.5")==0)
+                frame_size = sampling_rate/400;
+            else if (strcmp(argv[ args + 1 ], "5")==0)
+                frame_size = sampling_rate/200;
+            else if (strcmp(argv[ args + 1 ], "10")==0)
+                frame_size = sampling_rate/100;
+            else if (strcmp(argv[ args + 1 ], "20")==0)
+                frame_size = sampling_rate/50;
+            else if (strcmp(argv[ args + 1 ], "40")==0)
+                frame_size = sampling_rate/25;
+            else if (strcmp(argv[ args + 1 ], "60")==0)
+                frame_size = 3*sampling_rate/50;
+            else {
+                fprintf(stderr, "Unsupported frame size: %s ms. "
+                                "Supported are 2.5, 5, 10, 20, 40, 60.\n",
+                                argv[ args + 1 ]);
+                return EXIT_FAILURE;
+            }
+            args += 2;
+        } else if( strcmp( argv[ args ], "-max_payload" ) == 0 ) {
+            check_encoder_option(decode_only, "-max_payload");
+            max_payload_bytes = atoi( argv[ args + 1 ] );
+            args += 2;
+        } else if( strcmp( argv[ args ], "-complexity" ) == 0 ) {
+            check_encoder_option(decode_only, "-complexity");
+            complexity = atoi( argv[ args + 1 ] );
+            args += 2;
+        } else if( strcmp( argv[ args ], "-inbandfec" ) == 0 ) {
+            use_inbandfec = 1;
+            args++;
+        } else if( strcmp( argv[ args ], "-forcemono" ) == 0 ) {
+            check_encoder_option(decode_only, "-forcemono");
+            forcechannels = 1;
+            args++;
+        } else if( strcmp( argv[ args ], "-cvbr" ) == 0 ) {
+            check_encoder_option(decode_only, "-cvbr");
+            cvbr = 1;
+            args++;
+        } else if( strcmp( argv[ args ], "-variable-duration" ) == 0 ) {
+            check_encoder_option(decode_only, "-variable-duration");
+            variable_duration = OPUS_FRAMESIZE_VARIABLE;
+            args++;
+        } else if( strcmp( argv[ args ], "-delayed-decision" ) == 0 ) {
+            check_encoder_option(decode_only, "-delayed-decision");
+            delayed_decision = 1;
+            args++;
+        } else if( strcmp( argv[ args ], "-dtx") == 0 ) {
+            check_encoder_option(decode_only, "-dtx");
+            use_dtx = 1;
+            args++;
+        } else if( strcmp( argv[ args ], "-loss" ) == 0 ) {
+            packet_loss_perc = atoi( argv[ args + 1 ] );
+            args += 2;
+        } else if( strcmp( argv[ args ], "-sweep" ) == 0 ) {
+            check_encoder_option(decode_only, "-sweep");
+            sweep_bps = atoi( argv[ args + 1 ] );
+            args += 2;
+        } else if( strcmp( argv[ args ], "-random_framesize" ) == 0 ) {
+            check_encoder_option(decode_only, "-random_framesize");
+            random_framesize = 1;
+            args++;
+        } else if( strcmp( argv[ args ], "-sweep_max" ) == 0 ) {
+            check_encoder_option(decode_only, "-sweep_max");
+            sweep_max = atoi( argv[ args + 1 ] );
+            args += 2;
+        } else if( strcmp( argv[ args ], "-random_fec" ) == 0 ) {
+            check_encoder_option(decode_only, "-random_fec");
+            random_fec = 1;
+            args++;
+        } else if( strcmp( argv[ args ], "-silk8k_test" ) == 0 ) {
+            check_encoder_option(decode_only, "-silk8k_test");
+            mode_list = silk8_test;
+            nb_modes_in_list = 8;
+            args++;
+        } else if( strcmp( argv[ args ], "-silk12k_test" ) == 0 ) {
+            check_encoder_option(decode_only, "-silk12k_test");
+            mode_list = silk12_test;
+            nb_modes_in_list = 8;
+            args++;
+        } else if( strcmp( argv[ args ], "-silk16k_test" ) == 0 ) {
+            check_encoder_option(decode_only, "-silk16k_test");
+            mode_list = silk16_test;
+            nb_modes_in_list = 8;
+            args++;
+        } else if( strcmp( argv[ args ], "-hybrid24k_test" ) == 0 ) {
+            check_encoder_option(decode_only, "-hybrid24k_test");
+            mode_list = hybrid24_test;
+            nb_modes_in_list = 4;
+            args++;
+        } else if( strcmp( argv[ args ], "-hybrid48k_test" ) == 0 ) {
+            check_encoder_option(decode_only, "-hybrid48k_test");
+            mode_list = hybrid48_test;
+            nb_modes_in_list = 4;
+            args++;
+        } else if( strcmp( argv[ args ], "-celt_test" ) == 0 ) {
+            check_encoder_option(decode_only, "-celt_test");
+            mode_list = celt_test;
+            nb_modes_in_list = 32;
+            args++;
+        } else if( strcmp( argv[ args ], "-celt_hq_test" ) == 0 ) {
+            check_encoder_option(decode_only, "-celt_hq_test");
+            mode_list = celt_hq_test;
+            nb_modes_in_list = 4;
+            args++;
+        } else {
+            printf( "Error: unrecognized setting: %s\n\n", argv[ args ] );
+            print_usage( argv );
+            return EXIT_FAILURE;
+        }
+    }
+
+    if (sweep_max)
+       sweep_min = bitrate_bps;
+
+    if (max_payload_bytes < 0 || max_payload_bytes > MAX_PACKET)
+    {
+        fprintf (stderr, "max_payload_bytes must be between 0 and %d\n",
+                          MAX_PACKET);
+        return EXIT_FAILURE;
+    }
+
+    inFile = argv[argc-2];
+    fin = fopen(inFile, "rb");
+    if (!fin)
+    {
+        fprintf (stderr, "Could not open input file %s\n", argv[argc-2]);
+        return EXIT_FAILURE;
+    }
+    if (mode_list)
+    {
+       int size;
+       fseek(fin, 0, SEEK_END);
+       size = ftell(fin);
+       fprintf(stderr, "File size is %d bytes\n", size);
+       fseek(fin, 0, SEEK_SET);
+       mode_switch_time = size/sizeof(short)/channels/nb_modes_in_list;
+       fprintf(stderr, "Switching mode every %d samples\n", mode_switch_time);
+    }
+
+    outFile = argv[argc-1];
+    fout = fopen(outFile, "wb+");
+    if (!fout)
+    {
+        fprintf (stderr, "Could not open output file %s\n", argv[argc-1]);
+        fclose(fin);
+        return EXIT_FAILURE;
+    }
+
+    if (!decode_only)
+    {
+       enc = opus_encoder_create(sampling_rate, channels, application, &err);
+       if (err != OPUS_OK)
+       {
+          fprintf(stderr, "Cannot create encoder: %s\n", opus_strerror(err));
+          fclose(fin);
+          fclose(fout);
+          return EXIT_FAILURE;
+       }
+       opus_encoder_ctl(enc, OPUS_SET_BITRATE(bitrate_bps));
+       opus_encoder_ctl(enc, OPUS_SET_BANDWIDTH(bandwidth));
+       opus_encoder_ctl(enc, OPUS_SET_VBR(use_vbr));
+       opus_encoder_ctl(enc, OPUS_SET_VBR_CONSTRAINT(cvbr));
+       opus_encoder_ctl(enc, OPUS_SET_COMPLEXITY(complexity));
+       opus_encoder_ctl(enc, OPUS_SET_INBAND_FEC(use_inbandfec));
+       opus_encoder_ctl(enc, OPUS_SET_FORCE_CHANNELS(forcechannels));
+       opus_encoder_ctl(enc, OPUS_SET_DTX(use_dtx));
+       opus_encoder_ctl(enc, OPUS_SET_PACKET_LOSS_PERC(packet_loss_perc));
+
+       opus_encoder_ctl(enc, OPUS_GET_LOOKAHEAD(&skip));
+       opus_encoder_ctl(enc, OPUS_SET_LSB_DEPTH(16));
+       opus_encoder_ctl(enc, OPUS_SET_EXPERT_FRAME_DURATION(variable_duration));
+    }
+    if (!encode_only)
+    {
+       dec = opus_decoder_create(sampling_rate, channels, &err);
+       if (err != OPUS_OK)
+       {
+          fprintf(stderr, "Cannot create decoder: %s\n", opus_strerror(err));
+          fclose(fin);
+          fclose(fout);
+          return EXIT_FAILURE;
+       }
+    }
+
+
+    switch(bandwidth)
+    {
+    case OPUS_BANDWIDTH_NARROWBAND:
+         bandwidth_string = "narrowband";
+         break;
+    case OPUS_BANDWIDTH_MEDIUMBAND:
+         bandwidth_string = "mediumband";
+         break;
+    case OPUS_BANDWIDTH_WIDEBAND:
+         bandwidth_string = "wideband";
+         break;
+    case OPUS_BANDWIDTH_SUPERWIDEBAND:
+         bandwidth_string = "superwideband";
+         break;
+    case OPUS_BANDWIDTH_FULLBAND:
+         bandwidth_string = "fullband";
+         break;
+    case OPUS_AUTO:
+         bandwidth_string = "auto";
+         break;
+    default:
+         bandwidth_string = "unknown";
+         break;
+    }
+
+    if (decode_only)
+       fprintf(stderr, "Decoding with %ld Hz output (%d channels)\n",
+                       (long)sampling_rate, channels);
+    else
+       fprintf(stderr, "Encoding %ld Hz input at %.3f kb/s "
+                       "in %s mode with %d-sample frames.\n",
+                       (long)sampling_rate, bitrate_bps*0.001,
+                       bandwidth_string, frame_size);
+
+    in = (short*)malloc(max_frame_size*channels*sizeof(short));
+    out = (short*)malloc(max_frame_size*channels*sizeof(short));
+    fbytes = (unsigned char*)malloc(max_frame_size*channels*sizeof(short));
+    data[0] = (unsigned char*)calloc(max_payload_bytes,sizeof(char));
+    if ( use_inbandfec ) {
+        data[1] = (unsigned char*)calloc(max_payload_bytes,sizeof(char));
+    }
+    if(delayed_decision)
+    {
+       if (variable_duration!=OPUS_FRAMESIZE_VARIABLE)
+       {
+          if (frame_size==sampling_rate/400)
+             variable_duration = OPUS_FRAMESIZE_2_5_MS;
+          else if (frame_size==sampling_rate/200)
+             variable_duration = OPUS_FRAMESIZE_5_MS;
+          else if (frame_size==sampling_rate/100)
+             variable_duration = OPUS_FRAMESIZE_10_MS;
+          else if (frame_size==sampling_rate/50)
+             variable_duration = OPUS_FRAMESIZE_20_MS;
+          else if (frame_size==sampling_rate/25)
+             variable_duration = OPUS_FRAMESIZE_40_MS;
+          else
+             variable_duration = OPUS_FRAMESIZE_60_MS;
+          opus_encoder_ctl(enc, OPUS_SET_EXPERT_FRAME_DURATION(variable_duration));
+       }
+       frame_size = 2*48000;
+    }
+    while (!stop)
+    {
+        if (delayed_celt)
+        {
+            frame_size = newsize;
+            delayed_celt = 0;
+        } else if (random_framesize && rand()%20==0)
+        {
+            newsize = rand()%6;
+            switch(newsize)
+            {
+            case 0: newsize=sampling_rate/400; break;
+            case 1: newsize=sampling_rate/200; break;
+            case 2: newsize=sampling_rate/100; break;
+            case 3: newsize=sampling_rate/50; break;
+            case 4: newsize=sampling_rate/25; break;
+            case 5: newsize=3*sampling_rate/50; break;
+            }
+            while (newsize < sampling_rate/25 && bitrate_bps-fabs(sweep_bps) <= 3*12*sampling_rate/newsize)
+               newsize*=2;
+            if (newsize < sampling_rate/100 && frame_size >= sampling_rate/100)
+            {
+                opus_encoder_ctl(enc, OPUS_SET_FORCE_MODE(MODE_CELT_ONLY));
+                delayed_celt=1;
+            } else {
+                frame_size = newsize;
+            }
+        }
+        if (random_fec && rand()%30==0)
+        {
+           opus_encoder_ctl(enc, OPUS_SET_INBAND_FEC(rand()%4==0));
+        }
+        if (decode_only)
+        {
+            unsigned char ch[4];
+            err = fread(ch, 1, 4, fin);
+            if (feof(fin))
+                break;
+            len[toggle] = char_to_int(ch);
+            if (len[toggle]>max_payload_bytes || len[toggle]<0)
+            {
+                fprintf(stderr, "Invalid payload length: %d\n",len[toggle]);
+                break;
+            }
+            err = fread(ch, 1, 4, fin);
+            enc_final_range[toggle] = char_to_int(ch);
+            err = fread(data[toggle], 1, len[toggle], fin);
+            if (err<len[toggle])
+            {
+                fprintf(stderr, "Ran out of input, "
+                                "expecting %d bytes got %d\n",
+                                len[toggle],err);
+                break;
+            }
+        } else {
+            int i;
+            if (mode_list!=NULL)
+            {
+                opus_encoder_ctl(enc, OPUS_SET_BANDWIDTH(mode_list[curr_mode][1]));
+                opus_encoder_ctl(enc, OPUS_SET_FORCE_MODE(mode_list[curr_mode][0]));
+                opus_encoder_ctl(enc, OPUS_SET_FORCE_CHANNELS(mode_list[curr_mode][3]));
+                frame_size = mode_list[curr_mode][2];
+            }
+            err = fread(fbytes, sizeof(short)*channels, frame_size-remaining, fin);
+            curr_read = err;
+            tot_in += curr_read;
+            for(i=0;i<curr_read*channels;i++)
+            {
+                opus_int32 s;
+                s=fbytes[2*i+1]<<8|fbytes[2*i];
+                s=((s&0xFFFF)^0x8000)-0x8000;
+                in[i+remaining*channels]=s;
+            }
+            if (curr_read+remaining < frame_size)
+            {
+                for (i=(curr_read+remaining)*channels;i<frame_size*channels;i++)
+                   in[i] = 0;
+                if (encode_only || decode_only)
+                   stop = 1;
+            }
+            len[toggle] = opus_encode(enc, in, frame_size, data[toggle], max_payload_bytes);
+            nb_encoded = opus_packet_get_samples_per_frame(data[toggle], sampling_rate)*opus_packet_get_nb_frames(data[toggle], len[toggle]);
+            remaining = frame_size-nb_encoded;
+            for(i=0;i<remaining*channels;i++)
+               in[i] = in[nb_encoded*channels+i];
+            if (sweep_bps!=0)
+            {
+               bitrate_bps += sweep_bps;
+               if (sweep_max)
+               {
+                  if (bitrate_bps > sweep_max)
+                     sweep_bps = -sweep_bps;
+                  else if (bitrate_bps < sweep_min)
+                     sweep_bps = -sweep_bps;
+               }
+               /* safety */
+               if (bitrate_bps<1000)
+                  bitrate_bps = 1000;
+               opus_encoder_ctl(enc, OPUS_SET_BITRATE(bitrate_bps));
+            }
+            opus_encoder_ctl(enc, OPUS_GET_FINAL_RANGE(&enc_final_range[toggle]));
+            if (len[toggle] < 0)
+            {
+                fprintf (stderr, "opus_encode() returned %d\n", len[toggle]);
+                fclose(fin);
+                fclose(fout);
+                return EXIT_FAILURE;
+            }
+            curr_mode_count += frame_size;
+            if (curr_mode_count > mode_switch_time && curr_mode < nb_modes_in_list-1)
+            {
+               curr_mode++;
+               curr_mode_count = 0;
+            }
+        }
+
+#if 0 /* This is for testing the padding code, do not enable by default */
+        if (len[toggle]<1275)
+        {
+           int new_len = len[toggle]+rand()%(max_payload_bytes-len[toggle]);
+           if ((err = opus_packet_pad(data[toggle], len[toggle], new_len)) != OPUS_OK)
+           {
+              fprintf(stderr, "padding failed: %s\n", opus_strerror(err));
+              return EXIT_FAILURE;
+           }
+           len[toggle] = new_len;
+        }
+#endif
+        if (encode_only)
+        {
+            unsigned char int_field[4];
+            int_to_char(len[toggle], int_field);
+            if (fwrite(int_field, 1, 4, fout) != 4) {
+               fprintf(stderr, "Error writing.\n");
+               return EXIT_FAILURE;
+            }
+            int_to_char(enc_final_range[toggle], int_field);
+            if (fwrite(int_field, 1, 4, fout) != 4) {
+               fprintf(stderr, "Error writing.\n");
+               return EXIT_FAILURE;
+            }
+            if (fwrite(data[toggle], 1, len[toggle], fout) != (unsigned)len[toggle]) {
+               fprintf(stderr, "Error writing.\n");
+               return EXIT_FAILURE;
+            }
+            tot_samples += nb_encoded;
+        } else {
+            int output_samples;
+            lost = len[toggle]==0 || (packet_loss_perc>0 && rand()%100 < packet_loss_perc);
+            if (lost)
+               opus_decoder_ctl(dec, OPUS_GET_LAST_PACKET_DURATION(&output_samples));
+            else
+               output_samples = max_frame_size;
+            if( count >= use_inbandfec ) {
+                /* delay by one packet when using in-band FEC */
+                if( use_inbandfec  ) {
+                    if( lost_prev ) {
+                        /* attempt to decode with in-band FEC from next packet */
+                        opus_decoder_ctl(dec, OPUS_GET_LAST_PACKET_DURATION(&output_samples));
+                        output_samples = opus_decode(dec, lost ? NULL : data[toggle], len[toggle], out, output_samples, 1);
+                    } else {
+                        /* regular decode */
+                        output_samples = max_frame_size;
+                        output_samples = opus_decode(dec, data[1-toggle], len[1-toggle], out, output_samples, 0);
+                    }
+                } else {
+                    output_samples = opus_decode(dec, lost ? NULL : data[toggle], len[toggle], out, output_samples, 0);
+                }
+                if (output_samples>0)
+                {
+                    if (!decode_only && tot_out + output_samples > tot_in)
+                    {
+                       stop=1;
+                       output_samples  = tot_in-tot_out;
+                    }
+                    if (output_samples>skip) {
+                       int i;
+                       for(i=0;i<(output_samples-skip)*channels;i++)
+                       {
+                          short s;
+                          s=out[i+(skip*channels)];
+                          fbytes[2*i]=s&0xFF;
+                          fbytes[2*i+1]=(s>>8)&0xFF;
+                       }
+                       if (fwrite(fbytes, sizeof(short)*channels, output_samples-skip, fout) != (unsigned)(output_samples-skip)){
+                          fprintf(stderr, "Error writing.\n");
+                          return EXIT_FAILURE;
+                       }
+                       tot_out += output_samples-skip;
+                    }
+                    if (output_samples<skip) skip -= output_samples;
+                    else skip = 0;
+                } else {
+                   fprintf(stderr, "error decoding frame: %s\n",
+                                   opus_strerror(output_samples));
+                }
+                tot_samples += output_samples;
+            }
+        }
+
+        if (!encode_only)
+           opus_decoder_ctl(dec, OPUS_GET_FINAL_RANGE(&dec_final_range));
+        /* compare final range encoder rng values of encoder and decoder */
+        if( enc_final_range[toggle^use_inbandfec]!=0  && !encode_only
+         && !lost && !lost_prev
+         && dec_final_range != enc_final_range[toggle^use_inbandfec] ) {
+            fprintf (stderr, "Error: Range coder state mismatch "
+                             "between encoder and decoder "
+                             "in frame %ld: 0x%8lx vs 0x%8lx\n",
+                         (long)count,
+                         (unsigned long)enc_final_range[toggle^use_inbandfec],
+                         (unsigned long)dec_final_range);
+            fclose(fin);
+            fclose(fout);
+            return EXIT_FAILURE;
+        }
+
+        lost_prev = lost;
+
+        /* count bits */
+        bits += len[toggle]*8;
+        bits_max = ( len[toggle]*8 > bits_max ) ? len[toggle]*8 : bits_max;
+        if( count >= use_inbandfec ) {
+            nrg = 0.0;
+            if (!decode_only)
+            {
+                for ( k = 0; k < frame_size * channels; k++ ) {
+                    nrg += in[ k ] * (double)in[ k ];
+                }
+            }
+            if ( ( nrg / ( frame_size * channels ) ) > 1e5 ) {
+                bits_act += len[toggle]*8;
+                count_act++;
+            }
+            /* Variance */
+            bits2 += len[toggle]*len[toggle]*64;
+        }
+        count++;
+        toggle = (toggle + use_inbandfec) & 1;
+    }
+    fprintf (stderr, "average bitrate:             %7.3f kb/s\n",
+                     1e-3*bits*sampling_rate/tot_samples);
+    fprintf (stderr, "maximum bitrate:             %7.3f kb/s\n",
+                     1e-3*bits_max*sampling_rate/frame_size);
+    if (!decode_only)
+       fprintf (stderr, "active bitrate:              %7.3f kb/s\n",
+               1e-3*bits_act*sampling_rate/(frame_size*(double)count_act));
+    fprintf (stderr, "bitrate standard deviation:  %7.3f kb/s\n",
+            1e-3*sqrt(bits2/count - bits*bits/(count*(double)count))*sampling_rate/frame_size);
+    /* Close any files to which intermediate results were stored */
+    SILK_DEBUG_STORE_CLOSE_FILES
+    silk_TimerSave("opus_timing.txt");
+    opus_encoder_destroy(enc);
+    opus_decoder_destroy(dec);
+    free(data[0]);
+    if (use_inbandfec)
+        free(data[1]);
+    fclose(fin);
+    fclose(fout);
+    free(in);
+    free(out);
+    free(fbytes);
+    return EXIT_SUCCESS;
+}
diff --git a/drivers/opus/opus_encoder.c b/drivers/opus/opus_encoder.c
new file mode 100644
index 0000000000..f739daa258
--- /dev/null
+++ b/drivers/opus/opus_encoder.c
@@ -0,0 +1,2488 @@
+/* Copyright (c) 2010-2011 Xiph.Org Foundation, Skype Limited
+   Written by Jean-Marc Valin and Koen Vos */
+/*
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifdef OPUS_HAVE_CONFIG_H
+#include "opus_config.h"
+#endif
+
+#include <stdarg.h>
+#include "celt.h"
+#include "entenc.h"
+#include "opus_modes.h"
+#include "API.h"
+#include "stack_alloc.h"
+#include "float_cast.h"
+#include "opus.h"
+#include "arch.h"
+#include "opus_private.h"
+#include "os_support.h"
+#include "cpu_support.h"
+#include "analysis.h"
+#include "mathops.h"
+#include "tuning_parameters.h"
+#ifdef OPUS_FIXED_POINT
+#include "fixed/structs_FIX.h"
+#else
+#include "float/structs_FLP.h"
+#endif
+
+#define MAX_ENCODER_BUFFER 480
+
+typedef struct {
+   opus_val32 XX, XY, YY;
+   opus_val16 smoothed_width;
+   opus_val16 max_follower;
+} StereoWidthState;
+
+struct OpusEncoder {
+    int          celt_enc_offset;
+    int          silk_enc_offset;
+    silk_EncControlStruct silk_mode;
+    int          application;
+    int          channels;
+    int          delay_compensation;
+    int          force_channels;
+    int          signal_type;
+    int          user_bandwidth;
+    int          max_bandwidth;
+    int          user_forced_mode;
+    int          voice_ratio;
+    opus_int32   Fs;
+    int          use_vbr;
+    int          vbr_constraint;
+    int          variable_duration;
+    opus_int32   bitrate_bps;
+    opus_int32   user_bitrate_bps;
+    int          lsb_depth;
+    int          encoder_buffer;
+    int          lfe;
+
+#define OPUS_ENCODER_RESET_START stream_channels
+    int          stream_channels;
+    opus_int16   hybrid_stereo_width_Q14;
+    opus_int32   variable_HP_smth2_Q15;
+    opus_val16   prev_HB_gain;
+    opus_val32   hp_mem[4];
+    int          mode;
+    int          prev_mode;
+    int          prev_channels;
+    int          prev_framesize;
+    int          bandwidth;
+    int          silk_bw_switch;
+    /* Sampling rate (at the API level) */
+    int          first;
+    opus_val16 * energy_masking;
+    StereoWidthState width_mem;
+    opus_val16   delay_buffer[MAX_ENCODER_BUFFER*2];
+#ifndef DISABLE_FLOAT_API
+    TonalityAnalysisState analysis;
+    int          detected_bandwidth;
+    int          analysis_offset;
+#endif
+    opus_uint32  rangeFinal;
+    int          arch;
+};
+
+/* Transition tables for the voice and music. First column is the
+   middle (memoriless) threshold. The second column is the hysteresis
+   (difference with the middle) */
+static const opus_int32 mono_voice_bandwidth_thresholds[8] = {
+        11000, 1000, /* NB<->MB */
+        14000, 1000, /* MB<->WB */
+        17000, 1000, /* WB<->SWB */
+        21000, 2000, /* SWB<->FB */
+};
+static const opus_int32 mono_music_bandwidth_thresholds[8] = {
+        12000, 1000, /* NB<->MB */
+        15000, 1000, /* MB<->WB */
+        18000, 2000, /* WB<->SWB */
+        22000, 2000, /* SWB<->FB */
+};
+static const opus_int32 stereo_voice_bandwidth_thresholds[8] = {
+        11000, 1000, /* NB<->MB */
+        14000, 1000, /* MB<->WB */
+        21000, 2000, /* WB<->SWB */
+        28000, 2000, /* SWB<->FB */
+};
+static const opus_int32 stereo_music_bandwidth_thresholds[8] = {
+        12000, 1000, /* NB<->MB */
+        18000, 2000, /* MB<->WB */
+        21000, 2000, /* WB<->SWB */
+        30000, 2000, /* SWB<->FB */
+};
+/* Threshold bit-rates for switching between mono and stereo */
+static const opus_int32 stereo_voice_threshold = 30000;
+static const opus_int32 stereo_music_threshold = 30000;
+
+/* Threshold bit-rate for switching between SILK/hybrid and CELT-only */
+static const opus_int32 mode_thresholds[2][2] = {
+      /* voice */ /* music */
+      {  64000,      16000}, /* mono */
+      {  36000,      16000}, /* stereo */
+};
+
+int opus_encoder_get_size(int channels)
+{
+    int silkEncSizeBytes, celtEncSizeBytes;
+    int ret;
+    if (channels<1 || channels > 2)
+        return 0;
+    ret = silk_Get_Encoder_Size( &silkEncSizeBytes );
+    if (ret)
+        return 0;
+    silkEncSizeBytes = align(silkEncSizeBytes);
+    celtEncSizeBytes = celt_encoder_get_size(channels);
+    return align(sizeof(OpusEncoder))+silkEncSizeBytes+celtEncSizeBytes;
+}
+
+int opus_encoder_init(OpusEncoder* st, opus_int32 Fs, int channels, int application)
+{
+    void *silk_enc;
+    CELTEncoder *celt_enc;
+    int err;
+    int ret, silkEncSizeBytes;
+
+   if((Fs!=48000&&Fs!=24000&&Fs!=16000&&Fs!=12000&&Fs!=8000)||(channels!=1&&channels!=2)||
+        (application != OPUS_APPLICATION_VOIP && application != OPUS_APPLICATION_AUDIO
+        && application != OPUS_APPLICATION_RESTRICTED_LOWDELAY))
+        return OPUS_BAD_ARG;
+
+    OPUS_CLEAR((char*)st, opus_encoder_get_size(channels));
+    /* Create SILK encoder */
+    ret = silk_Get_Encoder_Size( &silkEncSizeBytes );
+    if (ret)
+        return OPUS_BAD_ARG;
+    silkEncSizeBytes = align(silkEncSizeBytes);
+    st->silk_enc_offset = align(sizeof(OpusEncoder));
+    st->celt_enc_offset = st->silk_enc_offset+silkEncSizeBytes;
+    silk_enc = (char*)st+st->silk_enc_offset;
+    celt_enc = (CELTEncoder*)((char*)st+st->celt_enc_offset);
+
+    st->stream_channels = st->channels = channels;
+
+    st->Fs = Fs;
+
+    st->arch = opus_select_arch();
+
+    ret = silk_InitEncoder( silk_enc, st->arch, &st->silk_mode );
+    if(ret)return OPUS_INTERNAL_ERROR;
+
+    /* default SILK parameters */
+    st->silk_mode.nChannelsAPI              = channels;
+    st->silk_mode.nChannelsInternal         = channels;
+    st->silk_mode.API_sampleRate            = st->Fs;
+    st->silk_mode.maxInternalSampleRate     = 16000;
+    st->silk_mode.minInternalSampleRate     = 8000;
+    st->silk_mode.desiredInternalSampleRate = 16000;
+    st->silk_mode.payloadSize_ms            = 20;
+    st->silk_mode.bitRate                   = 25000;
+    st->silk_mode.packetLossPercentage      = 0;
+    st->silk_mode.complexity                = 9;
+    st->silk_mode.useInBandFEC              = 0;
+    st->silk_mode.useDTX                    = 0;
+    st->silk_mode.useCBR                    = 0;
+    st->silk_mode.reducedDependency         = 0;
+
+    /* Create CELT encoder */
+    /* Initialize CELT encoder */
+    err = celt_encoder_init(celt_enc, Fs, channels, st->arch);
+    if(err!=OPUS_OK)return OPUS_INTERNAL_ERROR;
+
+    celt_encoder_ctl(celt_enc, CELT_SET_SIGNALLING(0));
+    celt_encoder_ctl(celt_enc, OPUS_SET_COMPLEXITY(st->silk_mode.complexity));
+
+    st->use_vbr = 1;
+    /* Makes constrained VBR the default (safer for real-time use) */
+    st->vbr_constraint = 1;
+    st->user_bitrate_bps = OPUS_AUTO;
+    st->bitrate_bps = 3000+Fs*channels;
+    st->application = application;
+    st->signal_type = OPUS_AUTO;
+    st->user_bandwidth = OPUS_AUTO;
+    st->max_bandwidth = OPUS_BANDWIDTH_FULLBAND;
+    st->force_channels = OPUS_AUTO;
+    st->user_forced_mode = OPUS_AUTO;
+    st->voice_ratio = -1;
+    st->encoder_buffer = st->Fs/100;
+    st->lsb_depth = 24;
+    st->variable_duration = OPUS_FRAMESIZE_ARG;
+
+    /* Delay compensation of 4 ms (2.5 ms for SILK's extra look-ahead 
+       + 1.5 ms for SILK resamplers and stereo prediction) */
+    st->delay_compensation = st->Fs/250;
+
+    st->hybrid_stereo_width_Q14 = 1 << 14;
+    st->prev_HB_gain = Q15ONE;
+    st->variable_HP_smth2_Q15 = silk_LSHIFT( silk_lin2log( VARIABLE_HP_MIN_CUTOFF_HZ ), 8 );
+    st->first = 1;
+    st->mode = MODE_HYBRID;
+    st->bandwidth = OPUS_BANDWIDTH_FULLBAND;
+
+    return OPUS_OK;
+}
+
+static unsigned char gen_toc(int mode, int framerate, int bandwidth, int channels)
+{
+   int period;
+   unsigned char toc;
+   period = 0;
+   while (framerate < 400)
+   {
+       framerate <<= 1;
+       period++;
+   }
+   if (mode == MODE_SILK_ONLY)
+   {
+       toc = (bandwidth-OPUS_BANDWIDTH_NARROWBAND)<<5;
+       toc |= (period-2)<<3;
+   } else if (mode == MODE_CELT_ONLY)
+   {
+       int tmp = bandwidth-OPUS_BANDWIDTH_MEDIUMBAND;
+       if (tmp < 0)
+           tmp = 0;
+       toc = 0x80;
+       toc |= tmp << 5;
+       toc |= period<<3;
+   } else /* Hybrid */
+   {
+       toc = 0x60;
+       toc |= (bandwidth-OPUS_BANDWIDTH_SUPERWIDEBAND)<<4;
+       toc |= (period-2)<<3;
+   }
+   toc |= (channels==2)<<2;
+   return toc;
+}
+
+#ifndef OPUS_FIXED_POINT
+static void silk_biquad_float(
+    const opus_val16      *in,            /* I:    Input signal                   */
+    const opus_int32      *B_Q28,         /* I:    MA coefficients [3]            */
+    const opus_int32      *A_Q28,         /* I:    AR coefficients [2]            */
+    opus_val32            *S,             /* I/O:  State vector [2]               */
+    opus_val16            *out,           /* O:    Output signal                  */
+    const opus_int32      len,            /* I:    Signal length (must be even)   */
+    int stride
+)
+{
+    /* DIRECT FORM II TRANSPOSED (uses 2 element state vector) */
+    opus_int   k;
+    opus_val32 vout;
+    opus_val32 inval;
+    opus_val32 A[2], B[3];
+
+    A[0] = (opus_val32)(A_Q28[0] * (1.f/((opus_int32)1<<28)));
+    A[1] = (opus_val32)(A_Q28[1] * (1.f/((opus_int32)1<<28)));
+    B[0] = (opus_val32)(B_Q28[0] * (1.f/((opus_int32)1<<28)));
+    B[1] = (opus_val32)(B_Q28[1] * (1.f/((opus_int32)1<<28)));
+    B[2] = (opus_val32)(B_Q28[2] * (1.f/((opus_int32)1<<28)));
+
+    /* Negate A_Q28 values and split in two parts */
+
+    for( k = 0; k < len; k++ ) {
+        /* S[ 0 ], S[ 1 ]: Q12 */
+        inval = in[ k*stride ];
+        vout = S[ 0 ] + B[0]*inval;
+
+        S[ 0 ] = S[1] - vout*A[0] + B[1]*inval;
+
+        S[ 1 ] = - vout*A[1] + B[2]*inval + VERY_SMALL;
+
+        /* Scale back to Q0 and saturate */
+        out[ k*stride ] = vout;
+    }
+}
+#endif
+
+static void hp_cutoff(const opus_val16 *in, opus_int32 cutoff_Hz, opus_val16 *out, opus_val32 *hp_mem, int len, int channels, opus_int32 Fs)
+{
+   opus_int32 B_Q28[ 3 ], A_Q28[ 2 ];
+   opus_int32 Fc_Q19, r_Q28, r_Q22;
+
+   silk_assert( cutoff_Hz <= silk_int32_MAX / SILK_FIX_CONST( 1.5 * 3.14159 / 1000, 19 ) );
+   Fc_Q19 = silk_DIV32_16( silk_SMULBB( SILK_FIX_CONST( 1.5 * 3.14159 / 1000, 19 ), cutoff_Hz ), Fs/1000 );
+   silk_assert( Fc_Q19 > 0 && Fc_Q19 < 32768 );
+
+   r_Q28 = SILK_FIX_CONST( 1.0, 28 ) - silk_MUL( SILK_FIX_CONST( 0.92, 9 ), Fc_Q19 );
+
+   /* b = r * [ 1; -2; 1 ]; */
+   /* a = [ 1; -2 * r * ( 1 - 0.5 * Fc^2 ); r^2 ]; */
+   B_Q28[ 0 ] = r_Q28;
+   B_Q28[ 1 ] = silk_LSHIFT( -r_Q28, 1 );
+   B_Q28[ 2 ] = r_Q28;
+
+   /* -r * ( 2 - Fc * Fc ); */
+   r_Q22  = silk_RSHIFT( r_Q28, 6 );
+   A_Q28[ 0 ] = silk_SMULWW( r_Q22, silk_SMULWW( Fc_Q19, Fc_Q19 ) - SILK_FIX_CONST( 2.0,  22 ) );
+   A_Q28[ 1 ] = silk_SMULWW( r_Q22, r_Q22 );
+
+#ifdef OPUS_FIXED_POINT
+   silk_biquad_alt( in, B_Q28, A_Q28, hp_mem, out, len, channels );
+   if( channels == 2 ) {
+       silk_biquad_alt( in+1, B_Q28, A_Q28, hp_mem+2, out+1, len, channels );
+   }
+#else
+   silk_biquad_float( in, B_Q28, A_Q28, hp_mem, out, len, channels );
+   if( channels == 2 ) {
+       silk_biquad_float( in+1, B_Q28, A_Q28, hp_mem+2, out+1, len, channels );
+   }
+#endif
+}
+
+#ifdef OPUS_FIXED_POINT
+static void dc_reject(const opus_val16 *in, opus_int32 cutoff_Hz, opus_val16 *out, opus_val32 *hp_mem, int len, int channels, opus_int32 Fs)
+{
+   int c, i;
+   int shift;
+
+   /* Approximates -round(log2(4.*cutoff_Hz/Fs)) */
+   shift=celt_ilog2(Fs/(cutoff_Hz*3));
+   for (c=0;c<channels;c++)
+   {
+      for (i=0;i<len;i++)
+      {
+         opus_val32 x, tmp, y;
+         x = SHL32(EXTEND32(in[channels*i+c]), 15);
+         /* First stage */
+         tmp = x-hp_mem[2*c];
+         hp_mem[2*c] = hp_mem[2*c] + PSHR32(x - hp_mem[2*c], shift);
+         /* Second stage */
+         y = tmp - hp_mem[2*c+1];
+         hp_mem[2*c+1] = hp_mem[2*c+1] + PSHR32(tmp - hp_mem[2*c+1], shift);
+         out[channels*i+c] = EXTRACT16(SATURATE(PSHR32(y, 15), 32767));
+      }
+   }
+}
+
+#else
+static void dc_reject(const opus_val16 *in, opus_int32 cutoff_Hz, opus_val16 *out, opus_val32 *hp_mem, int len, int channels, opus_int32 Fs)
+{
+   int c, i;
+   float coef;
+
+   coef = 4.0f*cutoff_Hz/Fs;
+   for (c=0;c<channels;c++)
+   {
+      for (i=0;i<len;i++)
+      {
+         opus_val32 x, tmp, y;
+         x = in[channels*i+c];
+         /* First stage */
+         tmp = x-hp_mem[2*c];
+         hp_mem[2*c] = hp_mem[2*c] + coef*(x - hp_mem[2*c]) + VERY_SMALL;
+         /* Second stage */
+         y = tmp - hp_mem[2*c+1];
+         hp_mem[2*c+1] = hp_mem[2*c+1] + coef*(tmp - hp_mem[2*c+1]) + VERY_SMALL;
+         out[channels*i+c] = y;
+      }
+   }
+}
+#endif
+
+static void stereo_fade(const opus_val16 *in, opus_val16 *out, opus_val16 g1, opus_val16 g2,
+        int overlap48, int frame_size, int channels, const opus_val16 *window, opus_int32 Fs)
+{
+    int i;
+    int overlap;
+    int inc;
+    inc = 48000/Fs;
+    overlap=overlap48/inc;
+    g1 = Q15ONE-g1;
+    g2 = Q15ONE-g2;
+    for (i=0;i<overlap;i++)
+    {
+       opus_val32 diff;
+       opus_val16 g, w;
+       w = MULT16_16_Q15(window[i*inc], window[i*inc]);
+       g = SHR32(MAC16_16(MULT16_16(w,g2),
+             Q15ONE-w, g1), 15);
+       diff = EXTRACT16(HALF32((opus_val32)in[i*channels] - (opus_val32)in[i*channels+1]));
+       diff = MULT16_16_Q15(g, diff);
+       out[i*channels] = out[i*channels] - diff;
+       out[i*channels+1] = out[i*channels+1] + diff;
+    }
+    for (;i<frame_size;i++)
+    {
+       opus_val32 diff;
+       diff = EXTRACT16(HALF32((opus_val32)in[i*channels] - (opus_val32)in[i*channels+1]));
+       diff = MULT16_16_Q15(g2, diff);
+       out[i*channels] = out[i*channels] - diff;
+       out[i*channels+1] = out[i*channels+1] + diff;
+    }
+}
+
+static void gain_fade(const opus_val16 *in, opus_val16 *out, opus_val16 g1, opus_val16 g2,
+        int overlap48, int frame_size, int channels, const opus_val16 *window, opus_int32 Fs)
+{
+    int i;
+    int inc;
+    int overlap;
+    int c;
+    inc = 48000/Fs;
+    overlap=overlap48/inc;
+    if (channels==1)
+    {
+       for (i=0;i<overlap;i++)
+       {
+          opus_val16 g, w;
+          w = MULT16_16_Q15(window[i*inc], window[i*inc]);
+          g = SHR32(MAC16_16(MULT16_16(w,g2),
+                Q15ONE-w, g1), 15);
+          out[i] = MULT16_16_Q15(g, in[i]);
+       }
+    } else {
+       for (i=0;i<overlap;i++)
+       {
+          opus_val16 g, w;
+          w = MULT16_16_Q15(window[i*inc], window[i*inc]);
+          g = SHR32(MAC16_16(MULT16_16(w,g2),
+                Q15ONE-w, g1), 15);
+          out[i*2] = MULT16_16_Q15(g, in[i*2]);
+          out[i*2+1] = MULT16_16_Q15(g, in[i*2+1]);
+       }
+    }
+    c=0;do {
+       for (i=overlap;i<frame_size;i++)
+       {
+          out[i*channels+c] = MULT16_16_Q15(g2, in[i*channels+c]);
+       }
+    }
+    while (++c<channels);
+}
+
+OpusEncoder *opus_encoder_create(opus_int32 Fs, int channels, int application, int *error)
+{
+   int ret;
+   OpusEncoder *st;
+   if((Fs!=48000&&Fs!=24000&&Fs!=16000&&Fs!=12000&&Fs!=8000)||(channels!=1&&channels!=2)||
+       (application != OPUS_APPLICATION_VOIP && application != OPUS_APPLICATION_AUDIO
+       && application != OPUS_APPLICATION_RESTRICTED_LOWDELAY))
+   {
+      if (error)
+         *error = OPUS_BAD_ARG;
+      return NULL;
+   }
+   st = (OpusEncoder *)opus_alloc(opus_encoder_get_size(channels));
+   if (st == NULL)
+   {
+      if (error)
+         *error = OPUS_ALLOC_FAIL;
+      return NULL;
+   }
+   ret = opus_encoder_init(st, Fs, channels, application);
+   if (error)
+      *error = ret;
+   if (ret != OPUS_OK)
+   {
+      opus_free(st);
+      st = NULL;
+   }
+   return st;
+}
+
+static opus_int32 user_bitrate_to_bitrate(OpusEncoder *st, int frame_size, int max_data_bytes)
+{
+  if(!frame_size)frame_size=st->Fs/400;
+  if (st->user_bitrate_bps==OPUS_AUTO)
+    return 60*st->Fs/frame_size + st->Fs*st->channels;
+  else if (st->user_bitrate_bps==OPUS_BITRATE_MAX)
+    return max_data_bytes*8*st->Fs/frame_size;
+  else
+    return st->user_bitrate_bps;
+}
+
+#ifndef DISABLE_FLOAT_API
+/* Don't use more than 60 ms for the frame size analysis */
+#define MAX_DYNAMIC_FRAMESIZE 24
+/* Estimates how much the bitrate will be boosted based on the sub-frame energy */
+static float transient_boost(const float *E, const float *E_1, int LM, int maxM)
+{
+   int i;
+   int M;
+   float sumE=0, sumE_1=0;
+   float metric;
+
+   M = IMIN(maxM, (1<<LM)+1);
+   for (i=0;i<M;i++)
+   {
+      sumE += E[i];
+      sumE_1 += E_1[i];
+   }
+   metric = sumE*sumE_1/(M*M);
+   /*if (LM==3)
+      printf("%f\n", metric);*/
+   /*return metric>10 ? 1 : 0;*/
+   /*return MAX16(0,1-exp(-.25*(metric-2.)));*/
+   return MIN16(1,(float)sqrt(MAX16(0,.05f*(metric-2))));
+}
+
+/* Viterbi decoding trying to find the best frame size combination using look-ahead
+
+   State numbering:
+    0: unused
+    1:  2.5 ms
+    2:  5 ms (#1)
+    3:  5 ms (#2)
+    4: 10 ms (#1)
+    5: 10 ms (#2)
+    6: 10 ms (#3)
+    7: 10 ms (#4)
+    8: 20 ms (#1)
+    9: 20 ms (#2)
+   10: 20 ms (#3)
+   11: 20 ms (#4)
+   12: 20 ms (#5)
+   13: 20 ms (#6)
+   14: 20 ms (#7)
+   15: 20 ms (#8)
+*/
+static int transient_viterbi(const float *E, const float *E_1, int N, int frame_cost, int rate)
+{
+   int i;
+   float cost[MAX_DYNAMIC_FRAMESIZE][16];
+   int states[MAX_DYNAMIC_FRAMESIZE][16];
+   float best_cost;
+   int best_state;
+   float factor;
+   /* Take into account that we damp VBR in the 32 kb/s to 64 kb/s range. */
+   if (rate<80)
+      factor=0;
+   else if (rate>160)
+      factor=1;
+   else
+      factor = (rate-80.f)/80.f;
+   /* Makes variable framesize less aggressive at lower bitrates, but I can't
+      find any valid theoretical justification for this (other than it seems
+      to help) */
+   for (i=0;i<16;i++)
+   {
+      /* Impossible state */
+      states[0][i] = -1;
+      cost[0][i] = 1e10;
+   }
+   for (i=0;i<4;i++)
+   {
+      cost[0][1<<i] = (frame_cost + rate*(1<<i))*(1+factor*transient_boost(E, E_1, i, N+1));
+      states[0][1<<i] = i;
+   }
+   for (i=1;i<N;i++)
+   {
+      int j;
+
+      /* Follow continuations */
+      for (j=2;j<16;j++)
+      {
+         cost[i][j] = cost[i-1][j-1];
+         states[i][j] = j-1;
+      }
+
+      /* New frames */
+      for(j=0;j<4;j++)
+      {
+         int k;
+         float min_cost;
+         float curr_cost;
+         states[i][1<<j] = 1;
+         min_cost = cost[i-1][1];
+         for(k=1;k<4;k++)
+         {
+            float tmp = cost[i-1][(1<<(k+1))-1];
+            if (tmp < min_cost)
+            {
+               states[i][1<<j] = (1<<(k+1))-1;
+               min_cost = tmp;
+            }
+         }
+         curr_cost = (frame_cost + rate*(1<<j))*(1+factor*transient_boost(E+i, E_1+i, j, N-i+1));
+         cost[i][1<<j] = min_cost;
+         /* If part of the frame is outside the analysis window, only count part of the cost */
+         if (N-i < (1<<j))
+            cost[i][1<<j] += curr_cost*(float)(N-i)/(1<<j);
+         else
+            cost[i][1<<j] += curr_cost;
+      }
+   }
+
+   best_state=1;
+   best_cost = cost[N-1][1];
+   /* Find best end state (doesn't force a frame to end at N-1) */
+   for (i=2;i<16;i++)
+   {
+      if (cost[N-1][i]<best_cost)
+      {
+         best_cost = cost[N-1][i];
+         best_state = i;
+      }
+   }
+
+   /* Follow transitions back */
+   for (i=N-1;i>=0;i--)
+   {
+      /*printf("%d ", best_state);*/
+      best_state = states[i][best_state];
+   }
+   /*printf("%d\n", best_state);*/
+   return best_state;
+}
+
+int optimize_framesize(const opus_val16 *x, int len, int C, opus_int32 Fs,
+                int bitrate, opus_val16 tonality, float *mem, int buffering,
+                downmix_func downmix)
+{
+   int N;
+   int i;
+   float e[MAX_DYNAMIC_FRAMESIZE+4];
+   float e_1[MAX_DYNAMIC_FRAMESIZE+3];
+   opus_val32 memx;
+   int bestLM=0;
+   int subframe;
+   int pos;
+   VARDECL(opus_val32, sub);
+
+   subframe = Fs/400;
+   ALLOC(sub, subframe, opus_val32);
+   e[0]=mem[0];
+   e_1[0]=1.f/(EPSILON+mem[0]);
+   if (buffering)
+   {
+      /* Consider the CELT delay when not in restricted-lowdelay */
+      /* We assume the buffering is between 2.5 and 5 ms */
+      int offset = 2*subframe - buffering;
+      celt_assert(offset>=0 && offset <= subframe);
+      x += C*offset;
+      len -= offset;
+      e[1]=mem[1];
+      e_1[1]=1.f/(EPSILON+mem[1]);
+      e[2]=mem[2];
+      e_1[2]=1.f/(EPSILON+mem[2]);
+      pos = 3;
+   } else {
+      pos=1;
+   }
+   N=IMIN(len/subframe, MAX_DYNAMIC_FRAMESIZE);
+   /* Just silencing a warning, it's really initialized later */
+   memx = 0;
+   for (i=0;i<N;i++)
+   {
+      float tmp;
+      opus_val32 tmpx;
+      int j;
+      tmp=EPSILON;
+
+      downmix(x, sub, subframe, i*subframe, 0, -2, C);
+      if (i==0)
+         memx = sub[0];
+      for (j=0;j<subframe;j++)
+      {
+         tmpx = sub[j];
+         tmp += (tmpx-memx)*(float)(tmpx-memx);
+         memx = tmpx;
+      }
+      e[i+pos] = tmp;
+      e_1[i+pos] = 1.f/tmp;
+   }
+   /* Hack to get 20 ms working with APPLICATION_AUDIO
+      The real problem is that the corresponding memory needs to use 1.5 ms
+      from this frame and 1 ms from the next frame */
+   e[i+pos] = e[i+pos-1];
+   if (buffering)
+      N=IMIN(MAX_DYNAMIC_FRAMESIZE, N+2);
+   bestLM = transient_viterbi(e, e_1, N, (int)((1.f+.5f*tonality)*(60*C+40)), bitrate/400);
+   mem[0] = e[1<<bestLM];
+   if (buffering)
+   {
+      mem[1] = e[(1<<bestLM)+1];
+      mem[2] = e[(1<<bestLM)+2];
+   }
+   return bestLM;
+}
+
+#endif
+
+#ifndef DISABLE_FLOAT_API
+#ifdef OPUS_FIXED_POINT
+#define PCM2VAL(x) FLOAT2INT16(x)
+#else
+#define PCM2VAL(x) SCALEIN(x)
+#endif
+void downmix_float(const void *_x, opus_val32 *sub, int subframe, int offset, int c1, int c2, int C)
+{
+   const float *x;
+   opus_val32 scale;
+   int j;
+   x = (const float *)_x;
+   for (j=0;j<subframe;j++)
+      sub[j] = PCM2VAL(x[(j+offset)*C+c1]);
+   if (c2>-1)
+   {
+      for (j=0;j<subframe;j++)
+         sub[j] += PCM2VAL(x[(j+offset)*C+c2]);
+   } else if (c2==-2)
+   {
+      int c;
+      for (c=1;c<C;c++)
+      {
+         for (j=0;j<subframe;j++)
+            sub[j] += PCM2VAL(x[(j+offset)*C+c]);
+      }
+   }
+#ifdef OPUS_FIXED_POINT
+   scale = (1<<SIG_SHIFT);
+#else
+   scale = 1.f;
+#endif
+   if (C==-2)
+      scale /= C;
+   else
+      scale /= 2;
+   for (j=0;j<subframe;j++)
+      sub[j] *= scale;
+}
+#endif
+
+void downmix_int(const void *_x, opus_val32 *sub, int subframe, int offset, int c1, int c2, int C)
+{
+   const opus_int16 *x;
+   opus_val32 scale;
+   int j;
+   x = (const opus_int16 *)_x;
+   for (j=0;j<subframe;j++)
+      sub[j] = x[(j+offset)*C+c1];
+   if (c2>-1)
+   {
+      for (j=0;j<subframe;j++)
+         sub[j] += x[(j+offset)*C+c2];
+   } else if (c2==-2)
+   {
+      int c;
+      for (c=1;c<C;c++)
+      {
+         for (j=0;j<subframe;j++)
+            sub[j] += x[(j+offset)*C+c];
+      }
+   }
+#ifdef OPUS_FIXED_POINT
+   scale = (1<<SIG_SHIFT);
+#else
+   scale = 1.f/32768;
+#endif
+   if (C==-2)
+      scale /= C;
+   else
+      scale /= 2;
+   for (j=0;j<subframe;j++)
+      sub[j] *= scale;
+}
+
+opus_int32 frame_size_select(opus_int32 frame_size, int variable_duration, opus_int32 Fs)
+{
+   int new_size;
+   if (frame_size<Fs/400)
+      return -1;
+   if (variable_duration == OPUS_FRAMESIZE_ARG)
+      new_size = frame_size;
+   else if (variable_duration == OPUS_FRAMESIZE_VARIABLE)
+      new_size = Fs/50;
+   else if (variable_duration >= OPUS_FRAMESIZE_2_5_MS && variable_duration <= OPUS_FRAMESIZE_60_MS)
+      new_size = IMIN(3*Fs/50, (Fs/400)<<(variable_duration-OPUS_FRAMESIZE_2_5_MS));
+   else
+      return -1;
+   if (new_size>frame_size)
+      return -1;
+   if (400*new_size!=Fs && 200*new_size!=Fs && 100*new_size!=Fs &&
+            50*new_size!=Fs && 25*new_size!=Fs && 50*new_size!=3*Fs)
+      return -1;
+   return new_size;
+}
+
+opus_int32 compute_frame_size(const void *analysis_pcm, int frame_size,
+      int variable_duration, int C, opus_int32 Fs, int bitrate_bps,
+      int delay_compensation, downmix_func downmix
+#ifndef DISABLE_FLOAT_API
+      , float *subframe_mem
+#endif
+      )
+{
+#ifndef DISABLE_FLOAT_API
+   if (variable_duration == OPUS_FRAMESIZE_VARIABLE && frame_size >= Fs/200)
+   {
+      int LM = 3;
+      LM = optimize_framesize(analysis_pcm, frame_size, C, Fs, bitrate_bps,
+            0, subframe_mem, delay_compensation, downmix);
+      while ((Fs/400<<LM)>frame_size)
+         LM--;
+      frame_size = (Fs/400<<LM);
+   } else
+#endif
+   {
+      frame_size = frame_size_select(frame_size, variable_duration, Fs);
+   }
+   if (frame_size<0)
+      return -1;
+   return frame_size;
+}
+
+opus_val16 compute_stereo_width(const opus_val16 *pcm, int frame_size, opus_int32 Fs, StereoWidthState *mem)
+{
+   opus_val16 corr;
+   opus_val16 ldiff;
+   opus_val16 width;
+   opus_val32 xx, xy, yy;
+   opus_val16 sqrt_xx, sqrt_yy;
+   opus_val16 qrrt_xx, qrrt_yy;
+   int frame_rate;
+   int i;
+   opus_val16 short_alpha;
+
+   frame_rate = Fs/frame_size;
+   short_alpha = Q15ONE - 25*Q15ONE/IMAX(50,frame_rate);
+   xx=xy=yy=0;
+   for (i=0;i<frame_size;i+=4)
+   {
+      opus_val32 pxx=0;
+      opus_val32 pxy=0;
+      opus_val32 pyy=0;
+      opus_val16 x, y;
+      x = pcm[2*i];
+      y = pcm[2*i+1];
+      pxx = SHR32(MULT16_16(x,x),2);
+      pxy = SHR32(MULT16_16(x,y),2);
+      pyy = SHR32(MULT16_16(y,y),2);
+      x = pcm[2*i+2];
+      y = pcm[2*i+3];
+      pxx += SHR32(MULT16_16(x,x),2);
+      pxy += SHR32(MULT16_16(x,y),2);
+      pyy += SHR32(MULT16_16(y,y),2);
+      x = pcm[2*i+4];
+      y = pcm[2*i+5];
+      pxx += SHR32(MULT16_16(x,x),2);
+      pxy += SHR32(MULT16_16(x,y),2);
+      pyy += SHR32(MULT16_16(y,y),2);
+      x = pcm[2*i+6];
+      y = pcm[2*i+7];
+      pxx += SHR32(MULT16_16(x,x),2);
+      pxy += SHR32(MULT16_16(x,y),2);
+      pyy += SHR32(MULT16_16(y,y),2);
+
+      xx += SHR32(pxx, 10);
+      xy += SHR32(pxy, 10);
+      yy += SHR32(pyy, 10);
+   }
+   mem->XX += MULT16_32_Q15(short_alpha, xx-mem->XX);
+   mem->XY += MULT16_32_Q15(short_alpha, xy-mem->XY);
+   mem->YY += MULT16_32_Q15(short_alpha, yy-mem->YY);
+   mem->XX = MAX32(0, mem->XX);
+   mem->XY = MAX32(0, mem->XY);
+   mem->YY = MAX32(0, mem->YY);
+   if (MAX32(mem->XX, mem->YY)>QCONST16(8e-4f, 18))
+   {
+      sqrt_xx = celt_sqrt(mem->XX);
+      sqrt_yy = celt_sqrt(mem->YY);
+      qrrt_xx = celt_sqrt(sqrt_xx);
+      qrrt_yy = celt_sqrt(sqrt_yy);
+      /* Inter-channel correlation */
+      mem->XY = MIN32(mem->XY, sqrt_xx*sqrt_yy);
+      corr = SHR32(frac_div32(mem->XY,EPSILON+MULT16_16(sqrt_xx,sqrt_yy)),16);
+      /* Approximate loudness difference */
+      ldiff = Q15ONE*ABS16(qrrt_xx-qrrt_yy)/(EPSILON+qrrt_xx+qrrt_yy);
+      width = MULT16_16_Q15(celt_sqrt(QCONST32(1.f,30)-MULT16_16(corr,corr)), ldiff);
+      /* Smoothing over one second */
+      mem->smoothed_width += (width-mem->smoothed_width)/frame_rate;
+      /* Peak follower */
+      mem->max_follower = MAX16(mem->max_follower-QCONST16(.02f,15)/frame_rate, mem->smoothed_width);
+   } else {
+      width = 0;
+      corr=Q15ONE;
+      ldiff=0;
+   }
+   /*printf("%f %f %f %f %f ", corr/(float)Q15ONE, ldiff/(float)Q15ONE, width/(float)Q15ONE, mem->smoothed_width/(float)Q15ONE, mem->max_follower/(float)Q15ONE);*/
+   return EXTRACT16(MIN32(Q15ONE,20*mem->max_follower));
+}
+
+opus_int32 opus_encode_native(OpusEncoder *st, const opus_val16 *pcm, int frame_size,
+                unsigned char *data, opus_int32 out_data_bytes, int lsb_depth,
+                const void *analysis_pcm, opus_int32 analysis_size, int c1, int c2, int analysis_channels, downmix_func downmix)
+{
+    void *silk_enc;
+    CELTEncoder *celt_enc;
+    int i;
+    int ret=0;
+    opus_int32 nBytes;
+    ec_enc enc;
+    int bytes_target;
+    int prefill=0;
+    int start_band = 0;
+    int redundancy = 0;
+    int redundancy_bytes = 0; /* Number of bytes to use for redundancy frame */
+    int celt_to_silk = 0;
+    VARDECL(opus_val16, pcm_buf);
+    int nb_compr_bytes;
+    int to_celt = 0;
+    opus_uint32 redundant_rng = 0;
+    int cutoff_Hz, hp_freq_smth1;
+    int voice_est; /* Probability of voice in Q7 */
+    opus_int32 equiv_rate;
+    int delay_compensation;
+    int frame_rate;
+    opus_int32 max_rate; /* Max bitrate we're allowed to use */
+    int curr_bandwidth;
+    opus_val16 HB_gain;
+    opus_int32 max_data_bytes; /* Max number of bytes we're allowed to use */
+    int total_buffer;
+    opus_val16 stereo_width;
+    const CELTMode *celt_mode;
+    AnalysisInfo analysis_info;
+    int analysis_read_pos_bak=-1;
+    int analysis_read_subframe_bak=-1;
+    VARDECL(opus_val16, tmp_prefill);
+
+    ALLOC_STACK;
+
+    max_data_bytes = IMIN(1276, out_data_bytes);
+
+    st->rangeFinal = 0;
+    if ((!st->variable_duration && 400*frame_size != st->Fs && 200*frame_size != st->Fs && 100*frame_size != st->Fs &&
+         50*frame_size != st->Fs &&  25*frame_size != st->Fs &&  50*frame_size != 3*st->Fs)
+         || (400*frame_size < st->Fs)
+         || max_data_bytes<=0
+         )
+    {
+       RESTORE_STACK;
+       return OPUS_BAD_ARG;
+    }
+    silk_enc = (char*)st+st->silk_enc_offset;
+    celt_enc = (CELTEncoder*)((char*)st+st->celt_enc_offset);
+    if (st->application == OPUS_APPLICATION_RESTRICTED_LOWDELAY)
+       delay_compensation = 0;
+    else
+       delay_compensation = st->delay_compensation;
+
+    lsb_depth = IMIN(lsb_depth, st->lsb_depth);
+
+    analysis_info.valid = 0;
+    celt_encoder_ctl(celt_enc, CELT_GET_MODE(&celt_mode));
+#ifndef DISABLE_FLOAT_API
+#ifdef OPUS_FIXED_POINT
+    if (st->silk_mode.complexity >= 10 && st->Fs==48000)
+#else
+    if (st->silk_mode.complexity >= 7 && st->Fs==48000)
+#endif
+    {
+       analysis_read_pos_bak = st->analysis.read_pos;
+       analysis_read_subframe_bak = st->analysis.read_subframe;
+       run_analysis(&st->analysis, celt_mode, analysis_pcm, analysis_size, frame_size,
+             c1, c2, analysis_channels, st->Fs,
+             lsb_depth, downmix, &analysis_info);
+    }
+#endif
+
+    st->voice_ratio = -1;
+
+#ifndef DISABLE_FLOAT_API
+    st->detected_bandwidth = 0;
+    if (analysis_info.valid)
+    {
+       int analysis_bandwidth;
+       if (st->signal_type == OPUS_AUTO)
+          st->voice_ratio = (int)floor(.5+100*(1-analysis_info.music_prob));
+
+       analysis_bandwidth = analysis_info.bandwidth;
+       if (analysis_bandwidth<=12)
+          st->detected_bandwidth = OPUS_BANDWIDTH_NARROWBAND;
+       else if (analysis_bandwidth<=14)
+          st->detected_bandwidth = OPUS_BANDWIDTH_MEDIUMBAND;
+       else if (analysis_bandwidth<=16)
+          st->detected_bandwidth = OPUS_BANDWIDTH_WIDEBAND;
+       else if (analysis_bandwidth<=18)
+          st->detected_bandwidth = OPUS_BANDWIDTH_SUPERWIDEBAND;
+       else
+          st->detected_bandwidth = OPUS_BANDWIDTH_FULLBAND;
+    }
+#endif
+
+    if (st->channels==2 && st->force_channels!=1)
+       stereo_width = compute_stereo_width(pcm, frame_size, st->Fs, &st->width_mem);
+    else
+       stereo_width = 0;
+    total_buffer = delay_compensation;
+    st->bitrate_bps = user_bitrate_to_bitrate(st, frame_size, max_data_bytes);
+
+    frame_rate = st->Fs/frame_size;
+    if (max_data_bytes<3 || st->bitrate_bps < 3*frame_rate*8
+       || (frame_rate<50 && (max_data_bytes*frame_rate<300 || st->bitrate_bps < 2400)))
+    {
+       /*If the space is too low to do something useful, emit 'PLC' frames.*/
+       int tocmode = st->mode;
+       int bw = st->bandwidth == 0 ? OPUS_BANDWIDTH_NARROWBAND : st->bandwidth;
+       if (tocmode==0)
+          tocmode = MODE_SILK_ONLY;
+       if (frame_rate>100)
+          tocmode = MODE_CELT_ONLY;
+       if (frame_rate < 50)
+          tocmode = MODE_SILK_ONLY;
+       if(tocmode==MODE_SILK_ONLY&&bw>OPUS_BANDWIDTH_WIDEBAND)
+          bw=OPUS_BANDWIDTH_WIDEBAND;
+       else if (tocmode==MODE_CELT_ONLY&&bw==OPUS_BANDWIDTH_MEDIUMBAND)
+          bw=OPUS_BANDWIDTH_NARROWBAND;
+       else if (bw<=OPUS_BANDWIDTH_SUPERWIDEBAND)
+          bw=OPUS_BANDWIDTH_SUPERWIDEBAND;
+       data[0] = gen_toc(tocmode, frame_rate, bw, st->stream_channels);
+       RESTORE_STACK;
+       return 1;
+    }
+    if (!st->use_vbr)
+    {
+       int cbrBytes;
+       cbrBytes = IMIN( (st->bitrate_bps + 4*frame_rate)/(8*frame_rate) , max_data_bytes);
+       st->bitrate_bps = cbrBytes * (8*frame_rate);
+       max_data_bytes = cbrBytes;
+    }
+    max_rate = frame_rate*max_data_bytes*8;
+
+    /* Equivalent 20-ms rate for mode/channel/bandwidth decisions */
+    equiv_rate = st->bitrate_bps - (40*st->channels+20)*(st->Fs/frame_size - 50);
+
+    if (st->signal_type == OPUS_SIGNAL_VOICE)
+       voice_est = 127;
+    else if (st->signal_type == OPUS_SIGNAL_MUSIC)
+       voice_est = 0;
+    else if (st->voice_ratio >= 0)
+    {
+       voice_est = st->voice_ratio*327>>8;
+       /* For AUDIO, never be more than 90% confident of having speech */
+       if (st->application == OPUS_APPLICATION_AUDIO)
+          voice_est = IMIN(voice_est, 115);
+    } else if (st->application == OPUS_APPLICATION_VOIP)
+       voice_est = 115;
+    else
+       voice_est = 48;
+
+    if (st->force_channels!=OPUS_AUTO && st->channels == 2)
+    {
+        st->stream_channels = st->force_channels;
+    } else {
+#ifdef FUZZING
+       /* Random mono/stereo decision */
+       if (st->channels == 2 && (rand()&0x1F)==0)
+          st->stream_channels = 3-st->stream_channels;
+#else
+       /* Rate-dependent mono-stereo decision */
+       if (st->channels == 2)
+       {
+          opus_int32 stereo_threshold;
+          stereo_threshold = stereo_music_threshold + ((voice_est*voice_est*(stereo_voice_threshold-stereo_music_threshold))>>14);
+          if (st->stream_channels == 2)
+             stereo_threshold -= 1000;
+          else
+             stereo_threshold += 1000;
+          st->stream_channels = (equiv_rate > stereo_threshold) ? 2 : 1;
+       } else {
+          st->stream_channels = st->channels;
+       }
+#endif
+    }
+    equiv_rate = st->bitrate_bps - (40*st->stream_channels+20)*(st->Fs/frame_size - 50);
+
+    /* Mode selection depending on application and signal type */
+    if (st->application == OPUS_APPLICATION_RESTRICTED_LOWDELAY)
+    {
+       st->mode = MODE_CELT_ONLY;
+    } else if (st->user_forced_mode == OPUS_AUTO)
+    {
+#ifdef FUZZING
+       /* Random mode switching */
+       if ((rand()&0xF)==0)
+       {
+          if ((rand()&0x1)==0)
+             st->mode = MODE_CELT_ONLY;
+          else
+             st->mode = MODE_SILK_ONLY;
+       } else {
+          if (st->prev_mode==MODE_CELT_ONLY)
+             st->mode = MODE_CELT_ONLY;
+          else
+             st->mode = MODE_SILK_ONLY;
+       }
+#else
+       opus_int32 mode_voice, mode_music;
+       opus_int32 threshold;
+
+       /* Interpolate based on stereo width */
+       mode_voice = (opus_int32)(MULT16_32_Q15(Q15ONE-stereo_width,mode_thresholds[0][0])
+             + MULT16_32_Q15(stereo_width,mode_thresholds[1][0]));
+       mode_music = (opus_int32)(MULT16_32_Q15(Q15ONE-stereo_width,mode_thresholds[1][1])
+             + MULT16_32_Q15(stereo_width,mode_thresholds[1][1]));
+       /* Interpolate based on speech/music probability */
+       threshold = mode_music + ((voice_est*voice_est*(mode_voice-mode_music))>>14);
+       /* Bias towards SILK for VoIP because of some useful features */
+       if (st->application == OPUS_APPLICATION_VOIP)
+          threshold += 8000;
+
+       /*printf("%f %d\n", stereo_width/(float)Q15ONE, threshold);*/
+       /* Hysteresis */
+       if (st->prev_mode == MODE_CELT_ONLY)
+           threshold -= 4000;
+       else if (st->prev_mode>0)
+           threshold += 4000;
+
+       st->mode = (equiv_rate >= threshold) ? MODE_CELT_ONLY: MODE_SILK_ONLY;
+
+       /* When FEC is enabled and there's enough packet loss, use SILK */
+       if (st->silk_mode.useInBandFEC && st->silk_mode.packetLossPercentage > (128-voice_est)>>4)
+          st->mode = MODE_SILK_ONLY;
+       /* When encoding voice and DTX is enabled, set the encoder to SILK mode (at least for now) */
+       if (st->silk_mode.useDTX && voice_est > 100)
+          st->mode = MODE_SILK_ONLY;
+#endif
+    } else {
+       st->mode = st->user_forced_mode;
+    }
+
+    /* Override the chosen mode to make sure we meet the requested frame size */
+    if (st->mode != MODE_CELT_ONLY && frame_size < st->Fs/100)
+       st->mode = MODE_CELT_ONLY;
+    if (st->lfe)
+       st->mode = MODE_CELT_ONLY;
+    /* If max_data_bytes represents less than 8 kb/s, switch to CELT-only mode */
+    if (max_data_bytes < (frame_rate > 50 ? 12000 : 8000)*frame_size / (st->Fs * 8))
+       st->mode = MODE_CELT_ONLY;
+
+    if (st->stream_channels == 1 && st->prev_channels ==2 && st->silk_mode.toMono==0
+          && st->mode != MODE_CELT_ONLY && st->prev_mode != MODE_CELT_ONLY)
+    {
+       /* Delay stereo->mono transition by two frames so that SILK can do a smooth downmix */
+       st->silk_mode.toMono = 1;
+       st->stream_channels = 2;
+    } else {
+       st->silk_mode.toMono = 0;
+    }
+
+    if (st->prev_mode > 0 &&
+        ((st->mode != MODE_CELT_ONLY && st->prev_mode == MODE_CELT_ONLY) ||
+    (st->mode == MODE_CELT_ONLY && st->prev_mode != MODE_CELT_ONLY)))
+    {
+        redundancy = 1;
+        celt_to_silk = (st->mode != MODE_CELT_ONLY);
+        if (!celt_to_silk)
+        {
+            /* Switch to SILK/hybrid if frame size is 10 ms or more*/
+            if (frame_size >= st->Fs/100)
+            {
+                st->mode = st->prev_mode;
+                to_celt = 1;
+            } else {
+                redundancy=0;
+            }
+        }
+    }
+    /* For the first frame at a new SILK bandwidth */
+    if (st->silk_bw_switch)
+    {
+       redundancy = 1;
+       celt_to_silk = 1;
+       st->silk_bw_switch = 0;
+       prefill=1;
+    }
+
+    if (redundancy)
+    {
+       /* Fair share of the max size allowed */
+       redundancy_bytes = IMIN(257, max_data_bytes*(opus_int32)(st->Fs/200)/(frame_size+st->Fs/200));
+       /* For VBR, target the actual bitrate (subject to the limit above) */
+       if (st->use_vbr)
+          redundancy_bytes = IMIN(redundancy_bytes, st->bitrate_bps/1600);
+    }
+
+    if (st->mode != MODE_CELT_ONLY && st->prev_mode == MODE_CELT_ONLY)
+    {
+        silk_EncControlStruct dummy;
+        silk_InitEncoder( silk_enc, st->arch, &dummy);
+        prefill=1;
+    }
+
+    /* Automatic (rate-dependent) bandwidth selection */
+    if (st->mode == MODE_CELT_ONLY || st->first || st->silk_mode.allowBandwidthSwitch)
+    {
+        const opus_int32 *voice_bandwidth_thresholds, *music_bandwidth_thresholds;
+        opus_int32 bandwidth_thresholds[8];
+        int bandwidth = OPUS_BANDWIDTH_FULLBAND;
+        opus_int32 equiv_rate2;
+
+        equiv_rate2 = equiv_rate;
+        if (st->mode != MODE_CELT_ONLY)
+        {
+           /* Adjust the threshold +/- 10% depending on complexity */
+           equiv_rate2 = equiv_rate2 * (45+st->silk_mode.complexity)/50;
+           /* CBR is less efficient by ~1 kb/s */
+           if (!st->use_vbr)
+              equiv_rate2 -= 1000;
+        }
+        if (st->channels==2 && st->force_channels!=1)
+        {
+           voice_bandwidth_thresholds = stereo_voice_bandwidth_thresholds;
+           music_bandwidth_thresholds = stereo_music_bandwidth_thresholds;
+        } else {
+           voice_bandwidth_thresholds = mono_voice_bandwidth_thresholds;
+           music_bandwidth_thresholds = mono_music_bandwidth_thresholds;
+        }
+        /* Interpolate bandwidth thresholds depending on voice estimation */
+        for (i=0;i<8;i++)
+        {
+           bandwidth_thresholds[i] = music_bandwidth_thresholds[i]
+                    + ((voice_est*voice_est*(voice_bandwidth_thresholds[i]-music_bandwidth_thresholds[i]))>>14);
+        }
+        do {
+            int threshold, hysteresis;
+            threshold = bandwidth_thresholds[2*(bandwidth-OPUS_BANDWIDTH_MEDIUMBAND)];
+            hysteresis = bandwidth_thresholds[2*(bandwidth-OPUS_BANDWIDTH_MEDIUMBAND)+1];
+            if (!st->first)
+            {
+                if (st->bandwidth >= bandwidth)
+                    threshold -= hysteresis;
+                else
+                    threshold += hysteresis;
+            }
+            if (equiv_rate2 >= threshold)
+                break;
+        } while (--bandwidth>OPUS_BANDWIDTH_NARROWBAND);
+        st->bandwidth = bandwidth;
+        /* Prevents any transition to SWB/FB until the SILK layer has fully
+           switched to WB mode and turned the variable LP filter off */
+        if (!st->first && st->mode != MODE_CELT_ONLY && !st->silk_mode.inWBmodeWithoutVariableLP && st->bandwidth > OPUS_BANDWIDTH_WIDEBAND)
+            st->bandwidth = OPUS_BANDWIDTH_WIDEBAND;
+    }
+
+    if (st->bandwidth>st->max_bandwidth)
+       st->bandwidth = st->max_bandwidth;
+
+    if (st->user_bandwidth != OPUS_AUTO)
+        st->bandwidth = st->user_bandwidth;
+
+    /* This prevents us from using hybrid at unsafe CBR/max rates */
+    if (st->mode != MODE_CELT_ONLY && max_rate < 15000)
+    {
+       st->bandwidth = IMIN(st->bandwidth, OPUS_BANDWIDTH_WIDEBAND);
+    }
+
+    /* Prevents Opus from wasting bits on frequencies that are above
+       the Nyquist rate of the input signal */
+    if (st->Fs <= 24000 && st->bandwidth > OPUS_BANDWIDTH_SUPERWIDEBAND)
+        st->bandwidth = OPUS_BANDWIDTH_SUPERWIDEBAND;
+    if (st->Fs <= 16000 && st->bandwidth > OPUS_BANDWIDTH_WIDEBAND)
+        st->bandwidth = OPUS_BANDWIDTH_WIDEBAND;
+    if (st->Fs <= 12000 && st->bandwidth > OPUS_BANDWIDTH_MEDIUMBAND)
+        st->bandwidth = OPUS_BANDWIDTH_MEDIUMBAND;
+    if (st->Fs <= 8000 && st->bandwidth > OPUS_BANDWIDTH_NARROWBAND)
+        st->bandwidth = OPUS_BANDWIDTH_NARROWBAND;
+#ifndef DISABLE_FLOAT_API
+    /* Use detected bandwidth to reduce the encoded bandwidth. */
+    if (st->detected_bandwidth && st->user_bandwidth == OPUS_AUTO)
+    {
+       int min_detected_bandwidth;
+       /* Makes bandwidth detection more conservative just in case the detector
+          gets it wrong when we could have coded a high bandwidth transparently.
+          When operating in SILK/hybrid mode, we don't go below wideband to avoid
+          more complicated switches that require redundancy. */
+       if (equiv_rate <= 18000*st->stream_channels && st->mode == MODE_CELT_ONLY)
+          min_detected_bandwidth = OPUS_BANDWIDTH_NARROWBAND;
+       else if (equiv_rate <= 24000*st->stream_channels && st->mode == MODE_CELT_ONLY)
+          min_detected_bandwidth = OPUS_BANDWIDTH_MEDIUMBAND;
+       else if (equiv_rate <= 30000*st->stream_channels)
+          min_detected_bandwidth = OPUS_BANDWIDTH_WIDEBAND;
+       else if (equiv_rate <= 44000*st->stream_channels)
+          min_detected_bandwidth = OPUS_BANDWIDTH_SUPERWIDEBAND;
+       else
+          min_detected_bandwidth = OPUS_BANDWIDTH_FULLBAND;
+
+       st->detected_bandwidth = IMAX(st->detected_bandwidth, min_detected_bandwidth);
+       st->bandwidth = IMIN(st->bandwidth, st->detected_bandwidth);
+    }
+#endif
+    celt_encoder_ctl(celt_enc, OPUS_SET_LSB_DEPTH(lsb_depth));
+
+    /* CELT mode doesn't support mediumband, use wideband instead */
+    if (st->mode == MODE_CELT_ONLY && st->bandwidth == OPUS_BANDWIDTH_MEDIUMBAND)
+        st->bandwidth = OPUS_BANDWIDTH_WIDEBAND;
+    if (st->lfe)
+       st->bandwidth = OPUS_BANDWIDTH_NARROWBAND;
+
+    /* Can't support higher than wideband for >20 ms frames */
+    if (frame_size > st->Fs/50 && (st->mode == MODE_CELT_ONLY || st->bandwidth > OPUS_BANDWIDTH_WIDEBAND))
+    {
+       VARDECL(unsigned char, tmp_data);
+       int nb_frames;
+       int bak_mode, bak_bandwidth, bak_channels, bak_to_mono;
+       VARDECL(OpusRepacketizer, rp);
+       opus_int32 bytes_per_frame;
+       opus_int32 repacketize_len;
+
+#ifndef DISABLE_FLOAT_API
+       if (analysis_read_pos_bak!= -1)
+       {
+          st->analysis.read_pos = analysis_read_pos_bak;
+          st->analysis.read_subframe = analysis_read_subframe_bak;
+       }
+#endif
+
+       nb_frames = frame_size > st->Fs/25 ? 3 : 2;
+       bytes_per_frame = IMIN(1276,(out_data_bytes-3)/nb_frames);
+
+       ALLOC(tmp_data, nb_frames*bytes_per_frame, unsigned char);
+
+       ALLOC(rp, 1, OpusRepacketizer);
+       opus_repacketizer_init(rp);
+
+       bak_mode = st->user_forced_mode;
+       bak_bandwidth = st->user_bandwidth;
+       bak_channels = st->force_channels;
+
+       st->user_forced_mode = st->mode;
+       st->user_bandwidth = st->bandwidth;
+       st->force_channels = st->stream_channels;
+       bak_to_mono = st->silk_mode.toMono;
+
+       if (bak_to_mono)
+          st->force_channels = 1;
+       else
+          st->prev_channels = st->stream_channels;
+       for (i=0;i<nb_frames;i++)
+       {
+          int tmp_len;
+          st->silk_mode.toMono = 0;
+          /* When switching from SILK/Hybrid to CELT, only ask for a switch at the last frame */
+          if (to_celt && i==nb_frames-1)
+             st->user_forced_mode = MODE_CELT_ONLY;
+          tmp_len = opus_encode_native(st, pcm+i*(st->channels*st->Fs/50), st->Fs/50,
+                tmp_data+i*bytes_per_frame, bytes_per_frame, lsb_depth,
+                NULL, 0, c1, c2, analysis_channels, downmix);
+          if (tmp_len<0)
+          {
+             RESTORE_STACK;
+             return OPUS_INTERNAL_ERROR;
+          }
+          ret = opus_repacketizer_cat(rp, tmp_data+i*bytes_per_frame, tmp_len);
+          if (ret<0)
+          {
+             RESTORE_STACK;
+             return OPUS_INTERNAL_ERROR;
+          }
+       }
+       if (st->use_vbr)
+          repacketize_len = out_data_bytes;
+       else
+          repacketize_len = IMIN(3*st->bitrate_bps/(3*8*50/nb_frames), out_data_bytes);
+       ret = opus_repacketizer_out_range_impl(rp, 0, nb_frames, data, repacketize_len, 0, !st->use_vbr);
+       if (ret<0)
+       {
+          RESTORE_STACK;
+          return OPUS_INTERNAL_ERROR;
+       }
+       st->user_forced_mode = bak_mode;
+       st->user_bandwidth = bak_bandwidth;
+       st->force_channels = bak_channels;
+       st->silk_mode.toMono = bak_to_mono;
+       RESTORE_STACK;
+       return ret;
+    }
+    curr_bandwidth = st->bandwidth;
+
+    /* Chooses the appropriate mode for speech
+       *NEVER* switch to/from CELT-only mode here as this will invalidate some assumptions */
+    if (st->mode == MODE_SILK_ONLY && curr_bandwidth > OPUS_BANDWIDTH_WIDEBAND)
+        st->mode = MODE_HYBRID;
+    if (st->mode == MODE_HYBRID && curr_bandwidth <= OPUS_BANDWIDTH_WIDEBAND)
+        st->mode = MODE_SILK_ONLY;
+
+    /* printf("%d %d %d %d\n", st->bitrate_bps, st->stream_channels, st->mode, curr_bandwidth); */
+    bytes_target = IMIN(max_data_bytes-redundancy_bytes, st->bitrate_bps * frame_size / (st->Fs * 8)) - 1;
+
+    data += 1;
+
+    ec_enc_init(&enc, data, max_data_bytes-1);
+
+    ALLOC(pcm_buf, (total_buffer+frame_size)*st->channels, opus_val16);
+    for (i=0;i<total_buffer*st->channels;i++)
+       pcm_buf[i] = st->delay_buffer[(st->encoder_buffer-total_buffer)*st->channels+i];
+
+    if (st->mode == MODE_CELT_ONLY)
+       hp_freq_smth1 = silk_LSHIFT( silk_lin2log( VARIABLE_HP_MIN_CUTOFF_HZ ), 8 );
+    else
+       hp_freq_smth1 = ((silk_encoder*)silk_enc)->state_Fxx[0].sCmn.variable_HP_smth1_Q15;
+
+    st->variable_HP_smth2_Q15 = silk_SMLAWB( st->variable_HP_smth2_Q15,
+          hp_freq_smth1 - st->variable_HP_smth2_Q15, SILK_FIX_CONST( VARIABLE_HP_SMTH_COEF2, 16 ) );
+
+    /* convert from log scale to Hertz */
+    cutoff_Hz = silk_log2lin( silk_RSHIFT( st->variable_HP_smth2_Q15, 8 ) );
+
+    if (st->application == OPUS_APPLICATION_VOIP)
+    {
+       hp_cutoff(pcm, cutoff_Hz, &pcm_buf[total_buffer*st->channels], st->hp_mem, frame_size, st->channels, st->Fs);
+    } else {
+       dc_reject(pcm, 3, &pcm_buf[total_buffer*st->channels], st->hp_mem, frame_size, st->channels, st->Fs);
+    }
+
+
+
+    /* SILK processing */
+    HB_gain = Q15ONE;
+    if (st->mode != MODE_CELT_ONLY)
+    {
+        opus_int32 total_bitRate, celt_rate;
+#ifdef OPUS_FIXED_POINT
+       const opus_int16 *pcm_silk;
+#else
+       VARDECL(opus_int16, pcm_silk);
+       ALLOC(pcm_silk, st->channels*frame_size, opus_int16);
+#endif
+
+        /* Distribute bits between SILK and CELT */
+        total_bitRate = 8 * bytes_target * frame_rate;
+        if( st->mode == MODE_HYBRID ) {
+            int HB_gain_ref;
+            /* Base rate for SILK */
+            st->silk_mode.bitRate = st->stream_channels * ( 5000 + 1000 * ( st->Fs == 100 * frame_size ) );
+            if( curr_bandwidth == OPUS_BANDWIDTH_SUPERWIDEBAND ) {
+                /* SILK gets 2/3 of the remaining bits */
+                st->silk_mode.bitRate += ( total_bitRate - st->silk_mode.bitRate ) * 2 / 3;
+            } else { /* FULLBAND */
+                /* SILK gets 3/5 of the remaining bits */
+                st->silk_mode.bitRate += ( total_bitRate - st->silk_mode.bitRate ) * 3 / 5;
+            }
+            /* Don't let SILK use more than 80% */
+            if( st->silk_mode.bitRate > total_bitRate * 4/5 ) {
+                st->silk_mode.bitRate = total_bitRate * 4/5;
+            }
+            if (!st->energy_masking)
+            {
+               /* Increasingly attenuate high band when it gets allocated fewer bits */
+               celt_rate = total_bitRate - st->silk_mode.bitRate;
+               HB_gain_ref = (curr_bandwidth == OPUS_BANDWIDTH_SUPERWIDEBAND) ? 3000 : 3600;
+               HB_gain = SHL32((opus_val32)celt_rate, 9) / SHR32((opus_val32)celt_rate + st->stream_channels * HB_gain_ref, 6);
+               HB_gain = HB_gain < Q15ONE*6/7 ? HB_gain + Q15ONE/7 : Q15ONE;
+            }
+        } else {
+            /* SILK gets all bits */
+            st->silk_mode.bitRate = total_bitRate;
+        }
+
+        /* Surround masking for SILK */
+        if (st->energy_masking && st->use_vbr && !st->lfe)
+        {
+           opus_val32 mask_sum=0;
+           opus_val16 masking_depth;
+           opus_int32 rate_offset;
+           int c;
+           int end = 17;
+           opus_int16 srate = 16000;
+           if (st->bandwidth == OPUS_BANDWIDTH_NARROWBAND)
+           {
+              end = 13;
+              srate = 8000;
+           } else if (st->bandwidth == OPUS_BANDWIDTH_MEDIUMBAND)
+           {
+              end = 15;
+              srate = 12000;
+           }
+           for (c=0;c<st->channels;c++)
+           {
+              for(i=0;i<end;i++)
+              {
+                 opus_val16 mask;
+                 mask = MAX16(MIN16(st->energy_masking[21*c+i],
+                        QCONST16(.5f, DB_SHIFT)), -QCONST16(2.0f, DB_SHIFT));
+                 if (mask > 0)
+                    mask = HALF16(mask);
+                 mask_sum += mask;
+              }
+           }
+           /* Conservative rate reduction, we cut the masking in half */
+           masking_depth = mask_sum / end*st->channels;
+           masking_depth += QCONST16(.2f, DB_SHIFT);
+           rate_offset = (opus_int32)PSHR32(MULT16_16(srate, masking_depth), DB_SHIFT);
+           rate_offset = MAX32(rate_offset, -2*st->silk_mode.bitRate/3);
+           /* Split the rate change between the SILK and CELT part for hybrid. */
+           if (st->bandwidth==OPUS_BANDWIDTH_SUPERWIDEBAND || st->bandwidth==OPUS_BANDWIDTH_FULLBAND)
+              st->silk_mode.bitRate += 3*rate_offset/5;
+           else
+              st->silk_mode.bitRate += rate_offset;
+           bytes_target += rate_offset * frame_size / (8 * st->Fs);
+        }
+
+        st->silk_mode.payloadSize_ms = 1000 * frame_size / st->Fs;
+        st->silk_mode.nChannelsAPI = st->channels;
+        st->silk_mode.nChannelsInternal = st->stream_channels;
+        if (curr_bandwidth == OPUS_BANDWIDTH_NARROWBAND) {
+            st->silk_mode.desiredInternalSampleRate = 8000;
+        } else if (curr_bandwidth == OPUS_BANDWIDTH_MEDIUMBAND) {
+            st->silk_mode.desiredInternalSampleRate = 12000;
+        } else {
+            silk_assert( st->mode == MODE_HYBRID || curr_bandwidth == OPUS_BANDWIDTH_WIDEBAND );
+            st->silk_mode.desiredInternalSampleRate = 16000;
+        }
+        if( st->mode == MODE_HYBRID ) {
+            /* Don't allow bandwidth reduction at lowest bitrates in hybrid mode */
+            st->silk_mode.minInternalSampleRate = 16000;
+        } else {
+            st->silk_mode.minInternalSampleRate = 8000;
+        }
+
+        if (st->mode == MODE_SILK_ONLY)
+        {
+           opus_int32 effective_max_rate = max_rate;
+           st->silk_mode.maxInternalSampleRate = 16000;
+           if (frame_rate > 50)
+              effective_max_rate = effective_max_rate*2/3;
+           if (effective_max_rate < 13000)
+           {
+              st->silk_mode.maxInternalSampleRate = 12000;
+              st->silk_mode.desiredInternalSampleRate = IMIN(12000, st->silk_mode.desiredInternalSampleRate);
+           }
+           if (effective_max_rate < 9600)
+           {
+              st->silk_mode.maxInternalSampleRate = 8000;
+              st->silk_mode.desiredInternalSampleRate = IMIN(8000, st->silk_mode.desiredInternalSampleRate);
+           }
+        } else {
+           st->silk_mode.maxInternalSampleRate = 16000;
+        }
+
+        st->silk_mode.useCBR = !st->use_vbr;
+
+        /* Call SILK encoder for the low band */
+        nBytes = IMIN(1275, max_data_bytes-1-redundancy_bytes);
+
+        st->silk_mode.maxBits = nBytes*8;
+        /* Only allow up to 90% of the bits for hybrid mode*/
+        if (st->mode == MODE_HYBRID)
+           st->silk_mode.maxBits = (opus_int32)st->silk_mode.maxBits*9/10;
+        if (st->silk_mode.useCBR)
+        {
+           st->silk_mode.maxBits = (st->silk_mode.bitRate * frame_size / (st->Fs * 8))*8;
+           /* Reduce the initial target to make it easier to reach the CBR rate */
+           st->silk_mode.bitRate = IMAX(1, st->silk_mode.bitRate-2000);
+        }
+
+        if (prefill)
+        {
+            opus_int32 zero=0;
+            int prefill_offset;
+            /* Use a smooth onset for the SILK prefill to avoid the encoder trying to encode
+               a discontinuity. The exact location is what we need to avoid leaving any "gap"
+               in the audio when mixing with the redundant CELT frame. Here we can afford to
+               overwrite st->delay_buffer because the only thing that uses it before it gets
+               rewritten is tmp_prefill[] and even then only the part after the ramp really
+               gets used (rather than sent to the encoder and discarded) */
+            prefill_offset = st->channels*(st->encoder_buffer-st->delay_compensation-st->Fs/400);
+            gain_fade(st->delay_buffer+prefill_offset, st->delay_buffer+prefill_offset,
+                  0, Q15ONE, celt_mode->overlap, st->Fs/400, st->channels, celt_mode->window, st->Fs);
+            for(i=0;i<prefill_offset;i++)
+               st->delay_buffer[i]=0;
+#ifdef OPUS_FIXED_POINT
+            pcm_silk = st->delay_buffer;
+#else
+            for (i=0;i<st->encoder_buffer*st->channels;i++)
+                pcm_silk[i] = FLOAT2INT16(st->delay_buffer[i]);
+#endif
+            silk_Encode( silk_enc, &st->silk_mode, pcm_silk, st->encoder_buffer, NULL, &zero, 1 );
+        }
+
+#ifdef OPUS_FIXED_POINT
+        pcm_silk = pcm_buf+total_buffer*st->channels;
+#else
+        for (i=0;i<frame_size*st->channels;i++)
+            pcm_silk[i] = FLOAT2INT16(pcm_buf[total_buffer*st->channels + i]);
+#endif
+        ret = silk_Encode( silk_enc, &st->silk_mode, pcm_silk, frame_size, &enc, &nBytes, 0 );
+        if( ret ) {
+            /*fprintf (stderr, "SILK encode error: %d\n", ret);*/
+            /* Handle error */
+           RESTORE_STACK;
+           return OPUS_INTERNAL_ERROR;
+        }
+        if (nBytes==0)
+        {
+           st->rangeFinal = 0;
+           data[-1] = gen_toc(st->mode, st->Fs/frame_size, curr_bandwidth, st->stream_channels);
+           RESTORE_STACK;
+           return 1;
+        }
+        /* Extract SILK internal bandwidth for signaling in first byte */
+        if( st->mode == MODE_SILK_ONLY ) {
+            if( st->silk_mode.internalSampleRate == 8000 ) {
+               curr_bandwidth = OPUS_BANDWIDTH_NARROWBAND;
+            } else if( st->silk_mode.internalSampleRate == 12000 ) {
+               curr_bandwidth = OPUS_BANDWIDTH_MEDIUMBAND;
+            } else if( st->silk_mode.internalSampleRate == 16000 ) {
+               curr_bandwidth = OPUS_BANDWIDTH_WIDEBAND;
+            }
+        } else {
+            silk_assert( st->silk_mode.internalSampleRate == 16000 );
+        }
+
+        st->silk_mode.opusCanSwitch = st->silk_mode.switchReady;
+        /* FIXME: How do we allocate the redundancy for CBR? */
+        if (st->silk_mode.opusCanSwitch)
+        {
+           redundancy = 1;
+           celt_to_silk = 0;
+           st->silk_bw_switch = 1;
+        }
+    }
+
+    /* CELT processing */
+    {
+        int endband=21;
+
+        switch(curr_bandwidth)
+        {
+            case OPUS_BANDWIDTH_NARROWBAND:
+                endband = 13;
+                break;
+            case OPUS_BANDWIDTH_MEDIUMBAND:
+            case OPUS_BANDWIDTH_WIDEBAND:
+                endband = 17;
+                break;
+            case OPUS_BANDWIDTH_SUPERWIDEBAND:
+                endband = 19;
+                break;
+            case OPUS_BANDWIDTH_FULLBAND:
+                endband = 21;
+                break;
+        }
+        celt_encoder_ctl(celt_enc, CELT_SET_END_BAND(endband));
+        celt_encoder_ctl(celt_enc, CELT_SET_CHANNELS(st->stream_channels));
+    }
+    celt_encoder_ctl(celt_enc, OPUS_SET_BITRATE(OPUS_BITRATE_MAX));
+    if (st->mode != MODE_SILK_ONLY)
+    {
+        opus_val32 celt_pred=2;
+        celt_encoder_ctl(celt_enc, OPUS_SET_VBR(0));
+        /* We may still decide to disable prediction later */
+        if (st->silk_mode.reducedDependency)
+           celt_pred = 0;
+        celt_encoder_ctl(celt_enc, CELT_SET_PREDICTION(celt_pred));
+
+        if (st->mode == MODE_HYBRID)
+        {
+            int len;
+
+            len = (ec_tell(&enc)+7)>>3;
+            if (redundancy)
+               len += st->mode == MODE_HYBRID ? 3 : 1;
+            if( st->use_vbr ) {
+                nb_compr_bytes = len + bytes_target - (st->silk_mode.bitRate * frame_size) / (8 * st->Fs);
+            } else {
+                /* check if SILK used up too much */
+                nb_compr_bytes = len > bytes_target ? len : bytes_target;
+            }
+        } else {
+            if (st->use_vbr)
+            {
+                opus_int32 bonus=0;
+#ifndef DISABLE_FLOAT_API
+                if (st->variable_duration==OPUS_FRAMESIZE_VARIABLE && frame_size != st->Fs/50)
+                {
+                   bonus = (60*st->stream_channels+40)*(st->Fs/frame_size-50);
+                   if (analysis_info.valid)
+                      bonus = (opus_int32)(bonus*(1.f+.5f*analysis_info.tonality));
+                }
+#endif
+                celt_encoder_ctl(celt_enc, OPUS_SET_VBR(1));
+                celt_encoder_ctl(celt_enc, OPUS_SET_VBR_CONSTRAINT(st->vbr_constraint));
+                celt_encoder_ctl(celt_enc, OPUS_SET_BITRATE(st->bitrate_bps+bonus));
+                nb_compr_bytes = max_data_bytes-1-redundancy_bytes;
+            } else {
+                nb_compr_bytes = bytes_target;
+            }
+        }
+
+    } else {
+        nb_compr_bytes = 0;
+    }
+
+    ALLOC(tmp_prefill, st->channels*st->Fs/400, opus_val16);
+    if (st->mode != MODE_SILK_ONLY && st->mode != st->prev_mode && st->prev_mode > 0)
+    {
+       for (i=0;i<st->channels*st->Fs/400;i++)
+          tmp_prefill[i] = st->delay_buffer[(st->encoder_buffer-total_buffer-st->Fs/400)*st->channels + i];
+    }
+
+    for (i=0;i<st->channels*(st->encoder_buffer-(frame_size+total_buffer));i++)
+        st->delay_buffer[i] = st->delay_buffer[i+st->channels*frame_size];
+    for (;i<st->encoder_buffer*st->channels;i++)
+        st->delay_buffer[i] = pcm_buf[(frame_size+total_buffer-st->encoder_buffer)*st->channels+i];
+
+    /* gain_fade() and stereo_fade() need to be after the buffer copying
+       because we don't want any of this to affect the SILK part */
+    if( st->prev_HB_gain < Q15ONE || HB_gain < Q15ONE ) {
+       gain_fade(pcm_buf, pcm_buf,
+             st->prev_HB_gain, HB_gain, celt_mode->overlap, frame_size, st->channels, celt_mode->window, st->Fs);
+    }
+    st->prev_HB_gain = HB_gain;
+    if (st->mode != MODE_HYBRID || st->stream_channels==1)
+       st->silk_mode.stereoWidth_Q14 = IMIN((1<<14),2*IMAX(0,equiv_rate-30000));
+    if( !st->energy_masking && st->channels == 2 ) {
+        /* Apply stereo width reduction (at low bitrates) */
+        if( st->hybrid_stereo_width_Q14 < (1 << 14) || st->silk_mode.stereoWidth_Q14 < (1 << 14) ) {
+            opus_val16 g1, g2;
+            g1 = st->hybrid_stereo_width_Q14;
+            g2 = (opus_val16)(st->silk_mode.stereoWidth_Q14);
+#ifdef OPUS_FIXED_POINT
+            g1 = g1==16384 ? Q15ONE : SHL16(g1,1);
+            g2 = g2==16384 ? Q15ONE : SHL16(g2,1);
+#else
+            g1 *= (1.f/16384);
+            g2 *= (1.f/16384);
+#endif
+            stereo_fade(pcm_buf, pcm_buf, g1, g2, celt_mode->overlap,
+                  frame_size, st->channels, celt_mode->window, st->Fs);
+            st->hybrid_stereo_width_Q14 = st->silk_mode.stereoWidth_Q14;
+        }
+    }
+
+    if ( st->mode != MODE_CELT_ONLY && ec_tell(&enc)+17+20*(st->mode == MODE_HYBRID) <= 8*(max_data_bytes-1))
+    {
+        /* For SILK mode, the redundancy is inferred from the length */
+        if (st->mode == MODE_HYBRID && (redundancy || ec_tell(&enc)+37 <= 8*nb_compr_bytes))
+           ec_enc_bit_logp(&enc, redundancy, 12);
+        if (redundancy)
+        {
+            int max_redundancy;
+            ec_enc_bit_logp(&enc, celt_to_silk, 1);
+            if (st->mode == MODE_HYBRID)
+               max_redundancy = (max_data_bytes-1)-nb_compr_bytes;
+            else
+               max_redundancy = (max_data_bytes-1)-((ec_tell(&enc)+7)>>3);
+            /* Target the same bit-rate for redundancy as for the rest,
+               up to a max of 257 bytes */
+            redundancy_bytes = IMIN(max_redundancy, st->bitrate_bps/1600);
+            redundancy_bytes = IMIN(257, IMAX(2, redundancy_bytes));
+            if (st->mode == MODE_HYBRID)
+                ec_enc_uint(&enc, redundancy_bytes-2, 256);
+        }
+    } else {
+        redundancy = 0;
+    }
+
+    if (!redundancy)
+    {
+       st->silk_bw_switch = 0;
+       redundancy_bytes = 0;
+    }
+    if (st->mode != MODE_CELT_ONLY)start_band=17;
+
+    if (st->mode == MODE_SILK_ONLY)
+    {
+        ret = (ec_tell(&enc)+7)>>3;
+        ec_enc_done(&enc);
+        nb_compr_bytes = ret;
+    } else {
+       nb_compr_bytes = IMIN((max_data_bytes-1)-redundancy_bytes, nb_compr_bytes);
+       ec_enc_shrink(&enc, nb_compr_bytes);
+    }
+
+#ifndef DISABLE_FLOAT_API
+    if (redundancy || st->mode != MODE_SILK_ONLY)
+       celt_encoder_ctl(celt_enc, CELT_SET_ANALYSIS(&analysis_info));
+#endif
+
+    /* 5 ms redundant frame for CELT->SILK */
+    if (redundancy && celt_to_silk)
+    {
+        int err;
+        celt_encoder_ctl(celt_enc, CELT_SET_START_BAND(0));
+        celt_encoder_ctl(celt_enc, OPUS_SET_VBR(0));
+        err = celt_encode_with_ec(celt_enc, pcm_buf, st->Fs/200, data+nb_compr_bytes, redundancy_bytes, NULL);
+        if (err < 0)
+        {
+           RESTORE_STACK;
+           return OPUS_INTERNAL_ERROR;
+        }
+        celt_encoder_ctl(celt_enc, OPUS_GET_FINAL_RANGE(&redundant_rng));
+        celt_encoder_ctl(celt_enc, OPUS_RESET_STATE);
+    }
+
+    celt_encoder_ctl(celt_enc, CELT_SET_START_BAND(start_band));
+
+    if (st->mode != MODE_SILK_ONLY)
+    {
+        if (st->mode != st->prev_mode && st->prev_mode > 0)
+        {
+           unsigned char dummy[2];
+           celt_encoder_ctl(celt_enc, OPUS_RESET_STATE);
+
+           /* Prefilling */
+           celt_encode_with_ec(celt_enc, tmp_prefill, st->Fs/400, dummy, 2, NULL);
+           celt_encoder_ctl(celt_enc, CELT_SET_PREDICTION(0));
+        }
+        /* If false, we already busted the budget and we'll end up with a "PLC packet" */
+        if (ec_tell(&enc) <= 8*nb_compr_bytes)
+        {
+           ret = celt_encode_with_ec(celt_enc, pcm_buf, frame_size, NULL, nb_compr_bytes, &enc);
+           if (ret < 0)
+           {
+              RESTORE_STACK;
+              return OPUS_INTERNAL_ERROR;
+           }
+        }
+    }
+
+    /* 5 ms redundant frame for SILK->CELT */
+    if (redundancy && !celt_to_silk)
+    {
+        int err;
+        unsigned char dummy[2];
+        int N2, N4;
+        N2 = st->Fs/200;
+        N4 = st->Fs/400;
+
+        celt_encoder_ctl(celt_enc, OPUS_RESET_STATE);
+        celt_encoder_ctl(celt_enc, CELT_SET_START_BAND(0));
+        celt_encoder_ctl(celt_enc, CELT_SET_PREDICTION(0));
+
+        /* NOTE: We could speed this up slightly (at the expense of code size) by just adding a function that prefills the buffer */
+        celt_encode_with_ec(celt_enc, pcm_buf+st->channels*(frame_size-N2-N4), N4, dummy, 2, NULL);
+
+        err = celt_encode_with_ec(celt_enc, pcm_buf+st->channels*(frame_size-N2), N2, data+nb_compr_bytes, redundancy_bytes, NULL);
+        if (err < 0)
+        {
+           RESTORE_STACK;
+           return OPUS_INTERNAL_ERROR;
+        }
+        celt_encoder_ctl(celt_enc, OPUS_GET_FINAL_RANGE(&redundant_rng));
+    }
+
+
+
+    /* Signalling the mode in the first byte */
+    data--;
+    data[0] = gen_toc(st->mode, st->Fs/frame_size, curr_bandwidth, st->stream_channels);
+
+    st->rangeFinal = enc.rng ^ redundant_rng;
+
+    if (to_celt)
+        st->prev_mode = MODE_CELT_ONLY;
+    else
+        st->prev_mode = st->mode;
+    st->prev_channels = st->stream_channels;
+    st->prev_framesize = frame_size;
+
+    st->first = 0;
+
+    /* In the unlikely case that the SILK encoder busted its target, tell
+       the decoder to call the PLC */
+    if (ec_tell(&enc) > (max_data_bytes-1)*8)
+    {
+       if (max_data_bytes < 2)
+       {
+          RESTORE_STACK;
+          return OPUS_BUFFER_TOO_SMALL;
+       }
+       data[1] = 0;
+       ret = 1;
+       st->rangeFinal = 0;
+    } else if (st->mode==MODE_SILK_ONLY&&!redundancy)
+    {
+       /*When in LPC only mode it's perfectly
+         reasonable to strip off trailing zero bytes as
+         the required range decoder behavior is to
+         fill these in. This can't be done when the MDCT
+         modes are used because the decoder needs to know
+         the actual length for allocation purposes.*/
+       while(ret>2&&data[ret]==0)ret--;
+    }
+    /* Count ToC and redundancy */
+    ret += 1+redundancy_bytes;
+    if (!st->use_vbr)
+    {
+       if (opus_packet_pad(data, ret, max_data_bytes) != OPUS_OK)
+
+       {
+          RESTORE_STACK;
+          return OPUS_INTERNAL_ERROR;
+       }
+       ret = max_data_bytes;
+    }
+    RESTORE_STACK;
+    return ret;
+}
+
+#ifdef OPUS_FIXED_POINT
+
+#ifndef DISABLE_FLOAT_API
+opus_int32 opus_encode_float(OpusEncoder *st, const float *pcm, int analysis_frame_size,
+      unsigned char *data, opus_int32 max_data_bytes)
+{
+   int i, ret;
+   int frame_size;
+   int delay_compensation;
+   VARDECL(opus_int16, in);
+   ALLOC_STACK;
+
+   if (st->application == OPUS_APPLICATION_RESTRICTED_LOWDELAY)
+      delay_compensation = 0;
+   else
+      delay_compensation = st->delay_compensation;
+   frame_size = compute_frame_size(pcm, analysis_frame_size,
+         st->variable_duration, st->channels, st->Fs, st->bitrate_bps,
+         delay_compensation, downmix_float, st->analysis.subframe_mem);
+
+   ALLOC(in, frame_size*st->channels, opus_int16);
+
+   for (i=0;i<frame_size*st->channels;i++)
+      in[i] = FLOAT2INT16(pcm[i]);
+   ret = opus_encode_native(st, in, frame_size, data, max_data_bytes, 16, pcm, analysis_frame_size, 0, -2, st->channels, downmix_float);
+   RESTORE_STACK;
+   return ret;
+}
+#endif
+
+opus_int32 opus_encode(OpusEncoder *st, const opus_int16 *pcm, int analysis_frame_size,
+                unsigned char *data, opus_int32 out_data_bytes)
+{
+   int frame_size;
+   int delay_compensation;
+   if (st->application == OPUS_APPLICATION_RESTRICTED_LOWDELAY)
+      delay_compensation = 0;
+   else
+      delay_compensation = st->delay_compensation;
+   frame_size = compute_frame_size(pcm, analysis_frame_size,
+         st->variable_duration, st->channels, st->Fs, st->bitrate_bps,
+         delay_compensation, downmix_int
+#ifndef DISABLE_FLOAT_API
+         , st->analysis.subframe_mem
+#endif
+         );
+   return opus_encode_native(st, pcm, frame_size, data, out_data_bytes, 16, pcm, analysis_frame_size, 0, -2, st->channels, downmix_int);
+}
+
+#else
+opus_int32 opus_encode(OpusEncoder *st, const opus_int16 *pcm, int analysis_frame_size,
+      unsigned char *data, opus_int32 max_data_bytes)
+{
+   int i, ret;
+   int frame_size;
+   int delay_compensation;
+   VARDECL(float, in);
+   ALLOC_STACK;
+
+   if (st->application == OPUS_APPLICATION_RESTRICTED_LOWDELAY)
+      delay_compensation = 0;
+   else
+      delay_compensation = st->delay_compensation;
+   frame_size = compute_frame_size(pcm, analysis_frame_size,
+         st->variable_duration, st->channels, st->Fs, st->bitrate_bps,
+         delay_compensation, downmix_int, st->analysis.subframe_mem);
+
+   ALLOC(in, frame_size*st->channels, float);
+
+   for (i=0;i<frame_size*st->channels;i++)
+      in[i] = (1.0f/32768)*pcm[i];
+   ret = opus_encode_native(st, in, frame_size, data, max_data_bytes, 16, pcm, analysis_frame_size, 0, -2, st->channels, downmix_int);
+   RESTORE_STACK;
+   return ret;
+}
+opus_int32 opus_encode_float(OpusEncoder *st, const float *pcm, int analysis_frame_size,
+                      unsigned char *data, opus_int32 out_data_bytes)
+{
+   int frame_size;
+   int delay_compensation;
+   if (st->application == OPUS_APPLICATION_RESTRICTED_LOWDELAY)
+      delay_compensation = 0;
+   else
+      delay_compensation = st->delay_compensation;
+   frame_size = compute_frame_size(pcm, analysis_frame_size,
+         st->variable_duration, st->channels, st->Fs, st->bitrate_bps,
+         delay_compensation, downmix_float, st->analysis.subframe_mem);
+   return opus_encode_native(st, pcm, frame_size, data, out_data_bytes, 24,
+                             pcm, analysis_frame_size, 0, -2, st->channels, downmix_float);
+}
+#endif
+
+
+int opus_encoder_ctl(OpusEncoder *st, int request, ...)
+{
+    int ret;
+    CELTEncoder *celt_enc;
+    va_list ap;
+
+    ret = OPUS_OK;
+    va_start(ap, request);
+
+    celt_enc = (CELTEncoder*)((char*)st+st->celt_enc_offset);
+
+    switch (request)
+    {
+        case OPUS_SET_APPLICATION_REQUEST:
+        {
+            opus_int32 value = va_arg(ap, opus_int32);
+            if (   (value != OPUS_APPLICATION_VOIP && value != OPUS_APPLICATION_AUDIO
+                 && value != OPUS_APPLICATION_RESTRICTED_LOWDELAY)
+               || (!st->first && st->application != value))
+            {
+               ret = OPUS_BAD_ARG;
+               break;
+            }
+            st->application = value;
+        }
+        break;
+        case OPUS_GET_APPLICATION_REQUEST:
+        {
+            opus_int32 *value = va_arg(ap, opus_int32*);
+            if (!value)
+            {
+               goto bad_arg;
+            }
+            *value = st->application;
+        }
+        break;
+        case OPUS_SET_BITRATE_REQUEST:
+        {
+            opus_int32 value = va_arg(ap, opus_int32);
+            if (value != OPUS_AUTO && value != OPUS_BITRATE_MAX)
+            {
+                if (value <= 0)
+                    goto bad_arg;
+                else if (value <= 500)
+                    value = 500;
+                else if (value > (opus_int32)300000*st->channels)
+                    value = (opus_int32)300000*st->channels;
+            }
+            st->user_bitrate_bps = value;
+        }
+        break;
+        case OPUS_GET_BITRATE_REQUEST:
+        {
+            opus_int32 *value = va_arg(ap, opus_int32*);
+            if (!value)
+            {
+               goto bad_arg;
+            }
+            *value = user_bitrate_to_bitrate(st, st->prev_framesize, 1276);
+        }
+        break;
+        case OPUS_SET_FORCE_CHANNELS_REQUEST:
+        {
+            opus_int32 value = va_arg(ap, opus_int32);
+            if((value<1 || value>st->channels) && value != OPUS_AUTO)
+            {
+               goto bad_arg;
+            }
+            st->force_channels = value;
+        }
+        break;
+        case OPUS_GET_FORCE_CHANNELS_REQUEST:
+        {
+            opus_int32 *value = va_arg(ap, opus_int32*);
+            if (!value)
+            {
+               goto bad_arg;
+            }
+            *value = st->force_channels;
+        }
+        break;
+        case OPUS_SET_MAX_BANDWIDTH_REQUEST:
+        {
+            opus_int32 value = va_arg(ap, opus_int32);
+            if (value < OPUS_BANDWIDTH_NARROWBAND || value > OPUS_BANDWIDTH_FULLBAND) 
+            {
+               goto bad_arg;
+            }
+            st->max_bandwidth = value;
+            if (st->max_bandwidth == OPUS_BANDWIDTH_NARROWBAND) {
+                st->silk_mode.maxInternalSampleRate = 8000;
+            } else if (st->max_bandwidth == OPUS_BANDWIDTH_MEDIUMBAND) {
+                st->silk_mode.maxInternalSampleRate = 12000;
+            } else {
+                st->silk_mode.maxInternalSampleRate = 16000;
+            }
+        }
+        break;
+        case OPUS_GET_MAX_BANDWIDTH_REQUEST:
+        {
+            opus_int32 *value = va_arg(ap, opus_int32*);
+            if (!value)
+            {
+               goto bad_arg;
+            }
+            *value = st->max_bandwidth;
+        }
+        break;
+        case OPUS_SET_BANDWIDTH_REQUEST:
+        {
+            opus_int32 value = va_arg(ap, opus_int32);
+            if ((value < OPUS_BANDWIDTH_NARROWBAND || value > OPUS_BANDWIDTH_FULLBAND) && value != OPUS_AUTO)
+            {
+               goto bad_arg;
+            }
+            st->user_bandwidth = value;
+            if (st->user_bandwidth == OPUS_BANDWIDTH_NARROWBAND) {
+                st->silk_mode.maxInternalSampleRate = 8000;
+            } else if (st->user_bandwidth == OPUS_BANDWIDTH_MEDIUMBAND) {
+                st->silk_mode.maxInternalSampleRate = 12000;
+            } else {
+                st->silk_mode.maxInternalSampleRate = 16000;
+            }
+        }
+        break;
+        case OPUS_GET_BANDWIDTH_REQUEST:
+        {
+            opus_int32 *value = va_arg(ap, opus_int32*);
+            if (!value)
+            {
+               goto bad_arg;
+            }
+            *value = st->bandwidth;
+        }
+        break;
+        case OPUS_SET_DTX_REQUEST:
+        {
+            opus_int32 value = va_arg(ap, opus_int32);
+            if(value<0 || value>1)
+            {
+               goto bad_arg;
+            }
+            st->silk_mode.useDTX = value;
+        }
+        break;
+        case OPUS_GET_DTX_REQUEST:
+        {
+            opus_int32 *value = va_arg(ap, opus_int32*);
+            if (!value)
+            {
+               goto bad_arg;
+            }
+            *value = st->silk_mode.useDTX;
+        }
+        break;
+        case OPUS_SET_COMPLEXITY_REQUEST:
+        {
+            opus_int32 value = va_arg(ap, opus_int32);
+            if(value<0 || value>10)
+            {
+               goto bad_arg;
+            }
+            st->silk_mode.complexity = value;
+            celt_encoder_ctl(celt_enc, OPUS_SET_COMPLEXITY(value));
+        }
+        break;
+        case OPUS_GET_COMPLEXITY_REQUEST:
+        {
+            opus_int32 *value = va_arg(ap, opus_int32*);
+            if (!value)
+            {
+               goto bad_arg;
+            }
+            *value = st->silk_mode.complexity;
+        }
+        break;
+        case OPUS_SET_INBAND_FEC_REQUEST:
+        {
+            opus_int32 value = va_arg(ap, opus_int32);
+            if(value<0 || value>1)
+            {
+               goto bad_arg;
+            }
+            st->silk_mode.useInBandFEC = value;
+        }
+        break;
+        case OPUS_GET_INBAND_FEC_REQUEST:
+        {
+            opus_int32 *value = va_arg(ap, opus_int32*);
+            if (!value)
+            {
+               goto bad_arg;
+            }
+            *value = st->silk_mode.useInBandFEC;
+        }
+        break;
+        case OPUS_SET_PACKET_LOSS_PERC_REQUEST:
+        {
+            opus_int32 value = va_arg(ap, opus_int32);
+            if (value < 0 || value > 100)
+            {
+               goto bad_arg;
+            }
+            st->silk_mode.packetLossPercentage = value;
+            celt_encoder_ctl(celt_enc, OPUS_SET_PACKET_LOSS_PERC(value));
+        }
+        break;
+        case OPUS_GET_PACKET_LOSS_PERC_REQUEST:
+        {
+            opus_int32 *value = va_arg(ap, opus_int32*);
+            if (!value)
+            {
+               goto bad_arg;
+            }
+            *value = st->silk_mode.packetLossPercentage;
+        }
+        break;
+        case OPUS_SET_VBR_REQUEST:
+        {
+            opus_int32 value = va_arg(ap, opus_int32);
+            if(value<0 || value>1)
+            {
+               goto bad_arg;
+            }
+            st->use_vbr = value;
+            st->silk_mode.useCBR = 1-value;
+        }
+        break;
+        case OPUS_GET_VBR_REQUEST:
+        {
+            opus_int32 *value = va_arg(ap, opus_int32*);
+            if (!value)
+            {
+               goto bad_arg;
+            }
+            *value = st->use_vbr;
+        }
+        break;
+        case OPUS_SET_VOICE_RATIO_REQUEST:
+        {
+            opus_int32 value = va_arg(ap, opus_int32);
+            if (value<-1 || value>100)
+            {
+               goto bad_arg;
+            }
+            st->voice_ratio = value;
+        }
+        break;
+        case OPUS_GET_VOICE_RATIO_REQUEST:
+        {
+            opus_int32 *value = va_arg(ap, opus_int32*);
+            if (!value)
+            {
+               goto bad_arg;
+            }
+            *value = st->voice_ratio;
+        }
+        break;
+        case OPUS_SET_VBR_CONSTRAINT_REQUEST:
+        {
+            opus_int32 value = va_arg(ap, opus_int32);
+            if(value<0 || value>1)
+            {
+               goto bad_arg;
+            }
+            st->vbr_constraint = value;
+        }
+        break;
+        case OPUS_GET_VBR_CONSTRAINT_REQUEST:
+        {
+            opus_int32 *value = va_arg(ap, opus_int32*);
+            if (!value)
+            {
+               goto bad_arg;
+            }
+            *value = st->vbr_constraint;
+        }
+        break;
+        case OPUS_SET_SIGNAL_REQUEST:
+        {
+            opus_int32 value = va_arg(ap, opus_int32);
+            if(value!=OPUS_AUTO && value!=OPUS_SIGNAL_VOICE && value!=OPUS_SIGNAL_MUSIC)
+            {
+               goto bad_arg;
+            }
+            st->signal_type = value;
+        }
+        break;
+        case OPUS_GET_SIGNAL_REQUEST:
+        {
+            opus_int32 *value = va_arg(ap, opus_int32*);
+            if (!value)
+            {
+               goto bad_arg;
+            }
+            *value = st->signal_type;
+        }
+        break;
+        case OPUS_GET_LOOKAHEAD_REQUEST:
+        {
+            opus_int32 *value = va_arg(ap, opus_int32*);
+            if (!value)
+            {
+               goto bad_arg;
+            }
+            *value = st->Fs/400;
+            if (st->application != OPUS_APPLICATION_RESTRICTED_LOWDELAY)
+                *value += st->delay_compensation;
+        }
+        break;
+        case OPUS_GET_SAMPLE_RATE_REQUEST:
+        {
+            opus_int32 *value = va_arg(ap, opus_int32*);
+            if (!value)
+            {
+               goto bad_arg;
+            }
+            *value = st->Fs;
+        }
+        break;
+        case OPUS_GET_FINAL_RANGE_REQUEST:
+        {
+            opus_uint32 *value = va_arg(ap, opus_uint32*);
+            if (!value)
+            {
+               goto bad_arg;
+            }
+            *value = st->rangeFinal;
+        }
+        break;
+        case OPUS_SET_LSB_DEPTH_REQUEST:
+        {
+            opus_int32 value = va_arg(ap, opus_int32);
+            if (value<8 || value>24)
+            {
+               goto bad_arg;
+            }
+            st->lsb_depth=value;
+        }
+        break;
+        case OPUS_GET_LSB_DEPTH_REQUEST:
+        {
+            opus_int32 *value = va_arg(ap, opus_int32*);
+            if (!value)
+            {
+               goto bad_arg;
+            }
+            *value = st->lsb_depth;
+        }
+        break;
+        case OPUS_SET_EXPERT_FRAME_DURATION_REQUEST:
+        {
+            opus_int32 value = va_arg(ap, opus_int32);
+            if (value != OPUS_FRAMESIZE_ARG   && value != OPUS_FRAMESIZE_2_5_MS &&
+                value != OPUS_FRAMESIZE_5_MS  && value != OPUS_FRAMESIZE_10_MS  &&
+                value != OPUS_FRAMESIZE_20_MS && value != OPUS_FRAMESIZE_40_MS  &&
+                value != OPUS_FRAMESIZE_60_MS && value != OPUS_FRAMESIZE_VARIABLE)
+            {
+               goto bad_arg;
+            }
+            st->variable_duration = value;
+            celt_encoder_ctl(celt_enc, OPUS_SET_EXPERT_FRAME_DURATION(value));
+        }
+        break;
+        case OPUS_GET_EXPERT_FRAME_DURATION_REQUEST:
+        {
+            opus_int32 *value = va_arg(ap, opus_int32*);
+            if (!value)
+            {
+               goto bad_arg;
+            }
+            *value = st->variable_duration;
+        }
+        break;
+        case OPUS_SET_PREDICTION_DISABLED_REQUEST:
+        {
+           opus_int32 value = va_arg(ap, opus_int32);
+           if (value > 1 || value < 0)
+              goto bad_arg;
+           st->silk_mode.reducedDependency = value;
+        }
+        break;
+        case OPUS_GET_PREDICTION_DISABLED_REQUEST:
+        {
+           opus_int32 *value = va_arg(ap, opus_int32*);
+           if (!value)
+              goto bad_arg;
+           *value = st->silk_mode.reducedDependency;
+        }
+        break;
+        case OPUS_RESET_STATE:
+        {
+           void *silk_enc;
+           silk_EncControlStruct dummy;
+           silk_enc = (char*)st+st->silk_enc_offset;
+
+           OPUS_CLEAR((char*)&st->OPUS_ENCODER_RESET_START,
+                 sizeof(OpusEncoder)-
+                 ((char*)&st->OPUS_ENCODER_RESET_START - (char*)st));
+
+           celt_encoder_ctl(celt_enc, OPUS_RESET_STATE);
+           silk_InitEncoder( silk_enc, st->arch, &dummy );
+           st->stream_channels = st->channels;
+           st->hybrid_stereo_width_Q14 = 1 << 14;
+           st->prev_HB_gain = Q15ONE;
+           st->first = 1;
+           st->mode = MODE_HYBRID;
+           st->bandwidth = OPUS_BANDWIDTH_FULLBAND;
+           st->variable_HP_smth2_Q15 = silk_LSHIFT( silk_lin2log( VARIABLE_HP_MIN_CUTOFF_HZ ), 8 );
+        }
+        break;
+        case OPUS_SET_FORCE_MODE_REQUEST:
+        {
+            opus_int32 value = va_arg(ap, opus_int32);
+            if ((value < MODE_SILK_ONLY || value > MODE_CELT_ONLY) && value != OPUS_AUTO)
+            {
+               goto bad_arg;
+            }
+            st->user_forced_mode = value;
+        }
+        break;
+        case OPUS_SET_LFE_REQUEST:
+        {
+            opus_int32 value = va_arg(ap, opus_int32);
+            st->lfe = value;
+            ret = celt_encoder_ctl(celt_enc, OPUS_SET_LFE(value));
+        }
+        break;
+        case OPUS_SET_ENERGY_MASK_REQUEST:
+        {
+            opus_val16 *value = va_arg(ap, opus_val16*);
+            st->energy_masking = value;
+            ret = celt_encoder_ctl(celt_enc, OPUS_SET_ENERGY_MASK(value));
+        }
+        break;
+
+        case CELT_GET_MODE_REQUEST:
+        {
+           const CELTMode ** value = va_arg(ap, const CELTMode**);
+           if (!value)
+           {
+              goto bad_arg;
+           }
+           ret = celt_encoder_ctl(celt_enc, CELT_GET_MODE(value));
+        }
+        break;
+        default:
+            /* fprintf(stderr, "unknown opus_encoder_ctl() request: %d", request);*/
+            ret = OPUS_UNIMPLEMENTED;
+            break;
+    }
+    va_end(ap);
+    return ret;
+bad_arg:
+    va_end(ap);
+    return OPUS_BAD_ARG;
+}
+
+void opus_encoder_destroy(OpusEncoder *st)
+{
+    opus_free(st);
+}
diff --git a/drivers/opus/opus_multistream.c b/drivers/opus/opus_multistream.c
new file mode 100644
index 0000000000..8211c0b470
--- /dev/null
+++ b/drivers/opus/opus_multistream.c
@@ -0,0 +1,92 @@
+/* Copyright (c) 2011 Xiph.Org Foundation
+   Written by Jean-Marc Valin */
+/*
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifdef OPUS_HAVE_CONFIG_H
+#include "opus_config.h"
+#endif
+
+#include "opus_multistream.h"
+#include "opus.h"
+#include "opus_private.h"
+#include "stack_alloc.h"
+#include <stdarg.h>
+#include "float_cast.h"
+#include "os_support.h"
+
+
+int validate_layout(const ChannelLayout *layout)
+{
+   int i, max_channel;
+
+   max_channel = layout->nb_streams+layout->nb_coupled_streams;
+   if (max_channel>255)
+      return 0;
+   for (i=0;i<layout->nb_channels;i++)
+   {
+      if (layout->mapping[i] >= max_channel && layout->mapping[i] != 255)
+         return 0;
+   }
+   return 1;
+}
+
+
+int get_left_channel(const ChannelLayout *layout, int stream_id, int prev)
+{
+   int i;
+   i = (prev<0) ? 0 : prev+1;
+   for (;i<layout->nb_channels;i++)
+   {
+      if (layout->mapping[i]==stream_id*2)
+         return i;
+   }
+   return -1;
+}
+
+int get_right_channel(const ChannelLayout *layout, int stream_id, int prev)
+{
+   int i;
+   i = (prev<0) ? 0 : prev+1;
+   for (;i<layout->nb_channels;i++)
+   {
+      if (layout->mapping[i]==stream_id*2+1)
+         return i;
+   }
+   return -1;
+}
+
+int get_mono_channel(const ChannelLayout *layout, int stream_id, int prev)
+{
+   int i;
+   i = (prev<0) ? 0 : prev+1;
+   for (;i<layout->nb_channels;i++)
+   {
+      if (layout->mapping[i]==stream_id+layout->nb_coupled_streams)
+         return i;
+   }
+   return -1;
+}
+
diff --git a/drivers/opus/opus_multistream.h b/drivers/opus/opus_multistream.h
new file mode 100644
index 0000000000..ae5997934a
--- /dev/null
+++ b/drivers/opus/opus_multistream.h
@@ -0,0 +1,660 @@
+/* Copyright (c) 2011 Xiph.Org Foundation
+   Written by Jean-Marc Valin */
+/*
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+/**
+ * @file opus_multistream.h
+ * @brief Opus reference implementation multistream API
+ */
+
+#ifndef OPUS_MULTISTREAM_H
+#define OPUS_MULTISTREAM_H
+
+#include "opus.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/** @cond OPUS_INTERNAL_DOC */
+
+/** Macros to trigger compilation errors when the wrong types are provided to a
+  * CTL. */
+/**@{*/
+#define __opus_check_encstate_ptr(ptr) ((ptr) + ((ptr) - (OpusEncoder**)(ptr)))
+#define __opus_check_decstate_ptr(ptr) ((ptr) + ((ptr) - (OpusDecoder**)(ptr)))
+/**@}*/
+
+/** These are the actual encoder and decoder CTL ID numbers.
+  * They should not be used directly by applications.
+  * In general, SETs should be even and GETs should be odd.*/
+/**@{*/
+#define OPUS_MULTISTREAM_GET_ENCODER_STATE_REQUEST 5120
+#define OPUS_MULTISTREAM_GET_DECODER_STATE_REQUEST 5122
+/**@}*/
+
+/** @endcond */
+
+/** @defgroup opus_multistream_ctls Multistream specific encoder and decoder CTLs
+  *
+  * These are convenience macros that are specific to the
+  * opus_multistream_encoder_ctl() and opus_multistream_decoder_ctl()
+  * interface.
+  * The CTLs from @ref opus_genericctls, @ref opus_encoderctls, and
+  * @ref opus_decoderctls may be applied to a multistream encoder or decoder as
+  * well.
+  * In addition, you may retrieve the encoder or decoder state for an specific
+  * stream via #OPUS_MULTISTREAM_GET_ENCODER_STATE or
+  * #OPUS_MULTISTREAM_GET_DECODER_STATE and apply CTLs to it individually.
+  */
+/**@{*/
+
+/** Gets the encoder state for an individual stream of a multistream encoder.
+  * @param[in] x <tt>opus_int32</tt>: The index of the stream whose encoder you
+  *                                   wish to retrieve.
+  *                                   This must be non-negative and less than
+  *                                   the <code>streams</code> parameter used
+  *                                   to initialize the encoder.
+  * @param[out] y <tt>OpusEncoder**</tt>: Returns a pointer to the given
+  *                                       encoder state.
+  * @retval OPUS_BAD_ARG The index of the requested stream was out of range.
+  * @hideinitializer
+  */
+#define OPUS_MULTISTREAM_GET_ENCODER_STATE(x,y) OPUS_MULTISTREAM_GET_ENCODER_STATE_REQUEST, __opus_check_int(x), __opus_check_encstate_ptr(y)
+
+/** Gets the decoder state for an individual stream of a multistream decoder.
+  * @param[in] x <tt>opus_int32</tt>: The index of the stream whose decoder you
+  *                                   wish to retrieve.
+  *                                   This must be non-negative and less than
+  *                                   the <code>streams</code> parameter used
+  *                                   to initialize the decoder.
+  * @param[out] y <tt>OpusDecoder**</tt>: Returns a pointer to the given
+  *                                       decoder state.
+  * @retval OPUS_BAD_ARG The index of the requested stream was out of range.
+  * @hideinitializer
+  */
+#define OPUS_MULTISTREAM_GET_DECODER_STATE(x,y) OPUS_MULTISTREAM_GET_DECODER_STATE_REQUEST, __opus_check_int(x), __opus_check_decstate_ptr(y)
+
+/**@}*/
+
+/** @defgroup opus_multistream Opus Multistream API
+  * @{
+  *
+  * The multistream API allows individual Opus streams to be combined into a
+  * single packet, enabling support for up to 255 channels. Unlike an
+  * elementary Opus stream, the encoder and decoder must negotiate the channel
+  * configuration before the decoder can successfully interpret the data in the
+  * packets produced by the encoder. Some basic information, such as packet
+  * duration, can be computed without any special negotiation.
+  *
+  * The format for multistream Opus packets is defined in the
+  * <a href="http://tools.ietf.org/html/draft-terriberry-oggopus">Ogg
+  * encapsulation specification</a> and is based on the self-delimited Opus
+  * framing described in Appendix B of <a href="http://tools.ietf.org/html/rfc6716">RFC 6716</a>.
+  * Normal Opus packets are just a degenerate case of multistream Opus packets,
+  * and can be encoded or decoded with the multistream API by setting
+  * <code>streams</code> to <code>1</code> when initializing the encoder or
+  * decoder.
+  *
+  * Multistream Opus streams can contain up to 255 elementary Opus streams.
+  * These may be either "uncoupled" or "coupled", indicating that the decoder
+  * is configured to decode them to either 1 or 2 channels, respectively.
+  * The streams are ordered so that all coupled streams appear at the
+  * beginning.
+  *
+  * A <code>mapping</code> table defines which decoded channel <code>i</code>
+  * should be used for each input/output (I/O) channel <code>j</code>. This table is
+  * typically provided as an unsigned char array.
+  * Let <code>i = mapping[j]</code> be the index for I/O channel <code>j</code>.
+  * If <code>i < 2*coupled_streams</code>, then I/O channel <code>j</code> is
+  * encoded as the left channel of stream <code>(i/2)</code> if <code>i</code>
+  * is even, or  as the right channel of stream <code>(i/2)</code> if
+  * <code>i</code> is odd. Otherwise, I/O channel <code>j</code> is encoded as
+  * mono in stream <code>(i - coupled_streams)</code>, unless it has the special
+  * value 255, in which case it is omitted from the encoding entirely (the
+  * decoder will reproduce it as silence). Each value <code>i</code> must either
+  * be the special value 255 or be less than <code>streams + coupled_streams</code>.
+  *
+  * The output channels specified by the encoder
+  * should use the
+  * <a href="http://www.xiph.org/vorbis/doc/Vorbis_I_spec.html#x1-800004.3.9">Vorbis
+  * channel ordering</a>. A decoder may wish to apply an additional permutation
+  * to the mapping the encoder used to achieve a different output channel
+  * order (e.g. for outputing in WAV order).
+  *
+  * Each multistream packet contains an Opus packet for each stream, and all of
+  * the Opus packets in a single multistream packet must have the same
+  * duration. Therefore the duration of a multistream packet can be extracted
+  * from the TOC sequence of the first stream, which is located at the
+  * beginning of the packet, just like an elementary Opus stream:
+  *
+  * @code
+  * int nb_samples;
+  * int nb_frames;
+  * nb_frames = opus_packet_get_nb_frames(data, len);
+  * if (nb_frames < 1)
+  *   return nb_frames;
+  * nb_samples = opus_packet_get_samples_per_frame(data, 48000) * nb_frames;
+  * @endcode
+  *
+  * The general encoding and decoding process proceeds exactly the same as in
+  * the normal @ref opus_encoder and @ref opus_decoder APIs.
+  * See their documentation for an overview of how to use the corresponding
+  * multistream functions.
+  */
+
+/** Opus multistream encoder state.
+  * This contains the complete state of a multistream Opus encoder.
+  * It is position independent and can be freely copied.
+  * @see opus_multistream_encoder_create
+  * @see opus_multistream_encoder_init
+  */
+typedef struct OpusMSEncoder OpusMSEncoder;
+
+/** Opus multistream decoder state.
+  * This contains the complete state of a multistream Opus decoder.
+  * It is position independent and can be freely copied.
+  * @see opus_multistream_decoder_create
+  * @see opus_multistream_decoder_init
+  */
+typedef struct OpusMSDecoder OpusMSDecoder;
+
+/**\name Multistream encoder functions */
+/**@{*/
+
+/** Gets the size of an OpusMSEncoder structure.
+  * @param streams <tt>int</tt>: The total number of streams to encode from the
+  *                              input.
+  *                              This must be no more than 255.
+  * @param coupled_streams <tt>int</tt>: Number of coupled (2 channel) streams
+  *                                      to encode.
+  *                                      This must be no larger than the total
+  *                                      number of streams.
+  *                                      Additionally, The total number of
+  *                                      encoded channels (<code>streams +
+  *                                      coupled_streams</code>) must be no
+  *                                      more than 255.
+  * @returns The size in bytes on success, or a negative error code
+  *          (see @ref opus_errorcodes) on error.
+  */
+OPUS_EXPORT OPUS_WARN_UNUSED_RESULT opus_int32 opus_multistream_encoder_get_size(
+      int streams,
+      int coupled_streams
+);
+
+OPUS_EXPORT OPUS_WARN_UNUSED_RESULT opus_int32 opus_multistream_surround_encoder_get_size(
+      int channels,
+      int mapping_family
+);
+
+
+/** Allocates and initializes a multistream encoder state.
+  * Call opus_multistream_encoder_destroy() to release
+  * this object when finished.
+  * @param Fs <tt>opus_int32</tt>: Sampling rate of the input signal (in Hz).
+  *                                This must be one of 8000, 12000, 16000,
+  *                                24000, or 48000.
+  * @param channels <tt>int</tt>: Number of channels in the input signal.
+  *                               This must be at most 255.
+  *                               It may be greater than the number of
+  *                               coded channels (<code>streams +
+  *                               coupled_streams</code>).
+  * @param streams <tt>int</tt>: The total number of streams to encode from the
+  *                              input.
+  *                              This must be no more than the number of channels.
+  * @param coupled_streams <tt>int</tt>: Number of coupled (2 channel) streams
+  *                                      to encode.
+  *                                      This must be no larger than the total
+  *                                      number of streams.
+  *                                      Additionally, The total number of
+  *                                      encoded channels (<code>streams +
+  *                                      coupled_streams</code>) must be no
+  *                                      more than the number of input channels.
+  * @param[in] mapping <code>const unsigned char[channels]</code>: Mapping from
+  *                    encoded channels to input channels, as described in
+  *                    @ref opus_multistream. As an extra constraint, the
+  *                    multistream encoder does not allow encoding coupled
+  *                    streams for which one channel is unused since this
+  *                    is never a good idea.
+  * @param application <tt>int</tt>: The target encoder application.
+  *                                  This must be one of the following:
+  * <dl>
+  * <dt>#OPUS_APPLICATION_VOIP</dt>
+  * <dd>Process signal for improved speech intelligibility.</dd>
+  * <dt>#OPUS_APPLICATION_AUDIO</dt>
+  * <dd>Favor faithfulness to the original input.</dd>
+  * <dt>#OPUS_APPLICATION_RESTRICTED_LOWDELAY</dt>
+  * <dd>Configure the minimum possible coding delay by disabling certain modes
+  * of operation.</dd>
+  * </dl>
+  * @param[out] error <tt>int *</tt>: Returns #OPUS_OK on success, or an error
+  *                                   code (see @ref opus_errorcodes) on
+  *                                   failure.
+  */
+OPUS_EXPORT OPUS_WARN_UNUSED_RESULT OpusMSEncoder *opus_multistream_encoder_create(
+      opus_int32 Fs,
+      int channels,
+      int streams,
+      int coupled_streams,
+      const unsigned char *mapping,
+      int application,
+      int *error
+) OPUS_ARG_NONNULL(5);
+
+OPUS_EXPORT OPUS_WARN_UNUSED_RESULT OpusMSEncoder *opus_multistream_surround_encoder_create(
+      opus_int32 Fs,
+      int channels,
+      int mapping_family,
+      int *streams,
+      int *coupled_streams,
+      unsigned char *mapping,
+      int application,
+      int *error
+) OPUS_ARG_NONNULL(5);
+
+/** Initialize a previously allocated multistream encoder state.
+  * The memory pointed to by \a st must be at least the size returned by
+  * opus_multistream_encoder_get_size().
+  * This is intended for applications which use their own allocator instead of
+  * malloc.
+  * To reset a previously initialized state, use the #OPUS_RESET_STATE CTL.
+  * @see opus_multistream_encoder_create
+  * @see opus_multistream_encoder_get_size
+  * @param st <tt>OpusMSEncoder*</tt>: Multistream encoder state to initialize.
+  * @param Fs <tt>opus_int32</tt>: Sampling rate of the input signal (in Hz).
+  *                                This must be one of 8000, 12000, 16000,
+  *                                24000, or 48000.
+  * @param channels <tt>int</tt>: Number of channels in the input signal.
+  *                               This must be at most 255.
+  *                               It may be greater than the number of
+  *                               coded channels (<code>streams +
+  *                               coupled_streams</code>).
+  * @param streams <tt>int</tt>: The total number of streams to encode from the
+  *                              input.
+  *                              This must be no more than the number of channels.
+  * @param coupled_streams <tt>int</tt>: Number of coupled (2 channel) streams
+  *                                      to encode.
+  *                                      This must be no larger than the total
+  *                                      number of streams.
+  *                                      Additionally, The total number of
+  *                                      encoded channels (<code>streams +
+  *                                      coupled_streams</code>) must be no
+  *                                      more than the number of input channels.
+  * @param[in] mapping <code>const unsigned char[channels]</code>: Mapping from
+  *                    encoded channels to input channels, as described in
+  *                    @ref opus_multistream. As an extra constraint, the
+  *                    multistream encoder does not allow encoding coupled
+  *                    streams for which one channel is unused since this
+  *                    is never a good idea.
+  * @param application <tt>int</tt>: The target encoder application.
+  *                                  This must be one of the following:
+  * <dl>
+  * <dt>#OPUS_APPLICATION_VOIP</dt>
+  * <dd>Process signal for improved speech intelligibility.</dd>
+  * <dt>#OPUS_APPLICATION_AUDIO</dt>
+  * <dd>Favor faithfulness to the original input.</dd>
+  * <dt>#OPUS_APPLICATION_RESTRICTED_LOWDELAY</dt>
+  * <dd>Configure the minimum possible coding delay by disabling certain modes
+  * of operation.</dd>
+  * </dl>
+  * @returns #OPUS_OK on success, or an error code (see @ref opus_errorcodes)
+  *          on failure.
+  */
+OPUS_EXPORT int opus_multistream_encoder_init(
+      OpusMSEncoder *st,
+      opus_int32 Fs,
+      int channels,
+      int streams,
+      int coupled_streams,
+      const unsigned char *mapping,
+      int application
+) OPUS_ARG_NONNULL(1) OPUS_ARG_NONNULL(6);
+
+OPUS_EXPORT int opus_multistream_surround_encoder_init(
+      OpusMSEncoder *st,
+      opus_int32 Fs,
+      int channels,
+      int mapping_family,
+      int *streams,
+      int *coupled_streams,
+      unsigned char *mapping,
+      int application
+) OPUS_ARG_NONNULL(1) OPUS_ARG_NONNULL(6);
+
+/** Encodes a multistream Opus frame.
+  * @param st <tt>OpusMSEncoder*</tt>: Multistream encoder state.
+  * @param[in] pcm <tt>const opus_int16*</tt>: The input signal as interleaved
+  *                                            samples.
+  *                                            This must contain
+  *                                            <code>frame_size*channels</code>
+  *                                            samples.
+  * @param frame_size <tt>int</tt>: Number of samples per channel in the input
+  *                                 signal.
+  *                                 This must be an Opus frame size for the
+  *                                 encoder's sampling rate.
+  *                                 For example, at 48 kHz the permitted values
+  *                                 are 120, 240, 480, 960, 1920, and 2880.
+  *                                 Passing in a duration of less than 10 ms
+  *                                 (480 samples at 48 kHz) will prevent the
+  *                                 encoder from using the LPC or hybrid modes.
+  * @param[out] data <tt>unsigned char*</tt>: Output payload.
+  *                                           This must contain storage for at
+  *                                           least \a max_data_bytes.
+  * @param [in] max_data_bytes <tt>opus_int32</tt>: Size of the allocated
+  *                                                 memory for the output
+  *                                                 payload. This may be
+  *                                                 used to impose an upper limit on
+  *                                                 the instant bitrate, but should
+  *                                                 not be used as the only bitrate
+  *                                                 control. Use #OPUS_SET_BITRATE to
+  *                                                 control the bitrate.
+  * @returns The length of the encoded packet (in bytes) on success or a
+  *          negative error code (see @ref opus_errorcodes) on failure.
+  */
+OPUS_EXPORT OPUS_WARN_UNUSED_RESULT int opus_multistream_encode(
+    OpusMSEncoder *st,
+    const opus_int16 *pcm,
+    int frame_size,
+    unsigned char *data,
+    opus_int32 max_data_bytes
+) OPUS_ARG_NONNULL(1) OPUS_ARG_NONNULL(2) OPUS_ARG_NONNULL(4);
+
+/** Encodes a multistream Opus frame from floating point input.
+  * @param st <tt>OpusMSEncoder*</tt>: Multistream encoder state.
+  * @param[in] pcm <tt>const float*</tt>: The input signal as interleaved
+  *                                       samples with a normal range of
+  *                                       +/-1.0.
+  *                                       Samples with a range beyond +/-1.0
+  *                                       are supported but will be clipped by
+  *                                       decoders using the integer API and
+  *                                       should only be used if it is known
+  *                                       that the far end supports extended
+  *                                       dynamic range.
+  *                                       This must contain
+  *                                       <code>frame_size*channels</code>
+  *                                       samples.
+  * @param frame_size <tt>int</tt>: Number of samples per channel in the input
+  *                                 signal.
+  *                                 This must be an Opus frame size for the
+  *                                 encoder's sampling rate.
+  *                                 For example, at 48 kHz the permitted values
+  *                                 are 120, 240, 480, 960, 1920, and 2880.
+  *                                 Passing in a duration of less than 10 ms
+  *                                 (480 samples at 48 kHz) will prevent the
+  *                                 encoder from using the LPC or hybrid modes.
+  * @param[out] data <tt>unsigned char*</tt>: Output payload.
+  *                                           This must contain storage for at
+  *                                           least \a max_data_bytes.
+  * @param [in] max_data_bytes <tt>opus_int32</tt>: Size of the allocated
+  *                                                 memory for the output
+  *                                                 payload. This may be
+  *                                                 used to impose an upper limit on
+  *                                                 the instant bitrate, but should
+  *                                                 not be used as the only bitrate
+  *                                                 control. Use #OPUS_SET_BITRATE to
+  *                                                 control the bitrate.
+  * @returns The length of the encoded packet (in bytes) on success or a
+  *          negative error code (see @ref opus_errorcodes) on failure.
+  */
+OPUS_EXPORT OPUS_WARN_UNUSED_RESULT int opus_multistream_encode_float(
+      OpusMSEncoder *st,
+      const float *pcm,
+      int frame_size,
+      unsigned char *data,
+      opus_int32 max_data_bytes
+) OPUS_ARG_NONNULL(1) OPUS_ARG_NONNULL(2) OPUS_ARG_NONNULL(4);
+
+/** Frees an <code>OpusMSEncoder</code> allocated by
+  * opus_multistream_encoder_create().
+  * @param st <tt>OpusMSEncoder*</tt>: Multistream encoder state to be freed.
+  */
+OPUS_EXPORT void opus_multistream_encoder_destroy(OpusMSEncoder *st);
+
+/** Perform a CTL function on a multistream Opus encoder.
+  *
+  * Generally the request and subsequent arguments are generated by a
+  * convenience macro.
+  * @param st <tt>OpusMSEncoder*</tt>: Multistream encoder state.
+  * @param request This and all remaining parameters should be replaced by one
+  *                of the convenience macros in @ref opus_genericctls,
+  *                @ref opus_encoderctls, or @ref opus_multistream_ctls.
+  * @see opus_genericctls
+  * @see opus_encoderctls
+  * @see opus_multistream_ctls
+  */
+OPUS_EXPORT int opus_multistream_encoder_ctl(OpusMSEncoder *st, int request, ...) OPUS_ARG_NONNULL(1);
+
+/**@}*/
+
+/**\name Multistream decoder functions */
+/**@{*/
+
+/** Gets the size of an <code>OpusMSDecoder</code> structure.
+  * @param streams <tt>int</tt>: The total number of streams coded in the
+  *                              input.
+  *                              This must be no more than 255.
+  * @param coupled_streams <tt>int</tt>: Number streams to decode as coupled
+  *                                      (2 channel) streams.
+  *                                      This must be no larger than the total
+  *                                      number of streams.
+  *                                      Additionally, The total number of
+  *                                      coded channels (<code>streams +
+  *                                      coupled_streams</code>) must be no
+  *                                      more than 255.
+  * @returns The size in bytes on success, or a negative error code
+  *          (see @ref opus_errorcodes) on error.
+  */
+OPUS_EXPORT OPUS_WARN_UNUSED_RESULT opus_int32 opus_multistream_decoder_get_size(
+      int streams,
+      int coupled_streams
+);
+
+/** Allocates and initializes a multistream decoder state.
+  * Call opus_multistream_decoder_destroy() to release
+  * this object when finished.
+  * @param Fs <tt>opus_int32</tt>: Sampling rate to decode at (in Hz).
+  *                                This must be one of 8000, 12000, 16000,
+  *                                24000, or 48000.
+  * @param channels <tt>int</tt>: Number of channels to output.
+  *                               This must be at most 255.
+  *                               It may be different from the number of coded
+  *                               channels (<code>streams +
+  *                               coupled_streams</code>).
+  * @param streams <tt>int</tt>: The total number of streams coded in the
+  *                              input.
+  *                              This must be no more than 255.
+  * @param coupled_streams <tt>int</tt>: Number of streams to decode as coupled
+  *                                      (2 channel) streams.
+  *                                      This must be no larger than the total
+  *                                      number of streams.
+  *                                      Additionally, The total number of
+  *                                      coded channels (<code>streams +
+  *                                      coupled_streams</code>) must be no
+  *                                      more than 255.
+  * @param[in] mapping <code>const unsigned char[channels]</code>: Mapping from
+  *                    coded channels to output channels, as described in
+  *                    @ref opus_multistream.
+  * @param[out] error <tt>int *</tt>: Returns #OPUS_OK on success, or an error
+  *                                   code (see @ref opus_errorcodes) on
+  *                                   failure.
+  */
+OPUS_EXPORT OPUS_WARN_UNUSED_RESULT OpusMSDecoder *opus_multistream_decoder_create(
+      opus_int32 Fs,
+      int channels,
+      int streams,
+      int coupled_streams,
+      const unsigned char *mapping,
+      int *error
+) OPUS_ARG_NONNULL(5);
+
+/** Intialize a previously allocated decoder state object.
+  * The memory pointed to by \a st must be at least the size returned by
+  * opus_multistream_encoder_get_size().
+  * This is intended for applications which use their own allocator instead of
+  * malloc.
+  * To reset a previously initialized state, use the #OPUS_RESET_STATE CTL.
+  * @see opus_multistream_decoder_create
+  * @see opus_multistream_deocder_get_size
+  * @param st <tt>OpusMSEncoder*</tt>: Multistream encoder state to initialize.
+  * @param Fs <tt>opus_int32</tt>: Sampling rate to decode at (in Hz).
+  *                                This must be one of 8000, 12000, 16000,
+  *                                24000, or 48000.
+  * @param channels <tt>int</tt>: Number of channels to output.
+  *                               This must be at most 255.
+  *                               It may be different from the number of coded
+  *                               channels (<code>streams +
+  *                               coupled_streams</code>).
+  * @param streams <tt>int</tt>: The total number of streams coded in the
+  *                              input.
+  *                              This must be no more than 255.
+  * @param coupled_streams <tt>int</tt>: Number of streams to decode as coupled
+  *                                      (2 channel) streams.
+  *                                      This must be no larger than the total
+  *                                      number of streams.
+  *                                      Additionally, The total number of
+  *                                      coded channels (<code>streams +
+  *                                      coupled_streams</code>) must be no
+  *                                      more than 255.
+  * @param[in] mapping <code>const unsigned char[channels]</code>: Mapping from
+  *                    coded channels to output channels, as described in
+  *                    @ref opus_multistream.
+  * @returns #OPUS_OK on success, or an error code (see @ref opus_errorcodes)
+  *          on failure.
+  */
+OPUS_EXPORT int opus_multistream_decoder_init(
+      OpusMSDecoder *st,
+      opus_int32 Fs,
+      int channels,
+      int streams,
+      int coupled_streams,
+      const unsigned char *mapping
+) OPUS_ARG_NONNULL(1) OPUS_ARG_NONNULL(6);
+
+/** Decode a multistream Opus packet.
+  * @param st <tt>OpusMSDecoder*</tt>: Multistream decoder state.
+  * @param[in] data <tt>const unsigned char*</tt>: Input payload.
+  *                                                Use a <code>NULL</code>
+  *                                                pointer to indicate packet
+  *                                                loss.
+  * @param len <tt>opus_int32</tt>: Number of bytes in payload.
+  * @param[out] pcm <tt>opus_int16*</tt>: Output signal, with interleaved
+  *                                       samples.
+  *                                       This must contain room for
+  *                                       <code>frame_size*channels</code>
+  *                                       samples.
+  * @param frame_size <tt>int</tt>: The number of samples per channel of
+  *                                 available space in \a pcm.
+  *                                 If this is less than the maximum packet duration
+  *                                 (120 ms; 5760 for 48kHz), this function will not be capable
+  *                                 of decoding some packets. In the case of PLC (data==NULL)
+  *                                 or FEC (decode_fec=1), then frame_size needs to be exactly
+  *                                 the duration of audio that is missing, otherwise the
+  *                                 decoder will not be in the optimal state to decode the
+  *                                 next incoming packet. For the PLC and FEC cases, frame_size
+  *                                 <b>must</b> be a multiple of 2.5 ms.
+  * @param decode_fec <tt>int</tt>: Flag (0 or 1) to request that any in-band
+  *                                 forward error correction data be decoded.
+  *                                 If no such data is available, the frame is
+  *                                 decoded as if it were lost.
+  * @returns Number of samples decoded on success or a negative error code
+  *          (see @ref opus_errorcodes) on failure.
+  */
+OPUS_EXPORT OPUS_WARN_UNUSED_RESULT int opus_multistream_decode(
+    OpusMSDecoder *st,
+    const unsigned char *data,
+    opus_int32 len,
+    opus_int16 *pcm,
+    int frame_size,
+    int decode_fec
+) OPUS_ARG_NONNULL(1) OPUS_ARG_NONNULL(4);
+
+/** Decode a multistream Opus packet with floating point output.
+  * @param st <tt>OpusMSDecoder*</tt>: Multistream decoder state.
+  * @param[in] data <tt>const unsigned char*</tt>: Input payload.
+  *                                                Use a <code>NULL</code>
+  *                                                pointer to indicate packet
+  *                                                loss.
+  * @param len <tt>opus_int32</tt>: Number of bytes in payload.
+  * @param[out] pcm <tt>opus_int16*</tt>: Output signal, with interleaved
+  *                                       samples.
+  *                                       This must contain room for
+  *                                       <code>frame_size*channels</code>
+  *                                       samples.
+  * @param frame_size <tt>int</tt>: The number of samples per channel of
+  *                                 available space in \a pcm.
+  *                                 If this is less than the maximum packet duration
+  *                                 (120 ms; 5760 for 48kHz), this function will not be capable
+  *                                 of decoding some packets. In the case of PLC (data==NULL)
+  *                                 or FEC (decode_fec=1), then frame_size needs to be exactly
+  *                                 the duration of audio that is missing, otherwise the
+  *                                 decoder will not be in the optimal state to decode the
+  *                                 next incoming packet. For the PLC and FEC cases, frame_size
+  *                                 <b>must</b> be a multiple of 2.5 ms.
+  * @param decode_fec <tt>int</tt>: Flag (0 or 1) to request that any in-band
+  *                                 forward error correction data be decoded.
+  *                                 If no such data is available, the frame is
+  *                                 decoded as if it were lost.
+  * @returns Number of samples decoded on success or a negative error code
+  *          (see @ref opus_errorcodes) on failure.
+  */
+OPUS_EXPORT OPUS_WARN_UNUSED_RESULT int opus_multistream_decode_float(
+    OpusMSDecoder *st,
+    const unsigned char *data,
+    opus_int32 len,
+    float *pcm,
+    int frame_size,
+    int decode_fec
+) OPUS_ARG_NONNULL(1) OPUS_ARG_NONNULL(4);
+
+/** Perform a CTL function on a multistream Opus decoder.
+  *
+  * Generally the request and subsequent arguments are generated by a
+  * convenience macro.
+  * @param st <tt>OpusMSDecoder*</tt>: Multistream decoder state.
+  * @param request This and all remaining parameters should be replaced by one
+  *                of the convenience macros in @ref opus_genericctls,
+  *                @ref opus_decoderctls, or @ref opus_multistream_ctls.
+  * @see opus_genericctls
+  * @see opus_decoderctls
+  * @see opus_multistream_ctls
+  */
+OPUS_EXPORT int opus_multistream_decoder_ctl(OpusMSDecoder *st, int request, ...) OPUS_ARG_NONNULL(1);
+
+/** Frees an <code>OpusMSDecoder</code> allocated by
+  * opus_multistream_decoder_create().
+  * @param st <tt>OpusMSDecoder</tt>: Multistream decoder state to be freed.
+  */
+OPUS_EXPORT void opus_multistream_decoder_destroy(OpusMSDecoder *st);
+
+/**@}*/
+
+/**@}*/
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* OPUS_MULTISTREAM_H */
diff --git a/drivers/opus/opus_multistream_decoder.c b/drivers/opus/opus_multistream_decoder.c
new file mode 100644
index 0000000000..64a0c24067
--- /dev/null
+++ b/drivers/opus/opus_multistream_decoder.c
@@ -0,0 +1,537 @@
+/* Copyright (c) 2011 Xiph.Org Foundation
+   Written by Jean-Marc Valin */
+/*
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifdef OPUS_HAVE_CONFIG_H
+#include "opus_config.h"
+#endif
+
+#include "opus_multistream.h"
+#include "opus.h"
+#include "opus_private.h"
+#include "stack_alloc.h"
+#include <stdarg.h>
+#include "float_cast.h"
+#include "os_support.h"
+
+struct OpusMSDecoder {
+   ChannelLayout layout;
+   /* Decoder states go here */
+};
+
+
+
+
+/* DECODER */
+
+opus_int32 opus_multistream_decoder_get_size(int nb_streams, int nb_coupled_streams)
+{
+   int coupled_size;
+   int mono_size;
+
+   if(nb_streams<1||nb_coupled_streams>nb_streams||nb_coupled_streams<0)return 0;
+   coupled_size = opus_decoder_get_size(2);
+   mono_size = opus_decoder_get_size(1);
+   return align(sizeof(OpusMSDecoder))
+         + nb_coupled_streams * align(coupled_size)
+         + (nb_streams-nb_coupled_streams) * align(mono_size);
+}
+
+int opus_multistream_decoder_init(
+      OpusMSDecoder *st,
+      opus_int32 Fs,
+      int channels,
+      int streams,
+      int coupled_streams,
+      const unsigned char *mapping
+)
+{
+   int coupled_size;
+   int mono_size;
+   int i, ret;
+   char *ptr;
+
+   if ((channels>255) || (channels<1) || (coupled_streams>streams) ||
+       (coupled_streams+streams>255) || (streams<1) || (coupled_streams<0))
+      return OPUS_BAD_ARG;
+
+   st->layout.nb_channels = channels;
+   st->layout.nb_streams = streams;
+   st->layout.nb_coupled_streams = coupled_streams;
+
+   for (i=0;i<st->layout.nb_channels;i++)
+      st->layout.mapping[i] = mapping[i];
+   if (!validate_layout(&st->layout))
+      return OPUS_BAD_ARG;
+
+   ptr = (char*)st + align(sizeof(OpusMSDecoder));
+   coupled_size = opus_decoder_get_size(2);
+   mono_size = opus_decoder_get_size(1);
+
+   for (i=0;i<st->layout.nb_coupled_streams;i++)
+   {
+      ret=opus_decoder_init((OpusDecoder*)ptr, Fs, 2);
+      if(ret!=OPUS_OK)return ret;
+      ptr += align(coupled_size);
+   }
+   for (;i<st->layout.nb_streams;i++)
+   {
+      ret=opus_decoder_init((OpusDecoder*)ptr, Fs, 1);
+      if(ret!=OPUS_OK)return ret;
+      ptr += align(mono_size);
+   }
+   return OPUS_OK;
+}
+
+
+OpusMSDecoder *opus_multistream_decoder_create(
+      opus_int32 Fs,
+      int channels,
+      int streams,
+      int coupled_streams,
+      const unsigned char *mapping,
+      int *error
+)
+{
+   int ret;
+   OpusMSDecoder *st;
+   if ((channels>255) || (channels<1) || (coupled_streams>streams) ||
+       (coupled_streams+streams>255) || (streams<1) || (coupled_streams<0))
+   {
+      if (error)
+         *error = OPUS_BAD_ARG;
+      return NULL;
+   }
+   st = (OpusMSDecoder *)opus_alloc(opus_multistream_decoder_get_size(streams, coupled_streams));
+   if (st==NULL)
+   {
+      if (error)
+         *error = OPUS_ALLOC_FAIL;
+      return NULL;
+   }
+   ret = opus_multistream_decoder_init(st, Fs, channels, streams, coupled_streams, mapping);
+   if (error)
+      *error = ret;
+   if (ret != OPUS_OK)
+   {
+      opus_free(st);
+      st = NULL;
+   }
+   return st;
+}
+
+typedef void (*opus_copy_channel_out_func)(
+  void *dst,
+  int dst_stride,
+  int dst_channel,
+  const opus_val16 *src,
+  int src_stride,
+  int frame_size
+);
+
+static int opus_multistream_packet_validate(const unsigned char *data,
+      opus_int32 len, int nb_streams, opus_int32 Fs)
+{
+   int s;
+   int count;
+   unsigned char toc;
+   opus_int16 size[48];
+   int samples=0;
+   opus_int32 packet_offset;
+
+   for (s=0;s<nb_streams;s++)
+   {
+      int tmp_samples;
+      if (len<=0)
+         return OPUS_INVALID_PACKET;
+      count = opus_packet_parse_impl(data, len, s!=nb_streams-1, &toc, NULL,
+                                     size, NULL, &packet_offset);
+      if (count<0)
+         return count;
+      tmp_samples = opus_packet_get_nb_samples(data, packet_offset, Fs);
+      if (s!=0 && samples != tmp_samples)
+         return OPUS_INVALID_PACKET;
+      samples = tmp_samples;
+      data += packet_offset;
+      len -= packet_offset;
+   }
+   return samples;
+}
+
+static int opus_multistream_decode_native(
+      OpusMSDecoder *st,
+      const unsigned char *data,
+      opus_int32 len,
+      void *pcm,
+      opus_copy_channel_out_func copy_channel_out,
+      int frame_size,
+      int decode_fec,
+      int soft_clip
+)
+{
+   opus_int32 Fs;
+   int coupled_size;
+   int mono_size;
+   int s, c;
+   char *ptr;
+   int do_plc=0;
+   VARDECL(opus_val16, buf);
+   ALLOC_STACK;
+
+   /* Limit frame_size to avoid excessive stack allocations. */
+   opus_multistream_decoder_ctl(st, OPUS_GET_SAMPLE_RATE(&Fs));
+   frame_size = IMIN(frame_size, Fs/25*3);
+   ALLOC(buf, 2*frame_size, opus_val16);
+   ptr = (char*)st + align(sizeof(OpusMSDecoder));
+   coupled_size = opus_decoder_get_size(2);
+   mono_size = opus_decoder_get_size(1);
+
+   if (len==0)
+      do_plc = 1;
+   if (len < 0)
+   {
+      RESTORE_STACK;
+      return OPUS_BAD_ARG;
+   }
+   if (!do_plc && len < 2*st->layout.nb_streams-1)
+   {
+      RESTORE_STACK;
+      return OPUS_INVALID_PACKET;
+   }
+   if (!do_plc)
+   {
+      int ret = opus_multistream_packet_validate(data, len, st->layout.nb_streams, Fs);
+      if (ret < 0)
+      {
+         RESTORE_STACK;
+         return ret;
+      } else if (ret > frame_size)
+      {
+         RESTORE_STACK;
+         return OPUS_BUFFER_TOO_SMALL;
+      }
+   }
+   for (s=0;s<st->layout.nb_streams;s++)
+   {
+      OpusDecoder *dec;
+      int packet_offset, ret;
+
+      dec = (OpusDecoder*)ptr;
+      ptr += (s < st->layout.nb_coupled_streams) ? align(coupled_size) : align(mono_size);
+
+      if (!do_plc && len<=0)
+      {
+         RESTORE_STACK;
+         return OPUS_INTERNAL_ERROR;
+      }
+      packet_offset = 0;
+      ret = opus_decode_native(dec, data, len, buf, frame_size, decode_fec, s!=st->layout.nb_streams-1, &packet_offset, soft_clip);
+      data += packet_offset;
+      len -= packet_offset;
+      if (ret <= 0)
+      {
+         RESTORE_STACK;
+         return ret;
+      }
+      frame_size = ret;
+      if (s < st->layout.nb_coupled_streams)
+      {
+         int chan, prev;
+         prev = -1;
+         /* Copy "left" audio to the channel(s) where it belongs */
+         while ( (chan = get_left_channel(&st->layout, s, prev)) != -1)
+         {
+            (*copy_channel_out)(pcm, st->layout.nb_channels, chan,
+               buf, 2, frame_size);
+            prev = chan;
+         }
+         prev = -1;
+         /* Copy "right" audio to the channel(s) where it belongs */
+         while ( (chan = get_right_channel(&st->layout, s, prev)) != -1)
+         {
+            (*copy_channel_out)(pcm, st->layout.nb_channels, chan,
+               buf+1, 2, frame_size);
+            prev = chan;
+         }
+      } else {
+         int chan, prev;
+         prev = -1;
+         /* Copy audio to the channel(s) where it belongs */
+         while ( (chan = get_mono_channel(&st->layout, s, prev)) != -1)
+         {
+            (*copy_channel_out)(pcm, st->layout.nb_channels, chan,
+               buf, 1, frame_size);
+            prev = chan;
+         }
+      }
+   }
+   /* Handle muted channels */
+   for (c=0;c<st->layout.nb_channels;c++)
+   {
+      if (st->layout.mapping[c] == 255)
+      {
+         (*copy_channel_out)(pcm, st->layout.nb_channels, c,
+            NULL, 0, frame_size);
+      }
+   }
+   RESTORE_STACK;
+   return frame_size;
+}
+
+#if !defined(DISABLE_FLOAT_API)
+static void opus_copy_channel_out_float(
+  void *dst,
+  int dst_stride,
+  int dst_channel,
+  const opus_val16 *src,
+  int src_stride,
+  int frame_size
+)
+{
+   float *float_dst;
+   opus_int32 i;
+   float_dst = (float*)dst;
+   if (src != NULL)
+   {
+      for (i=0;i<frame_size;i++)
+#if defined(OPUS_FIXED_POINT)
+         float_dst[i*dst_stride+dst_channel] = (1/32768.f)*src[i*src_stride];
+#else
+         float_dst[i*dst_stride+dst_channel] = src[i*src_stride];
+#endif
+   }
+   else
+   {
+      for (i=0;i<frame_size;i++)
+         float_dst[i*dst_stride+dst_channel] = 0;
+   }
+}
+#endif
+
+static void opus_copy_channel_out_short(
+  void *dst,
+  int dst_stride,
+  int dst_channel,
+  const opus_val16 *src,
+  int src_stride,
+  int frame_size
+)
+{
+   opus_int16 *short_dst;
+   opus_int32 i;
+   short_dst = (opus_int16*)dst;
+   if (src != NULL)
+   {
+      for (i=0;i<frame_size;i++)
+#if defined(OPUS_FIXED_POINT)
+         short_dst[i*dst_stride+dst_channel] = src[i*src_stride];
+#else
+         short_dst[i*dst_stride+dst_channel] = FLOAT2INT16(src[i*src_stride]);
+#endif
+   }
+   else
+   {
+      for (i=0;i<frame_size;i++)
+         short_dst[i*dst_stride+dst_channel] = 0;
+   }
+}
+
+
+
+#ifdef OPUS_FIXED_POINT
+int opus_multistream_decode(
+      OpusMSDecoder *st,
+      const unsigned char *data,
+      opus_int32 len,
+      opus_int16 *pcm,
+      int frame_size,
+      int decode_fec
+)
+{
+   return opus_multistream_decode_native(st, data, len,
+       pcm, opus_copy_channel_out_short, frame_size, decode_fec, 0);
+}
+
+#ifndef DISABLE_FLOAT_API
+int opus_multistream_decode_float(OpusMSDecoder *st, const unsigned char *data,
+      opus_int32 len, float *pcm, int frame_size, int decode_fec)
+{
+   return opus_multistream_decode_native(st, data, len,
+       pcm, opus_copy_channel_out_float, frame_size, decode_fec, 0);
+}
+#endif
+
+#else
+
+int opus_multistream_decode(OpusMSDecoder *st, const unsigned char *data,
+      opus_int32 len, opus_int16 *pcm, int frame_size, int decode_fec)
+{
+   return opus_multistream_decode_native(st, data, len,
+       pcm, opus_copy_channel_out_short, frame_size, decode_fec, 1);
+}
+
+int opus_multistream_decode_float(
+      OpusMSDecoder *st,
+      const unsigned char *data,
+      opus_int32 len,
+      float *pcm,
+      int frame_size,
+      int decode_fec
+)
+{
+   return opus_multistream_decode_native(st, data, len,
+       pcm, opus_copy_channel_out_float, frame_size, decode_fec, 0);
+}
+#endif
+
+int opus_multistream_decoder_ctl(OpusMSDecoder *st, int request, ...)
+{
+   va_list ap;
+   int coupled_size, mono_size;
+   char *ptr;
+   int ret = OPUS_OK;
+
+   va_start(ap, request);
+
+   coupled_size = opus_decoder_get_size(2);
+   mono_size = opus_decoder_get_size(1);
+   ptr = (char*)st + align(sizeof(OpusMSDecoder));
+   switch (request)
+   {
+       case OPUS_GET_BANDWIDTH_REQUEST:
+       case OPUS_GET_SAMPLE_RATE_REQUEST:
+       case OPUS_GET_GAIN_REQUEST:
+       case OPUS_GET_LAST_PACKET_DURATION_REQUEST:
+       {
+          OpusDecoder *dec;
+          /* For int32* GET params, just query the first stream */
+          opus_int32 *value = va_arg(ap, opus_int32*);
+          dec = (OpusDecoder*)ptr;
+          ret = opus_decoder_ctl(dec, request, value);
+       }
+       break;
+       case OPUS_GET_FINAL_RANGE_REQUEST:
+       {
+          int s;
+          opus_uint32 *value = va_arg(ap, opus_uint32*);
+          opus_uint32 tmp;
+          if (!value)
+          {
+             goto bad_arg;
+          }
+          *value = 0;
+          for (s=0;s<st->layout.nb_streams;s++)
+          {
+             OpusDecoder *dec;
+             dec = (OpusDecoder*)ptr;
+             if (s < st->layout.nb_coupled_streams)
+                ptr += align(coupled_size);
+             else
+                ptr += align(mono_size);
+             ret = opus_decoder_ctl(dec, request, &tmp);
+             if (ret != OPUS_OK) break;
+             *value ^= tmp;
+          }
+       }
+       break;
+       case OPUS_RESET_STATE:
+       {
+          int s;
+          for (s=0;s<st->layout.nb_streams;s++)
+          {
+             OpusDecoder *dec;
+
+             dec = (OpusDecoder*)ptr;
+             if (s < st->layout.nb_coupled_streams)
+                ptr += align(coupled_size);
+             else
+                ptr += align(mono_size);
+             ret = opus_decoder_ctl(dec, OPUS_RESET_STATE);
+             if (ret != OPUS_OK)
+                break;
+          }
+       }
+       break;
+       case OPUS_MULTISTREAM_GET_DECODER_STATE_REQUEST:
+       {
+          int s;
+          opus_int32 stream_id;
+          OpusDecoder **value;
+          stream_id = va_arg(ap, opus_int32);
+          if (stream_id<0 || stream_id >= st->layout.nb_streams)
+             ret = OPUS_BAD_ARG;
+          value = va_arg(ap, OpusDecoder**);
+          if (!value)
+          {
+             goto bad_arg;
+          }
+          for (s=0;s<stream_id;s++)
+          {
+             if (s < st->layout.nb_coupled_streams)
+                ptr += align(coupled_size);
+             else
+                ptr += align(mono_size);
+          }
+          *value = (OpusDecoder*)ptr;
+       }
+       break;
+       case OPUS_SET_GAIN_REQUEST:
+       {
+          int s;
+          /* This works for int32 params */
+          opus_int32 value = va_arg(ap, opus_int32);
+          for (s=0;s<st->layout.nb_streams;s++)
+          {
+             OpusDecoder *dec;
+
+             dec = (OpusDecoder*)ptr;
+             if (s < st->layout.nb_coupled_streams)
+                ptr += align(coupled_size);
+             else
+                ptr += align(mono_size);
+             ret = opus_decoder_ctl(dec, request, value);
+             if (ret != OPUS_OK)
+                break;
+          }
+       }
+       break;
+       default:
+          ret = OPUS_UNIMPLEMENTED;
+       break;
+   }
+
+   va_end(ap);
+   return ret;
+bad_arg:
+   va_end(ap);
+   return OPUS_BAD_ARG;
+}
+
+
+void opus_multistream_decoder_destroy(OpusMSDecoder *st)
+{
+    opus_free(st);
+}
diff --git a/drivers/opus/opus_multistream_encoder.c b/drivers/opus/opus_multistream_encoder.c
new file mode 100644
index 0000000000..8d559743ea
--- /dev/null
+++ b/drivers/opus/opus_multistream_encoder.c
@@ -0,0 +1,1174 @@
+/* Copyright (c) 2011 Xiph.Org Foundation
+   Written by Jean-Marc Valin */
+/*
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifdef OPUS_HAVE_CONFIG_H
+#include "opus_config.h"
+#endif
+
+#include "opus_multistream.h"
+#include "opus.h"
+#include "opus_private.h"
+#include "stack_alloc.h"
+#include <stdarg.h>
+#include "float_cast.h"
+#include "os_support.h"
+#include "mathops.h"
+#include "mdct.h"
+#include "opus_modes.h"
+#include "bands.h"
+#include "quant_bands.h"
+
+typedef struct {
+   int nb_streams;
+   int nb_coupled_streams;
+   unsigned char mapping[8];
+} VorbisLayout;
+
+/* Index is nb_channel-1*/
+static const VorbisLayout vorbis_mappings[8] = {
+      {1, 0, {0}},                      /* 1: mono */
+      {1, 1, {0, 1}},                   /* 2: stereo */
+      {2, 1, {0, 2, 1}},                /* 3: 1-d surround */
+      {2, 2, {0, 1, 2, 3}},             /* 4: quadraphonic surround */
+      {3, 2, {0, 4, 1, 2, 3}},          /* 5: 5-channel surround */
+      {4, 2, {0, 4, 1, 2, 3, 5}},       /* 6: 5.1 surround */
+      {4, 3, {0, 4, 1, 2, 3, 5, 6}},    /* 7: 6.1 surround */
+      {5, 3, {0, 6, 1, 2, 3, 4, 5, 7}}, /* 8: 7.1 surround */
+};
+
+typedef void (*opus_copy_channel_in_func)(
+  opus_val16 *dst,
+  int dst_stride,
+  const void *src,
+  int src_stride,
+  int src_channel,
+  int frame_size
+);
+
+struct OpusMSEncoder {
+   ChannelLayout layout;
+   int lfe_stream;
+   int application;
+   int variable_duration;
+   int surround;
+   opus_int32 bitrate_bps;
+   float subframe_mem[3];
+   /* Encoder states go here */
+   /* then opus_val32 window_mem[channels*120]; */
+   /* then opus_val32 preemph_mem[channels]; */
+};
+
+static opus_val32 *ms_get_preemph_mem(OpusMSEncoder *st)
+{
+   int s;
+   char *ptr;
+   int coupled_size, mono_size;
+
+   coupled_size = opus_encoder_get_size(2);
+   mono_size = opus_encoder_get_size(1);
+   ptr = (char*)st + align(sizeof(OpusMSEncoder));
+   for (s=0;s<st->layout.nb_streams;s++)
+   {
+      if (s < st->layout.nb_coupled_streams)
+         ptr += align(coupled_size);
+      else
+         ptr += align(mono_size);
+   }
+   return (opus_val32*)(ptr+st->layout.nb_channels*120*sizeof(opus_val32));
+}
+
+static opus_val32 *ms_get_window_mem(OpusMSEncoder *st)
+{
+   int s;
+   char *ptr;
+   int coupled_size, mono_size;
+
+   coupled_size = opus_encoder_get_size(2);
+   mono_size = opus_encoder_get_size(1);
+   ptr = (char*)st + align(sizeof(OpusMSEncoder));
+   for (s=0;s<st->layout.nb_streams;s++)
+   {
+      if (s < st->layout.nb_coupled_streams)
+         ptr += align(coupled_size);
+      else
+         ptr += align(mono_size);
+   }
+   return (opus_val32*)ptr;
+}
+
+static int validate_encoder_layout(const ChannelLayout *layout)
+{
+   int s;
+   for (s=0;s<layout->nb_streams;s++)
+   {
+      if (s < layout->nb_coupled_streams)
+      {
+         if (get_left_channel(layout, s, -1)==-1)
+            return 0;
+         if (get_right_channel(layout, s, -1)==-1)
+            return 0;
+      } else {
+         if (get_mono_channel(layout, s, -1)==-1)
+            return 0;
+      }
+   }
+   return 1;
+}
+
+static void channel_pos(int channels, int pos[8])
+{
+   /* Position in the mix: 0 don't mix, 1: left, 2: center, 3:right */
+   if (channels==4)
+   {
+      pos[0]=1;
+      pos[1]=3;
+      pos[2]=1;
+      pos[3]=3;
+   } else if (channels==3||channels==5||channels==6)
+   {
+      pos[0]=1;
+      pos[1]=2;
+      pos[2]=3;
+      pos[3]=1;
+      pos[4]=3;
+      pos[5]=0;
+   } else if (channels==7)
+   {
+      pos[0]=1;
+      pos[1]=2;
+      pos[2]=3;
+      pos[3]=1;
+      pos[4]=3;
+      pos[5]=2;
+      pos[6]=0;
+   } else if (channels==8)
+   {
+      pos[0]=1;
+      pos[1]=2;
+      pos[2]=3;
+      pos[3]=1;
+      pos[4]=3;
+      pos[5]=1;
+      pos[6]=3;
+      pos[7]=0;
+   }
+}
+
+#if 1
+/* Computes a rough approximation of log2(2^a + 2^b) */
+static opus_val16 logSum(opus_val16 a, opus_val16 b)
+{
+   opus_val16 max;
+   opus_val32 diff;
+   opus_val16 frac;
+   static const opus_val16 diff_table[17] = {
+         QCONST16(0.5000000f, DB_SHIFT), QCONST16(0.2924813f, DB_SHIFT), QCONST16(0.1609640f, DB_SHIFT), QCONST16(0.0849625f, DB_SHIFT),
+         QCONST16(0.0437314f, DB_SHIFT), QCONST16(0.0221971f, DB_SHIFT), QCONST16(0.0111839f, DB_SHIFT), QCONST16(0.0056136f, DB_SHIFT),
+         QCONST16(0.0028123f, DB_SHIFT)
+   };
+   int low;
+   if (a>b)
+   {
+      max = a;
+      diff = SUB32(EXTEND32(a),EXTEND32(b));
+   } else {
+      max = b;
+      diff = SUB32(EXTEND32(b),EXTEND32(a));
+   }
+   if (diff >= QCONST16(8.f, DB_SHIFT))
+      return max;
+#ifdef OPUS_FIXED_POINT
+   low = SHR32(diff, DB_SHIFT-1);
+   frac = SHL16(diff - SHL16(low, DB_SHIFT-1), 16-DB_SHIFT);
+#else
+   low = (int)floor(2*diff);
+   frac = 2*diff - low;
+#endif
+   return max + diff_table[low] + MULT16_16_Q15(frac, SUB16(diff_table[low+1], diff_table[low]));
+}
+#else
+opus_val16 logSum(opus_val16 a, opus_val16 b)
+{
+   return log2(pow(4, a)+ pow(4, b))/2;
+}
+#endif
+
+void surround_analysis(const CELTMode *celt_mode, const void *pcm, opus_val16 *bandLogE, opus_val32 *mem, opus_val32 *preemph_mem,
+      int len, int overlap, int channels, int rate, opus_copy_channel_in_func copy_channel_in
+)
+{
+   int c;
+   int i;
+   int LM;
+   int pos[8] = {0};
+   int upsample;
+   int frame_size;
+   opus_val16 channel_offset;
+   opus_val32 bandE[21];
+   opus_val16 maskLogE[3][21];
+   VARDECL(opus_val32, in);
+   VARDECL(opus_val16, x);
+   VARDECL(opus_val32, freq);
+   SAVE_STACK;
+
+   upsample = resampling_factor(rate);
+   frame_size = len*upsample;
+
+   for (LM=0;LM<celt_mode->maxLM;LM++)
+      if (celt_mode->shortMdctSize<<LM==frame_size)
+         break;
+
+   ALLOC(in, frame_size+overlap, opus_val32);
+   ALLOC(x, len, opus_val16);
+   ALLOC(freq, frame_size, opus_val32);
+
+   channel_pos(channels, pos);
+
+   for (c=0;c<3;c++)
+      for (i=0;i<21;i++)
+         maskLogE[c][i] = -QCONST16(28.f, DB_SHIFT);
+
+   for (c=0;c<channels;c++)
+   {
+      OPUS_COPY(in, mem+c*overlap, overlap);
+      (*copy_channel_in)(x, 1, pcm, channels, c, len);
+      celt_preemphasis(x, in+overlap, frame_size, 1, upsample, celt_mode->preemph, preemph_mem+c, 0);
+      clt_mdct_forward(&celt_mode->mdct, in, freq, celt_mode->window, overlap, celt_mode->maxLM-LM, 1);
+      if (upsample != 1)
+      {
+         int bound = len;
+         for (i=0;i<bound;i++)
+            freq[i] *= upsample;
+         for (;i<frame_size;i++)
+            freq[i] = 0;
+      }
+
+      compute_band_energies(celt_mode, freq, bandE, 21, 1, 1<<LM);
+      amp2Log2(celt_mode, 21, 21, bandE, bandLogE+21*c, 1);
+      /* Apply spreading function with -6 dB/band going up and -12 dB/band going down. */
+      for (i=1;i<21;i++)
+         bandLogE[21*c+i] = MAX16(bandLogE[21*c+i], bandLogE[21*c+i-1]-QCONST16(1.f, DB_SHIFT));
+      for (i=19;i>=0;i--)
+         bandLogE[21*c+i] = MAX16(bandLogE[21*c+i], bandLogE[21*c+i+1]-QCONST16(2.f, DB_SHIFT));
+      if (pos[c]==1)
+      {
+         for (i=0;i<21;i++)
+            maskLogE[0][i] = logSum(maskLogE[0][i], bandLogE[21*c+i]);
+      } else if (pos[c]==3)
+      {
+         for (i=0;i<21;i++)
+            maskLogE[2][i] = logSum(maskLogE[2][i], bandLogE[21*c+i]);
+      } else if (pos[c]==2)
+      {
+         for (i=0;i<21;i++)
+         {
+            maskLogE[0][i] = logSum(maskLogE[0][i], bandLogE[21*c+i]-QCONST16(.5f, DB_SHIFT));
+            maskLogE[2][i] = logSum(maskLogE[2][i], bandLogE[21*c+i]-QCONST16(.5f, DB_SHIFT));
+         }
+      }
+#if 0
+      for (i=0;i<21;i++)
+         printf("%f ", bandLogE[21*c+i]);
+      float sum=0;
+      for (i=0;i<21;i++)
+         sum += bandLogE[21*c+i];
+      printf("%f ", sum/21);
+#endif
+      OPUS_COPY(mem+c*overlap, in+frame_size, overlap);
+   }
+   for (i=0;i<21;i++)
+      maskLogE[1][i] = MIN32(maskLogE[0][i],maskLogE[2][i]);
+   channel_offset = HALF16(celt_log2(QCONST32(2.f,14)/(channels-1)));
+   for (c=0;c<3;c++)
+      for (i=0;i<21;i++)
+         maskLogE[c][i] += channel_offset;
+#if 0
+   for (c=0;c<3;c++)
+   {
+      for (i=0;i<21;i++)
+         printf("%f ", maskLogE[c][i]);
+   }
+#endif
+   for (c=0;c<channels;c++)
+   {
+      opus_val16 *mask;
+      if (pos[c]!=0)
+      {
+         mask = &maskLogE[pos[c]-1][0];
+         for (i=0;i<21;i++)
+            bandLogE[21*c+i] = bandLogE[21*c+i] - mask[i];
+      } else {
+         for (i=0;i<21;i++)
+            bandLogE[21*c+i] = 0;
+      }
+#if 0
+      for (i=0;i<21;i++)
+         printf("%f ", bandLogE[21*c+i]);
+      printf("\n");
+#endif
+#if 0
+      float sum=0;
+      for (i=0;i<21;i++)
+         sum += bandLogE[21*c+i];
+      printf("%f ", sum/(float)QCONST32(21.f, DB_SHIFT));
+      printf("\n");
+#endif
+   }
+   RESTORE_STACK;
+}
+
+opus_int32 opus_multistream_encoder_get_size(int nb_streams, int nb_coupled_streams)
+{
+   int coupled_size;
+   int mono_size;
+
+   if(nb_streams<1||nb_coupled_streams>nb_streams||nb_coupled_streams<0)return 0;
+   coupled_size = opus_encoder_get_size(2);
+   mono_size = opus_encoder_get_size(1);
+   return align(sizeof(OpusMSEncoder))
+        + nb_coupled_streams * align(coupled_size)
+        + (nb_streams-nb_coupled_streams) * align(mono_size);
+}
+
+opus_int32 opus_multistream_surround_encoder_get_size(int channels, int mapping_family)
+{
+   int nb_streams;
+   int nb_coupled_streams;
+   opus_int32 size;
+
+   if (mapping_family==0)
+   {
+      if (channels==1)
+      {
+         nb_streams=1;
+         nb_coupled_streams=0;
+      } else if (channels==2)
+      {
+         nb_streams=1;
+         nb_coupled_streams=1;
+      } else
+         return 0;
+   } else if (mapping_family==1 && channels<=8 && channels>=1)
+   {
+      nb_streams=vorbis_mappings[channels-1].nb_streams;
+      nb_coupled_streams=vorbis_mappings[channels-1].nb_coupled_streams;
+   } else if (mapping_family==255)
+   {
+      nb_streams=channels;
+      nb_coupled_streams=0;
+   } else
+      return 0;
+   size = opus_multistream_encoder_get_size(nb_streams, nb_coupled_streams);
+   if (channels>2)
+   {
+      size += channels*(120*sizeof(opus_val32) + sizeof(opus_val32));
+   }
+   return size;
+}
+
+
+static int opus_multistream_encoder_init_impl(
+      OpusMSEncoder *st,
+      opus_int32 Fs,
+      int channels,
+      int streams,
+      int coupled_streams,
+      const unsigned char *mapping,
+      int application,
+      int surround
+)
+{
+   int coupled_size;
+   int mono_size;
+   int i, ret;
+   char *ptr;
+
+   if ((channels>255) || (channels<1) || (coupled_streams>streams) ||
+       (coupled_streams+streams>255) || (streams<1) || (coupled_streams<0))
+      return OPUS_BAD_ARG;
+
+   st->layout.nb_channels = channels;
+   st->layout.nb_streams = streams;
+   st->layout.nb_coupled_streams = coupled_streams;
+   st->subframe_mem[0]=st->subframe_mem[1]=st->subframe_mem[2]=0;
+   if (!surround)
+      st->lfe_stream = -1;
+   st->bitrate_bps = OPUS_AUTO;
+   st->application = application;
+   st->variable_duration = OPUS_FRAMESIZE_ARG;
+   for (i=0;i<st->layout.nb_channels;i++)
+      st->layout.mapping[i] = mapping[i];
+   if (!validate_layout(&st->layout) || !validate_encoder_layout(&st->layout))
+      return OPUS_BAD_ARG;
+   ptr = (char*)st + align(sizeof(OpusMSEncoder));
+   coupled_size = opus_encoder_get_size(2);
+   mono_size = opus_encoder_get_size(1);
+
+   for (i=0;i<st->layout.nb_coupled_streams;i++)
+   {
+      ret = opus_encoder_init((OpusEncoder*)ptr, Fs, 2, application);
+      if(ret!=OPUS_OK)return ret;
+      if (i==st->lfe_stream)
+         opus_encoder_ctl((OpusEncoder*)ptr, OPUS_SET_LFE(1));
+      ptr += align(coupled_size);
+   }
+   for (;i<st->layout.nb_streams;i++)
+   {
+      ret = opus_encoder_init((OpusEncoder*)ptr, Fs, 1, application);
+      if (i==st->lfe_stream)
+         opus_encoder_ctl((OpusEncoder*)ptr, OPUS_SET_LFE(1));
+      if(ret!=OPUS_OK)return ret;
+      ptr += align(mono_size);
+   }
+   if (surround)
+   {
+      OPUS_CLEAR(ms_get_preemph_mem(st), channels);
+      OPUS_CLEAR(ms_get_window_mem(st), channels*120);
+   }
+   st->surround = surround;
+   return OPUS_OK;
+}
+
+int opus_multistream_encoder_init(
+      OpusMSEncoder *st,
+      opus_int32 Fs,
+      int channels,
+      int streams,
+      int coupled_streams,
+      const unsigned char *mapping,
+      int application
+)
+{
+   return opus_multistream_encoder_init_impl(st, Fs, channels, streams, coupled_streams, mapping, application, 0);
+}
+
+int opus_multistream_surround_encoder_init(
+      OpusMSEncoder *st,
+      opus_int32 Fs,
+      int channels,
+      int mapping_family,
+      int *streams,
+      int *coupled_streams,
+      unsigned char *mapping,
+      int application
+)
+{
+   if ((channels>255) || (channels<1))
+      return OPUS_BAD_ARG;
+   st->lfe_stream = -1;
+   if (mapping_family==0)
+   {
+      if (channels==1)
+      {
+         *streams=1;
+         *coupled_streams=0;
+         mapping[0]=0;
+      } else if (channels==2)
+      {
+         *streams=1;
+         *coupled_streams=1;
+         mapping[0]=0;
+         mapping[1]=1;
+      } else
+         return OPUS_UNIMPLEMENTED;
+   } else if (mapping_family==1 && channels<=8 && channels>=1)
+   {
+      int i;
+      *streams=vorbis_mappings[channels-1].nb_streams;
+      *coupled_streams=vorbis_mappings[channels-1].nb_coupled_streams;
+      for (i=0;i<channels;i++)
+         mapping[i] = vorbis_mappings[channels-1].mapping[i];
+      if (channels>=6)
+         st->lfe_stream = *streams-1;
+   } else if (mapping_family==255)
+   {
+      int i;
+      *streams=channels;
+      *coupled_streams=0;
+      for(i=0;i<channels;i++)
+         mapping[i] = i;
+   } else
+      return OPUS_UNIMPLEMENTED;
+   return opus_multistream_encoder_init_impl(st, Fs, channels, *streams, *coupled_streams,
+         mapping, application, channels>2&&mapping_family==1);
+}
+
+OpusMSEncoder *opus_multistream_encoder_create(
+      opus_int32 Fs,
+      int channels,
+      int streams,
+      int coupled_streams,
+      const unsigned char *mapping,
+      int application,
+      int *error
+)
+{
+   int ret;
+   OpusMSEncoder *st;
+   if ((channels>255) || (channels<1) || (coupled_streams>streams) ||
+       (coupled_streams+streams>255) || (streams<1) || (coupled_streams<0))
+   {
+      if (error)
+         *error = OPUS_BAD_ARG;
+      return NULL;
+   }
+   st = (OpusMSEncoder *)opus_alloc(opus_multistream_encoder_get_size(streams, coupled_streams));
+   if (st==NULL)
+   {
+      if (error)
+         *error = OPUS_ALLOC_FAIL;
+      return NULL;
+   }
+   ret = opus_multistream_encoder_init(st, Fs, channels, streams, coupled_streams, mapping, application);
+   if (ret != OPUS_OK)
+   {
+      opus_free(st);
+      st = NULL;
+   }
+   if (error)
+      *error = ret;
+   return st;
+}
+
+OpusMSEncoder *opus_multistream_surround_encoder_create(
+      opus_int32 Fs,
+      int channels,
+      int mapping_family,
+      int *streams,
+      int *coupled_streams,
+      unsigned char *mapping,
+      int application,
+      int *error
+)
+{
+   int ret;
+   OpusMSEncoder *st;
+   if ((channels>255) || (channels<1))
+   {
+      if (error)
+         *error = OPUS_BAD_ARG;
+      return NULL;
+   }
+   st = (OpusMSEncoder *)opus_alloc(opus_multistream_surround_encoder_get_size(channels, mapping_family));
+   if (st==NULL)
+   {
+      if (error)
+         *error = OPUS_ALLOC_FAIL;
+      return NULL;
+   }
+   ret = opus_multistream_surround_encoder_init(st, Fs, channels, mapping_family, streams, coupled_streams, mapping, application);
+   if (ret != OPUS_OK)
+   {
+      opus_free(st);
+      st = NULL;
+   }
+   if (error)
+      *error = ret;
+   return st;
+}
+
+static void surround_rate_allocation(
+      OpusMSEncoder *st,
+      opus_int32 *rate,
+      int frame_size
+      )
+{
+   int i;
+   opus_int32 channel_rate;
+   opus_int32 Fs;
+   char *ptr;
+   int stream_offset;
+   int lfe_offset;
+   int coupled_ratio; /* Q8 */
+   int lfe_ratio;     /* Q8 */
+
+   ptr = (char*)st + align(sizeof(OpusMSEncoder));
+   opus_encoder_ctl((OpusEncoder*)ptr, OPUS_GET_SAMPLE_RATE(&Fs));
+
+   if (st->bitrate_bps > st->layout.nb_channels*40000)
+      stream_offset = 20000;
+   else
+      stream_offset = st->bitrate_bps/st->layout.nb_channels/2;
+   stream_offset += 60*(Fs/frame_size-50);
+   /* We start by giving each stream (coupled or uncoupled) the same bitrate.
+      This models the main saving of coupled channels over uncoupled. */
+   /* The LFE stream is an exception to the above and gets fewer bits. */
+   lfe_offset = 3500 + 60*(Fs/frame_size-50);
+   /* Coupled streams get twice the mono rate after the first 20 kb/s. */
+   coupled_ratio = 512;
+   /* Should depend on the bitrate, for now we assume LFE gets 1/8 the bits of mono */
+   lfe_ratio = 32;
+
+   /* Compute bitrate allocation between streams */
+   if (st->bitrate_bps==OPUS_AUTO)
+   {
+      channel_rate = Fs+60*Fs/frame_size;
+   } else if (st->bitrate_bps==OPUS_BITRATE_MAX)
+   {
+      channel_rate = 300000;
+   } else {
+      int nb_lfe;
+      int nb_uncoupled;
+      int nb_coupled;
+      int total;
+      nb_lfe = (st->lfe_stream!=-1);
+      nb_coupled = st->layout.nb_coupled_streams;
+      nb_uncoupled = st->layout.nb_streams-nb_coupled-nb_lfe;
+      total = (nb_uncoupled<<8)         /* mono */
+            + coupled_ratio*nb_coupled /* stereo */
+            + nb_lfe*lfe_ratio;
+      channel_rate = 256*(st->bitrate_bps-lfe_offset*nb_lfe-stream_offset*(nb_coupled+nb_uncoupled))/total;
+   }
+#ifndef OPUS_FIXED_POINT
+   if (st->variable_duration==OPUS_FRAMESIZE_VARIABLE && frame_size != Fs/50)
+   {
+      opus_int32 bonus;
+      bonus = 60*(Fs/frame_size-50);
+      channel_rate += bonus;
+   }
+#endif
+
+   for (i=0;i<st->layout.nb_streams;i++)
+   {
+      if (i<st->layout.nb_coupled_streams)
+         rate[i] = stream_offset+(channel_rate*coupled_ratio>>8);
+      else if (i!=st->lfe_stream)
+         rate[i] = stream_offset+channel_rate;
+      else
+         rate[i] = lfe_offset+(channel_rate*lfe_ratio>>8);
+   }
+}
+
+/* Max size in case the encoder decides to return three frames */
+#define MS_FRAME_TMP (3*1275+7)
+static int opus_multistream_encode_native
+(
+    OpusMSEncoder *st,
+    opus_copy_channel_in_func copy_channel_in,
+    const void *pcm,
+    int analysis_frame_size,
+    unsigned char *data,
+    opus_int32 max_data_bytes,
+    int lsb_depth,
+    downmix_func downmix
+)
+{
+   opus_int32 Fs;
+   int coupled_size;
+   int mono_size;
+   int s;
+   char *ptr;
+   int tot_size;
+   VARDECL(opus_val16, buf);
+   VARDECL(opus_val16, bandSMR);
+   unsigned char tmp_data[MS_FRAME_TMP];
+   OpusRepacketizer rp;
+   opus_int32 vbr;
+   const CELTMode *celt_mode;
+   opus_int32 bitrates[256];
+   opus_val16 bandLogE[42];
+   opus_val32 *mem = NULL;
+   opus_val32 *preemph_mem=NULL;
+   int frame_size;
+   ALLOC_STACK;
+
+   if (st->surround)
+   {
+      preemph_mem = ms_get_preemph_mem(st);
+      mem = ms_get_window_mem(st);
+   }
+
+   ptr = (char*)st + align(sizeof(OpusMSEncoder));
+   opus_encoder_ctl((OpusEncoder*)ptr, OPUS_GET_SAMPLE_RATE(&Fs));
+   opus_encoder_ctl((OpusEncoder*)ptr, OPUS_GET_VBR(&vbr));
+   opus_encoder_ctl((OpusEncoder*)ptr, CELT_GET_MODE(&celt_mode));
+
+   {
+      opus_int32 delay_compensation;
+      int channels;
+
+      channels = st->layout.nb_streams + st->layout.nb_coupled_streams;
+      opus_encoder_ctl((OpusEncoder*)ptr, OPUS_GET_LOOKAHEAD(&delay_compensation));
+      delay_compensation -= Fs/400;
+      frame_size = compute_frame_size(pcm, analysis_frame_size,
+            st->variable_duration, channels, Fs, st->bitrate_bps,
+            delay_compensation, downmix
+#ifndef DISABLE_FLOAT_API
+            , st->subframe_mem
+#endif
+            );
+   }
+
+   if (400*frame_size < Fs)
+   {
+      RESTORE_STACK;
+      return OPUS_BAD_ARG;
+   }
+   /* Validate frame_size before using it to allocate stack space.
+      This mirrors the checks in opus_encode[_float](). */
+   if (400*frame_size != Fs && 200*frame_size != Fs &&
+       100*frame_size != Fs &&  50*frame_size != Fs &&
+        25*frame_size != Fs &&  50*frame_size != 3*Fs)
+   {
+      RESTORE_STACK;
+      return OPUS_BAD_ARG;
+   }
+   ALLOC(buf, 2*frame_size, opus_val16);
+   coupled_size = opus_encoder_get_size(2);
+   mono_size = opus_encoder_get_size(1);
+
+   ALLOC(bandSMR, 21*st->layout.nb_channels, opus_val16);
+   if (st->surround)
+   {
+      surround_analysis(celt_mode, pcm, bandSMR, mem, preemph_mem, frame_size, 120, st->layout.nb_channels, Fs, copy_channel_in);
+   }
+
+   if (max_data_bytes < 4*st->layout.nb_streams-1)
+   {
+      RESTORE_STACK;
+      return OPUS_BUFFER_TOO_SMALL;
+   }
+
+   /* Compute bitrate allocation between streams (this could be a lot better) */
+   surround_rate_allocation(st, bitrates, frame_size);
+
+   if (!vbr)
+      max_data_bytes = IMIN(max_data_bytes, 3*st->bitrate_bps/(3*8*Fs/frame_size));
+
+   ptr = (char*)st + align(sizeof(OpusMSEncoder));
+   for (s=0;s<st->layout.nb_streams;s++)
+   {
+      OpusEncoder *enc;
+      enc = (OpusEncoder*)ptr;
+      if (s < st->layout.nb_coupled_streams)
+         ptr += align(coupled_size);
+      else
+         ptr += align(mono_size);
+      opus_encoder_ctl(enc, OPUS_SET_BITRATE(bitrates[s]));
+      if (st->surround)
+      {
+         opus_int32 equiv_rate;
+         equiv_rate = st->bitrate_bps;
+         if (frame_size*50 < Fs)
+            equiv_rate -= 60*(Fs/frame_size - 50)*st->layout.nb_channels;
+         if (equiv_rate > 10000*st->layout.nb_channels)
+            opus_encoder_ctl(enc, OPUS_SET_BANDWIDTH(OPUS_BANDWIDTH_FULLBAND));
+         else if (equiv_rate > 7000*st->layout.nb_channels)
+            opus_encoder_ctl(enc, OPUS_SET_BANDWIDTH(OPUS_BANDWIDTH_SUPERWIDEBAND));
+         else if (equiv_rate > 5000*st->layout.nb_channels)
+            opus_encoder_ctl(enc, OPUS_SET_BANDWIDTH(OPUS_BANDWIDTH_WIDEBAND));
+         else
+            opus_encoder_ctl(enc, OPUS_SET_BANDWIDTH(OPUS_BANDWIDTH_NARROWBAND));
+         if (s < st->layout.nb_coupled_streams)
+         {
+            /* To preserve the spatial image, force stereo CELT on coupled streams */
+            opus_encoder_ctl(enc, OPUS_SET_FORCE_MODE(MODE_CELT_ONLY));
+            opus_encoder_ctl(enc, OPUS_SET_FORCE_CHANNELS(2));
+         }
+      }
+   }
+
+   ptr = (char*)st + align(sizeof(OpusMSEncoder));
+   /* Counting ToC */
+   tot_size = 0;
+   for (s=0;s<st->layout.nb_streams;s++)
+   {
+      OpusEncoder *enc;
+      int len;
+      int curr_max;
+      int c1, c2;
+
+      opus_repacketizer_init(&rp);
+      enc = (OpusEncoder*)ptr;
+      if (s < st->layout.nb_coupled_streams)
+      {
+         int i;
+         int left, right;
+         left = get_left_channel(&st->layout, s, -1);
+         right = get_right_channel(&st->layout, s, -1);
+         (*copy_channel_in)(buf, 2,
+            pcm, st->layout.nb_channels, left, frame_size);
+         (*copy_channel_in)(buf+1, 2,
+            pcm, st->layout.nb_channels, right, frame_size);
+         ptr += align(coupled_size);
+         if (st->surround)
+         {
+            for (i=0;i<21;i++)
+            {
+               bandLogE[i] = bandSMR[21*left+i];
+               bandLogE[21+i] = bandSMR[21*right+i];
+            }
+         }
+         c1 = left;
+         c2 = right;
+      } else {
+         int i;
+         int chan = get_mono_channel(&st->layout, s, -1);
+         (*copy_channel_in)(buf, 1,
+            pcm, st->layout.nb_channels, chan, frame_size);
+         ptr += align(mono_size);
+         if (st->surround)
+         {
+            for (i=0;i<21;i++)
+               bandLogE[i] = bandSMR[21*chan+i];
+         }
+         c1 = chan;
+         c2 = -1;
+      }
+      if (st->surround)
+         opus_encoder_ctl(enc, OPUS_SET_ENERGY_MASK(bandLogE));
+      /* number of bytes left (+Toc) */
+      curr_max = max_data_bytes - tot_size;
+      /* Reserve three bytes for the last stream and four for the others */
+      curr_max -= IMAX(0,4*(st->layout.nb_streams-s-1)-1);
+      curr_max = IMIN(curr_max,MS_FRAME_TMP);
+      if (!vbr && s == st->layout.nb_streams-1)
+         opus_encoder_ctl(enc, OPUS_SET_BITRATE(curr_max*(8*Fs/frame_size)));
+      len = opus_encode_native(enc, buf, frame_size, tmp_data, curr_max, lsb_depth,
+            pcm, analysis_frame_size, c1, c2, st->layout.nb_channels, downmix);
+      if (len<0)
+      {
+         RESTORE_STACK;
+         return len;
+      }
+      /* We need to use the repacketizer to add the self-delimiting lengths
+         while taking into account the fact that the encoder can now return
+         more than one frame at a time (e.g. 60 ms CELT-only) */
+      opus_repacketizer_cat(&rp, tmp_data, len);
+      len = opus_repacketizer_out_range_impl(&rp, 0, opus_repacketizer_get_nb_frames(&rp),
+            data, max_data_bytes-tot_size, s != st->layout.nb_streams-1, !vbr && s == st->layout.nb_streams-1);
+      data += len;
+      tot_size += len;
+   }
+   /*printf("\n");*/
+   RESTORE_STACK;
+   return tot_size;
+}
+
+#if !defined(DISABLE_FLOAT_API)
+static void opus_copy_channel_in_float(
+  opus_val16 *dst,
+  int dst_stride,
+  const void *src,
+  int src_stride,
+  int src_channel,
+  int frame_size
+)
+{
+   const float *float_src;
+   opus_int32 i;
+   float_src = (const float *)src;
+   for (i=0;i<frame_size;i++)
+#if defined(OPUS_FIXED_POINT)
+      dst[i*dst_stride] = FLOAT2INT16(float_src[i*src_stride+src_channel]);
+#else
+      dst[i*dst_stride] = float_src[i*src_stride+src_channel];
+#endif
+}
+#endif
+
+static void opus_copy_channel_in_short(
+  opus_val16 *dst,
+  int dst_stride,
+  const void *src,
+  int src_stride,
+  int src_channel,
+  int frame_size
+)
+{
+   const opus_int16 *short_src;
+   opus_int32 i;
+   short_src = (const opus_int16 *)src;
+   for (i=0;i<frame_size;i++)
+#if defined(OPUS_FIXED_POINT)
+      dst[i*dst_stride] = short_src[i*src_stride+src_channel];
+#else
+      dst[i*dst_stride] = (1/32768.f)*short_src[i*src_stride+src_channel];
+#endif
+}
+
+
+#ifdef OPUS_FIXED_POINT
+int opus_multistream_encode(
+    OpusMSEncoder *st,
+    const opus_val16 *pcm,
+    int frame_size,
+    unsigned char *data,
+    opus_int32 max_data_bytes
+)
+{
+   return opus_multistream_encode_native(st, opus_copy_channel_in_short,
+      pcm, frame_size, data, max_data_bytes, 16, downmix_int);
+}
+
+#ifndef DISABLE_FLOAT_API
+int opus_multistream_encode_float(
+    OpusMSEncoder *st,
+    const float *pcm,
+    int frame_size,
+    unsigned char *data,
+    opus_int32 max_data_bytes
+)
+{
+   return opus_multistream_encode_native(st, opus_copy_channel_in_float,
+      pcm, frame_size, data, max_data_bytes, 16, downmix_float);
+}
+#endif
+
+#else
+
+int opus_multistream_encode_float
+(
+    OpusMSEncoder *st,
+    const opus_val16 *pcm,
+    int frame_size,
+    unsigned char *data,
+    opus_int32 max_data_bytes
+)
+{
+   return opus_multistream_encode_native(st, opus_copy_channel_in_float,
+      pcm, frame_size, data, max_data_bytes, 24, downmix_float);
+}
+
+int opus_multistream_encode(
+    OpusMSEncoder *st,
+    const opus_int16 *pcm,
+    int frame_size,
+    unsigned char *data,
+    opus_int32 max_data_bytes
+)
+{
+   return opus_multistream_encode_native(st, opus_copy_channel_in_short,
+      pcm, frame_size, data, max_data_bytes, 16, downmix_int);
+}
+#endif
+
+int opus_multistream_encoder_ctl(OpusMSEncoder *st, int request, ...)
+{
+   va_list ap;
+   int coupled_size, mono_size;
+   char *ptr;
+   int ret = OPUS_OK;
+
+   va_start(ap, request);
+
+   coupled_size = opus_encoder_get_size(2);
+   mono_size = opus_encoder_get_size(1);
+   ptr = (char*)st + align(sizeof(OpusMSEncoder));
+   switch (request)
+   {
+   case OPUS_SET_BITRATE_REQUEST:
+   {
+      opus_int32 value = va_arg(ap, opus_int32);
+      if (value<0 && value!=OPUS_AUTO && value!=OPUS_BITRATE_MAX)
+      {
+         goto bad_arg;
+      }
+      st->bitrate_bps = value;
+   }
+   break;
+   case OPUS_GET_BITRATE_REQUEST:
+   {
+      int s;
+      opus_int32 *value = va_arg(ap, opus_int32*);
+      if (!value)
+      {
+         goto bad_arg;
+      }
+      *value = 0;
+      for (s=0;s<st->layout.nb_streams;s++)
+      {
+         opus_int32 rate;
+         OpusEncoder *enc;
+         enc = (OpusEncoder*)ptr;
+         if (s < st->layout.nb_coupled_streams)
+            ptr += align(coupled_size);
+         else
+            ptr += align(mono_size);
+         opus_encoder_ctl(enc, request, &rate);
+         *value += rate;
+      }
+   }
+   break;
+   case OPUS_GET_LSB_DEPTH_REQUEST:
+   case OPUS_GET_VBR_REQUEST:
+   case OPUS_GET_APPLICATION_REQUEST:
+   case OPUS_GET_BANDWIDTH_REQUEST:
+   case OPUS_GET_COMPLEXITY_REQUEST:
+   case OPUS_GET_PACKET_LOSS_PERC_REQUEST:
+   case OPUS_GET_DTX_REQUEST:
+   case OPUS_GET_VOICE_RATIO_REQUEST:
+   case OPUS_GET_VBR_CONSTRAINT_REQUEST:
+   case OPUS_GET_SIGNAL_REQUEST:
+   case OPUS_GET_LOOKAHEAD_REQUEST:
+   case OPUS_GET_SAMPLE_RATE_REQUEST:
+   case OPUS_GET_INBAND_FEC_REQUEST:
+   case OPUS_GET_FORCE_CHANNELS_REQUEST:
+   case OPUS_GET_PREDICTION_DISABLED_REQUEST:
+   {
+      OpusEncoder *enc;
+      /* For int32* GET params, just query the first stream */
+      opus_int32 *value = va_arg(ap, opus_int32*);
+      enc = (OpusEncoder*)ptr;
+      ret = opus_encoder_ctl(enc, request, value);
+   }
+   break;
+   case OPUS_GET_FINAL_RANGE_REQUEST:
+   {
+      int s;
+      opus_uint32 *value = va_arg(ap, opus_uint32*);
+      opus_uint32 tmp;
+      if (!value)
+      {
+         goto bad_arg;
+      }
+      *value=0;
+      for (s=0;s<st->layout.nb_streams;s++)
+      {
+         OpusEncoder *enc;
+         enc = (OpusEncoder*)ptr;
+         if (s < st->layout.nb_coupled_streams)
+            ptr += align(coupled_size);
+         else
+            ptr += align(mono_size);
+         ret = opus_encoder_ctl(enc, request, &tmp);
+         if (ret != OPUS_OK) break;
+         *value ^= tmp;
+      }
+   }
+   break;
+   case OPUS_SET_LSB_DEPTH_REQUEST:
+   case OPUS_SET_COMPLEXITY_REQUEST:
+   case OPUS_SET_VBR_REQUEST:
+   case OPUS_SET_VBR_CONSTRAINT_REQUEST:
+   case OPUS_SET_MAX_BANDWIDTH_REQUEST:
+   case OPUS_SET_BANDWIDTH_REQUEST:
+   case OPUS_SET_SIGNAL_REQUEST:
+   case OPUS_SET_APPLICATION_REQUEST:
+   case OPUS_SET_INBAND_FEC_REQUEST:
+   case OPUS_SET_PACKET_LOSS_PERC_REQUEST:
+   case OPUS_SET_DTX_REQUEST:
+   case OPUS_SET_FORCE_MODE_REQUEST:
+   case OPUS_SET_FORCE_CHANNELS_REQUEST:
+   case OPUS_SET_PREDICTION_DISABLED_REQUEST:
+   {
+      int s;
+      /* This works for int32 params */
+      opus_int32 value = va_arg(ap, opus_int32);
+      for (s=0;s<st->layout.nb_streams;s++)
+      {
+         OpusEncoder *enc;
+
+         enc = (OpusEncoder*)ptr;
+         if (s < st->layout.nb_coupled_streams)
+            ptr += align(coupled_size);
+         else
+            ptr += align(mono_size);
+         ret = opus_encoder_ctl(enc, request, value);
+         if (ret != OPUS_OK)
+            break;
+      }
+   }
+   break;
+   case OPUS_MULTISTREAM_GET_ENCODER_STATE_REQUEST:
+   {
+      int s;
+      opus_int32 stream_id;
+      OpusEncoder **value;
+      stream_id = va_arg(ap, opus_int32);
+      if (stream_id<0 || stream_id >= st->layout.nb_streams)
+         ret = OPUS_BAD_ARG;
+      value = va_arg(ap, OpusEncoder**);
+      if (!value)
+      {
+         goto bad_arg;
+      }
+      for (s=0;s<stream_id;s++)
+      {
+         if (s < st->layout.nb_coupled_streams)
+            ptr += align(coupled_size);
+         else
+            ptr += align(mono_size);
+      }
+      *value = (OpusEncoder*)ptr;
+   }
+   break;
+   case OPUS_SET_EXPERT_FRAME_DURATION_REQUEST:
+   {
+       opus_int32 value = va_arg(ap, opus_int32);
+       st->variable_duration = value;
+   }
+   break;
+   case OPUS_GET_EXPERT_FRAME_DURATION_REQUEST:
+   {
+       opus_int32 *value = va_arg(ap, opus_int32*);
+       if (!value)
+       {
+          goto bad_arg;
+       }
+       *value = st->variable_duration;
+   }
+   break;
+   case OPUS_RESET_STATE:
+   {
+      int s;
+      st->subframe_mem[0] = st->subframe_mem[1] = st->subframe_mem[2] = 0;
+      if (st->surround)
+      {
+         OPUS_CLEAR(ms_get_preemph_mem(st), st->layout.nb_channels);
+         OPUS_CLEAR(ms_get_window_mem(st), st->layout.nb_channels*120);
+      }
+      for (s=0;s<st->layout.nb_streams;s++)
+      {
+         OpusEncoder *enc;
+         enc = (OpusEncoder*)ptr;
+         if (s < st->layout.nb_coupled_streams)
+            ptr += align(coupled_size);
+         else
+            ptr += align(mono_size);
+         ret = opus_encoder_ctl(enc, OPUS_RESET_STATE);
+         if (ret != OPUS_OK)
+            break;
+      }
+   }
+   break;
+   default:
+      ret = OPUS_UNIMPLEMENTED;
+      break;
+   }
+
+   va_end(ap);
+   return ret;
+bad_arg:
+   va_end(ap);
+   return OPUS_BAD_ARG;
+}
+
+void opus_multistream_encoder_destroy(OpusMSEncoder *st)
+{
+    opus_free(st);
+}
diff --git a/drivers/opus/opus_private.h b/drivers/opus/opus_private.h
new file mode 100644
index 0000000000..83225f2b6c
--- /dev/null
+++ b/drivers/opus/opus_private.h
@@ -0,0 +1,129 @@
+/* Copyright (c) 2012 Xiph.Org Foundation
+   Written by Jean-Marc Valin */
+/*
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+
+#ifndef OPUS_PRIVATE_H
+#define OPUS_PRIVATE_H
+
+#include "arch.h"
+#include "opus.h"
+#include "celt.h"
+
+struct OpusRepacketizer {
+   unsigned char toc;
+   int nb_frames;
+   const unsigned char *frames[48];
+   opus_int16 len[48];
+   int framesize;
+};
+
+typedef struct ChannelLayout {
+   int nb_channels;
+   int nb_streams;
+   int nb_coupled_streams;
+   unsigned char mapping[256];
+} ChannelLayout;
+
+int validate_layout(const ChannelLayout *layout);
+int get_left_channel(const ChannelLayout *layout, int stream_id, int prev);
+int get_right_channel(const ChannelLayout *layout, int stream_id, int prev);
+int get_mono_channel(const ChannelLayout *layout, int stream_id, int prev);
+
+
+
+#define MODE_SILK_ONLY          1000
+#define MODE_HYBRID             1001
+#define MODE_CELT_ONLY          1002
+
+#define OPUS_SET_VOICE_RATIO_REQUEST         11018
+#define OPUS_GET_VOICE_RATIO_REQUEST         11019
+
+/** Configures the encoder's expected percentage of voice
+  * opposed to music or other signals.
+  *
+  * @note This interface is currently more aspiration than actuality. It's
+  * ultimately expected to bias an automatic signal classifier, but it currently
+  * just shifts the static bitrate to mode mapping around a little bit.
+  *
+  * @param[in] x <tt>int</tt>:   Voice percentage in the range 0-100, inclusive.
+  * @hideinitializer */
+#define OPUS_SET_VOICE_RATIO(x) OPUS_SET_VOICE_RATIO_REQUEST, __opus_check_int(x)
+/** Gets the encoder's configured voice ratio value, @see OPUS_SET_VOICE_RATIO
+  *
+  * @param[out] x <tt>int*</tt>:  Voice percentage in the range 0-100, inclusive.
+  * @hideinitializer */
+#define OPUS_GET_VOICE_RATIO(x) OPUS_GET_VOICE_RATIO_REQUEST, __opus_check_int_ptr(x)
+
+
+#define OPUS_SET_FORCE_MODE_REQUEST    11002
+#define OPUS_SET_FORCE_MODE(x) OPUS_SET_FORCE_MODE_REQUEST, __opus_check_int(x)
+
+typedef void (*downmix_func)(const void *, opus_val32 *, int, int, int, int, int);
+void downmix_float(const void *_x, opus_val32 *sub, int subframe, int offset, int c1, int c2, int C);
+void downmix_int(const void *_x, opus_val32 *sub, int subframe, int offset, int c1, int c2, int C);
+
+int optimize_framesize(const opus_val16 *x, int len, int C, opus_int32 Fs,
+                int bitrate, opus_val16 tonality, float *mem, int buffering,
+                downmix_func downmix);
+
+int encode_size(int size, unsigned char *data);
+
+opus_int32 frame_size_select(opus_int32 frame_size, int variable_duration, opus_int32 Fs);
+
+opus_int32 compute_frame_size(const void *analysis_pcm, int frame_size,
+      int variable_duration, int C, opus_int32 Fs, int bitrate_bps,
+      int delay_compensation, downmix_func downmix
+#ifndef DISABLE_FLOAT_API
+      , float *subframe_mem
+#endif
+      );
+
+opus_int32 opus_encode_native(OpusEncoder *st, const opus_val16 *pcm, int frame_size,
+      unsigned char *data, opus_int32 out_data_bytes, int lsb_depth,
+      const void *analysis_pcm, opus_int32 analysis_size, int c1, int c2, int analysis_channels, downmix_func downmix);
+
+int opus_decode_native(OpusDecoder *st, const unsigned char *data, opus_int32 len,
+      opus_val16 *pcm, int frame_size, int decode_fec, int self_delimited,
+      opus_int32 *packet_offset, int soft_clip);
+
+/* Make sure everything's aligned to sizeof(void *) bytes */
+static OPUS_INLINE int align(int i)
+{
+    return (i+(int)sizeof(void *)-1)&-(int)sizeof(void *);
+}
+
+int opus_packet_parse_impl(const unsigned char *data, opus_int32 len,
+      int self_delimited, unsigned char *out_toc,
+      const unsigned char *frames[48], opus_int16 size[48],
+      int *payload_offset, opus_int32 *packet_offset);
+
+opus_int32 opus_repacketizer_out_range_impl(OpusRepacketizer *rp, int begin, int end,
+      unsigned char *data, opus_int32 maxlen, int self_delimited, int pad);
+
+int pad_frame(unsigned char *data, opus_int32 len, opus_int32 new_len);
+
+#endif /* OPUS_PRIVATE_H */
diff --git a/drivers/opus/opus_types.h b/drivers/opus/opus_types.h
new file mode 100644
index 0000000000..b28e03aea2
--- /dev/null
+++ b/drivers/opus/opus_types.h
@@ -0,0 +1,159 @@
+/* (C) COPYRIGHT 1994-2002 Xiph.Org Foundation */
+/* Modified by Jean-Marc Valin */
+/*
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+/* opus_types.h based on ogg_types.h from libogg */
+
+/**
+   @file opus_types.h
+   @brief Opus reference implementation types
+*/
+#ifndef OPUS_TYPES_H
+#define OPUS_TYPES_H
+
+/* Use the real stdint.h if it's there (taken from Paul Hsieh's pstdint.h) */
+#if (defined(__STDC__) && __STDC__ && __STDC_VERSION__ >= 199901L) || (defined(__GNUC__) && (defined(_STDINT_H) || defined(_STDINT_H_)) || defined (HAVE_STDINT_H))
+#include <stdint.h>
+
+   typedef int16_t opus_int16;
+   typedef uint16_t opus_uint16;
+   typedef int32_t opus_int32;
+   typedef uint32_t opus_uint32;
+#elif defined(_WIN32)
+
+#  if defined(__CYGWIN__)
+#    include <_G_config.h>
+     typedef _G_int32_t opus_int32;
+     typedef _G_uint32_t opus_uint32;
+     typedef _G_int16 opus_int16;
+     typedef _G_uint16 opus_uint16;
+#  elif defined(__MINGW32__)
+     typedef short opus_int16;
+     typedef unsigned short opus_uint16;
+     typedef int opus_int32;
+     typedef unsigned int opus_uint32;
+#  elif defined(__MWERKS__)
+     typedef int opus_int32;
+     typedef unsigned int opus_uint32;
+     typedef short opus_int16;
+     typedef unsigned short opus_uint16;
+#  else
+     /* MSVC/Borland */
+     typedef __int32 opus_int32;
+     typedef unsigned __int32 opus_uint32;
+     typedef __int16 opus_int16;
+     typedef unsigned __int16 opus_uint16;
+#  endif
+
+#elif defined(__MACOS__)
+
+#  include <sys/types.h>
+   typedef SInt16 opus_int16;
+   typedef UInt16 opus_uint16;
+   typedef SInt32 opus_int32;
+   typedef UInt32 opus_uint32;
+
+#elif (defined(__APPLE__) && defined(__MACH__)) /* MacOS X Framework build */
+
+#  include <sys/types.h>
+   typedef int16_t opus_int16;
+   typedef u_int16_t opus_uint16;
+   typedef int32_t opus_int32;
+   typedef u_int32_t opus_uint32;
+
+#elif defined(__BEOS__)
+
+   /* Be */
+#  include <inttypes.h>
+   typedef int16 opus_int16;
+   typedef u_int16 opus_uint16;
+   typedef int32_t opus_int32;
+   typedef u_int32_t opus_uint32;
+
+#elif defined (__EMX__)
+
+   /* OS/2 GCC */
+   typedef short opus_int16;
+   typedef unsigned short opus_uint16;
+   typedef int opus_int32;
+   typedef unsigned int opus_uint32;
+
+#elif defined (DJGPP)
+
+   /* DJGPP */
+   typedef short opus_int16;
+   typedef unsigned short opus_uint16;
+   typedef int opus_int32;
+   typedef unsigned int opus_uint32;
+
+#elif defined(R5900)
+
+   /* PS2 EE */
+   typedef int opus_int32;
+   typedef unsigned opus_uint32;
+   typedef short opus_int16;
+   typedef unsigned short opus_uint16;
+
+#elif defined(__SYMBIAN32__)
+
+   /* Symbian GCC */
+   typedef signed short opus_int16;
+   typedef unsigned short opus_uint16;
+   typedef signed int opus_int32;
+   typedef unsigned int opus_uint32;
+
+#elif defined(CONFIG_TI_C54X) || defined (CONFIG_TI_C55X)
+
+   typedef short opus_int16;
+   typedef unsigned short opus_uint16;
+   typedef long opus_int32;
+   typedef unsigned long opus_uint32;
+
+#elif defined(CONFIG_TI_C6X)
+
+   typedef short opus_int16;
+   typedef unsigned short opus_uint16;
+   typedef int opus_int32;
+   typedef unsigned int opus_uint32;
+
+#else
+
+   /* Give up, take a reasonable guess */
+   typedef short opus_int16;
+   typedef unsigned short opus_uint16;
+   typedef int opus_int32;
+   typedef unsigned int opus_uint32;
+
+#endif
+
+#define opus_int         int                     /* used for counters etc; at least 16 bits */
+#define opus_int64       long long
+#define opus_int8        signed char
+
+#define opus_uint        unsigned int            /* used for counters etc; at least 16 bits */
+#define opus_uint64      unsigned long long
+#define opus_uint8       unsigned char
+
+#endif  /* OPUS_TYPES_H */
diff --git a/drivers/opus/opusfile.c b/drivers/opus/opusfile.c
new file mode 100644
index 0000000000..1e7497f6cd
--- /dev/null
+++ b/drivers/opus/opusfile.c
@@ -0,0 +1,3158 @@
+/********************************************************************
+ *                                                                  *
+ * THIS FILE IS PART OF THE libopusfile SOFTWARE CODEC SOURCE CODE. *
+ * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS     *
+ * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
+ * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
+ *                                                                  *
+ * THE libopusfile SOURCE CODE IS (C) COPYRIGHT 1994-2012           *
+ * by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
+ *                                                                  *
+ ********************************************************************
+
+ function: stdio-based convenience library for opening/seeking/decoding
+ last mod: $Id: vorbisfile.c 17573 2010-10-27 14:53:59Z xiphmont $
+
+ ********************************************************************/
+#ifdef OPUS_HAVE_CONFIG_H
+#include "opus_config.h"
+#endif
+
+#include "internal.h"
+#include <stdio.h>
+#include <stdlib.h>
+#include <errno.h>
+#include <limits.h>
+#include <string.h>
+#include <math.h>
+
+#include "opusfile.h"
+
+/*This implementation is largely based off of libvorbisfile.
+  All of the Ogg bits work roughly the same, though I have made some
+   "improvements" that have not been folded back there, yet.*/
+
+/*A 'chained bitstream' is an Ogg Opus bitstream that contains more than one
+   logical bitstream arranged end to end (the only form of Ogg multiplexing
+   supported by this library.
+  Grouping (parallel multiplexing) is not supported, except to the extent that
+   if there are multiple logical Ogg streams in a single link of the chain, we
+   will ignore all but the first Opus stream we find.*/
+
+/*An Ogg Opus file can be played beginning to end (streamed) without worrying
+   ahead of time about chaining (see opusdec from the opus-tools package).
+  If we have the whole file, however, and want random access
+   (seeking/scrubbing) or desire to know the total length/time of a file, we
+   need to account for the possibility of chaining.*/
+
+/*We can handle things a number of ways.
+  We can determine the entire bitstream structure right off the bat, or find
+   pieces on demand.
+  This library determines and caches structure for the entire bitstream, but
+   builds a virtual decoder on the fly when moving between links in the chain.*/
+
+/*There are also different ways to implement seeking.
+  Enough information exists in an Ogg bitstream to seek to sample-granularity
+   positions in the output.
+  Or, one can seek by picking some portion of the stream roughly in the desired
+   area if we only want coarse navigation through the stream.
+  We implement and expose both strategies.*/
+
+/*The maximum number of bytes in a page (including the page headers).*/
+#define OP_PAGE_SIZE_MAX  (65307)
+/*The default amount to seek backwards per step when trying to find the
+   previous page.
+  This must be at least as large as the maximum size of a page.*/
+#define OP_CHUNK_SIZE     (65536)
+/*The maximum amount to seek backwards per step when trying to find the
+   previous page.*/
+#define OP_CHUNK_SIZE_MAX (1024*(opus_int32)1024)
+/*A smaller read size is needed for low-rate streaming.*/
+#define OP_READ_SIZE      (2048)
+
+int op_test(OpusHead *_head,
+ const unsigned char *_initial_data,size_t _initial_bytes){
+  ogg_sync_state  oy;
+  char           *data;
+  int             err;
+  /*The first page of a normal Opus file will be at most 57 bytes (27 Ogg
+     page header bytes + 1 lacing value + 21 Opus header bytes + 8 channel
+     mapping bytes).
+    It will be at least 47 bytes (27 Ogg page header bytes + 1 lacing value +
+     19 Opus header bytes using channel mapping family 0).
+    If we don't have at least that much data, give up now.*/
+  if(_initial_bytes<47)return OP_FALSE;
+  /*Only proceed if we start with the magic OggS string.
+    This is to prevent us spending a lot of time allocating memory and looking
+     for Ogg pages in non-Ogg files.*/
+  if(memcmp(_initial_data,"OggS",4)!=0)return OP_ENOTFORMAT;
+  ogg_sync_init(&oy);
+  data=ogg_sync_buffer(&oy,_initial_bytes);
+  if(data!=NULL){
+    ogg_stream_state os;
+    ogg_page         og;
+    int              ret;
+    memcpy(data,_initial_data,_initial_bytes);
+    ogg_sync_wrote(&oy,_initial_bytes);
+    ogg_stream_init(&os,-1);
+    err=OP_FALSE;
+    do{
+      ogg_packet op;
+      ret=ogg_sync_pageout(&oy,&og);
+      /*Ignore holes.*/
+      if(ret<0)continue;
+      /*Stop if we run out of data.*/
+      if(!ret)break;
+      ogg_stream_reset_serialno(&os,ogg_page_serialno(&og));
+      ogg_stream_pagein(&os,&og);
+      /*Only process the first packet on this page (if it's a BOS packet,
+         it's required to be the only one).*/
+      if(ogg_stream_packetout(&os,&op)==1){
+        if(op.b_o_s){
+          ret=opus_head_parse(_head,op.packet,op.bytes);
+          /*If this didn't look like Opus, keep going.*/
+          if(ret==OP_ENOTFORMAT)continue;
+          /*Otherwise we're done, one way or another.*/
+          err=ret;
+        }
+        /*We finished parsing the headers.
+          There is no Opus to be found.*/
+        else err=OP_ENOTFORMAT;
+      }
+    }
+    while(err==OP_FALSE);
+    ogg_stream_clear(&os);
+  }
+  else err=OP_EFAULT;
+  ogg_sync_clear(&oy);
+  return err;
+}
+
+/*Many, many internal helpers.
+  The intention is not to be confusing.
+  Rampant duplication and monolithic function implementation (though we do have
+   some large, omnibus functions still) would be harder to understand anyway.
+  The high level functions are last.
+  Begin grokking near the end of the file if you prefer to read things
+   top-down.*/
+
+/*The read/seek functions track absolute position within the stream.*/
+
+/*Read a little more data from the file/pipe into the ogg_sync framer.
+  _nbytes: The maximum number of bytes to read.
+  Return: A positive number of bytes read on success, 0 on end-of-file, or a
+           negative value on failure.*/
+static int op_get_data(OggOpusFile *_of,int _nbytes){
+  unsigned char *buffer;
+  int            nbytes;
+  OP_ASSERT(_nbytes>0);
+  buffer=(unsigned char *)ogg_sync_buffer(&_of->oy,_nbytes);
+  nbytes=(int)(*_of->callbacks.read)(_of->source,buffer,_nbytes);
+  OP_ASSERT(nbytes<=_nbytes);
+  if(OP_LIKELY(nbytes>0))ogg_sync_wrote(&_of->oy,nbytes);
+  return nbytes;
+}
+
+/*Save a tiny smidge of verbosity to make the code more readable.*/
+static int op_seek_helper(OggOpusFile *_of,opus_int64 _offset){
+  if(_offset==_of->offset)return 0;
+  if(_of->callbacks.seek==NULL
+   ||(*_of->callbacks.seek)(_of->source,_offset,SEEK_SET)){
+    return OP_EREAD;
+  }
+  _of->offset=_offset;
+  ogg_sync_reset(&_of->oy);
+  return 0;
+}
+
+/*Get the current position indicator of the underlying source.
+  This should be the same as the value reported by tell().*/
+static opus_int64 op_position(const OggOpusFile *_of){
+  /*The current position indicator is _not_ simply offset.
+    We may also have unprocessed, buffered data in the sync state.*/
+  return _of->offset+_of->oy.fill-_of->oy.returned;
+}
+
+/*From the head of the stream, get the next page.
+  _boundary specifies if the function is allowed to fetch more data from the
+   stream (and how much) or only use internally buffered data.
+  _boundary: -1: Unbounded search.
+              0: Read no additional data.
+                 Use only cached data.
+              n: Search for the start of a new page up to file position n.
+  Return: n>=0:       Found a page at absolute offset n.
+          OP_FALSE:   Hit the _boundary limit.
+          OP_EREAD:   An underlying read operation failed.
+          OP_BADLINK: We hit end-of-file before reaching _boundary.*/
+static opus_int64 op_get_next_page(OggOpusFile *_of,ogg_page *_og,
+ opus_int64 _boundary){
+  while(_boundary<=0||_of->offset<_boundary){
+    int more;
+    more=ogg_sync_pageseek(&_of->oy,_og);
+    /*Skipped (-more) bytes.*/
+    if(OP_UNLIKELY(more<0))_of->offset-=more;
+    else if(more==0){
+      int read_nbytes;
+      int ret;
+      /*Send more paramedics.*/
+      if(!_boundary)return OP_FALSE;
+      if(_boundary<0)read_nbytes=OP_READ_SIZE;
+      else{
+        opus_int64 position;
+        position=op_position(_of);
+        if(position>=_boundary)return OP_FALSE;
+        read_nbytes=(int)OP_MIN(_boundary-position,OP_READ_SIZE);
+      }
+      ret=op_get_data(_of,read_nbytes);
+      if(OP_UNLIKELY(ret<0))return OP_EREAD;
+      if(OP_UNLIKELY(ret==0)){
+        /*Only fail cleanly on EOF if we didn't have a known boundary.
+          Otherwise, we should have been able to reach that boundary, and this
+           is a fatal error.*/
+        return OP_UNLIKELY(_boundary<0)?OP_FALSE:OP_EBADLINK;
+      }
+    }
+    else{
+      /*Got a page.
+        Return the page start offset and advance the internal offset past the
+         page end.*/
+      opus_int64 page_offset;
+      page_offset=_of->offset;
+      _of->offset+=more;
+      OP_ASSERT(page_offset>=0);
+      return page_offset;
+    }
+  }
+  return OP_FALSE;
+}
+
+static int op_add_serialno(const ogg_page *_og,
+ ogg_uint32_t **_serialnos,int *_nserialnos,int *_cserialnos){
+  ogg_uint32_t *serialnos;
+  int           nserialnos;
+  int           cserialnos;
+  ogg_uint32_t s;
+  s=ogg_page_serialno(_og);
+  serialnos=*_serialnos;
+  nserialnos=*_nserialnos;
+  cserialnos=*_cserialnos;
+  if(OP_UNLIKELY(nserialnos>=cserialnos)){
+    if(OP_UNLIKELY(cserialnos>INT_MAX-1>>1))return OP_EFAULT;
+    cserialnos=2*cserialnos+1;
+    OP_ASSERT(nserialnos<cserialnos);
+    serialnos=(ogg_uint32_t *)_ogg_realloc(serialnos,
+     sizeof(*serialnos)*cserialnos);
+    if(OP_UNLIKELY(serialnos==NULL))return OP_EFAULT;
+  }
+  serialnos[nserialnos++]=s;
+  *_serialnos=serialnos;
+  *_nserialnos=nserialnos;
+  *_cserialnos=cserialnos;
+  return 0;
+}
+
+/*Returns nonzero if found.*/
+static int op_lookup_serialno(ogg_uint32_t _s,
+ const ogg_uint32_t *_serialnos,int _nserialnos){
+  int i;
+  for(i=0;i<_nserialnos&&_serialnos[i]!=_s;i++);
+  return i<_nserialnos;
+}
+
+static int op_lookup_page_serialno(const ogg_page *_og,
+ const ogg_uint32_t *_serialnos,int _nserialnos){
+  return op_lookup_serialno(ogg_page_serialno(_og),_serialnos,_nserialnos);
+}
+
+typedef struct OpusSeekRecord OpusSeekRecord;
+
+/*We use this to remember the pages we found while enumerating the links of a
+   chained stream.
+  We keep track of the starting and ending offsets, as well as the point we
+   started searching from, so we know where to bisect.
+  We also keep the serial number, so we can tell if the page belonged to the
+   current link or not, as well as the granule position, to aid in estimating
+   the start of the link.*/
+struct OpusSeekRecord{
+  /*The earliest byte we know of such that reading forward from it causes
+     capture to be regained at this page.*/
+  opus_int64   search_start;
+  /*The offset of this page.*/
+  opus_int64   offset;
+  /*The size of this page.*/
+  opus_int32   size;
+  /*The serial number of this page.*/
+  ogg_uint32_t serialno;
+  /*The granule position of this page.*/
+  ogg_int64_t  gp;
+};
+
+/*Find the last page beginning before _offset with a valid granule position.
+  There is no '_boundary' parameter as it will always have to read more data.
+  This is much dirtier than the above, as Ogg doesn't have any backward search
+   linkage.
+  This search prefers pages of the specified serial number.
+  If a page of the specified serial number is spotted during the
+   seek-back-and-read-forward, it will return the info of last page of the
+   matching serial number, instead of the very last page, unless the very last
+   page belongs to a different link than preferred serial number.
+  If no page of the specified serial number is seen, it will return the info of
+   the last page.
+  [out] _sr:   Returns information about the page that was found on success.
+  _offset:     The _offset before which to find a page.
+               Any page returned will consist of data entirely before _offset.
+  _serialno:   The preferred serial number.
+               If a page with this serial number is found, it will be returned
+                even if another page in the same link is found closer to
+                _offset.
+               This is purely opportunistic: there is no guarantee such a page
+                will be found if it exists.
+  _serialnos:  The list of serial numbers in the link that contains the
+                preferred serial number.
+  _nserialnos: The number of serial numbers in the current link.
+  Return: 0 on success, or a negative value on failure.
+          OP_EREAD:    Failed to read more data (error or EOF).
+          OP_EBADLINK: We couldn't find a page even after seeking back to the
+                        start of the stream.*/
+static int op_get_prev_page_serial(OggOpusFile *_of,OpusSeekRecord *_sr,
+ opus_int64 _offset,ogg_uint32_t _serialno,
+ const ogg_uint32_t *_serialnos,int _nserialnos){
+  OpusSeekRecord preferred_sr;
+  ogg_page       og;
+  opus_int64     begin;
+  opus_int64     end;
+  opus_int64     original_end;
+  opus_int32     chunk_size;
+  int            preferred_found;
+  original_end=end=begin=_offset;
+  preferred_found=0;
+  _offset=-1;
+  chunk_size=OP_CHUNK_SIZE;
+  do{
+    opus_int64 search_start;
+    int        ret;
+    OP_ASSERT(chunk_size>=OP_PAGE_SIZE_MAX);
+    begin=OP_MAX(begin-chunk_size,0);
+    ret=op_seek_helper(_of,begin);
+    if(OP_UNLIKELY(ret<0))return ret;
+    search_start=begin;
+    while(_of->offset<end){
+      opus_int64   llret;
+      ogg_uint32_t serialno;
+      llret=op_get_next_page(_of,&og,end);
+      if(OP_UNLIKELY(llret<OP_FALSE))return (int)llret;
+      else if(llret==OP_FALSE)break;
+      serialno=ogg_page_serialno(&og);
+      /*Save the information for this page.
+        We're not interested in the page itself... just the serial number, byte
+         offset, page size, and granule position.*/
+      _sr->search_start=search_start;
+      _sr->offset=_offset=llret;
+      _sr->serialno=serialno;
+      OP_ASSERT(_of->offset-_offset>=0);
+      OP_ASSERT(_of->offset-_offset<=OP_PAGE_SIZE_MAX);
+      _sr->size=(opus_int32)(_of->offset-_offset);
+      _sr->gp=ogg_page_granulepos(&og);
+      /*If this page is from the stream we're looking for, remember it.*/
+      if(serialno==_serialno){
+        preferred_found=1;
+        *&preferred_sr=*_sr;
+      }
+      if(!op_lookup_serialno(serialno,_serialnos,_nserialnos)){
+        /*We fell off the end of the link, which means we seeked back too far
+           and shouldn't have been looking in that link to begin with.
+          If we found the preferred serial number, forget that we saw it.*/
+        preferred_found=0;
+      }
+      search_start=llret+1;
+    }
+    /*We started from the beginning of the stream and found nothing.
+      This should be impossible unless the contents of the source changed out
+       from under us after we read from it.*/
+    if(OP_UNLIKELY(!begin)&&OP_UNLIKELY(_offset<0))return OP_EBADLINK;
+    /*Bump up the chunk size.
+      This is mildly helpful when seeks are very expensive (http).*/
+    chunk_size=OP_MIN(2*chunk_size,OP_CHUNK_SIZE_MAX);
+    /*Avoid quadratic complexity if we hit an invalid patch of the file.*/
+    end=OP_MIN(begin+OP_PAGE_SIZE_MAX-1,original_end);
+  }
+  while(_offset<0);
+  if(preferred_found)*_sr=*&preferred_sr;
+  return 0;
+}
+
+/*Find the last page beginning before _offset with the given serial number and
+   a valid granule position.
+  Unlike the above search, this continues until it finds such a page, but does
+   not stray outside the current link.
+  We could implement it (inefficiently) by calling op_get_prev_page_serial()
+   repeatedly until it returned a page that had both our preferred serial
+   number and a valid granule position, but doing it with a separate function
+   allows us to avoid repeatedly re-scanning valid pages from other streams as
+   we seek-back-and-read-forward.
+  [out] _gp:   Returns the granule position of the page that was found on
+                success.
+  _offset:     The _offset before which to find a page.
+               Any page returned will consist of data entirely before _offset.
+  _serialno:   The target serial number.
+  _serialnos:  The list of serial numbers in the link that contains the
+                preferred serial number.
+  _nserialnos: The number of serial numbers in the current link.
+  Return: The offset of the page on success, or a negative value on failure.
+          OP_EREAD:    Failed to read more data (error or EOF).
+          OP_EBADLINK: We couldn't find a page even after seeking back past the
+                        beginning of the link.*/
+static opus_int64 op_get_last_page(OggOpusFile *_of,ogg_int64_t *_gp,
+ opus_int64 _offset,ogg_uint32_t _serialno,
+ const ogg_uint32_t *_serialnos,int _nserialnos){
+  ogg_page    og;
+  ogg_int64_t gp;
+  opus_int64  begin;
+  opus_int64  end;
+  opus_int64  original_end;
+  opus_int32  chunk_size;
+  /*The target serial number must belong to the current link.*/
+  OP_ASSERT(op_lookup_serialno(_serialno,_serialnos,_nserialnos));
+  original_end=end=begin=_offset;
+  _offset=-1;
+  /*We shouldn't have to initialize gp, but gcc is too dumb to figure out that
+     ret>=0 implies we entered the if(page_gp!=-1) block at least once.*/
+  gp=-1;
+  chunk_size=OP_CHUNK_SIZE;
+  do{
+    int left_link;
+    int ret;
+    OP_ASSERT(chunk_size>=OP_PAGE_SIZE_MAX);
+    begin=OP_MAX(begin-chunk_size,0);
+    ret=op_seek_helper(_of,begin);
+    if(OP_UNLIKELY(ret<0))return ret;
+    left_link=0;
+    while(_of->offset<end){
+      opus_int64   llret;
+      ogg_uint32_t serialno;
+      llret=op_get_next_page(_of,&og,end);
+      if(OP_UNLIKELY(llret<OP_FALSE))return llret;
+      else if(llret==OP_FALSE)break;
+      serialno=ogg_page_serialno(&og);
+      if(serialno==_serialno){
+        ogg_int64_t page_gp;
+        /*The page is from the right stream...*/
+        page_gp=ogg_page_granulepos(&og);
+        if(page_gp!=-1){
+          /*And has a valid granule position.
+            Let's remember it.*/
+          _offset=llret;
+          gp=page_gp;
+        }
+      }
+      else if(OP_UNLIKELY(!op_lookup_serialno(serialno,
+       _serialnos,_nserialnos))){
+        /*We fell off the start of the link, which means we don't need to keep
+           seeking any farther back.*/
+        left_link=1;
+      }
+    }
+    /*We started from at or before the beginning of the link and found nothing.
+      This should be impossible unless the contents of the source changed out
+       from under us after we read from it.*/
+    if((OP_UNLIKELY(left_link)||OP_UNLIKELY(!begin))&&OP_UNLIKELY(_offset<0)){
+      return OP_EBADLINK;
+    }
+    /*Bump up the chunk size.
+      This is mildly helpful when seeks are very expensive (http).*/
+    chunk_size=OP_MIN(2*chunk_size,OP_CHUNK_SIZE_MAX);
+    /*Avoid quadratic complexity if we hit an invalid patch of the file.*/
+    end=OP_MIN(begin+OP_PAGE_SIZE_MAX-1,original_end);
+  }
+  while(_offset<0);
+  *_gp=gp;
+  return _offset;
+}
+
+/*Uses the local ogg_stream storage in _of.
+  This is important for non-streaming input sources.*/
+static int op_fetch_headers_impl(OggOpusFile *_of,OpusHead *_head,
+ OpusTags *_tags,ogg_uint32_t **_serialnos,int *_nserialnos,
+ int *_cserialnos,ogg_page *_og){
+  ogg_packet op;
+  int        ret;
+  if(_serialnos!=NULL)*_nserialnos=0;
+  /*Extract the serialnos of all BOS pages plus the first set of Opus headers
+     we see in the link.*/
+  while(ogg_page_bos(_og)){
+    if(_serialnos!=NULL){
+      if(OP_UNLIKELY(op_lookup_page_serialno(_og,*_serialnos,*_nserialnos))){
+        /*A dupe serialnumber in an initial header packet set==invalid stream.*/
+        return OP_EBADHEADER;
+      }
+      ret=op_add_serialno(_og,_serialnos,_nserialnos,_cserialnos);
+      if(OP_UNLIKELY(ret<0))return ret;
+    }
+    if(_of->ready_state<OP_STREAMSET){
+      /*We don't have an Opus stream in this link yet, so begin prospective
+         stream setup.
+        We need a stream to get packets.*/
+      ogg_stream_reset_serialno(&_of->os,ogg_page_serialno(_og));
+      ogg_stream_pagein(&_of->os,_og);
+      if(OP_LIKELY(ogg_stream_packetout(&_of->os,&op)>0)){
+        ret=opus_head_parse(_head,op.packet,op.bytes);
+        /*Found a valid Opus header.
+          Continue setup.*/
+        if(OP_LIKELY(ret>=0))_of->ready_state=OP_STREAMSET;
+        /*If it's just a stream type we don't recognize, ignore it.
+          Everything else is fatal.*/
+        else if(ret!=OP_ENOTFORMAT)return ret;
+      }
+    }
+    /*Get the next page.
+      No need to clamp the boundary offset against _of->end, as all errors
+       become OP_ENOTFORMAT or OP_EBADHEADER.*/
+    if(OP_UNLIKELY(op_get_next_page(_of,_og,
+     OP_ADV_OFFSET(_of->offset,OP_CHUNK_SIZE))<0)){
+      return _of->ready_state<OP_STREAMSET?OP_ENOTFORMAT:OP_EBADHEADER;
+    }
+  }
+  if(OP_UNLIKELY(_of->ready_state!=OP_STREAMSET))return OP_ENOTFORMAT;
+  /*If the first non-header page belonged to our Opus stream, submit it.*/
+  if(_of->os.serialno==ogg_page_serialno(_og))ogg_stream_pagein(&_of->os,_og);
+  /*Loop getting packets.*/
+  for(;;){
+    switch(ogg_stream_packetout(&_of->os,&op)){
+      case 0:{
+        /*Loop getting pages.*/
+        for(;;){
+          /*No need to clamp the boundary offset against _of->end, as all
+             errors become OP_EBADHEADER.*/
+          if(OP_UNLIKELY(op_get_next_page(_of,_og,
+           OP_ADV_OFFSET(_of->offset,OP_CHUNK_SIZE))<0)){
+            return OP_EBADHEADER;
+          }
+          /*If this page belongs to the correct stream, go parse it.*/
+          if(_of->os.serialno==ogg_page_serialno(_og)){
+            ogg_stream_pagein(&_of->os,_og);
+            break;
+          }
+          /*If the link ends before we see the Opus comment header, abort.*/
+          if(OP_UNLIKELY(ogg_page_bos(_og)))return OP_EBADHEADER;
+          /*Otherwise, keep looking.*/
+        }
+      }break;
+      /*We shouldn't get a hole in the headers!*/
+      case -1:return OP_EBADHEADER;
+      default:{
+        /*Got a packet.
+          It should be the comment header.*/
+        ret=opus_tags_parse(_tags,op.packet,op.bytes);
+        if(OP_UNLIKELY(ret<0))return ret;
+        /*Make sure the page terminated at the end of the comment header.
+          If there is another packet on the page, or part of a packet, then
+           reject the stream.
+          Otherwise seekable sources won't be able to seek back to the start
+           properly.*/
+        ret=ogg_stream_packetout(&_of->os,&op);
+        if(OP_UNLIKELY(ret!=0)
+         ||OP_UNLIKELY(_og->header[_og->header_len-1]==255)){
+          /*If we fail, the caller assumes our tags are uninitialized.*/
+          opus_tags_clear(_tags);
+          return OP_EBADHEADER;
+        }
+        return 0;
+      }
+    }
+  }
+}
+
+static int op_fetch_headers(OggOpusFile *_of,OpusHead *_head,
+ OpusTags *_tags,ogg_uint32_t **_serialnos,int *_nserialnos,
+ int *_cserialnos,ogg_page *_og){
+  ogg_page og;
+  int      ret;
+  if(!_og){
+    /*No need to clamp the boundary offset against _of->end, as all errors
+       become OP_ENOTFORMAT.*/
+    if(OP_UNLIKELY(op_get_next_page(_of,&og,
+     OP_ADV_OFFSET(_of->offset,OP_CHUNK_SIZE))<0)){
+      return OP_ENOTFORMAT;
+    }
+    _og=&og;
+  }
+  _of->ready_state=OP_OPENED;
+  ret=op_fetch_headers_impl(_of,_head,_tags,_serialnos,_nserialnos,
+   _cserialnos,_og);
+  /*Revert back from OP_STREAMSET to OP_OPENED on failure, to prevent
+     double-free of the tags in an unseekable stream.*/
+  if(OP_UNLIKELY(ret<0))_of->ready_state=OP_OPENED;
+  return ret;
+}
+
+/*Granule position manipulation routines.
+  A granule position is defined to be an unsigned 64-bit integer, with the
+   special value -1 in two's complement indicating an unset or invalid granule
+   position.
+  We are not guaranteed to have an unsigned 64-bit type, so we construct the
+   following routines that
+   a) Properly order negative numbers as larger than positive numbers, and
+   b) Check for underflow or overflow past the special -1 value.
+  This lets us operate on the full, valid range of granule positions in a
+   consistent and safe manner.
+  This full range is organized into distinct regions:
+   [ -1 (invalid) ][ 0 ... OP_INT64_MAX ][ OP_INT64_MIN ... -2 ][-1 (invalid) ]
+
+  No one should actually use granule positions so large that they're negative,
+   even if they are technically valid, as very little software handles them
+   correctly (including most of Xiph.Org's).
+  This library also refuses to support durations so large they won't fit in a
+   signed 64-bit integer (to avoid exposing this mess to the application, and
+   to simplify a good deal of internal arithmetic), so the only way to use them
+   successfully is if pcm_start is very large.
+  This means there isn't anything you can do with negative granule positions
+   that you couldn't have done with purely non-negative ones.
+  The main purpose of these routines is to allow us to think very explicitly
+   about the possible failure cases of all granule position manipulations.*/
+
+/*Safely adds a small signed integer to a valid (not -1) granule position.
+  The result can use the full 64-bit range of values (both positive and
+   negative), but will fail on overflow (wrapping past -1; wrapping past
+   OP_INT64_MAX is explicitly okay).
+  [out] _dst_gp: The resulting granule position.
+                 Only modified on success.
+  _src_gp:       The granule position to add to.
+                 This must not be -1.
+  _delta:        The amount to add.
+                 This is allowed to be up to 32 bits to support the maximum
+                  duration of a single Ogg page (255 packets * 120 ms per
+                  packet == 1,468,800 samples at 48 kHz).
+  Return: 0 on success, or OP_EINVAL if the result would wrap around past -1.*/
+static int op_granpos_add(ogg_int64_t *_dst_gp,ogg_int64_t _src_gp,
+ opus_int32 _delta){
+  /*The code below handles this case correctly, but there's no reason we
+     should ever be called with these values, so make sure we aren't.*/
+  OP_ASSERT(_src_gp!=-1);
+  if(_delta>0){
+    /*Adding this amount to the granule position would overflow its 64-bit
+       range.*/
+    if(OP_UNLIKELY(_src_gp<0)&&OP_UNLIKELY(_src_gp>=-1-_delta))return OP_EINVAL;
+    if(OP_UNLIKELY(_src_gp>OP_INT64_MAX-_delta)){
+      /*Adding this amount to the granule position would overflow the positive
+         half of its 64-bit range.
+        Since signed overflow is undefined in C, do it in a way the compiler
+         isn't allowed to screw up.*/
+      _delta-=(opus_int32)(OP_INT64_MAX-_src_gp)+1;
+      _src_gp=OP_INT64_MIN;
+    }
+  }
+  else if(_delta<0){
+    /*Subtracting this amount from the granule position would underflow its
+       64-bit range.*/
+    if(_src_gp>=0&&OP_UNLIKELY(_src_gp<-_delta))return OP_EINVAL;
+    if(OP_UNLIKELY(_src_gp<OP_INT64_MIN-_delta)){
+      /*Subtracting this amount from the granule position would underflow the
+         negative half of its 64-bit range.
+        Since signed underflow is undefined in C, do it in a way the compiler
+         isn't allowed to screw up.*/
+      _delta+=(opus_int32)(_src_gp-OP_INT64_MIN)+1;
+      _src_gp=OP_INT64_MAX;
+    }
+  }
+  *_dst_gp=_src_gp+_delta;
+  return 0;
+}
+
+/*Safely computes the difference between two granule positions.
+  The difference must fit in a signed 64-bit integer, or the function fails.
+  It correctly handles the case where the granule position has wrapped around
+   from positive values to negative ones.
+  [out] _delta: The difference between the granule positions.
+                Only modified on success.
+  _gp_a:        The granule position to subtract from.
+                This must not be -1.
+  _gp_b:        The granule position to subtract.
+                This must not be -1.
+  Return: 0 on success, or OP_EINVAL if the result would not fit in a signed
+           64-bit integer.*/
+static int op_granpos_diff(ogg_int64_t *_delta,
+ ogg_int64_t _gp_a,ogg_int64_t _gp_b){
+  int gp_a_negative;
+  int gp_b_negative;
+  /*The code below handles these cases correctly, but there's no reason we
+     should ever be called with these values, so make sure we aren't.*/
+  OP_ASSERT(_gp_a!=-1);
+  OP_ASSERT(_gp_b!=-1);
+  gp_a_negative=OP_UNLIKELY(_gp_a<0);
+  gp_b_negative=OP_UNLIKELY(_gp_b<0);
+  if(OP_UNLIKELY(gp_a_negative^gp_b_negative)){
+    ogg_int64_t da;
+    ogg_int64_t db;
+    if(gp_a_negative){
+      /*_gp_a has wrapped to a negative value but _gp_b hasn't: the difference
+         should be positive.*/
+      /*Step 1: Handle wrapping.*/
+      /*_gp_a < 0 => da < 0.*/
+      da=(OP_INT64_MIN-_gp_a)-1;
+      /*_gp_b >= 0  => db >= 0.*/
+      db=OP_INT64_MAX-_gp_b;
+      /*Step 2: Check for overflow.*/
+      if(OP_UNLIKELY(OP_INT64_MAX+da<db))return OP_EINVAL;
+      *_delta=db-da;
+    }
+    else{
+      /*_gp_b has wrapped to a negative value but _gp_a hasn't: the difference
+         should be negative.*/
+      /*Step 1: Handle wrapping.*/
+      /*_gp_a >= 0 => da <= 0*/
+      da=_gp_a+OP_INT64_MIN;
+      /*_gp_b < 0 => db <= 0*/
+      db=OP_INT64_MIN-_gp_b;
+      /*Step 2: Check for overflow.*/
+      if(OP_UNLIKELY(da<OP_INT64_MIN-db))return OP_EINVAL;
+      *_delta=da+db;
+    }
+  }
+  else *_delta=_gp_a-_gp_b;
+  return 0;
+}
+
+static int op_granpos_cmp(ogg_int64_t _gp_a,ogg_int64_t _gp_b){
+  /*The invalid granule position -1 should behave like NaN: neither greater
+     than nor less than any other granule position, nor equal to any other
+     granule position, including itself.
+    However, that means there isn't anything we could sensibly return from this
+     function for it.*/
+  OP_ASSERT(_gp_a!=-1);
+  OP_ASSERT(_gp_b!=-1);
+  /*Handle the wrapping cases.*/
+  if(OP_UNLIKELY(_gp_a<0)){
+    if(_gp_b>=0)return 1;
+    /*Else fall through.*/
+  }
+  else if(OP_UNLIKELY(_gp_b<0))return -1;
+  /*No wrapping case.*/
+  return (_gp_a>_gp_b)-(_gp_b>_gp_a);
+}
+
+/*Returns the duration of the packet (in samples at 48 kHz), or a negative
+   value on error.*/
+static int op_get_packet_duration(const unsigned char *_data,int _len){
+  int nframes;
+  int frame_size;
+  int nsamples;
+  nframes=opus_packet_get_nb_frames(_data,_len);
+  if(OP_UNLIKELY(nframes<0))return OP_EBADPACKET;
+  frame_size=opus_packet_get_samples_per_frame(_data,48000);
+  nsamples=nframes*frame_size;
+  if(OP_UNLIKELY(nsamples>120*48))return OP_EBADPACKET;
+  return nsamples;
+}
+
+/*This function more properly belongs in info.c, but we define it here to allow
+   the static granule position manipulation functions to remain static.*/
+ogg_int64_t opus_granule_sample(const OpusHead *_head,ogg_int64_t _gp){
+  opus_int32 pre_skip;
+  pre_skip=_head->pre_skip;
+  if(_gp!=-1&&op_granpos_add(&_gp,_gp,-pre_skip))_gp=-1;
+  return _gp;
+}
+
+/*Grab all the packets currently in the stream state, and compute their
+   durations.
+  _of->op_count is set to the number of packets collected.
+  [out] _durations: Returns the durations of the individual packets.
+  Return: The total duration of all packets, or OP_HOLE if there was a hole.*/
+static opus_int32 op_collect_audio_packets(OggOpusFile *_of,
+ int _durations[255]){
+  opus_int32 total_duration;
+  int        op_count;
+  /*Count the durations of all packets in the page.*/
+  op_count=0;
+  total_duration=0;
+  for(;;){
+    int ret;
+    /*This takes advantage of undocumented libogg behavior that returned
+       ogg_packet buffers are valid at least until the next page is
+       submitted.
+      Relying on this is not too terrible, as _none_ of the Ogg memory
+       ownership/lifetime rules are well-documented.
+      But I can read its code and know this will work.*/
+    ret=ogg_stream_packetout(&_of->os,_of->op+op_count);
+    if(!ret)break;
+    if(OP_UNLIKELY(ret<0)){
+      /*We shouldn't get holes in the middle of pages.*/
+      OP_ASSERT(op_count==0);
+      /*Set the return value and break out of the loop.
+        We want to make sure op_count gets set to 0, because we've ingested a
+         page, so any previously loaded packets are now invalid.*/
+      total_duration=OP_HOLE;
+      break;
+    }
+    /*Unless libogg is broken, we can't get more than 255 packets from a
+       single page.*/
+    OP_ASSERT(op_count<255);
+    _durations[op_count]=op_get_packet_duration(_of->op[op_count].packet,
+     _of->op[op_count].bytes);
+    if(OP_LIKELY(_durations[op_count]>0)){
+      /*With at most 255 packets on a page, this can't overflow.*/
+      total_duration+=_durations[op_count++];
+    }
+    /*Ignore packets with an invalid TOC sequence.*/
+    else if(op_count>0){
+      /*But save the granule position, if there was one.*/
+      _of->op[op_count-1].granulepos=_of->op[op_count].granulepos;
+    }
+  }
+  _of->op_pos=0;
+  _of->op_count=op_count;
+  return total_duration;
+}
+
+/*Starting from current cursor position, get the initial PCM offset of the next
+   page.
+  This also validates the granule position on the first page with a completed
+   audio data packet, as required by the spec.
+  If this link is completely empty (no pages with completed packets), then this
+   function sets pcm_start=pcm_end=0 and returns the BOS page of the next link
+   (if any).
+  In the seekable case, we initialize pcm_end=-1 before calling this function,
+   so that later we can detect that the link was empty before calling
+   op_find_final_pcm_offset().
+  [inout] _link: The link for which to find pcm_start.
+  [out] _og:     Returns the BOS page of the next link if this link was empty.
+                 In the unseekable case, we can then feed this to
+                  op_fetch_headers() to start the next link.
+                 The caller may pass NULL (e.g., for seekable streams), in
+                  which case this page will be discarded.
+  Return: 0 on success, 1 if there is a buffered BOS page available, or a
+           negative value on unrecoverable error.*/
+static int op_find_initial_pcm_offset(OggOpusFile *_of,
+ OggOpusLink *_link,ogg_page *_og){
+  ogg_page     og;
+  ogg_int64_t  pcm_start;
+  ogg_int64_t  prev_packet_gp;
+  ogg_int64_t  cur_page_gp;
+  ogg_uint32_t serialno;
+  opus_int32   total_duration;
+  int          durations[255];
+  int          cur_page_eos;
+  int          op_count;
+  int          pi;
+  if(_og==NULL)_og=&og;
+  serialno=_of->os.serialno;
+  op_count=0;
+  /*We shouldn't have to initialize total_duration, but gcc is too dumb to
+     figure out that op_count>0 implies we've been through the whole loop at
+     least once.*/
+  total_duration=0;
+  do{
+    opus_int64 llret;
+    llret=op_get_next_page(_of,_og,_of->end);
+    /*We should get a page unless the file is truncated or mangled.
+      Otherwise there are no audio data packets in the whole logical stream.*/
+    if(OP_UNLIKELY(llret<0)){
+      /*Fail if there was a read error.*/
+      if(llret<OP_FALSE)return (int)llret;
+      /*Fail if the pre-skip is non-zero, since it's asking us to skip more
+         samples than exist.*/
+      if(_link->head.pre_skip>0)return OP_EBADTIMESTAMP;
+      /*Set pcm_end and end_offset so we can skip the call to
+         op_find_final_pcm_offset().*/
+      _link->pcm_start=_link->pcm_end=0;
+      _link->end_offset=_link->data_offset;
+      return 0;
+    }
+    /*Similarly, if we hit the next link in the chain, we've gone too far.*/
+    if(OP_UNLIKELY(ogg_page_bos(_og))){
+      if(_link->head.pre_skip>0)return OP_EBADTIMESTAMP;
+      /*Set pcm_end and end_offset so we can skip the call to
+         op_find_final_pcm_offset().*/
+      _link->pcm_end=_link->pcm_start=0;
+      _link->end_offset=_link->data_offset;
+      /*Tell the caller we've got a buffered page for them.*/
+      return 1;
+    }
+    /*Ignore pages from other streams (not strictly necessary, because of the
+       checks in ogg_stream_pagein(), but saves some work).*/
+    if(serialno!=(ogg_uint32_t)ogg_page_serialno(_og))continue;
+    ogg_stream_pagein(&_of->os,_og);
+    /*Bitrate tracking: add the header's bytes here.
+      The body bytes are counted when we consume the packets.*/
+    _of->bytes_tracked+=_og->header_len;
+    /*Count the durations of all packets in the page.*/
+    do total_duration=op_collect_audio_packets(_of,durations);
+    /*Ignore holes.*/
+    while(OP_UNLIKELY(total_duration<0));
+    op_count=_of->op_count;
+  }
+  while(op_count<=0);
+  /*We found the first page with a completed audio data packet: actually look
+     at the granule position.
+    RFC 3533 says, "A special value of -1 (in two's complement) indicates that
+     no packets finish on this page," which does not say that a granule
+     position that is NOT -1 indicates that some packets DO finish on that page
+     (even though this was the intention, libogg itself violated this intention
+     for years before we fixed it).
+    The Ogg Opus specification only imposes its start-time requirements
+     on the granule position of the first page with completed packets,
+     so we ignore any set granule positions until then.*/
+  cur_page_gp=_of->op[op_count-1].granulepos;
+  /*But getting a packet without a valid granule position on the page is not
+     okay.*/
+  if(cur_page_gp==-1)return OP_EBADTIMESTAMP;
+  cur_page_eos=_of->op[op_count-1].e_o_s;
+  if(OP_LIKELY(!cur_page_eos)){
+    /*The EOS flag wasn't set.
+      Work backwards from the provided granule position to get the starting PCM
+       offset.*/
+    if(OP_UNLIKELY(op_granpos_add(&pcm_start,cur_page_gp,-total_duration)<0)){
+      /*The starting granule position MUST not be smaller than the amount of
+         audio on the first page with completed packets.*/
+      return OP_EBADTIMESTAMP;
+    }
+  }
+  else{
+    /*The first page with completed packets was also the last.*/
+    if(OP_LIKELY(op_granpos_add(&pcm_start,cur_page_gp,-total_duration)<0)){
+      /*If there's less audio on the page than indicated by the granule
+         position, then we're doing end-trimming, and the starting PCM offset
+         is zero by spec mandate.*/
+      pcm_start=0;
+      /*However, the end-trimming MUST not ask us to trim more samples than
+         exist after applying the pre-skip.*/
+      if(OP_UNLIKELY(op_granpos_cmp(cur_page_gp,_link->head.pre_skip)<0)){
+        return OP_EBADTIMESTAMP;
+      }
+    }
+  }
+  /*Timestamp the individual packets.*/
+  prev_packet_gp=pcm_start;
+  for(pi=0;pi<op_count;pi++){
+    if(cur_page_eos){
+      ogg_int64_t diff;
+      OP_ALWAYS_TRUE(!op_granpos_diff(&diff,cur_page_gp,prev_packet_gp));
+      diff=durations[pi]-diff;
+      /*If we have samples to trim...*/
+      if(diff>0){
+        /*If we trimmed the entire packet, stop (the spec says encoders
+           shouldn't do this, but we support it anyway).*/
+        if(OP_UNLIKELY(diff>durations[pi]))break;
+        _of->op[pi].granulepos=prev_packet_gp=cur_page_gp;
+        /*Move the EOS flag to this packet, if necessary, so we'll trim the
+           samples.*/
+        _of->op[pi].e_o_s=1;
+        continue;
+      }
+    }
+    /*Update the granule position as normal.*/
+    OP_ALWAYS_TRUE(!op_granpos_add(&_of->op[pi].granulepos,
+     prev_packet_gp,durations[pi]));
+    prev_packet_gp=_of->op[pi].granulepos;
+  }
+  /*Update the packet count after end-trimming.*/
+  _of->op_count=pi;
+  _of->cur_discard_count=_link->head.pre_skip;
+  _of->prev_packet_gp=_link->pcm_start=pcm_start;
+  return 0;
+}
+
+/*Starting from current cursor position, get the final PCM offset of the
+   previous page.
+  This also validates the duration of the link, which, while not strictly
+   required by the spec, we need to ensure duration calculations don't
+   overflow.
+  This is only done for seekable sources.
+  We must validate that op_find_initial_pcm_offset() succeeded for this link
+   before calling this function, otherwise it will scan the entire stream
+   backwards until it reaches the start, and then fail.*/
+static int op_find_final_pcm_offset(OggOpusFile *_of,
+ const ogg_uint32_t *_serialnos,int _nserialnos,OggOpusLink *_link,
+ opus_int64 _offset,ogg_uint32_t _end_serialno,ogg_int64_t _end_gp,
+ ogg_int64_t *_total_duration){
+  ogg_int64_t  total_duration;
+  ogg_int64_t  duration;
+  ogg_uint32_t cur_serialno;
+  /*For the time being, fetch end PCM offset the simple way.*/
+  cur_serialno=_link->serialno;
+  if(_end_serialno!=cur_serialno||_end_gp==-1){
+    _offset=op_get_last_page(_of,&_end_gp,_offset,
+     cur_serialno,_serialnos,_nserialnos);
+    if(OP_UNLIKELY(_offset<0))return (int)_offset;
+  }
+  /*At worst we should have found the first page with completed packets.*/
+  if(OP_UNLIKELY(_offset<_link->data_offset))return OP_EBADLINK;
+  /*This implementation requires that the difference between the first and last
+     granule positions in each link be representable in a signed, 64-bit
+     number, and that each link also have at least as many samples as the
+     pre-skip requires.*/
+  if(OP_UNLIKELY(op_granpos_diff(&duration,_end_gp,_link->pcm_start)<0)
+   ||OP_UNLIKELY(duration<_link->head.pre_skip)){
+    return OP_EBADTIMESTAMP;
+  }
+  /*We also require that the total duration be representable in a signed,
+     64-bit number.*/
+  duration-=_link->head.pre_skip;
+  total_duration=*_total_duration;
+  if(OP_UNLIKELY(OP_INT64_MAX-duration<total_duration))return OP_EBADTIMESTAMP;
+  *_total_duration=total_duration+duration;
+  _link->pcm_end=_end_gp;
+  _link->end_offset=_offset;
+  return 0;
+}
+
+/*Rescale the number _x from the range [0,_from] to [0,_to].
+  _from and _to must be positive.*/
+static opus_int64 op_rescale64(opus_int64 _x,opus_int64 _from,opus_int64 _to){
+  opus_int64 frac;
+  opus_int64 ret;
+  int        i;
+  if(_x>=_from)return _to;
+  if(_x<=0)return 0;
+  frac=0;
+  for(i=0;i<63;i++){
+    frac<<=1;
+    OP_ASSERT(_x<=_from);
+    if(_x>=_from>>1){
+      _x-=_from-_x;
+      frac|=1;
+    }
+    else _x<<=1;
+  }
+  ret=0;
+  for(i=0;i<63;i++){
+    if(frac&1)ret=(ret&_to&1)+(ret>>1)+(_to>>1);
+    else ret>>=1;
+    frac>>=1;
+  }
+  return ret;
+}
+
+/*The minimum granule position spacing allowed for making predictions.
+  This corresponds to about 1 second of audio at 48 kHz for both Opus and
+   Vorbis, or one keyframe interval in Theora with the default keyframe spacing
+   of 256.*/
+#define OP_GP_SPACING_MIN (48000)
+
+/*Try to estimate the location of the next link using the current seek
+   records, assuming the initial granule position of any streams we've found is
+   0.*/
+static opus_int64 op_predict_link_start(const OpusSeekRecord *_sr,int _nsr,
+ opus_int64 _searched,opus_int64 _end_searched,opus_int32 _bias){
+  opus_int64 bisect;
+  int        sri;
+  int        srj;
+  /*Require that we be at least OP_CHUNK_SIZE from the end.
+    We don't require that we be at least OP_CHUNK_SIZE from the beginning,
+     because if we are we'll just scan forward without seeking.*/
+  _end_searched-=OP_CHUNK_SIZE;
+  if(_searched>=_end_searched)return -1;
+  bisect=_end_searched;
+  for(sri=0;sri<_nsr;sri++){
+    ogg_int64_t  gp1;
+    ogg_int64_t  gp2_min;
+    ogg_uint32_t serialno1;
+    opus_int64   offset1;
+    /*If the granule position is negative, either it's invalid or we'd cause
+       overflow.*/
+    gp1=_sr[sri].gp;
+    if(gp1<0)continue;
+    /*We require some minimum distance between granule positions to make an
+       estimate.
+      We don't actually know what granule position scheme is being used,
+       because we have no idea what kind of stream these came from.
+      Therefore we require a minimum spacing between them, with the
+       expectation that while bitrates and granule position increments might
+       vary locally in quite complex ways, they are globally smooth.*/
+    if(OP_UNLIKELY(op_granpos_add(&gp2_min,gp1,OP_GP_SPACING_MIN)<0)){
+      /*No granule position would satisfy us.*/
+      continue;
+    }
+    offset1=_sr[sri].offset;
+    serialno1=_sr[sri].serialno;
+    for(srj=sri;srj-->0;){
+      ogg_int64_t gp2;
+      opus_int64  offset2;
+      opus_int64  num;
+      ogg_int64_t den;
+      ogg_int64_t ipart;
+      gp2=_sr[srj].gp;
+      if(gp2<gp2_min)continue;
+      /*Oh, and also make sure these came from the same stream.*/
+      if(_sr[srj].serialno!=serialno1)continue;
+      offset2=_sr[srj].offset;
+      /*For once, we can subtract with impunity.*/
+      den=gp2-gp1;
+      ipart=gp2/den;
+      num=offset2-offset1;
+      OP_ASSERT(num>0);
+      if(ipart>0&&(offset2-_searched)/ipart<num)continue;
+      offset2-=ipart*num;
+      gp2-=ipart*den;
+      offset2-=op_rescale64(gp2,den,num)-_bias;
+      if(offset2<_searched)continue;
+      bisect=OP_MIN(bisect,offset2);
+      break;
+    }
+  }
+  return bisect>=_end_searched?-1:bisect;
+}
+
+/*Finds each bitstream link, one at a time, using a bisection search.
+  This has to begin by knowing the offset of the first link's initial page.*/
+static int op_bisect_forward_serialno(OggOpusFile *_of,
+ opus_int64 _searched,OpusSeekRecord *_sr,int _csr,
+ ogg_uint32_t **_serialnos,int *_nserialnos,int *_cserialnos){
+  ogg_page      og;
+  OggOpusLink  *links;
+  int           nlinks;
+  int           clinks;
+  ogg_uint32_t *serialnos;
+  int           nserialnos;
+  ogg_int64_t   total_duration;
+  int           nsr;
+  int           ret;
+  links=_of->links;
+  nlinks=clinks=_of->nlinks;
+  total_duration=0;
+  /*We start with one seek record, for the last page in the file.
+    We build up a list of records for places we seek to during link
+     enumeration.
+    This list is kept sorted in reverse order.
+    We only care about seek locations that were _not_ in the current link,
+     therefore we can add them one at a time to the end of the list as we
+     improve the lower bound on the location where the next link starts.*/
+  nsr=1;
+  for(;;){
+    opus_int64  end_searched;
+    opus_int64  bisect;
+    opus_int64  next;
+    opus_int64  last;
+    ogg_int64_t end_offset;
+    ogg_int64_t end_gp;
+    int         sri;
+    serialnos=*_serialnos;
+    nserialnos=*_nserialnos;
+    if(OP_UNLIKELY(nlinks>=clinks)){
+      if(OP_UNLIKELY(clinks>INT_MAX-1>>1))return OP_EFAULT;
+      clinks=2*clinks+1;
+      OP_ASSERT(nlinks<clinks);
+      links=(OggOpusLink *)_ogg_realloc(links,sizeof(*links)*clinks);
+      if(OP_UNLIKELY(links==NULL))return OP_EFAULT;
+      _of->links=links;
+    }
+    /*Invariants:
+      We have the headers and serial numbers for the link beginning at 'begin'.
+      We have the offset and granule position of the last page in the file
+       (potentially not a page we care about).*/
+    /*Scan the seek records we already have to save us some bisection.*/
+    for(sri=0;sri<nsr;sri++){
+      if(op_lookup_serialno(_sr[sri].serialno,serialnos,nserialnos))break;
+    }
+    /*Is the last page in our current list of serial numbers?*/
+    if(sri<=0)break;
+    /*Last page wasn't found.
+      We have at least one more link.*/
+    last=-1;
+    end_searched=_sr[sri-1].search_start;
+    next=_sr[sri-1].offset;
+    end_gp=-1;
+    if(sri<nsr){
+      _searched=_sr[sri].offset+_sr[sri].size;
+      if(_sr[sri].serialno==links[nlinks-1].serialno){
+        end_gp=_sr[sri].gp;
+        end_offset=_sr[sri].offset;
+      }
+    }
+    nsr=sri;
+    bisect=-1;
+    /*If we've already found the end of at least one link, try to pick the
+       first bisection point at twice the average link size.
+      This is a good choice for files with lots of links that are all about the
+       same size.*/
+    if(nlinks>1){
+      opus_int64 last_offset;
+      opus_int64 avg_link_size;
+      opus_int64 upper_limit;
+      last_offset=links[nlinks-1].offset;
+      avg_link_size=last_offset/(nlinks-1);
+      upper_limit=end_searched-OP_CHUNK_SIZE-avg_link_size;
+      if(OP_LIKELY(last_offset>_searched-avg_link_size)
+       &&OP_LIKELY(last_offset<upper_limit)){
+        bisect=last_offset+avg_link_size;
+        if(OP_LIKELY(bisect<upper_limit))bisect+=avg_link_size;
+      }
+    }
+    /*We guard against garbage separating the last and first pages of two
+       links below.*/
+    while(_searched<end_searched){
+      opus_int32 next_bias;
+      /*If we don't have a better estimate, use simple bisection.*/
+      if(bisect==-1)bisect=_searched+(end_searched-_searched>>1);
+      /*If we're within OP_CHUNK_SIZE of the start, scan forward.*/
+      if(bisect-_searched<OP_CHUNK_SIZE)bisect=_searched;
+      /*Otherwise we're skipping data.
+        Forget the end page, if we saw one, as we might miss a later one.*/
+      else end_gp=-1;
+      ret=op_seek_helper(_of,bisect);
+      if(OP_UNLIKELY(ret<0))return ret;
+      last=op_get_next_page(_of,&og,_sr[nsr-1].offset);
+      if(OP_UNLIKELY(last<OP_FALSE))return (int)last;
+      next_bias=0;
+      if(last==OP_FALSE)end_searched=bisect;
+      else{
+        ogg_uint32_t serialno;
+        ogg_int64_t  gp;
+        serialno=ogg_page_serialno(&og);
+        gp=ogg_page_granulepos(&og);
+        if(!op_lookup_serialno(serialno,serialnos,nserialnos)){
+          end_searched=bisect;
+          next=last;
+          /*In reality we should always have enough room, but be paranoid.*/
+          if(OP_LIKELY(nsr<_csr)){
+            _sr[nsr].search_start=bisect;
+            _sr[nsr].offset=last;
+            OP_ASSERT(_of->offset-last>=0);
+            OP_ASSERT(_of->offset-last<=OP_PAGE_SIZE_MAX);
+            _sr[nsr].size=(opus_int32)(_of->offset-last);
+            _sr[nsr].serialno=serialno;
+            _sr[nsr].gp=gp;
+            nsr++;
+          }
+        }
+        else{
+          _searched=_of->offset;
+          next_bias=OP_CHUNK_SIZE;
+          if(serialno==links[nlinks-1].serialno){
+            /*This page was from the stream we want, remember it.
+              If it's the last such page in the link, we won't have to go back
+               looking for it later.*/
+            end_gp=gp;
+            end_offset=last;
+          }
+        }
+      }
+      bisect=op_predict_link_start(_sr,nsr,_searched,end_searched,next_bias);
+    }
+    /*Bisection point found.
+      Get the final granule position of the previous link, assuming
+       op_find_initial_pcm_offset() didn't already determine the link was
+       empty.*/
+    if(OP_LIKELY(links[nlinks-1].pcm_end==-1)){
+      if(end_gp==-1){
+        /*If we don't know where the end page is, we'll have to seek back and
+           look for it, starting from the end of the link.*/
+        end_offset=next;
+        /*Also forget the last page we read.
+          It won't be available after the seek.*/
+        last=-1;
+      }
+      ret=op_find_final_pcm_offset(_of,serialnos,nserialnos,
+       links+nlinks-1,end_offset,links[nlinks-1].serialno,end_gp,
+       &total_duration);
+      if(OP_UNLIKELY(ret<0))return ret;
+    }
+    if(last!=next){
+      /*The last page we read was not the first page the next link.
+        Move the cursor position to the offset of that first page.
+        This only performs an actual seek if the first page of the next link
+         does not start at the end of the last page from the current Opus
+         stream with a valid granule position.*/
+      ret=op_seek_helper(_of,next);
+      if(OP_UNLIKELY(ret<0))return ret;
+    }
+    ret=op_fetch_headers(_of,&links[nlinks].head,&links[nlinks].tags,
+     _serialnos,_nserialnos,_cserialnos,last!=next?NULL:&og);
+    if(OP_UNLIKELY(ret<0))return ret;
+    links[nlinks].offset=next;
+    links[nlinks].data_offset=_of->offset;
+    links[nlinks].serialno=_of->os.serialno;
+    links[nlinks].pcm_end=-1;
+    /*This might consume a page from the next link, however the next bisection
+       always starts with a seek.*/
+    ret=op_find_initial_pcm_offset(_of,links+nlinks,NULL);
+    if(OP_UNLIKELY(ret<0))return ret;
+    _searched=_of->offset;
+    /*Mark the current link count so it can be cleaned up on error.*/
+    _of->nlinks=++nlinks;
+  }
+  /*Last page is in the starting serialno list, so we've reached the last link.
+    Now find the last granule position for it (if we didn't the first time we
+     looked at the end of the stream, and if op_find_initial_pcm_offset()
+     didn't already determine the link was empty).*/
+  if(OP_LIKELY(links[nlinks-1].pcm_end==-1)){
+    ret=op_find_final_pcm_offset(_of,serialnos,nserialnos,
+     links+nlinks-1,_sr[0].offset,_sr[0].serialno,_sr[0].gp,&total_duration);
+    if(OP_UNLIKELY(ret<0))return ret;
+  }
+  /*Trim back the links array if necessary.*/
+  links=(OggOpusLink *)_ogg_realloc(links,sizeof(*links)*nlinks);
+  if(OP_LIKELY(links!=NULL))_of->links=links;
+  /*We also don't need these anymore.*/
+  _ogg_free(*_serialnos);
+  *_serialnos=NULL;
+  *_cserialnos=*_nserialnos=0;
+  return 0;
+}
+
+static void op_update_gain(OggOpusFile *_of){
+  OpusHead   *head;
+  opus_int32  gain_q8;
+  int         li;
+  /*If decode isn't ready, then we'll apply the gain when we initialize the
+     decoder.*/
+  if(_of->ready_state<OP_INITSET)return;
+  gain_q8=_of->gain_offset_q8;
+  li=_of->seekable?_of->cur_link:0;
+  head=&_of->links[li].head;
+  /*We don't have to worry about overflow here because the header gain and
+     track gain must lie in the range [-32768,32767], and the user-supplied
+     offset has been pre-clamped to [-98302,98303].*/
+  switch(_of->gain_type){
+    case OP_TRACK_GAIN:{
+      int track_gain_q8;
+      track_gain_q8=0;
+      opus_tags_get_track_gain(&_of->links[li].tags,&track_gain_q8);
+      gain_q8+=track_gain_q8;
+    }
+    /*Fall through.*/
+    case OP_HEADER_GAIN:gain_q8+=head->output_gain;break;
+    case OP_ABSOLUTE_GAIN:break;
+    default:OP_ASSERT(0);
+  }
+  gain_q8=OP_CLAMP(-32768,gain_q8,32767);
+  OP_ASSERT(_of->od!=NULL);
+#if defined(OPUS_SET_GAIN)
+  opus_multistream_decoder_ctl(_of->od,OPUS_SET_GAIN(gain_q8));
+#else
+/*A fallback that works with both float and fixed-point is a bunch of work,
+   so just force people to use a sufficiently new version.
+  This is deployed well enough at this point that this shouldn't be a burden.*/
+# error "libopus 1.0.1 or later required"
+#endif
+}
+
+static int op_make_decode_ready(OggOpusFile *_of){
+  const OpusHead *head;
+  int             li;
+  int             stream_count;
+  int             coupled_count;
+  int             channel_count;
+  if(_of->ready_state>OP_STREAMSET)return 0;
+  if(OP_UNLIKELY(_of->ready_state<OP_STREAMSET))return OP_EFAULT;
+  li=_of->seekable?_of->cur_link:0;
+  head=&_of->links[li].head;
+  stream_count=head->stream_count;
+  coupled_count=head->coupled_count;
+  channel_count=head->channel_count;
+  /*Check to see if the current decoder is compatible with the current link.*/
+  if(_of->od!=NULL&&_of->od_stream_count==stream_count
+   &&_of->od_coupled_count==coupled_count&&_of->od_channel_count==channel_count
+   &&memcmp(_of->od_mapping,head->mapping,
+   sizeof(*head->mapping)*channel_count)==0){
+    opus_multistream_decoder_ctl(_of->od,OPUS_RESET_STATE);
+  }
+  else{
+    int err;
+    opus_multistream_decoder_destroy(_of->od);
+    _of->od=opus_multistream_decoder_create(48000,channel_count,
+     stream_count,coupled_count,head->mapping,&err);
+    if(_of->od==NULL)return OP_EFAULT;
+    _of->od_stream_count=stream_count;
+    _of->od_coupled_count=coupled_count;
+    _of->od_channel_count=channel_count;
+    memcpy(_of->od_mapping,head->mapping,sizeof(*head->mapping)*channel_count);
+  }
+  _of->ready_state=OP_INITSET;
+  _of->bytes_tracked=0;
+  _of->samples_tracked=0;
+#if !defined(OPUS_FIXED_POINT)
+  _of->state_channel_count=0;
+  /*Use the serial number for the PRNG seed to get repeatable output for
+     straight play-throughs.*/
+  _of->dither_seed=_of->links[li].serialno;
+#endif
+  op_update_gain(_of);
+  return 0;
+}
+
+static int op_open_seekable2_impl(OggOpusFile *_of){
+  /*64 seek records should be enough for anybody.
+    Actually, with a bisection search in a 63-bit range down to OP_CHUNK_SIZE
+     granularity, much more than enough.*/
+  OpusSeekRecord sr[64];
+  opus_int64     data_offset;
+  int            ret;
+  /*We can seek, so set out learning all about this file.*/
+  (*_of->callbacks.seek)(_of->source,0,SEEK_END);
+  _of->offset=_of->end=(*_of->callbacks.tell)(_of->source);
+  if(OP_UNLIKELY(_of->end<0))return OP_EREAD;
+  data_offset=_of->links[0].data_offset;
+  if(OP_UNLIKELY(_of->end<data_offset))return OP_EBADLINK;
+  /*Get the offset of the last page of the physical bitstream, or, if we're
+     lucky, the last Opus page of the first link, as most Ogg Opus files will
+     contain a single logical bitstream.*/
+  ret=op_get_prev_page_serial(_of,sr,_of->end,
+   _of->links[0].serialno,_of->serialnos,_of->nserialnos);
+  if(OP_UNLIKELY(ret<0))return ret;
+  /*If there's any trailing junk, forget about it.*/
+  _of->end=sr[0].offset+sr[0].size;
+  if(OP_UNLIKELY(_of->end<data_offset))return OP_EBADLINK;
+  /*Now enumerate the bitstream structure.*/
+  return op_bisect_forward_serialno(_of,data_offset,sr,sizeof(sr)/sizeof(*sr),
+   &_of->serialnos,&_of->nserialnos,&_of->cserialnos);
+}
+
+static int op_open_seekable2(OggOpusFile *_of){
+  ogg_sync_state    oy_start;
+  ogg_stream_state  os_start;
+  ogg_packet       *op_start;
+  opus_int64        start_offset;
+  int               start_op_count;
+  int               ret;
+  /*We're partially open and have a first link header state in storage in _of.
+    Save off that stream state so we can come back to it.
+    It would be simpler to just dump all this state and seek back to
+     links[0].data_offset when we're done.
+    But we do the extra work to allow us to seek back to _exactly_ the same
+     stream position we're at now.
+    This allows, e.g., the HTTP backend to continue reading from the original
+     connection (if it's still available), instead of opening a new one.
+    This means we can open and start playing a normal Opus file with a single
+     link and reasonable packet sizes using only two HTTP requests.*/
+  start_op_count=_of->op_count;
+  /*This is a bit too large to put on the stack unconditionally.*/
+  op_start=(ogg_packet *)_ogg_malloc(sizeof(*op_start)*start_op_count);
+  if(op_start==NULL)return OP_EFAULT;
+  *&oy_start=_of->oy;
+  *&os_start=_of->os;
+  start_offset=_of->offset;
+  memcpy(op_start,_of->op,sizeof(*op_start)*start_op_count);
+  OP_ASSERT((*_of->callbacks.tell)(_of->source)==op_position(_of));
+  ogg_sync_init(&_of->oy);
+  ogg_stream_init(&_of->os,-1);
+  ret=op_open_seekable2_impl(_of);
+  /*Restore the old stream state.*/
+  ogg_stream_clear(&_of->os);
+  ogg_sync_clear(&_of->oy);
+  *&_of->oy=*&oy_start;
+  *&_of->os=*&os_start;
+  _of->offset=start_offset;
+  _of->op_count=start_op_count;
+  memcpy(_of->op,op_start,sizeof(*_of->op)*start_op_count);
+  _ogg_free(op_start);
+  _of->prev_packet_gp=_of->links[0].pcm_start;
+  _of->cur_discard_count=_of->links[0].head.pre_skip;
+  if(OP_UNLIKELY(ret<0))return ret;
+  /*And restore the position indicator.*/
+  ret=(*_of->callbacks.seek)(_of->source,op_position(_of),SEEK_SET);
+  return OP_UNLIKELY(ret<0)?OP_EREAD:0;
+}
+
+/*Clear out the current logical bitstream decoder.*/
+static void op_decode_clear(OggOpusFile *_of){
+  /*We don't actually free the decoder.
+    We might be able to re-use it for the next link.*/
+  _of->op_count=0;
+  _of->od_buffer_size=0;
+  _of->prev_packet_gp=-1;
+  if(!_of->seekable){
+    OP_ASSERT(_of->ready_state>=OP_INITSET);
+    opus_tags_clear(&_of->links[0].tags);
+  }
+  _of->ready_state=OP_OPENED;
+}
+
+static void op_clear(OggOpusFile *_of){
+  OggOpusLink *links;
+  _ogg_free(_of->od_buffer);
+  if(_of->od!=NULL)opus_multistream_decoder_destroy(_of->od);
+  links=_of->links;
+  if(!_of->seekable){
+    if(_of->ready_state>OP_OPENED||_of->ready_state==OP_PARTOPEN){
+      opus_tags_clear(&links[0].tags);
+    }
+  }
+  else if(OP_LIKELY(links!=NULL)){
+    int nlinks;
+    int link;
+    nlinks=_of->nlinks;
+    for(link=0;link<nlinks;link++)opus_tags_clear(&links[link].tags);
+  }
+  _ogg_free(links);
+  _ogg_free(_of->serialnos);
+  ogg_stream_clear(&_of->os);
+  ogg_sync_clear(&_of->oy);
+  if(_of->callbacks.close!=NULL)(*_of->callbacks.close)(_of->source);
+}
+
+static int op_open1(OggOpusFile *_of,
+ void *_source,const OpusFileCallbacks *_cb,
+ const unsigned char *_initial_data,size_t _initial_bytes){
+  ogg_page  og;
+  ogg_page *pog;
+  int       seekable;
+  int       ret;
+  memset(_of,0,sizeof(*_of));
+  _of->end=-1;
+  _of->source=_source;
+  *&_of->callbacks=*_cb;
+  /*At a minimum, we need to be able to read data.*/
+  if(OP_UNLIKELY(_of->callbacks.read==NULL))return OP_EREAD;
+  /*Initialize the framing state.*/
+  ogg_sync_init(&_of->oy);
+  /*Perhaps some data was previously read into a buffer for testing against
+     other stream types.
+    Allow initialization from this previously read data (especially as we may
+     be reading from a non-seekable stream).
+    This requires copying it into a buffer allocated by ogg_sync_buffer() and
+     doesn't support seeking, so this is not a good mechanism to use for
+     decoding entire files from RAM.*/
+  if(_initial_bytes>0){
+    char *buffer;
+    buffer=ogg_sync_buffer(&_of->oy,_initial_bytes);
+    memcpy(buffer,_initial_data,_initial_bytes*sizeof(*buffer));
+    ogg_sync_wrote(&_of->oy,_initial_bytes);
+  }
+  /*Can we seek?
+    Stevens suggests the seek test is portable.*/
+  seekable=_cb->seek!=NULL&&(*_cb->seek)(_source,0,SEEK_CUR)!=-1;
+  /*If seek is implemented, tell must also be implemented.*/
+  if(seekable){
+    opus_int64 pos;
+    if(OP_UNLIKELY(_of->callbacks.tell==NULL))return OP_EINVAL;
+    pos=(*_of->callbacks.tell)(_of->source);
+    /*If the current position is not equal to the initial bytes consumed,
+       absolute seeking will not work.*/
+    if(OP_UNLIKELY(pos!=(opus_int64)_initial_bytes))return OP_EINVAL;
+  }
+  _of->seekable=seekable;
+  /*Don't seek yet.
+    Set up a 'single' (current) logical bitstream entry for partial open.*/
+  _of->links=(OggOpusLink *)_ogg_malloc(sizeof(*_of->links));
+  /*The serialno gets filled in later by op_fetch_headers().*/
+  ogg_stream_init(&_of->os,-1);
+  pog=NULL;
+  for(;;){
+    /*Fetch all BOS pages, store the Opus header and all seen serial numbers,
+      and load subsequent Opus setup headers.*/
+    ret=op_fetch_headers(_of,&_of->links[0].head,&_of->links[0].tags,
+     &_of->serialnos,&_of->nserialnos,&_of->cserialnos,pog);
+    if(OP_UNLIKELY(ret<0))break;
+    _of->nlinks=1;
+    _of->links[0].offset=0;
+    _of->links[0].data_offset=_of->offset;
+    _of->links[0].pcm_end=-1;
+    _of->links[0].serialno=_of->os.serialno;
+    /*Fetch the initial PCM offset.*/
+    ret=op_find_initial_pcm_offset(_of,_of->links,&og);
+    if(seekable||OP_LIKELY(ret<=0))break;
+    /*This link was empty, but we already have the BOS page for the next one in
+       og.
+      We can't seek, so start processing the next link right now.*/
+    opus_tags_clear(&_of->links[0].tags);
+    _of->nlinks=0;
+    if(!seekable)_of->cur_link++;
+    pog=&og;
+  }
+  if(OP_LIKELY(ret>=0))_of->ready_state=OP_PARTOPEN;
+  return ret;
+}
+
+static int op_open2(OggOpusFile *_of){
+  int ret;
+  OP_ASSERT(_of->ready_state==OP_PARTOPEN);
+  if(_of->seekable){
+    _of->ready_state=OP_OPENED;
+    ret=op_open_seekable2(_of);
+  }
+  else ret=0;
+  if(OP_LIKELY(ret>=0)){
+    /*We have buffered packets from op_find_initial_pcm_offset().
+      Move to OP_INITSET so we can use them.*/
+    _of->ready_state=OP_STREAMSET;
+    ret=op_make_decode_ready(_of);
+    if(OP_LIKELY(ret>=0))return 0;
+  }
+  /*Don't auto-close the stream on failure.*/
+  _of->callbacks.close=NULL;
+  op_clear(_of);
+  return ret;
+}
+
+OggOpusFile *op_test_callbacks(void *_source,const OpusFileCallbacks *_cb,
+ const unsigned char *_initial_data,size_t _initial_bytes,int *_error){
+  OggOpusFile *of;
+  int          ret;
+  of=(OggOpusFile *)_ogg_malloc(sizeof(*of));
+  ret=OP_EFAULT;
+  if(OP_LIKELY(of!=NULL)){
+    ret=op_open1(of,_source,_cb,_initial_data,_initial_bytes);
+    if(OP_LIKELY(ret>=0)){
+      if(_error!=NULL)*_error=0;
+      return of;
+    }
+    /*Don't auto-close the stream on failure.*/
+    of->callbacks.close=NULL;
+    op_clear(of);
+    _ogg_free(of);
+  }
+  if(_error!=NULL)*_error=ret;
+  return NULL;
+}
+
+OggOpusFile *op_open_callbacks(void *_source,const OpusFileCallbacks *_cb,
+ const unsigned char *_initial_data,size_t _initial_bytes,int *_error){
+  OggOpusFile *of;
+  of=op_test_callbacks(_source,_cb,_initial_data,_initial_bytes,_error);
+  if(OP_LIKELY(of!=NULL)){
+    int ret;
+    ret=op_open2(of);
+    if(OP_LIKELY(ret>=0))return of;
+    if(_error!=NULL)*_error=ret;
+    _ogg_free(of);
+  }
+  return NULL;
+}
+
+/*Convenience routine to clean up from failure for the open functions that
+   create their own streams.*/
+static OggOpusFile *op_open_close_on_failure(void *_source,
+ const OpusFileCallbacks *_cb,int *_error){
+  OggOpusFile *of;
+  if(OP_UNLIKELY(_source==NULL)){
+    if(_error!=NULL)*_error=OP_EFAULT;
+    return NULL;
+  }
+  of=op_open_callbacks(_source,_cb,NULL,0,_error);
+  if(OP_UNLIKELY(of==NULL))(*_cb->close)(_source);
+  return of;
+}
+
+OggOpusFile *op_open_file(const char *_path,int *_error){
+  OpusFileCallbacks cb;
+  return op_open_close_on_failure(op_fopen(&cb,_path,"rb"),&cb,_error);
+}
+
+OggOpusFile *op_open_memory(const unsigned char *_data,size_t _size,
+ int *_error){
+  OpusFileCallbacks cb;
+  return op_open_close_on_failure(op_mem_stream_create(&cb,_data,_size),&cb,
+   _error);
+}
+
+/*Convenience routine to clean up from failure for the open functions that
+   create their own streams.*/
+static OggOpusFile *op_test_close_on_failure(void *_source,
+ const OpusFileCallbacks *_cb,int *_error){
+  OggOpusFile *of;
+  if(OP_UNLIKELY(_source==NULL)){
+    if(_error!=NULL)*_error=OP_EFAULT;
+    return NULL;
+  }
+  of=op_test_callbacks(_source,_cb,NULL,0,_error);
+  if(OP_UNLIKELY(of==NULL))(*_cb->close)(_source);
+  return of;
+}
+
+OggOpusFile *op_test_file(const char *_path,int *_error){
+  OpusFileCallbacks cb;
+  return op_test_close_on_failure(op_fopen(&cb,_path,"rb"),&cb,_error);
+}
+
+OggOpusFile *op_test_memory(const unsigned char *_data,size_t _size,
+ int *_error){
+  OpusFileCallbacks cb;
+  return op_test_close_on_failure(op_mem_stream_create(&cb,_data,_size),&cb,
+   _error);
+}
+
+int op_test_open(OggOpusFile *_of){
+  int ret;
+  if(OP_UNLIKELY(_of->ready_state!=OP_PARTOPEN))return OP_EINVAL;
+  ret=op_open2(_of);
+  /*op_open2() will clear this structure on failure.
+    Reset its contents to prevent double-frees in op_free().*/
+  if(OP_UNLIKELY(ret<0))memset(_of,0,sizeof(*_of));
+  return ret;
+}
+
+void op_free(OggOpusFile *_of){
+  if(OP_LIKELY(_of!=NULL)){
+    op_clear(_of);
+    _ogg_free(_of);
+  }
+}
+
+int op_seekable(const OggOpusFile *_of){
+  return _of->seekable;
+}
+
+int op_link_count(const OggOpusFile *_of){
+  return _of->nlinks;
+}
+
+ogg_uint32_t op_serialno(const OggOpusFile *_of,int _li){
+  if(OP_UNLIKELY(_li>=_of->nlinks))_li=_of->nlinks-1;
+  if(!_of->seekable)_li=0;
+  return _of->links[_li<0?_of->cur_link:_li].serialno;
+}
+
+int op_channel_count(const OggOpusFile *_of,int _li){
+  return op_head(_of,_li)->channel_count;
+}
+
+opus_int64 op_raw_total(const OggOpusFile *_of,int _li){
+  if(OP_UNLIKELY(_of->ready_state<OP_OPENED)
+   ||OP_UNLIKELY(!_of->seekable)
+   ||OP_UNLIKELY(_li>=_of->nlinks)){
+    return OP_EINVAL;
+  }
+  if(_li<0)return _of->end-_of->links[0].offset;
+  return (_li+1>=_of->nlinks?_of->end:_of->links[_li+1].offset)
+   -_of->links[_li].offset;
+}
+
+ogg_int64_t op_pcm_total(const OggOpusFile *_of,int _li){
+  OggOpusLink *links;
+  ogg_int64_t  diff;
+  int          nlinks;
+  nlinks=_of->nlinks;
+  if(OP_UNLIKELY(_of->ready_state<OP_OPENED)
+   ||OP_UNLIKELY(!_of->seekable)
+   ||OP_UNLIKELY(_li>=nlinks)){
+    return OP_EINVAL;
+  }
+  links=_of->links;
+  /*We verify that the granule position differences are larger than the
+     pre-skip and that the total duration does not overflow during link
+     enumeration, so we don't have to check here.*/
+  if(_li<0){
+    ogg_int64_t pcm_total;
+    int         li;
+    pcm_total=0;
+    for(li=0;li<nlinks;li++){
+      OP_ALWAYS_TRUE(!op_granpos_diff(&diff,
+       links[li].pcm_end,links[li].pcm_start));
+      pcm_total+=diff-links[li].head.pre_skip;
+    }
+    return pcm_total;
+  }
+  OP_ALWAYS_TRUE(!op_granpos_diff(&diff,
+   links[_li].pcm_end,links[_li].pcm_start));
+  return diff-links[_li].head.pre_skip;
+}
+
+const OpusHead *op_head(const OggOpusFile *_of,int _li){
+  if(OP_UNLIKELY(_li>=_of->nlinks))_li=_of->nlinks-1;
+  if(!_of->seekable)_li=0;
+  return &_of->links[_li<0?_of->cur_link:_li].head;
+}
+
+const OpusTags *op_tags(const OggOpusFile *_of,int _li){
+  if(OP_UNLIKELY(_li>=_of->nlinks))_li=_of->nlinks-1;
+  if(!_of->seekable){
+    if(_of->ready_state<OP_STREAMSET&&_of->ready_state!=OP_PARTOPEN){
+      return NULL;
+    }
+    _li=0;
+  }
+  else if(_li<0)_li=_of->ready_state>=OP_STREAMSET?_of->cur_link:0;
+  return &_of->links[_li].tags;
+}
+
+int op_current_link(const OggOpusFile *_of){
+  if(OP_UNLIKELY(_of->ready_state<OP_OPENED))return OP_EINVAL;
+  return _of->cur_link;
+}
+
+/*Compute an average bitrate given a byte and sample count.
+  Return: The bitrate in bits per second.*/
+static opus_int32 op_calc_bitrate(opus_int64 _bytes,ogg_int64_t _samples){
+  /*These rates are absurd, but let's handle them anyway.*/
+  if(OP_UNLIKELY(_bytes>(OP_INT64_MAX-(_samples>>1))/(48000*8))){
+    ogg_int64_t den;
+    if(OP_UNLIKELY(_bytes/(OP_INT32_MAX/(48000*8))>=_samples)){
+      return OP_INT32_MAX;
+    }
+    den=_samples/(48000*8);
+    return (opus_int32)((_bytes+(den>>1))/den);
+  }
+  if(OP_UNLIKELY(_samples<=0))return OP_INT32_MAX;
+  /*This can't actually overflow in normal operation: even with a pre-skip of
+     545 2.5 ms frames with 8 streams running at 1282*8+1 bytes per packet
+     (1275 byte frames + Opus framing overhead + Ogg lacing values), that all
+     produce a single sample of decoded output, we still don't top 45 Mbps.
+    The only way to get bitrates larger than that is with excessive Opus
+     padding, more encoded streams than output channels, or lots and lots of
+     Ogg pages with no packets on them.*/
+  return (opus_int32)OP_MIN((_bytes*48000*8+(_samples>>1))/_samples,
+   OP_INT32_MAX);
+}
+
+opus_int32 op_bitrate(const OggOpusFile *_of,int _li){
+  if(OP_UNLIKELY(_of->ready_state<OP_OPENED)||OP_UNLIKELY(!_of->seekable)
+   ||OP_UNLIKELY(_li>=_of->nlinks)){
+    return OP_EINVAL;
+  }
+  return op_calc_bitrate(op_raw_total(_of,_li),op_pcm_total(_of,_li));
+}
+
+opus_int32 op_bitrate_instant(OggOpusFile *_of){
+  ogg_int64_t samples_tracked;
+  opus_int32  ret;
+  if(OP_UNLIKELY(_of->ready_state<OP_OPENED))return OP_EINVAL;
+  samples_tracked=_of->samples_tracked;
+  if(OP_UNLIKELY(samples_tracked==0))return OP_FALSE;
+  ret=op_calc_bitrate(_of->bytes_tracked,samples_tracked);
+  _of->bytes_tracked=0;
+  _of->samples_tracked=0;
+  return ret;
+}
+
+/*Fetch and process a page.
+  This handles the case where we're at a bitstream boundary and dumps the
+   decoding machine.
+  If the decoding machine is unloaded, it loads it.
+  It also keeps prev_packet_gp up to date (seek and read both use this; seek
+   uses a special hack with _readp).
+  Return: <0) Error, OP_HOLE (lost packet), or OP_EOF.
+           0) Need more data (only if _readp==0).
+           1) Got at least one audio data packet.*/
+static int op_fetch_and_process_page(OggOpusFile *_of,
+ ogg_page *_og,opus_int64 _page_pos,int _readp,int _spanp,int _ignore_holes){
+  OggOpusLink  *links;
+  ogg_uint32_t  cur_serialno;
+  int           seekable;
+  int           cur_link;
+  int           ret;
+  /*We shouldn't get here if we have unprocessed packets.*/
+  OP_ASSERT(_of->ready_state<OP_INITSET||_of->op_pos>=_of->op_count);
+  if(!_readp)return 0;
+  seekable=_of->seekable;
+  links=_of->links;
+  cur_link=seekable?_of->cur_link:0;
+  cur_serialno=links[cur_link].serialno;
+  /*Handle one page.*/
+  for(;;){
+    ogg_page og;
+    OP_ASSERT(_of->ready_state>=OP_OPENED);
+    /*This loop is not strictly necessary, but there's no sense in doing the
+       extra checks of the larger loop for the common case in a multiplexed
+       bistream where the page is simply part of a different logical
+       bitstream.*/
+    do{
+      /*If we were given a page to use, use it.*/
+      if(_og!=NULL){
+        *&og=*_og;
+        _og=NULL;
+      }
+      /*Keep reading until we get a page with the correct serialno.*/
+      else _page_pos=op_get_next_page(_of,&og,_of->end);
+      /*EOF: Leave uninitialized.*/
+      if(_page_pos<0)return _page_pos<OP_FALSE?(int)_page_pos:OP_EOF;
+      if(OP_LIKELY(_of->ready_state>=OP_STREAMSET)){
+        if(cur_serialno!=(ogg_uint32_t)ogg_page_serialno(&og)){
+          /*Two possibilities:
+             1) Another stream is multiplexed into this logical section, or*/
+          if(OP_LIKELY(!ogg_page_bos(&og)))continue;
+          /* 2) Our decoding just traversed a bitstream boundary.*/
+          if(!_spanp)return OP_EOF;
+          if(OP_LIKELY(_of->ready_state>=OP_INITSET))op_decode_clear(_of);
+          break;
+        }
+      }
+      /*Bitrate tracking: add the header's bytes here.
+        The body bytes are counted when we consume the packets.*/
+      _of->bytes_tracked+=og.header_len;
+    }
+    while(0);
+    /*Do we need to load a new machine before submitting the page?
+      This is different in the seekable and non-seekable cases.
+      In the seekable case, we already have all the header information loaded
+       and cached.
+      We just initialize the machine with it and continue on our merry way.
+      In the non-seekable (streaming) case, we'll only be at a boundary if we
+       just left the previous logical bitstream, and we're now nominally at the
+       header of the next bitstream.*/
+    if(OP_UNLIKELY(_of->ready_state<OP_STREAMSET)){
+      if(seekable){
+        ogg_uint32_t serialno;
+        int          nlinks;
+        int          li;
+        serialno=ogg_page_serialno(&og);
+        /*Match the serialno to bitstream section.
+          We use this rather than offset positions to avoid problems near
+           logical bitstream boundaries.*/
+        nlinks=_of->nlinks;
+        for(li=0;li<nlinks&&links[li].serialno!=serialno;li++);
+        /*Not a desired Opus bitstream section.
+          Keep trying.*/
+        if(li>=nlinks)continue;
+        cur_serialno=serialno;
+        _of->cur_link=cur_link=li;
+        ogg_stream_reset_serialno(&_of->os,serialno);
+        _of->ready_state=OP_STREAMSET;
+        /*If we're at the start of this link, initialize the granule position
+           and pre-skip tracking.*/
+        if(_page_pos<=links[cur_link].data_offset){
+          _of->prev_packet_gp=links[cur_link].pcm_start;
+          _of->cur_discard_count=links[cur_link].head.pre_skip;
+          /*Ignore a hole at the start of a new link (this is common for
+             streams joined in the middle) or after seeking.*/
+          _ignore_holes=1;
+        }
+      }
+      else{
+        do{
+          /*We're streaming.
+            Fetch the two header packets, build the info struct.*/
+          ret=op_fetch_headers(_of,&links[0].head,&links[0].tags,
+           NULL,NULL,NULL,&og);
+          if(OP_UNLIKELY(ret<0))return ret;
+          /*op_find_initial_pcm_offset() will suppress any initial hole for us,
+             so no need to set _ignore_holes.*/
+          ret=op_find_initial_pcm_offset(_of,links,&og);
+          if(OP_UNLIKELY(ret<0))return ret;
+          _of->links[0].serialno=cur_serialno=_of->os.serialno;
+          _of->cur_link++;
+        }
+        /*If the link was empty, keep going, because we already have the
+           BOS page of the next one in og.*/
+        while(OP_UNLIKELY(ret>0));
+        /*If we didn't get any packets out of op_find_initial_pcm_offset(),
+           keep going (this is possible if end-trimming trimmed them all).*/
+        if(_of->op_count<=0)continue;
+        /*Otherwise, we're done.*/
+        ret=op_make_decode_ready(_of);
+        if(OP_UNLIKELY(ret<0))return ret;
+        return 1;
+      }
+    }
+    /*The buffered page is the data we want, and we're ready for it.
+      Add it to the stream state.*/
+    if(OP_UNLIKELY(_of->ready_state==OP_STREAMSET)){
+      ret=op_make_decode_ready(_of);
+      if(OP_UNLIKELY(ret<0))return ret;
+    }
+    /*Extract all the packets from the current page.*/
+    ogg_stream_pagein(&_of->os,&og);
+    if(OP_LIKELY(_of->ready_state>=OP_INITSET)){
+      opus_int32 total_duration;
+      int        durations[255];
+      int        op_count;
+      total_duration=op_collect_audio_packets(_of,durations);
+      if(OP_UNLIKELY(total_duration<0)){
+        /*Drain the packets from the page anyway.*/
+        total_duration=op_collect_audio_packets(_of,durations);
+        OP_ASSERT(total_duration>=0);
+        /*Report holes to the caller.*/
+        if(!_ignore_holes)return OP_HOLE;
+      }
+      op_count=_of->op_count;
+      /*If we found at least one audio data packet, compute per-packet granule
+         positions for them.*/
+      if(op_count>0){
+        ogg_int64_t diff;
+        ogg_int64_t prev_packet_gp;
+        ogg_int64_t cur_packet_gp;
+        ogg_int64_t cur_page_gp;
+        int         cur_page_eos;
+        int         pi;
+        cur_page_gp=_of->op[op_count-1].granulepos;
+        cur_page_eos=_of->op[op_count-1].e_o_s;
+        prev_packet_gp=_of->prev_packet_gp;
+        if(OP_UNLIKELY(prev_packet_gp==-1)){
+          opus_int32 cur_discard_count;
+          /*This is the first call after a raw seek.
+            Try to reconstruct prev_packet_gp from scratch.*/
+          OP_ASSERT(seekable);
+          if(OP_UNLIKELY(cur_page_eos)){
+            /*If the first page we hit after our seek was the EOS page, and
+               we didn't start from data_offset or before, we don't have
+               enough information to do end-trimming.
+              Proceed to the next link, rather than risk playing back some
+               samples that shouldn't have been played.*/
+            _of->op_count=0;
+            continue;
+          }
+          /*By default discard 80 ms of data after a seek, unless we seek
+             into the pre-skip region.*/
+          cur_discard_count=80*48;
+          cur_page_gp=_of->op[op_count-1].granulepos;
+          /*Try to initialize prev_packet_gp.
+            If the current page had packets but didn't have a granule
+             position, or the granule position it had was too small (both
+             illegal), just use the starting granule position for the link.*/
+          prev_packet_gp=links[cur_link].pcm_start;
+          if(OP_LIKELY(cur_page_gp!=-1)){
+            op_granpos_add(&prev_packet_gp,cur_page_gp,-total_duration);
+          }
+          if(OP_LIKELY(!op_granpos_diff(&diff,
+           prev_packet_gp,links[cur_link].pcm_start))){
+            opus_int32 pre_skip;
+            /*If we start at the beginning of the pre-skip region, or we're
+               at least 80 ms from the end of the pre-skip region, we discard
+               to the end of the pre-skip region.
+              Otherwise, we still use the 80 ms default, which will discard
+               past the end of the pre-skip region.*/
+            pre_skip=links[cur_link].head.pre_skip;
+            if(diff>=0&&diff<=OP_MAX(0,pre_skip-80*48)){
+              cur_discard_count=pre_skip-(int)diff;
+            }
+          }
+          _of->cur_discard_count=cur_discard_count;
+        }
+        if(OP_UNLIKELY(cur_page_gp==-1)){
+          /*This page had completed packets but didn't have a valid granule
+             position.
+            This is illegal, but we'll try to handle it by continuing to count
+             forwards from the previous page.*/
+          if(op_granpos_add(&cur_page_gp,prev_packet_gp,total_duration)<0){
+            /*The timestamp for this page overflowed.*/
+            cur_page_gp=links[cur_link].pcm_end;
+          }
+        }
+        /*If we hit the last page, handle end-trimming.*/
+        if(OP_UNLIKELY(cur_page_eos)
+         &&OP_LIKELY(!op_granpos_diff(&diff,cur_page_gp,prev_packet_gp))
+         &&OP_LIKELY(diff<total_duration)){
+          cur_packet_gp=prev_packet_gp;
+          for(pi=0;pi<op_count;pi++){
+            diff=durations[pi]-diff;
+            /*If we have samples to trim...*/
+            if(diff>0){
+              /*If we trimmed the entire packet, stop (the spec says encoders
+                 shouldn't do this, but we support it anyway).*/
+              if(OP_UNLIKELY(diff>durations[pi]))break;
+              cur_packet_gp=cur_page_gp;
+              /*Move the EOS flag to this packet, if necessary, so we'll trim
+                 the samples during decode.*/
+              _of->op[pi].e_o_s=1;
+            }
+            else{
+              /*Update the granule position as normal.*/
+              OP_ALWAYS_TRUE(!op_granpos_add(&cur_packet_gp,
+               cur_packet_gp,durations[pi]));
+            }
+            _of->op[pi].granulepos=cur_packet_gp;
+            OP_ALWAYS_TRUE(!op_granpos_diff(&diff,cur_page_gp,cur_packet_gp));
+          }
+        }
+        else{
+          /*Propagate timestamps to earlier packets.
+            op_granpos_add(&prev_packet_gp,prev_packet_gp,total_duration)
+             should succeed and give prev_packet_gp==cur_page_gp.
+            But we don't bother to check that, as there isn't much we can do
+             if it's not true.
+            The only thing we guarantee is that the start and end granule
+             positions of the packets are valid, and that they are monotonic
+             within a page.
+            They might be completely out of range for this link (we'll check
+             that elsewhere), or non-monotonic between pages.*/
+          if(OP_UNLIKELY(op_granpos_add(&prev_packet_gp,
+           cur_page_gp,-total_duration)<0)){
+            /*The starting timestamp for the first packet on this page
+               underflowed.
+              This is illegal, but we ignore it.*/
+            prev_packet_gp=0;
+          }
+          for(pi=0;pi<op_count;pi++){
+            if(OP_UNLIKELY(op_granpos_add(&cur_packet_gp,
+             cur_page_gp,-total_duration)<0)){
+              /*The start timestamp for this packet underflowed.
+                This is illegal, but we ignore it.*/
+              cur_packet_gp=0;
+            }
+            total_duration-=durations[pi];
+            OP_ASSERT(total_duration>=0);
+            OP_ALWAYS_TRUE(!op_granpos_add(&cur_packet_gp,
+             cur_packet_gp,durations[pi]));
+            _of->op[pi].granulepos=cur_packet_gp;
+          }
+          OP_ASSERT(total_duration==0);
+        }
+        _of->prev_packet_gp=prev_packet_gp;
+        _of->op_count=pi;
+        /*If end-trimming didn't trim all the packets, we're done.*/
+        if(OP_LIKELY(pi>0))return 1;
+      }
+    }
+  }
+}
+
+int op_raw_seek(OggOpusFile *_of,opus_int64 _pos){
+  int ret;
+  if(OP_UNLIKELY(_of->ready_state<OP_OPENED))return OP_EINVAL;
+  /*Don't dump the decoder state if we can't seek.*/
+  if(OP_UNLIKELY(!_of->seekable))return OP_ENOSEEK;
+  if(OP_UNLIKELY(_pos<0)||OP_UNLIKELY(_pos>_of->end))return OP_EINVAL;
+  /*Clear out any buffered, decoded data.*/
+  op_decode_clear(_of);
+  _of->bytes_tracked=0;
+  _of->samples_tracked=0;
+  ret=op_seek_helper(_of,_pos);
+  if(OP_UNLIKELY(ret<0))return OP_EREAD;
+  ret=op_fetch_and_process_page(_of,NULL,-1,1,1,1);
+  /*If we hit EOF, op_fetch_and_process_page() leaves us uninitialized.
+    Instead, jump to the end.*/
+  if(ret==OP_EOF){
+    int cur_link;
+    op_decode_clear(_of);
+    cur_link=_of->nlinks-1;
+    _of->cur_link=cur_link;
+    _of->prev_packet_gp=_of->links[cur_link].pcm_end;
+    _of->cur_discard_count=0;
+    ret=0;
+  }
+  else if(ret>0)ret=0;
+  return ret;
+}
+
+/*Convert a PCM offset relative to the start of the whole stream to a granule
+   position in an individual link.*/
+static ogg_int64_t op_get_granulepos(const OggOpusFile *_of,
+ ogg_int64_t _pcm_offset,int *_li){
+  const OggOpusLink *links;
+  ogg_int64_t        duration;
+  int                nlinks;
+  int                li;
+  OP_ASSERT(_pcm_offset>=0);
+  nlinks=_of->nlinks;
+  links=_of->links;
+  for(li=0;OP_LIKELY(li<nlinks);li++){
+    ogg_int64_t pcm_start;
+    opus_int32  pre_skip;
+    pcm_start=links[li].pcm_start;
+    pre_skip=links[li].head.pre_skip;
+    OP_ALWAYS_TRUE(!op_granpos_diff(&duration,links[li].pcm_end,pcm_start));
+    duration-=pre_skip;
+    if(_pcm_offset<duration){
+      _pcm_offset+=pre_skip;
+      if(OP_UNLIKELY(pcm_start>OP_INT64_MAX-_pcm_offset)){
+        /*Adding this amount to the granule position would overflow the positive
+           half of its 64-bit range.
+          Since signed overflow is undefined in C, do it in a way the compiler
+           isn't allowed to screw up.*/
+        _pcm_offset-=OP_INT64_MAX-pcm_start+1;
+        pcm_start=OP_INT64_MIN;
+      }
+      pcm_start+=_pcm_offset;
+      *_li=li;
+      return pcm_start;
+    }
+    _pcm_offset-=duration;
+  }
+  return -1;
+}
+
+/*This controls how close the target has to be to use the current stream
+   position to subdivide the initial range.
+  Two minutes seems to be a good default.*/
+#define OP_CUR_TIME_THRESH (120*48*(opus_int32)1000)
+
+/*Note: The OP_SMALL_FOOTPRINT #define doesn't (currently) save much code size,
+   but it's meant to serve as documentation for portions of the seeking
+   algorithm that are purely optional, to aid others learning from/porting this
+   code to other contexts.*/
+/*#define OP_SMALL_FOOTPRINT (1)*/
+
+/*Search within link _li for the page with the highest granule position
+   preceding (or equal to) _target_gp.
+  There is a danger here: missing pages or incorrect frame number information
+   in the bitstream could make our task impossible.
+  Account for that (and report it as an error condition).*/
+static int op_pcm_seek_page(OggOpusFile *_of,
+ ogg_int64_t _target_gp,int _li){
+  const OggOpusLink *link;
+  ogg_page           og;
+  ogg_int64_t        pcm_pre_skip;
+  ogg_int64_t        pcm_start;
+  ogg_int64_t        pcm_end;
+  ogg_int64_t        best_gp;
+  ogg_int64_t        diff;
+  ogg_uint32_t       serialno;
+  opus_int32         pre_skip;
+  opus_int64         begin;
+  opus_int64         end;
+  opus_int64         boundary;
+  opus_int64         best;
+  opus_int64         page_offset;
+  opus_int64         d0;
+  opus_int64         d1;
+  opus_int64         d2;
+  int                force_bisect;
+  int                ret;
+  _of->bytes_tracked=0;
+  _of->samples_tracked=0;
+  link=_of->links+_li;
+  best_gp=pcm_start=link->pcm_start;
+  pcm_end=link->pcm_end;
+  serialno=link->serialno;
+  best=begin=link->data_offset;
+  page_offset=-1;
+  /*We discard the first 80 ms of data after a seek, so seek back that much
+     farther.
+    If we can't, simply seek to the beginning of the link.*/
+  if(OP_UNLIKELY(op_granpos_add(&_target_gp,_target_gp,-80*48)<0)
+   ||OP_UNLIKELY(op_granpos_cmp(_target_gp,pcm_start)<0)){
+    _target_gp=pcm_start;
+  }
+  /*Special case seeking to the start of the link.*/
+  pre_skip=link->head.pre_skip;
+  OP_ALWAYS_TRUE(!op_granpos_add(&pcm_pre_skip,pcm_start,pre_skip));
+  if(op_granpos_cmp(_target_gp,pcm_pre_skip)<0)end=boundary=begin;
+  else{
+    end=boundary=link->end_offset;
+#if !defined(OP_SMALL_FOOTPRINT)
+    /*If we were decoding from this link, we can narrow the range a bit.*/
+    if(_li==_of->cur_link&&_of->ready_state>=OP_INITSET){
+      opus_int64 offset;
+      int        op_count;
+      op_count=_of->op_count;
+      /*The only way the offset can be invalid _and_ we can fail the granule
+         position checks below is if someone changed the contents of the last
+         page since we read it.
+        We'd be within our rights to just return OP_EBADLINK in that case, but
+         we'll simply ignore the current position instead.*/
+      offset=_of->offset;
+      if(op_count>0&&OP_LIKELY(offset<=end)){
+        ogg_int64_t gp;
+        /*Make sure the timestamp is valid.
+          The granule position might be -1 if we collected the packets from a
+           page without a granule position after reporting a hole.*/
+        gp=_of->op[op_count-1].granulepos;
+        if(OP_LIKELY(gp!=-1)&&OP_LIKELY(op_granpos_cmp(pcm_start,gp)<0)
+         &&OP_LIKELY(op_granpos_cmp(pcm_end,gp)>0)){
+          OP_ALWAYS_TRUE(!op_granpos_diff(&diff,gp,_target_gp));
+          /*We only actually use the current time if either
+            a) We can cut off at least half the range, or
+            b) We're seeking sufficiently close to the current position that
+                it's likely to be informative.
+            Otherwise it appears using the whole link range to estimate the
+             first seek location gives better results, on average.*/
+          if(diff<0){
+            OP_ASSERT(offset>=begin);
+            if(offset-begin>=end-begin>>1||diff>-OP_CUR_TIME_THRESH){
+              best=begin=offset;
+              best_gp=pcm_start=gp;
+            }
+          }
+          else{
+            ogg_int64_t prev_page_gp;
+            /*We might get lucky and already have the packet with the target
+               buffered.
+              Worth checking.
+              For very small files (with all of the data in a single page,
+               generally 1 second or less), we can loop them continuously
+               without seeking at all.*/
+            OP_ALWAYS_TRUE(!op_granpos_add(&prev_page_gp,_of->op[0].granulepos,
+             op_get_packet_duration(_of->op[0].packet,_of->op[0].bytes)));
+            if(op_granpos_cmp(prev_page_gp,_target_gp)<=0){
+              /*Don't call op_decode_clear(), because it will dump our
+                 packets.*/
+              _of->op_pos=0;
+              _of->od_buffer_size=0;
+              _of->prev_packet_gp=prev_page_gp;
+              _of->ready_state=OP_STREAMSET;
+              return op_make_decode_ready(_of);
+            }
+            /*No such luck.
+              Check if we can cut off at least half the range, though.*/
+            if(offset-begin<=end-begin>>1||diff<OP_CUR_TIME_THRESH){
+              /*We really want the page start here, but this will do.*/
+              end=boundary=offset;
+              pcm_end=gp;
+            }
+          }
+        }
+      }
+    }
+#endif
+  }
+  /*This code was originally based on the "new search algorithm by HB (Nicholas
+     Vinen)" from libvorbisfile.
+    It has been modified substantially since.*/
+  op_decode_clear(_of);
+  /*Initialize the interval size history.*/
+  d2=d1=d0=end-begin;
+  force_bisect=0;
+  while(begin<end){
+    opus_int64 bisect;
+    opus_int64 next_boundary;
+    opus_int32 chunk_size;
+    if(end-begin<OP_CHUNK_SIZE)bisect=begin;
+    else{
+      /*Update the interval size history.*/
+      d0=d1>>1;
+      d1=d2>>1;
+      d2=end-begin>>1;
+      if(force_bisect)bisect=begin+(end-begin>>1);
+      else{
+        ogg_int64_t diff2;
+        OP_ALWAYS_TRUE(!op_granpos_diff(&diff,_target_gp,pcm_start));
+        OP_ALWAYS_TRUE(!op_granpos_diff(&diff2,pcm_end,pcm_start));
+        /*Take a (pretty decent) guess.*/
+        bisect=begin+op_rescale64(diff,diff2,end-begin)-OP_CHUNK_SIZE;
+      }
+      if(bisect-OP_CHUNK_SIZE<begin)bisect=begin;
+      force_bisect=0;
+    }
+    if(bisect!=_of->offset){
+      page_offset=-1;
+      ret=op_seek_helper(_of,bisect);
+      if(OP_UNLIKELY(ret<0))return ret;
+    }
+    chunk_size=OP_CHUNK_SIZE;
+    next_boundary=boundary;
+    while(begin<end){
+      page_offset=op_get_next_page(_of,&og,boundary);
+      if(page_offset<0){
+        if(page_offset<OP_FALSE)return (int)page_offset;
+        /*There are no more pages in our interval from our stream with a valid
+           timestamp that start at position bisect or later.*/
+        /*If we scanned the whole interval, we're done.*/
+        if(bisect<=begin+1)end=begin;
+        else{
+          /*Otherwise, back up one chunk.*/
+          bisect=OP_MAX(bisect-chunk_size,begin);
+          ret=op_seek_helper(_of,bisect);
+          if(OP_UNLIKELY(ret<0))return ret;
+          /*Bump up the chunk size.*/
+          chunk_size=OP_MIN(2*chunk_size,OP_CHUNK_SIZE_MAX);
+          /*If we did find a page from another stream or without a timestamp,
+             don't read past it.*/
+          boundary=next_boundary;
+        }
+      }
+      else{
+        ogg_int64_t gp;
+        /*Save the offset of the first page we found after the seek, regardless
+           of the stream it came from or whether or not it has a timestamp.*/
+        next_boundary=OP_MIN(page_offset,next_boundary);
+        if(serialno!=(ogg_uint32_t)ogg_page_serialno(&og))continue;
+        gp=ogg_page_granulepos(&og);
+        if(gp==-1)continue;
+        if(op_granpos_cmp(gp,_target_gp)<0){
+          /*We found a page that ends before our target.
+            Advance to the raw offset of the next page.*/
+          begin=_of->offset;
+          if(OP_UNLIKELY(op_granpos_cmp(pcm_start,gp)>0)
+           ||OP_UNLIKELY(op_granpos_cmp(pcm_end,gp)<0)){
+            /*Don't let pcm_start get out of range!
+              That could happen with an invalid timestamp.*/
+            break;
+          }
+          /*Save the byte offset of the end of the page with this granule
+             position.*/
+          best=begin;
+          best_gp=pcm_start=gp;
+          OP_ALWAYS_TRUE(!op_granpos_diff(&diff,_target_gp,pcm_start));
+          /*If we're more than a second away from our target, break out and
+             do another bisection.*/
+          if(diff>48000)break;
+          /*Otherwise, keep scanning forward (do NOT use begin+1).*/
+          bisect=begin;
+        }
+        else{
+          /*We found a page that ends after our target.*/
+          /*If we scanned the whole interval before we found it, we're done.*/
+          if(bisect<=begin+1)end=begin;
+          else{
+            end=bisect;
+            /*In later iterations, don't read past the first page we found.*/
+            boundary=next_boundary;
+            /*If we're not making much progress shrinking the interval size,
+               start forcing straight bisection to limit the worst case.*/
+            force_bisect=end-begin>d0*2;
+            /*Don't let pcm_end get out of range!
+              That could happen with an invalid timestamp.*/
+            if(OP_LIKELY(op_granpos_cmp(pcm_end,gp)>0)
+             &&OP_LIKELY(op_granpos_cmp(pcm_start,gp)<=0)){
+              pcm_end=gp;
+            }
+            break;
+          }
+        }
+      }
+    }
+  }
+  /*Found our page.
+    Seek to the end of it and update prev_packet_gp.
+    Our caller will set cur_discard_count.
+    This is an easier case than op_raw_seek(), as we don't need to keep any
+     packets from the page we found.*/
+  /*Seek, if necessary.*/
+  if(best!=page_offset){
+    page_offset=-1;
+    ret=op_seek_helper(_of,best);
+    if(OP_UNLIKELY(ret<0))return ret;
+  }
+  OP_ASSERT(op_granpos_cmp(best_gp,pcm_start)>=0);
+  _of->cur_link=_li;
+  _of->ready_state=OP_STREAMSET;
+  _of->prev_packet_gp=best_gp;
+  ogg_stream_reset_serialno(&_of->os,serialno);
+  ret=op_fetch_and_process_page(_of,page_offset<0?NULL:&og,page_offset,1,0,1);
+  if(OP_UNLIKELY(ret<=0))return OP_EBADLINK;
+  /*Verify result.*/
+  if(OP_UNLIKELY(op_granpos_cmp(_of->prev_packet_gp,_target_gp)>0)){
+    return OP_EBADLINK;
+  }
+  return 0;
+}
+
+int op_pcm_seek(OggOpusFile *_of,ogg_int64_t _pcm_offset){
+  const OggOpusLink *link;
+  ogg_int64_t        pcm_start;
+  ogg_int64_t        target_gp;
+  ogg_int64_t        prev_packet_gp;
+  ogg_int64_t        skip;
+  ogg_int64_t        diff;
+  int                op_count;
+  int                op_pos;
+  int                ret;
+  int                li;
+  if(OP_UNLIKELY(_of->ready_state<OP_OPENED))return OP_EINVAL;
+  if(OP_UNLIKELY(!_of->seekable))return OP_ENOSEEK;
+  if(OP_UNLIKELY(_pcm_offset<0))return OP_EINVAL;
+  target_gp=op_get_granulepos(_of,_pcm_offset,&li);
+  if(OP_UNLIKELY(target_gp==-1))return OP_EINVAL;
+  link=_of->links+li;
+  pcm_start=link->pcm_start;
+  OP_ALWAYS_TRUE(!op_granpos_diff(&_pcm_offset,target_gp,pcm_start));
+#if !defined(OP_SMALL_FOOTPRINT)
+  /*For small (90 ms or less) forward seeks within the same link, just decode
+     forward.
+    This also optimizes the case of seeking to the current position.*/
+  if(li==_of->cur_link&&_of->ready_state>=OP_INITSET){
+    ogg_int64_t gp;
+    gp=_of->prev_packet_gp;
+    if(OP_LIKELY(gp!=-1)){
+      int nbuffered;
+      nbuffered=OP_MAX(_of->od_buffer_size-_of->od_buffer_pos,0);
+      OP_ALWAYS_TRUE(!op_granpos_add(&gp,gp,-nbuffered));
+      /*We do _not_ add cur_discard_count to gp.
+        Otherwise the total amount to discard could grow without bound, and it
+         would be better just to do a full seek.*/
+      if(OP_LIKELY(!op_granpos_diff(&diff,gp,pcm_start))){
+        ogg_int64_t discard_count;
+        discard_count=_pcm_offset-diff;
+        /*We use a threshold of 90 ms instead of 80, since 80 ms is the
+           _minimum_ we would have discarded after a full seek.
+          Assuming 20 ms frames (the default), we'd discard 90 ms on average.*/
+        if(discard_count>=0&&OP_UNLIKELY(discard_count<90*48)){
+          _of->cur_discard_count=(opus_int32)discard_count;
+          return 0;
+        }
+      }
+    }
+  }
+#endif
+  ret=op_pcm_seek_page(_of,target_gp,li);
+  if(OP_UNLIKELY(ret<0))return ret;
+  /*Now skip samples until we actually get to our target.*/
+  /*Figure out where we should skip to.*/
+  if(_pcm_offset<=link->head.pre_skip)skip=0;
+  else skip=OP_MAX(_pcm_offset-80*48,0);
+  OP_ASSERT(_pcm_offset-skip>=0);
+  OP_ASSERT(_pcm_offset-skip<OP_INT32_MAX-120*48);
+  /*Skip packets until we find one with samples past our skip target.*/
+  for(;;){
+    op_count=_of->op_count;
+    prev_packet_gp=_of->prev_packet_gp;
+    for(op_pos=_of->op_pos;op_pos<op_count;op_pos++){
+      ogg_int64_t cur_packet_gp;
+      cur_packet_gp=_of->op[op_pos].granulepos;
+      if(OP_LIKELY(!op_granpos_diff(&diff,cur_packet_gp,pcm_start))
+       &&diff>skip){
+        break;
+      }
+      prev_packet_gp=cur_packet_gp;
+    }
+    _of->prev_packet_gp=prev_packet_gp;
+    _of->op_pos=op_pos;
+    if(op_pos<op_count)break;
+    /*We skipped all the packets on this page.
+      Fetch another.*/
+    ret=op_fetch_and_process_page(_of,NULL,-1,1,0,1);
+    if(OP_UNLIKELY(ret<=0))return OP_EBADLINK;
+  }
+  OP_ALWAYS_TRUE(!op_granpos_diff(&diff,prev_packet_gp,pcm_start));
+  /*We skipped too far.
+    Either the timestamps were illegal or there was a hole in the data.*/
+  if(diff>skip)return OP_EBADLINK;
+  OP_ASSERT(_pcm_offset-diff<OP_INT32_MAX);
+  /*TODO: If there are further holes/illegal timestamps, we still won't decode
+     to the correct sample.
+    However, at least op_pcm_tell() will report the correct value immediately
+     after returning.*/
+  _of->cur_discard_count=(opus_int32)(_pcm_offset-diff);
+  return 0;
+}
+
+opus_int64 op_raw_tell(const OggOpusFile *_of){
+  if(OP_UNLIKELY(_of->ready_state<OP_OPENED))return OP_EINVAL;
+  return _of->offset;
+}
+
+/*Convert a granule position from a given link to a PCM offset relative to the
+   start of the whole stream.
+  For unseekable sources, this gets reset to 0 at the beginning of each link.*/
+static ogg_int64_t op_get_pcm_offset(const OggOpusFile *_of,
+ ogg_int64_t _gp,int _li){
+  const OggOpusLink *links;
+  ogg_int64_t        pcm_offset;
+  ogg_int64_t        delta;
+  int                li;
+  links=_of->links;
+  pcm_offset=0;
+  OP_ASSERT(_li<_of->nlinks);
+  for(li=0;li<_li;li++){
+    OP_ALWAYS_TRUE(!op_granpos_diff(&delta,
+     links[li].pcm_end,links[li].pcm_start));
+    delta-=links[li].head.pre_skip;
+    pcm_offset+=delta;
+  }
+  OP_ASSERT(_li>=0);
+  if(_of->seekable&&OP_UNLIKELY(op_granpos_cmp(_gp,links[_li].pcm_end)>0)){
+    _gp=links[_li].pcm_end;
+  }
+  if(OP_LIKELY(op_granpos_cmp(_gp,links[_li].pcm_start)>0)){
+    if(OP_UNLIKELY(op_granpos_diff(&delta,_gp,links[_li].pcm_start)<0)){
+      /*This means an unseekable stream claimed to have a page from more than
+         2 billion days after we joined.*/
+      OP_ASSERT(!_of->seekable);
+      return OP_INT64_MAX;
+    }
+    if(delta<links[_li].head.pre_skip)delta=0;
+    else delta-=links[_li].head.pre_skip;
+    /*In the seekable case, _gp was limited by pcm_end.
+      In the unseekable case, pcm_offset should be 0.*/
+    OP_ASSERT(pcm_offset<=OP_INT64_MAX-delta);
+    pcm_offset+=delta;
+  }
+  return pcm_offset;
+}
+
+ogg_int64_t op_pcm_tell(const OggOpusFile *_of){
+  ogg_int64_t gp;
+  int         nbuffered;
+  int         li;
+  if(OP_UNLIKELY(_of->ready_state<OP_OPENED))return OP_EINVAL;
+  gp=_of->prev_packet_gp;
+  if(gp==-1)return 0;
+  nbuffered=OP_MAX(_of->od_buffer_size-_of->od_buffer_pos,0);
+  OP_ALWAYS_TRUE(!op_granpos_add(&gp,gp,-nbuffered));
+  li=_of->seekable?_of->cur_link:0;
+  if(op_granpos_add(&gp,gp,_of->cur_discard_count)<0){
+    gp=_of->links[li].pcm_end;
+  }
+  return op_get_pcm_offset(_of,gp,li);
+}
+
+void op_set_decode_callback(OggOpusFile *_of,
+ op_decode_cb_func _decode_cb,void *_ctx){
+  _of->decode_cb=_decode_cb;
+  _of->decode_cb_ctx=_ctx;
+}
+
+int op_set_gain_offset(OggOpusFile *_of,
+ int _gain_type,opus_int32 _gain_offset_q8){
+  if(_gain_type!=OP_HEADER_GAIN&&_gain_type!=OP_TRACK_GAIN
+   &&_gain_type!=OP_ABSOLUTE_GAIN){
+    return OP_EINVAL;
+  }
+  _of->gain_type=_gain_type;
+  /*The sum of header gain and track gain lies in the range [-65536,65534].
+    These bounds allow the offset to set the final value to anywhere in the
+     range [-32768,32767], which is what we'll clamp it to before applying.*/
+  _of->gain_offset_q8=OP_CLAMP(-98302,_gain_offset_q8,98303);
+  op_update_gain(_of);
+  return 0;
+}
+
+void op_set_dither_enabled(OggOpusFile *_of,int _enabled){
+#if !defined(OPUS_FIXED_POINT)
+  _of->dither_disabled=!_enabled;
+  if(!_enabled)_of->dither_mute=65;
+#endif
+}
+
+/*Allocate the decoder scratch buffer.
+  This is done lazily, since if the user provides large enough buffers, we'll
+   never need it.*/
+static int op_init_buffer(OggOpusFile *_of){
+  int nchannels_max;
+  if(_of->seekable){
+    const OggOpusLink *links;
+    int                nlinks;
+    int                li;
+    links=_of->links;
+    nlinks=_of->nlinks;
+    nchannels_max=1;
+    for(li=0;li<nlinks;li++){
+      nchannels_max=OP_MAX(nchannels_max,links[li].head.channel_count);
+    }
+  }
+  else nchannels_max=OP_NCHANNELS_MAX;
+  _of->od_buffer=(op_sample *)_ogg_malloc(
+   sizeof(*_of->od_buffer)*nchannels_max*120*48);
+  if(_of->od_buffer==NULL)return OP_EFAULT;
+  return 0;
+}
+
+/*Decode a single packet into the target buffer.*/
+static int op_decode(OggOpusFile *_of,op_sample *_pcm,
+ const ogg_packet *_op,int _nsamples,int _nchannels){
+  int ret;
+  /*First we try using the application-provided decode callback.*/
+  if(_of->decode_cb!=NULL){
+#if defined(OPUS_FIXED_POINT)
+    ret=(*_of->decode_cb)(_of->decode_cb_ctx,_of->od,_pcm,_op,
+     _nsamples,_nchannels,OP_DEC_FORMAT_SHORT,_of->cur_link);
+#else
+    ret=(*_of->decode_cb)(_of->decode_cb_ctx,_of->od,_pcm,_op,
+     _nsamples,_nchannels,OP_DEC_FORMAT_FLOAT,_of->cur_link);
+#endif
+  }
+  else ret=OP_DEC_USE_DEFAULT;
+  /*If the application didn't want to handle decoding, do it ourselves.*/
+  if(ret==OP_DEC_USE_DEFAULT){
+#if defined(OPUS_FIXED_POINT)
+    ret=opus_multistream_decode(_of->od,
+     _op->packet,_op->bytes,_pcm,_nsamples,0);
+#else
+    ret=opus_multistream_decode_float(_of->od,
+     _op->packet,_op->bytes,_pcm,_nsamples,0);
+#endif
+    OP_ASSERT(ret<0||ret==_nsamples);
+  }
+  /*If the application returned a positive value other than 0 or
+     OP_DEC_USE_DEFAULT, fail.*/
+  else if(OP_UNLIKELY(ret>0))return OP_EBADPACKET;
+  if(OP_UNLIKELY(ret<0))return OP_EBADPACKET;
+  return ret;
+}
+
+/*Read more samples from the stream, using the same API as op_read() or
+   op_read_float().*/
+static int op_read_native(OggOpusFile *_of,
+ op_sample *_pcm,int _buf_size,int *_li){
+  if(OP_UNLIKELY(_of->ready_state<OP_OPENED))return OP_EINVAL;
+  for(;;){
+    int ret;
+    if(OP_LIKELY(_of->ready_state>=OP_INITSET)){
+      int nchannels;
+      int od_buffer_pos;
+      int nsamples;
+      int op_pos;
+      nchannels=_of->links[_of->seekable?_of->cur_link:0].head.channel_count;
+      od_buffer_pos=_of->od_buffer_pos;
+      nsamples=_of->od_buffer_size-od_buffer_pos;
+      /*If we have buffered samples, return them.*/
+      if(nsamples>0){
+        if(nsamples*nchannels>_buf_size)nsamples=_buf_size/nchannels;
+        memcpy(_pcm,_of->od_buffer+nchannels*od_buffer_pos,
+         sizeof(*_pcm)*nchannels*nsamples);
+        od_buffer_pos+=nsamples;
+        _of->od_buffer_pos=od_buffer_pos;
+        if(_li!=NULL)*_li=_of->cur_link;
+        return nsamples;
+      }
+      /*If we have buffered packets, decode one.*/
+      op_pos=_of->op_pos;
+      if(OP_LIKELY(op_pos<_of->op_count)){
+        const ogg_packet *pop;
+        ogg_int64_t       diff;
+        opus_int32        cur_discard_count;
+        int               duration;
+        int               trimmed_duration;
+        pop=_of->op+op_pos++;
+        _of->op_pos=op_pos;
+        cur_discard_count=_of->cur_discard_count;
+        duration=op_get_packet_duration(pop->packet,pop->bytes);
+        /*We don't buffer packets with an invalid TOC sequence.*/
+        OP_ASSERT(duration>0);
+        trimmed_duration=duration;
+        /*Perform end-trimming.*/
+        if(OP_UNLIKELY(pop->e_o_s)){
+          if(OP_UNLIKELY(op_granpos_cmp(pop->granulepos,
+           _of->prev_packet_gp)<=0)){
+            trimmed_duration=0;
+          }
+          else if(OP_LIKELY(!op_granpos_diff(&diff,
+           pop->granulepos,_of->prev_packet_gp))){
+            trimmed_duration=(int)OP_MIN(diff,trimmed_duration);
+          }
+        }
+        _of->prev_packet_gp=pop->granulepos;
+        if(OP_UNLIKELY(duration*nchannels>_buf_size)){
+          op_sample *buf;
+          /*If the user's buffer is too small, decode into a scratch buffer.*/
+          buf=_of->od_buffer;
+          if(OP_UNLIKELY(buf==NULL)){
+            ret=op_init_buffer(_of);
+            if(OP_UNLIKELY(ret<0))return ret;
+            buf=_of->od_buffer;
+          }
+          ret=op_decode(_of,buf,pop,duration,nchannels);
+          if(OP_UNLIKELY(ret<0))return ret;
+          /*Perform pre-skip/pre-roll.*/
+          od_buffer_pos=(int)OP_MIN(trimmed_duration,cur_discard_count);
+          cur_discard_count-=od_buffer_pos;
+          _of->cur_discard_count=cur_discard_count;
+          _of->od_buffer_pos=od_buffer_pos;
+          _of->od_buffer_size=trimmed_duration;
+          /*Update bitrate tracking based on the actual samples we used from
+             what was decoded.*/
+          _of->bytes_tracked+=pop->bytes;
+          _of->samples_tracked+=trimmed_duration-od_buffer_pos;
+        }
+        else{
+          /*Otherwise decode directly into the user's buffer.*/
+          ret=op_decode(_of,_pcm,pop,duration,nchannels);
+          if(OP_UNLIKELY(ret<0))return ret;
+          if(OP_LIKELY(trimmed_duration>0)){
+            /*Perform pre-skip/pre-roll.*/
+            od_buffer_pos=(int)OP_MIN(trimmed_duration,cur_discard_count);
+            cur_discard_count-=od_buffer_pos;
+            _of->cur_discard_count=cur_discard_count;
+            trimmed_duration-=od_buffer_pos;
+            if(OP_LIKELY(trimmed_duration>0)
+             &&OP_UNLIKELY(od_buffer_pos>0)){
+              memmove(_pcm,_pcm+od_buffer_pos*nchannels,
+               sizeof(*_pcm)*trimmed_duration*nchannels);
+            }
+            /*Update bitrate tracking based on the actual samples we used from
+               what was decoded.*/
+            _of->bytes_tracked+=pop->bytes;
+            _of->samples_tracked+=trimmed_duration;
+            if(OP_LIKELY(trimmed_duration>0)){
+              if(_li!=NULL)*_li=_of->cur_link;
+              return trimmed_duration;
+            }
+          }
+        }
+        /*Don't grab another page yet.
+          This one might have more packets, or might have buffered data now.*/
+        continue;
+      }
+    }
+    /*Suck in another page.*/
+    ret=op_fetch_and_process_page(_of,NULL,-1,1,1,0);
+    if(OP_UNLIKELY(ret==OP_EOF)){
+      if(_li!=NULL)*_li=_of->cur_link;
+      return 0;
+    }
+    if(OP_UNLIKELY(ret<0))return ret;
+  }
+}
+
+/*A generic filter to apply to the decoded audio data.
+  _src is non-const because we will destructively modify the contents of the
+   source buffer that we consume in some cases.*/
+typedef int (*op_read_filter_func)(OggOpusFile *_of,void *_dst,int _dst_sz,
+ op_sample *_src,int _nsamples,int _nchannels);
+
+/*Decode some samples and then apply a custom filter to them.
+  This is used to convert to different output formats.*/
+static int op_filter_read_native(OggOpusFile *_of,void *_dst,int _dst_sz,
+ op_read_filter_func _filter,int *_li){
+  int ret;
+  /*Ensure we have some decoded samples in our buffer.*/
+  ret=op_read_native(_of,NULL,0,_li);
+  /*Now apply the filter to them.*/
+  if(OP_LIKELY(ret>=0)&&OP_LIKELY(_of->ready_state>=OP_INITSET)){
+    int od_buffer_pos;
+    od_buffer_pos=_of->od_buffer_pos;
+    ret=_of->od_buffer_size-od_buffer_pos;
+    if(OP_LIKELY(ret>0)){
+      int nchannels;
+      nchannels=_of->links[_of->seekable?_of->cur_link:0].head.channel_count;
+      ret=(*_filter)(_of,_dst,_dst_sz,
+       _of->od_buffer+nchannels*od_buffer_pos,ret,nchannels);
+      OP_ASSERT(ret>=0);
+      OP_ASSERT(ret<=_of->od_buffer_size-od_buffer_pos);
+      od_buffer_pos+=ret;
+      _of->od_buffer_pos=od_buffer_pos;
+    }
+  }
+  return ret;
+}
+
+#if !defined(OPUS_FIXED_POINT)||!defined(OP_DISABLE_FLOAT_API)
+
+/*Matrices for downmixing from the supported channel counts to stereo.
+  The matrices with 5 or more channels are normalized to a total volume of 2.0,
+   since most mixes sound too quiet if normalized to 1.0 (as there is generally
+   little volume in the side/rear channels).*/
+static const float OP_STEREO_DOWNMIX[OP_NCHANNELS_MAX-2][OP_NCHANNELS_MAX][2]={
+  /*3.0*/
+  {
+    {0.5858F,0.0F},{0.4142F,0.4142F},{0.0F,0.5858F}
+  },
+  /*quadrophonic*/
+  {
+    {0.4226F,0.0F},{0.0F,0.4226F},{0.366F,0.2114F},{0.2114F,0.336F}
+  },
+  /*5.0*/
+  {
+    {0.651F,0.0F},{0.46F,0.46F},{0.0F,0.651F},{0.5636F,0.3254F},
+    {0.3254F,0.5636F}
+  },
+  /*5.1*/
+  {
+    {0.529F,0.0F},{0.3741F,0.3741F},{0.0F,0.529F},{0.4582F,0.2645F},
+    {0.2645F,0.4582F},{0.3741F,0.3741F}
+  },
+  /*6.1*/
+  {
+    {0.4553F,0.0F},{0.322F,0.322F},{0.0F,0.4553F},{0.3943F,0.2277F},
+    {0.2277F,0.3943F},{0.2788F,0.2788F},{0.322F,0.322F}
+  },
+  /*7.1*/
+  {
+    {0.3886F,0.0F},{0.2748F,0.2748F},{0.0F,0.3886F},{0.3366F,0.1943F},
+    {0.1943F,0.3366F},{0.3366F,0.1943F},{0.1943F,0.3366F},{0.2748F,0.2748F}
+  }
+};
+
+#endif
+
+#if defined(OPUS_FIXED_POINT)
+
+/*Matrices for downmixing from the supported channel counts to stereo.
+  The matrices with 5 or more channels are normalized to a total volume of 2.0,
+   since most mixes sound too quiet if normalized to 1.0 (as there is generally
+   little volume in the side/rear channels).
+  Hence we keep the coefficients in Q14, so the downmix values won't overflow a
+   32-bit number.*/
+static const opus_int16 OP_STEREO_DOWNMIX_Q14
+ [OP_NCHANNELS_MAX-2][OP_NCHANNELS_MAX][2]={
+  /*3.0*/
+  {
+    {9598,0},{6786,6786},{0,9598}
+  },
+  /*quadrophonic*/
+  {
+    {6924,0},{0,6924},{5996,3464},{3464,5996}
+  },
+  /*5.0*/
+  {
+    {10666,0},{7537,7537},{0,10666},{9234,5331},{5331,9234}
+  },
+  /*5.1*/
+  {
+    {8668,0},{6129,6129},{0,8668},{7507,4335},{4335,7507},{6129,6129}
+  },
+  /*6.1*/
+  {
+    {7459,0},{5275,5275},{0,7459},{6460,3731},{3731,6460},{4568,4568},
+    {5275,5275}
+  },
+  /*7.1*/
+  {
+    {6368,0},{4502,4502},{0,6368},{5515,3183},{3183,5515},{5515,3183},
+    {3183,5515},{4502,4502}
+  }
+};
+
+int op_read(OggOpusFile *_of,opus_int16 *_pcm,int _buf_size,int *_li){
+  return op_read_native(_of,_pcm,_buf_size,_li);
+}
+
+static int op_stereo_filter(OggOpusFile *_of,void *_dst,int _dst_sz,
+ op_sample *_src,int _nsamples,int _nchannels){
+  (void)_of;
+  _nsamples=OP_MIN(_nsamples,_dst_sz>>1);
+  if(_nchannels==2)memcpy(_dst,_src,_nsamples*2*sizeof(*_src));
+  else{
+    opus_int16 *dst;
+    int         i;
+    dst=(opus_int16 *)_dst;
+    if(_nchannels==1){
+      for(i=0;i<_nsamples;i++)dst[2*i+0]=dst[2*i+1]=_src[i];
+    }
+    else{
+      for(i=0;i<_nsamples;i++){
+        opus_int32 l;
+        opus_int32 r;
+        int        ci;
+        l=r=0;
+        for(ci=0;ci<_nchannels;ci++){
+          opus_int32 s;
+          s=_src[_nchannels*i+ci];
+          l+=OP_STEREO_DOWNMIX_Q14[_nchannels-3][ci][0]*s;
+          r+=OP_STEREO_DOWNMIX_Q14[_nchannels-3][ci][1]*s;
+        }
+        /*TODO: For 5 or more channels, we should do soft clipping here.*/
+        dst[2*i+0]=(opus_int16)OP_CLAMP(-32768,l+8192>>14,32767);
+        dst[2*i+1]=(opus_int16)OP_CLAMP(-32768,r+8192>>14,32767);
+      }
+    }
+  }
+  return _nsamples;
+}
+
+int op_read_stereo(OggOpusFile *_of,opus_int16 *_pcm,int _buf_size){
+  return op_filter_read_native(_of,_pcm,_buf_size,op_stereo_filter,NULL);
+}
+
+# if !defined(OP_DISABLE_FLOAT_API)
+
+static int op_short2float_filter(OggOpusFile *_of,void *_dst,int _dst_sz,
+ op_sample *_src,int _nsamples,int _nchannels){
+  float *dst;
+  int    i;
+  (void)_of;
+  dst=(float *)_dst;
+  if(OP_UNLIKELY(_nsamples*_nchannels>_dst_sz))_nsamples=_dst_sz/_nchannels;
+  _dst_sz=_nsamples*_nchannels;
+  for(i=0;i<_dst_sz;i++)dst[i]=(1.0F/32768)*_src[i];
+  return _nsamples;
+}
+
+int op_read_float(OggOpusFile *_of,float *_pcm,int _buf_size,int *_li){
+  return op_filter_read_native(_of,_pcm,_buf_size,op_short2float_filter,_li);
+}
+
+static int op_short2float_stereo_filter(OggOpusFile *_of,
+ void *_dst,int _dst_sz,op_sample *_src,int _nsamples,int _nchannels){
+  float *dst;
+  int    i;
+  dst=(float *)_dst;
+  _nsamples=OP_MIN(_nsamples,_dst_sz>>1);
+  if(_nchannels==1){
+    _nsamples=op_short2float_filter(_of,dst,_nsamples,_src,_nsamples,1);
+    for(i=_nsamples;i-->0;)dst[2*i+0]=dst[2*i+1]=dst[i];
+  }
+  else if(_nchannels<5){
+    /*For 3 or 4 channels, we can downmix in fixed point without risk of
+       clipping.*/
+    if(_nchannels>2){
+      _nsamples=op_stereo_filter(_of,_src,_nsamples*2,
+       _src,_nsamples,_nchannels);
+    }
+    return op_short2float_filter(_of,dst,_dst_sz,_src,_nsamples,2);
+  }
+  else{
+    /*For 5 or more channels, we convert to floats and then downmix (so that we
+       don't risk clipping).*/
+    for(i=0;i<_nsamples;i++){
+      float l;
+      float r;
+      int   ci;
+      l=r=0;
+      for(ci=0;ci<_nchannels;ci++){
+        float s;
+        s=(1.0F/32768)*_src[_nchannels*i+ci];
+        l+=OP_STEREO_DOWNMIX[_nchannels-3][ci][0]*s;
+        r+=OP_STEREO_DOWNMIX[_nchannels-3][ci][1]*s;
+      }
+      dst[2*i+0]=l;
+      dst[2*i+1]=r;
+    }
+  }
+  return _nsamples;
+}
+
+int op_read_float_stereo(OggOpusFile *_of,float *_pcm,int _buf_size){
+  return op_filter_read_native(_of,_pcm,_buf_size,
+   op_short2float_stereo_filter,NULL);
+}
+
+# endif
+
+#else
+
+# if defined(OP_HAVE_LRINTF)
+#  include <math.h>
+#  define op_float2int(_x) (lrintf(_x))
+# else
+#  define op_float2int(_x) ((int)((_x)+((_x)<0?-0.5F:0.5F)))
+# endif
+
+/*The dithering code here is adapted from opusdec, part of opus-tools.
+  It was originally written by Greg Maxwell.*/
+
+static opus_uint32 op_rand(opus_uint32 _seed){
+  return _seed*96314165+907633515&0xFFFFFFFFU;
+}
+
+/*This implements 16-bit quantization with full triangular dither and IIR noise
+   shaping.
+  The noise shaping filters were designed by Sebastian Gesemann, and are based
+   on the LAME ATH curves with flattening to limit their peak gain to 20 dB.
+  Everyone else's noise shaping filters are mildly crazy.
+  The 48 kHz version of this filter is just a warped version of the 44.1 kHz
+   filter and probably could be improved by shifting the HF shelf up in
+   frequency a little bit, since 48 kHz has a bit more room and being more
+   conservative against bat-ears is probably more important than more noise
+   suppression.
+  This process can increase the peak level of the signal (in theory by the peak
+   error of 1.5 +20 dB, though that is unobservably rare).
+  To avoid clipping, the signal is attenuated by a couple thousandths of a dB.
+  Initially, the approach taken here was to only attenuate by the 99.9th
+   percentile, making clipping rare but not impossible (like SoX), but the
+   limited gain of the filter means that the worst case was only two
+   thousandths of a dB more, so this just uses the worst case.
+  The attenuation is probably also helpful to prevent clipping in the DAC
+   reconstruction filters or downstream resampling, in any case.*/
+
+# define OP_GAIN (32753.0F)
+
+# define OP_PRNG_GAIN (1.0F/0xFFFFFFFF)
+
+/*48 kHz noise shaping filter, sd=2.34.*/
+
+static const float OP_FCOEF_B[4]={
+  2.2374F,-0.7339F,-0.1251F,-0.6033F
+};
+
+static const float OP_FCOEF_A[4]={
+  0.9030F,0.0116F,-0.5853F,-0.2571F
+};
+
+static int op_float2short_filter(OggOpusFile *_of,void *_dst,int _dst_sz,
+ float *_src,int _nsamples,int _nchannels){
+  opus_int16 *dst;
+  int         ci;
+  int         i;
+  dst=(opus_int16 *)_dst;
+  if(OP_UNLIKELY(_nsamples*_nchannels>_dst_sz))_nsamples=_dst_sz/_nchannels;
+# if defined(OP_SOFT_CLIP)
+  if(_of->state_channel_count!=_nchannels){
+    for(ci=0;ci<_nchannels;ci++)_of->clip_state[ci]=0;
+  }
+  opus_pcm_soft_clip(_src,_nsamples,_nchannels,_of->clip_state);
+# endif
+  if(_of->dither_disabled){
+    for(i=0;i<_nchannels*_nsamples;i++){
+      dst[i]=op_float2int(OP_CLAMP(-32768,32768.0F*_src[i],32767));
+    }
+  }
+  else{
+    opus_uint32 seed;
+    int         mute;
+    seed=_of->dither_seed;
+    mute=_of->dither_mute;
+    if(_of->state_channel_count!=_nchannels)mute=65;
+    /*In order to avoid replacing digital silence with quiet dither noise, we
+       mute if the output has been silent for a while.*/
+    if(mute>64)memset(_of->dither_a,0,sizeof(*_of->dither_a)*4*_nchannels);
+    for(i=0;i<_nsamples;i++){
+      int silent;
+      silent=1;
+      for(ci=0;ci<_nchannels;ci++){
+        float r;
+        float s;
+        float err;
+        int   si;
+        int   j;
+        s=_src[_nchannels*i+ci];
+        silent&=s==0;
+        s*=OP_GAIN;
+        err=0;
+        for(j=0;j<4;j++){
+          err+=OP_FCOEF_B[j]*_of->dither_b[ci*4+j]
+           -OP_FCOEF_A[j]*_of->dither_a[ci*4+j];
+        }
+        for(j=3;j-->0;)_of->dither_a[ci*4+j+1]=_of->dither_a[ci*4+j];
+        for(j=3;j-->0;)_of->dither_b[ci*4+j+1]=_of->dither_b[ci*4+j];
+        _of->dither_a[ci*4]=err;
+        s-=err;
+        if(mute>16)r=0;
+        else{
+          seed=op_rand(seed);
+          r=seed*OP_PRNG_GAIN;
+          seed=op_rand(seed);
+          r-=seed*OP_PRNG_GAIN;
+        }
+        /*Clamp in float out of paranoia that the input will be > 96 dBFS and
+           wrap if the integer is clamped.*/
+        si=op_float2int(OP_CLAMP(-32768,s+r,32767));
+        dst[_nchannels*i+ci]=(opus_int16)si;
+        /*Including clipping in the noise shaping is generally disastrous: the
+           futile effort to restore the clipped energy results in more clipping.
+          However, small amounts---at the level which could normally be created
+           by dither and rounding---are harmless and can even reduce clipping
+           somewhat due to the clipping sometimes reducing the dither + rounding
+           error.*/
+        _of->dither_b[ci*4]=mute>16?0:OP_CLAMP(-1.5F,si-s,1.5F);
+      }
+      mute++;
+      if(!silent)mute=0;
+    }
+    _of->dither_mute=OP_MIN(mute,65);
+    _of->dither_seed=seed;
+  }
+  _of->state_channel_count=_nchannels;
+  return _nsamples;
+}
+
+int op_read(OggOpusFile *_of,opus_int16 *_pcm,int _buf_size,int *_li){
+  return op_filter_read_native(_of,_pcm,_buf_size,op_float2short_filter,_li);
+}
+
+int op_read_float(OggOpusFile *_of,float *_pcm,int _buf_size,int *_li){
+  _of->state_channel_count=0;
+  return op_read_native(_of,_pcm,_buf_size,_li);
+}
+
+static int op_stereo_filter(OggOpusFile *_of,void *_dst,int _dst_sz,
+ op_sample *_src,int _nsamples,int _nchannels){
+  (void)_of;
+  _nsamples=OP_MIN(_nsamples,_dst_sz>>1);
+  if(_nchannels==2)memcpy(_dst,_src,_nsamples*2*sizeof(*_src));
+  else{
+    float *dst;
+    int    i;
+    dst=(float *)_dst;
+    if(_nchannels==1){
+      for(i=0;i<_nsamples;i++)dst[2*i+0]=dst[2*i+1]=_src[i];
+    }
+    else{
+      for(i=0;i<_nsamples;i++){
+        float l;
+        float r;
+        int   ci;
+        l=r=0;
+        for(ci=0;ci<_nchannels;ci++){
+          l+=OP_STEREO_DOWNMIX[_nchannels-3][ci][0]*_src[_nchannels*i+ci];
+          r+=OP_STEREO_DOWNMIX[_nchannels-3][ci][1]*_src[_nchannels*i+ci];
+        }
+        dst[2*i+0]=l;
+        dst[2*i+1]=r;
+      }
+    }
+  }
+  return _nsamples;
+}
+
+static int op_float2short_stereo_filter(OggOpusFile *_of,
+ void *_dst,int _dst_sz,op_sample *_src,int _nsamples,int _nchannels){
+  opus_int16 *dst;
+  dst=(opus_int16 *)_dst;
+  if(_nchannels==1){
+    int i;
+    _nsamples=op_float2short_filter(_of,dst,_dst_sz>>1,_src,_nsamples,1);
+    for(i=_nsamples;i-->0;)dst[2*i+0]=dst[2*i+1]=dst[i];
+  }
+  else{
+    if(_nchannels>2){
+      _nsamples=OP_MIN(_nsamples,_dst_sz>>1);
+      _nsamples=op_stereo_filter(_of,_src,_nsamples*2,
+       _src,_nsamples,_nchannels);
+    }
+    _nsamples=op_float2short_filter(_of,dst,_dst_sz,_src,_nsamples,2);
+  }
+  return _nsamples;
+}
+
+int op_read_stereo(OggOpusFile *_of,opus_int16 *_pcm,int _buf_size){
+  return op_filter_read_native(_of,_pcm,_buf_size,
+   op_float2short_stereo_filter,NULL);
+}
+
+int op_read_float_stereo(OggOpusFile *_of,float *_pcm,int _buf_size){
+  _of->state_channel_count=0;
+  return op_filter_read_native(_of,_pcm,_buf_size,op_stereo_filter,NULL);
+}
+
+#endif
diff --git a/drivers/opus/opusfile.h b/drivers/opus/opusfile.h
new file mode 100644
index 0000000000..91d06aa9ba
--- /dev/null
+++ b/drivers/opus/opusfile.h
@@ -0,0 +1,2102 @@
+/********************************************************************
+ *                                                                  *
+ * THIS FILE IS PART OF THE libopusfile SOFTWARE CODEC SOURCE CODE. *
+ * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS     *
+ * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
+ * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
+ *                                                                  *
+ * THE libopusfile SOURCE CODE IS (C) COPYRIGHT 1994-2012           *
+ * by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
+ *                                                                  *
+ ********************************************************************
+
+ function: stdio-based convenience library for opening/seeking/decoding
+ last mod: $Id: vorbisfile.h 17182 2010-04-29 03:48:32Z xiphmont $
+
+ ********************************************************************/
+#if !defined(_opusfile_h)
+# define _opusfile_h (1)
+
+/**\mainpage
+   \section Introduction
+
+   This is the documentation for the <tt>libopusfile</tt> C API.
+
+   The <tt>libopusfile</tt> package provides a convenient high-level API for
+    decoding and basic manipulation of all Ogg Opus audio streams.
+   <tt>libopusfile</tt> is implemented as a layer on top of Xiph.Org's
+    reference
+    <tt><a href="https://www.xiph.org/ogg/doc/libogg/reference.html">libogg</a></tt>
+    and
+    <tt><a href="https://mf4.xiph.org/jenkins/view/opus/job/opus/ws/doc/html/index.html">libopus</a></tt>
+    libraries.
+
+   <tt>libopusfile</tt> provides several sets of built-in routines for
+    file/stream access, and may also use custom stream I/O routines provided by
+    the embedded environment.
+   There are built-in I/O routines provided for ANSI-compliant
+    <code>stdio</code> (<code>FILE *</code>), memory buffers, and URLs
+    (including <file:> URLs, plus optionally <http:> and <https:> URLs).
+
+   \section Organization
+
+   The main API is divided into several sections:
+   - \ref stream_open_close
+   - \ref stream_info
+   - \ref stream_decoding
+   - \ref stream_seeking
+
+   Several additional sections are not tied to the main API.
+   - \ref stream_callbacks
+   - \ref header_info
+   - \ref error_codes
+
+   \section Overview
+
+   The <tt>libopusfile</tt> API always decodes files to 48&nbsp;kHz.
+   The original sample rate is not preserved by the lossy compression, though
+    it is stored in the header to allow you to resample to it after decoding
+    (the <tt>libopusfile</tt> API does not currently provide a resampler,
+    but the
+    <a href="http://www.speex.org/docs/manual/speex-manual/node7.html#SECTION00760000000000000000">the
+    Speex resampler</a> is a good choice if you need one).
+   In general, if you are playing back the audio, you should leave it at
+    48&nbsp;kHz, provided your audio hardware supports it.
+   When decoding to a file, it may be worth resampling back to the original
+    sample rate, so as not to surprise users who might not expect the sample
+    rate to change after encoding to Opus and decoding.
+
+   Opus files can contain anywhere from 1 to 255 channels of audio.
+   The channel mappings for up to 8 channels are the same as the
+    <a href="http://www.xiph.org/vorbis/doc/Vorbis_I_spec.html#x1-800004.3.9">Vorbis
+    mappings</a>.
+   A special stereo API can convert everything to 2 channels, making it simple
+    to support multichannel files in an application which only has stereo
+    output.
+   Although the <tt>libopusfile</tt> ABI provides support for the theoretical
+    maximum number of channels, the current implementation does not support
+    files with more than 8 channels, as they do not have well-defined channel
+    mappings.
+
+   Like all Ogg files, Opus files may be "chained".
+   That is, multiple Opus files may be combined into a single, longer file just
+    by concatenating the original files.
+   This is commonly done in internet radio streaming, as it allows the title
+    and artist to be updated each time the song changes, since each link in the
+    chain includes its own set of metadata.
+
+   <tt>libopusfile</tt> fully supports chained files.
+   It will decode the first Opus stream found in each link of a chained file
+    (ignoring any other streams that might be concurrently multiplexed with it,
+    such as a video stream).
+
+   The channel count can also change between links.
+   If your application is not prepared to deal with this, it can use the stereo
+    API to ensure the audio from all links will always get decoded into a
+    common format.
+   Since <tt>libopusfile</tt> always decodes to 48&nbsp;kHz, you do not have to
+    worry about the sample rate changing between links (as was possible with
+    Vorbis).
+   This makes application support for chained files with <tt>libopusfile</tt>
+    very easy.*/
+
+# if defined(__cplusplus)
+extern "C" {
+# endif
+
+# include <stdarg.h>
+# include <stdio.h>
+# include <ogg/ogg.h>
+# include <opus/opus_multistream.h>
+
+/**@cond PRIVATE*/
+
+/*Enable special features for gcc and gcc-compatible compilers.*/
+# if !defined(OP_GNUC_PREREQ)
+#  if defined(__GNUC__)&&defined(__GNUC_MINOR__)
+#   define OP_GNUC_PREREQ(_maj,_min) \
+ ((__GNUC__<<16)+__GNUC_MINOR__>=((_maj)<<16)+(_min))
+#  else
+#   define OP_GNUC_PREREQ(_maj,_min) 0
+#  endif
+# endif
+
+# if OP_GNUC_PREREQ(4,0)
+#  pragma GCC visibility push(default)
+# endif
+
+typedef struct OpusHead          OpusHead;
+typedef struct OpusTags          OpusTags;
+typedef struct OpusPictureTag    OpusPictureTag;
+typedef struct OpusServerInfo    OpusServerInfo;
+typedef struct OpusFileCallbacks OpusFileCallbacks;
+typedef struct OggOpusFile       OggOpusFile;
+
+/*Warning attributes for libopusfile functions.*/
+# if OP_GNUC_PREREQ(3,4)
+#  define OP_WARN_UNUSED_RESULT __attribute__((__warn_unused_result__))
+# else
+#  define OP_WARN_UNUSED_RESULT
+# endif
+# if OP_GNUC_PREREQ(3,4)
+#  define OP_ARG_NONNULL(_x) __attribute__((__nonnull__(_x)))
+# else
+#  define OP_ARG_NONNULL(_x)
+# endif
+
+/**@endcond*/
+
+/**\defgroup error_codes Error Codes*/
+/*@{*/
+/**\name List of possible error codes
+   Many of the functions in this library return a negative error code when a
+    function fails.
+   This list provides a brief explanation of the common errors.
+   See each individual function for more details on what a specific error code
+    means in that context.*/
+/*@{*/
+
+/**A request did not succeed.*/
+#define OP_FALSE         (-1)
+/*Currently not used externally.*/
+#define OP_EOF           (-2)
+/**There was a hole in the page sequence numbers (e.g., a page was corrupt or
+    missing).*/
+#define OP_HOLE          (-3)
+/**An underlying read, seek, or tell operation failed when it should have
+    succeeded.*/
+#define OP_EREAD         (-128)
+/**A <code>NULL</code> pointer was passed where one was unexpected, or an
+    internal memory allocation failed, or an internal library error was
+    encountered.*/
+#define OP_EFAULT        (-129)
+/**The stream used a feature that is not implemented, such as an unsupported
+    channel family.*/
+#define OP_EIMPL         (-130)
+/**One or more parameters to a function were invalid.*/
+#define OP_EINVAL        (-131)
+/**A purported Ogg Opus stream did not begin with an Ogg page, a purported
+    header packet did not start with one of the required strings, "OpusHead" or
+    "OpusTags", or a link in a chained file was encountered that did not
+    contain any logical Opus streams.*/
+#define OP_ENOTFORMAT    (-132)
+/**A required header packet was not properly formatted, contained illegal
+    values, or was missing altogether.*/
+#define OP_EBADHEADER    (-133)
+/**The ID header contained an unrecognized version number.*/
+#define OP_EVERSION      (-134)
+/*Currently not used at all.*/
+#define OP_ENOTAUDIO     (-135)
+/**An audio packet failed to decode properly.
+   This is usually caused by a multistream Ogg packet where the durations of
+    the individual Opus packets contained in it are not all the same.*/
+#define OP_EBADPACKET    (-136)
+/**We failed to find data we had seen before, or the bitstream structure was
+    sufficiently malformed that seeking to the target destination was
+    impossible.*/
+#define OP_EBADLINK      (-137)
+/**An operation that requires seeking was requested on an unseekable stream.*/
+#define OP_ENOSEEK       (-138)
+/**The first or last granule position of a link failed basic validity checks.*/
+#define OP_EBADTIMESTAMP (-139)
+
+/*@}*/
+/*@}*/
+
+/**\defgroup header_info Header Information*/
+/*@{*/
+
+/**The maximum number of channels in an Ogg Opus stream.*/
+#define OPUS_CHANNEL_COUNT_MAX (255)
+
+/**Ogg Opus bitstream information.
+   This contains the basic playback parameters for a stream, and corresponds to
+    the initial ID header packet of an Ogg Opus stream.*/
+struct OpusHead{
+  /**The Ogg Opus format version, in the range 0...255.
+     The top 4 bits represent a "major" version, and the bottom four bits
+      represent backwards-compatible "minor" revisions.
+     The current specification describes version 1.
+     This library will recognize versions up through 15 as backwards compatible
+      with the current specification.
+     An earlier draft of the specification described a version 0, but the only
+      difference between version 1 and version 0 is that version 0 did
+      not specify the semantics for handling the version field.*/
+  int           version;
+  /**The number of channels, in the range 1...255.*/
+  int           channel_count;
+  /**The number of samples that should be discarded from the beginning of the
+      stream.*/
+  unsigned      pre_skip;
+  /**The sampling rate of the original input.
+     All Opus audio is coded at 48 kHz, and should also be decoded at 48 kHz
+      for playback (unless the target hardware does not support this sampling
+      rate).
+     However, this field may be used to resample the audio back to the original
+      sampling rate, for example, when saving the output to a file.*/
+  opus_uint32   input_sample_rate;
+  /**The gain to apply to the decoded output, in dB, as a Q8 value in the range
+      -32768...32767.
+     The <tt>libopusfile</tt> API will automatically apply this gain to the
+      decoded output before returning it, scaling it by
+      <code>pow(10,output_gain/(20.0*256))</code>.*/
+  int           output_gain;
+  /**The channel mapping family, in the range 0...255.
+     Channel mapping family 0 covers mono or stereo in a single stream.
+     Channel mapping family 1 covers 1 to 8 channels in one or more streams,
+      using the Vorbis speaker assignments.
+     Channel mapping family 255 covers 1 to 255 channels in one or more
+      streams, but without any defined speaker assignment.*/
+  int           mapping_family;
+  /**The number of Opus streams in each Ogg packet, in the range 1...255.*/
+  int           stream_count;
+  /**The number of coupled Opus streams in each Ogg packet, in the range
+      0...127.
+     This must satisfy <code>0 <= coupled_count <= stream_count</code> and
+      <code>coupled_count + stream_count <= 255</code>.
+     The coupled streams appear first, before all uncoupled streams, in an Ogg
+      Opus packet.*/
+  int           coupled_count;
+  /**The mapping from coded stream channels to output channels.
+     Let <code>index=mapping[k]</code> be the value for channel <code>k</code>.
+     If <code>index<2*coupled_count</code>, then it refers to the left channel
+      from stream <code>(index/2)</code> if even, and the right channel from
+      stream <code>(index/2)</code> if odd.
+     Otherwise, it refers to the output of the uncoupled stream
+      <code>(index-coupled_count)</code>.*/
+  unsigned char mapping[OPUS_CHANNEL_COUNT_MAX];
+};
+
+/**The metadata from an Ogg Opus stream.
+
+   This structure holds the in-stream metadata corresponding to the 'comment'
+    header packet of an Ogg Opus stream.
+   The comment header is meant to be used much like someone jotting a quick
+    note on the label of a CD.
+   It should be a short, to the point text note that can be more than a couple
+    words, but not more than a short paragraph.
+
+   The metadata is stored as a series of (tag, value) pairs, in length-encoded
+    string vectors, using the same format as Vorbis (without the final "framing
+    bit"), Theora, and Speex, except for the packet header.
+   The first occurrence of the '=' character delimits the tag and value.
+   A particular tag may occur more than once, and order is significant.
+   The character set encoding for the strings is always UTF-8, but the tag
+    names are limited to ASCII, and treated as case-insensitive.
+   See <a href="http://www.xiph.org/vorbis/doc/v-comment.html">the Vorbis
+    comment header specification</a> for details.
+
+   In filling in this structure, <tt>libopusfile</tt> will null-terminate the
+    #user_comments strings for safety.
+   However, the bitstream format itself treats them as 8-bit clean vectors,
+    possibly containing NUL characters, so the #comment_lengths array should be
+    treated as their authoritative length.
+
+   This structure is binary and source-compatible with a
+    <code>vorbis_comment</code>, and pointers to it may be freely cast to
+    <code>vorbis_comment</code> pointers, and vice versa.
+   It is provided as a separate type to avoid introducing a compile-time
+    dependency on the libvorbis headers.*/
+struct OpusTags{
+  /**The array of comment string vectors.*/
+  char **user_comments;
+  /**An array of the corresponding length of each vector, in bytes.*/
+  int   *comment_lengths;
+  /**The total number of comment streams.*/
+  int    comments;
+  /**The null-terminated vendor string.
+     This identifies the software used to encode the stream.*/
+  char  *vendor;
+};
+
+/**\name Picture tag image formats*/
+/*@{*/
+
+/**The MIME type was not recognized, or the image data did not match the
+    declared MIME type.*/
+#define OP_PIC_FORMAT_UNKNOWN (-1)
+/**The MIME type indicates the image data is really a URL.*/
+#define OP_PIC_FORMAT_URL     (0)
+/**The image is a JPEG.*/
+#define OP_PIC_FORMAT_JPEG    (1)
+/**The image is a PNG.*/
+#define OP_PIC_FORMAT_PNG     (2)
+/**The image is a GIF.*/
+#define OP_PIC_FORMAT_GIF     (3)
+
+/*@}*/
+
+/**The contents of a METADATA_BLOCK_PICTURE tag.*/
+struct OpusPictureTag{
+  /**The picture type according to the ID3v2 APIC frame:
+     <ol start="0">
+     <li>Other</li>
+     <li>32x32 pixels 'file icon' (PNG only)</li>
+     <li>Other file icon</li>
+     <li>Cover (front)</li>
+     <li>Cover (back)</li>
+     <li>Leaflet page</li>
+     <li>Media (e.g. label side of CD)</li>
+     <li>Lead artist/lead performer/soloist</li>
+     <li>Artist/performer</li>
+     <li>Conductor</li>
+     <li>Band/Orchestra</li>
+     <li>Composer</li>
+     <li>Lyricist/text writer</li>
+     <li>Recording Location</li>
+     <li>During recording</li>
+     <li>During performance</li>
+     <li>Movie/video screen capture</li>
+     <li>A bright colored fish</li>
+     <li>Illustration</li>
+     <li>Band/artist logotype</li>
+     <li>Publisher/Studio logotype</li>
+     </ol>
+     Others are reserved and should not be used.
+     There may only be one each of picture type 1 and 2 in a file.*/
+  opus_int32     type;
+  /**The MIME type of the picture, in printable ASCII characters 0x20-0x7E.
+     The MIME type may also be <code>"-->"</code> to signify that the data part
+      is a URL pointing to the picture instead of the picture data itself.
+     In this case, a terminating NUL is appended to the URL string in #data,
+      but #data_length is set to the length of the string excluding that
+      terminating NUL.*/
+  char          *mime_type;
+  /**The description of the picture, in UTF-8.*/
+  char          *description;
+  /**The width of the picture in pixels.*/
+  opus_uint32    width;
+  /**The height of the picture in pixels.*/
+  opus_uint32    height;
+  /**The color depth of the picture in bits-per-pixel (<em>not</em>
+      bits-per-channel).*/
+  opus_uint32    depth;
+  /**For indexed-color pictures (e.g., GIF), the number of colors used, or 0
+      for non-indexed pictures.*/
+  opus_uint32    colors;
+  /**The length of the picture data in bytes.*/
+  opus_uint32    data_length;
+  /**The binary picture data.*/
+  unsigned char *data;
+  /**The format of the picture data, if known.
+     One of
+     <ul>
+     <li>#OP_PIC_FORMAT_UNKNOWN,</li>
+     <li>#OP_PIC_FORMAT_URL,</li>
+     <li>#OP_PIC_FORMAT_JPEG,</li>
+     <li>#OP_PIC_FORMAT_PNG, or</li>
+     <li>#OP_PIC_FORMAT_GIF.</li>
+     </ul>*/
+  int            format;
+};
+
+/**\name Functions for manipulating header data
+
+   These functions manipulate the #OpusHead and #OpusTags structures,
+    which describe the audio parameters and tag-value metadata, respectively.
+   These can be used to query the headers returned by <tt>libopusfile</tt>, or
+    to parse Opus headers from sources other than an Ogg Opus stream, provided
+    they use the same format.*/
+/*@{*/
+
+/**Parses the contents of the ID header packet of an Ogg Opus stream.
+   \param[out] _head Returns the contents of the parsed packet.
+                     The contents of this structure are untouched on error.
+                     This may be <code>NULL</code> to merely test the header
+                      for validity.
+   \param[in]  _data The contents of the ID header packet.
+   \param      _len  The number of bytes of data in the ID header packet.
+   \return 0 on success or a negative value on error.
+   \retval #OP_ENOTFORMAT If the data does not start with the "OpusHead"
+                           string.
+   \retval #OP_EVERSION   If the version field signaled a version this library
+                           does not know how to parse.
+   \retval #OP_EIMPL      If the channel mapping family was 255, which general
+                           purpose players should not attempt to play.
+   \retval #OP_EBADHEADER If the contents of the packet otherwise violate the
+                           Ogg Opus specification:
+                          <ul>
+                           <li>Insufficient data,</li>
+                           <li>Too much data for the known minor versions,</li>
+                           <li>An unrecognized channel mapping family,</li>
+                           <li>Zero channels or too many channels,</li>
+                           <li>Zero coded streams,</li>
+                           <li>Too many coupled streams, or</li>
+                           <li>An invalid channel mapping index.</li>
+                          </ul>*/
+OP_WARN_UNUSED_RESULT int opus_head_parse(OpusHead *_head,
+ const unsigned char *_data,size_t _len) OP_ARG_NONNULL(2);
+
+/**Converts a granule position to a sample offset for a given Ogg Opus stream.
+   The sample offset is simply <code>_gp-_head->pre_skip</code>.
+   Granule position values smaller than OpusHead#pre_skip correspond to audio
+    that should never be played, and thus have no associated sample offset.
+   This function returns -1 for such values.
+   This function also correctly handles extremely large granule positions,
+    which may have wrapped around to a negative number when stored in a signed
+    ogg_int64_t value.
+   \param _head The #OpusHead information from the ID header of the stream.
+   \param _gp   The granule position to convert.
+   \return The sample offset associated with the given granule position
+            (counting at a 48 kHz sampling rate), or the special value -1 on
+            error (i.e., the granule position was smaller than the pre-skip
+            amount).*/
+ogg_int64_t opus_granule_sample(const OpusHead *_head,ogg_int64_t _gp)
+ OP_ARG_NONNULL(1);
+
+/**Parses the contents of the 'comment' header packet of an Ogg Opus stream.
+   \param[out] _tags An uninitialized #OpusTags structure.
+                     This returns the contents of the parsed packet.
+                     The contents of this structure are untouched on error.
+                     This may be <code>NULL</code> to merely test the header
+                      for validity.
+   \param[in]  _data The contents of the 'comment' header packet.
+   \param      _len  The number of bytes of data in the 'info' header packet.
+   \retval 0              Success.
+   \retval #OP_ENOTFORMAT If the data does not start with the "OpusTags"
+                           string.
+   \retval #OP_EBADHEADER If the contents of the packet otherwise violate the
+                           Ogg Opus specification.
+   \retval #OP_EFAULT     If there wasn't enough memory to store the tags.*/
+OP_WARN_UNUSED_RESULT int opus_tags_parse(OpusTags *_tags,
+ const unsigned char *_data,size_t _len) OP_ARG_NONNULL(2);
+
+/**Performs a deep copy of an #OpusTags structure.
+   \param _dst The #OpusTags structure to copy into.
+               If this function fails, the contents of this structure remain
+                untouched.
+   \param _src The #OpusTags structure to copy from.
+   \retval 0          Success.
+   \retval #OP_EFAULT If there wasn't enough memory to copy the tags.*/
+int opus_tags_copy(OpusTags *_dst,const OpusTags *_src) OP_ARG_NONNULL(1);
+
+/**Initializes an #OpusTags structure.
+   This should be called on a freshly allocated #OpusTags structure before
+    attempting to use it.
+   \param _tags The #OpusTags structure to initialize.*/
+void opus_tags_init(OpusTags *_tags) OP_ARG_NONNULL(1);
+
+/**Add a (tag, value) pair to an initialized #OpusTags structure.
+   \note Neither opus_tags_add() nor opus_tags_add_comment() support values
+    containing embedded NULs, although the bitstream format does support them.
+   To add such tags, you will need to manipulate the #OpusTags structure
+    directly.
+   \param _tags  The #OpusTags structure to add the (tag, value) pair to.
+   \param _tag   A NUL-terminated, case-insensitive, ASCII string containing
+                  the tag to add (without an '=' character).
+   \param _value A NUL-terminated UTF-8 containing the corresponding value.
+   \return 0 on success, or a negative value on failure.
+   \retval #OP_EFAULT An internal memory allocation failed.*/
+int opus_tags_add(OpusTags *_tags,const char *_tag,const char *_value)
+ OP_ARG_NONNULL(1) OP_ARG_NONNULL(2) OP_ARG_NONNULL(3);
+
+/**Add a comment to an initialized #OpusTags structure.
+   \note Neither opus_tags_add_comment() nor opus_tags_add() support comments
+    containing embedded NULs, although the bitstream format does support them.
+   To add such tags, you will need to manipulate the #OpusTags structure
+    directly.
+   \param _tags    The #OpusTags structure to add the comment to.
+   \param _comment A NUL-terminated UTF-8 string containing the comment in
+                    "TAG=value" form.
+   \return 0 on success, or a negative value on failure.
+   \retval #OP_EFAULT An internal memory allocation failed.*/
+int opus_tags_add_comment(OpusTags *_tags,const char *_comment)
+ OP_ARG_NONNULL(1) OP_ARG_NONNULL(2);
+
+/**Look up a comment value by its tag.
+   \param _tags  An initialized #OpusTags structure.
+   \param _tag   The tag to look up.
+   \param _count The instance of the tag.
+                 The same tag can appear multiple times, each with a distinct
+                  value, so an index is required to retrieve them all.
+                 The order in which these values appear is significant and
+                  should be preserved.
+                 Use opus_tags_query_count() to get the legal range for the
+                  \a _count parameter.
+   \return A pointer to the queried tag's value.
+           This points directly to data in the #OpusTags structure.
+           It should not be modified or freed by the application, and
+            modifications to the structure may invalidate the pointer.
+   \retval NULL If no matching tag is found.*/
+const char *opus_tags_query(const OpusTags *_tags,const char *_tag,int _count)
+ OP_ARG_NONNULL(1) OP_ARG_NONNULL(2);
+
+/**Look up the number of instances of a tag.
+   Call this first when querying for a specific tag and then iterate over the
+    number of instances with separate calls to opus_tags_query() to retrieve
+    all the values for that tag in order.
+   \param _tags An initialized #OpusTags structure.
+   \param _tag  The tag to look up.
+   \return The number of instances of this particular tag.*/
+int opus_tags_query_count(const OpusTags *_tags,const char *_tag)
+ OP_ARG_NONNULL(1) OP_ARG_NONNULL(2);
+
+/**Get the track gain from an R128_TRACK_GAIN tag, if one was specified.
+   This searches for the first R128_TRACK_GAIN tag with a valid signed,
+    16-bit decimal integer value and returns the value.
+   This routine is exposed merely for convenience for applications which wish
+    to do something special with the track gain (i.e., display it).
+   If you simply wish to apply the track gain instead of the header gain, you
+    can use op_set_gain_offset() with an #OP_TRACK_GAIN type and no offset.
+   \param      _tags    An initialized #OpusTags structure.
+   \param[out] _gain_q8 The track gain, in 1/256ths of a dB.
+                        This will lie in the range [-32768,32767], and should
+                         be applied in <em>addition</em> to the header gain.
+                        On error, no value is returned, and the previous
+                         contents remain unchanged.
+   \return 0 on success, or a negative value on error.
+   \retval #OP_FALSE There was no track gain available in the given tags.*/
+int opus_tags_get_track_gain(const OpusTags *_tags,int *_gain_q8)
+ OP_ARG_NONNULL(1) OP_ARG_NONNULL(2);
+
+/**Clears the #OpusTags structure.
+   This should be called on an #OpusTags structure after it is no longer
+    needed.
+   It will free all memory used by the structure members.
+   \param _tags The #OpusTags structure to clear.*/
+void opus_tags_clear(OpusTags *_tags) OP_ARG_NONNULL(1);
+
+/**Check if \a _comment is an instance of a \a _tag_name tag.
+   \see opus_tagncompare
+   \param _tag_name A NUL-terminated, case-insensitive, ASCII string containing
+                     the name of the tag to check for (without the terminating
+                     '=' character).
+   \param _comment  The comment string to check.
+   \return An integer less than, equal to, or greater than zero if \a _comment
+            is found respectively, to be less than, to match, or be greater
+            than a "tag=value" string whose tag matches \a _tag_name.*/
+int opus_tagcompare(const char *_tag_name,const char *_comment);
+
+/**Check if \a _comment is an instance of a \a _tag_name tag.
+   This version is slightly more efficient than opus_tagcompare() if the length
+    of the tag name is already known (e.g., because it is a constant).
+   \see opus_tagcompare
+   \param _tag_name A case-insensitive ASCII string containing the name of the
+                     tag to check for (without the terminating '=' character).
+   \param _tag_len  The number of characters in the tag name.
+                    This must be non-negative.
+   \param _comment  The comment string to check.
+   \return An integer less than, equal to, or greater than zero if \a _comment
+            is found respectively, to be less than, to match, or be greater
+            than a "tag=value" string whose tag matches the first \a _tag_len
+            characters of \a _tag_name.*/
+int opus_tagncompare(const char *_tag_name,int _tag_len,const char *_comment);
+
+/**Parse a single METADATA_BLOCK_PICTURE tag.
+   This decodes the BASE64-encoded content of the tag and returns a structure
+    with the MIME type, description, image parameters (if known), and the
+    compressed image data.
+   If the MIME type indicates the presence of an image format we recognize
+    (JPEG, PNG, or GIF) and the actual image data contains the magic signature
+    associated with that format, then the OpusPictureTag::format field will be
+    set to the corresponding format.
+   This is provided as a convenience to avoid requiring applications to parse
+    the MIME type and/or do their own format detection for the commonly used
+    formats.
+   In this case, we also attempt to extract the image parameters directly from
+    the image data (overriding any that were present in the tag, which the
+    specification says applications are not meant to rely on).
+   The application must still provide its own support for actually decoding the
+    image data and, if applicable, retrieving that data from URLs.
+   \param[out] _pic Returns the parsed picture data.
+                    No sanitation is done on the type, MIME type, or
+                     description fields, so these might return invalid values.
+                    The contents of this structure are left unmodified on
+                     failure.
+   \param      _tag The METADATA_BLOCK_PICTURE tag contents.
+                    The leading "METADATA_BLOCK_PICTURE=" portion is optional,
+                     to allow the function to be used on either directly on the
+                     values in OpusTags::user_comments or on the return value
+                     of opus_tags_query().
+   \return 0 on success or a negative value on error.
+   \retval #OP_ENOTFORMAT The METADATA_BLOCK_PICTURE contents were not valid.
+   \retval #OP_EFAULT     There was not enough memory to store the picture tag
+                           contents.*/
+OP_WARN_UNUSED_RESULT int opus_picture_tag_parse(OpusPictureTag *_pic,
+ const char *_tag) OP_ARG_NONNULL(1) OP_ARG_NONNULL(2);
+
+/**Initializes an #OpusPictureTag structure.
+   This should be called on a freshly allocated #OpusPictureTag structure
+    before attempting to use it.
+   \param _pic The #OpusPictureTag structure to initialize.*/
+void opus_picture_tag_init(OpusPictureTag *_pic) OP_ARG_NONNULL(1);
+
+/**Clears the #OpusPictureTag structure.
+   This should be called on an #OpusPictureTag structure after it is no longer
+    needed.
+   It will free all memory used by the structure members.
+   \param _pic The #OpusPictureTag structure to clear.*/
+void opus_picture_tag_clear(OpusPictureTag *_pic) OP_ARG_NONNULL(1);
+
+/*@}*/
+
+/*@}*/
+
+/**\defgroup url_options URL Reading Options*/
+/*@{*/
+/**\name URL reading options
+   Options for op_url_stream_create() and associated functions.
+   These allow you to provide proxy configuration parameters, skip SSL
+    certificate checks, etc.
+   Options are processed in order, and if the same option is passed multiple
+    times, only the value specified by the last occurrence has an effect
+    (unless otherwise specified).
+   They may be expanded in the future.*/
+/*@{*/
+
+/**@cond PRIVATE*/
+
+/*These are the raw numbers used to define the request codes.
+  They should not be used directly.*/
+#define OP_SSL_SKIP_CERTIFICATE_CHECK_REQUEST (6464)
+#define OP_HTTP_PROXY_HOST_REQUEST            (6528)
+#define OP_HTTP_PROXY_PORT_REQUEST            (6592)
+#define OP_HTTP_PROXY_USER_REQUEST            (6656)
+#define OP_HTTP_PROXY_PASS_REQUEST            (6720)
+#define OP_GET_SERVER_INFO_REQUEST            (6784)
+
+#define OP_URL_OPT(_request) ((_request)+(char *)0)
+
+/*These macros trigger compilation errors or warnings if the wrong types are
+   provided to one of the URL options.*/
+#define OP_CHECK_INT(_x) ((void)((_x)==(opus_int32)0),(opus_int32)(_x))
+#define OP_CHECK_CONST_CHAR_PTR(_x) ((_x)+((_x)-(const char *)(_x)))
+#define OP_CHECK_SERVER_INFO_PTR(_x) ((_x)+((_x)-(OpusServerInfo *)(_x)))
+
+/**@endcond*/
+
+/**HTTP/Shoutcast/Icecast server information associated with a URL.*/
+struct OpusServerInfo{
+  /**The name of the server (icy-name/ice-name).
+     This is <code>NULL</code> if there was no <code>icy-name</code> or
+      <code>ice-name</code> header.*/
+  char        *name;
+  /**A short description of the server (icy-description/ice-description).
+     This is <code>NULL</code> if there was no <code>icy-description</code> or
+      <code>ice-description</code> header.*/
+  char        *description;
+  /**The genre the server falls under (icy-genre/ice-genre).
+     This is <code>NULL</code> if there was no <code>icy-genre</code> or
+      <code>ice-genre</code> header.*/
+  char        *genre;
+  /**The homepage for the server (icy-url/ice-url).
+     This is <code>NULL</code> if there was no <code>icy-url</code> or
+      <code>ice-url</code> header.*/
+  char        *url;
+  /**The software used by the origin server (Server).
+     This is <code>NULL</code> if there was no <code>Server</code> header.*/
+  char        *server;
+  /**The media type of the entity sent to the recepient (Content-Type).
+     This is <code>NULL</code> if there was no <code>Content-Type</code>
+      header.*/
+  char        *content_type;
+  /**The nominal stream bitrate in kbps (icy-br/ice-bitrate).
+     This is <code>-1</code> if there was no <code>icy-br</code> or
+      <code>ice-bitrate</code> header.*/
+  opus_int32   bitrate_kbps;
+  /**Flag indicating whether the server is public (<code>1</code>) or not
+      (<code>0</code>) (icy-pub/ice-public).
+     This is <code>-1</code> if there was no <code>icy-pub</code> or
+      <code>ice-public</code> header.*/
+  int          is_public;
+  /**Flag indicating whether the server is using HTTPS instead of HTTP.
+     This is <code>0</code> unless HTTPS is being used.
+     This may not match the protocol used in the original URL if there were
+      redirections.*/
+  int          is_ssl;
+};
+
+/**Initializes an #OpusServerInfo structure.
+   All fields are set as if the corresponding header was not available.
+   \param _info The #OpusServerInfo structure to initialize.
+   \note If you use this function, you must link against <tt>libopusurl</tt>.*/
+void opus_server_info_init(OpusServerInfo *_info) OP_ARG_NONNULL(1);
+
+/**Clears the #OpusServerInfo structure.
+   This should be called on an #OpusServerInfo structure after it is no longer
+    needed.
+   It will free all memory used by the structure members.
+   \param _info The #OpusServerInfo structure to clear.
+   \note If you use this function, you must link against <tt>libopusurl</tt>.*/
+void opus_server_info_clear(OpusServerInfo *_info) OP_ARG_NONNULL(1);
+
+/**Skip the certificate check when connecting via TLS/SSL (https).
+   \param _b <code>opus_int32</code>: Whether or not to skip the certificate
+              check.
+             The check will be skipped if \a _b is non-zero, and will not be
+              skipped if \a _b is zero.
+   \hideinitializer*/
+#define OP_SSL_SKIP_CERTIFICATE_CHECK(_b) \
+ OP_URL_OPT(OP_SSL_SKIP_CERTIFICATE_CHECK_REQUEST),OP_CHECK_INT(_b)
+
+/**Proxy connections through the given host.
+   If no port is specified via #OP_HTTP_PROXY_PORT, the port number defaults
+    to 8080 (http-alt).
+   All proxy parameters are ignored for non-http and non-https URLs.
+   \param _host <code>const char *</code>: The proxy server hostname.
+                This may be <code>NULL</code> to disable the use of a proxy
+                 server.
+   \hideinitializer*/
+#define OP_HTTP_PROXY_HOST(_host) \
+ OP_URL_OPT(OP_HTTP_PROXY_HOST_REQUEST),OP_CHECK_CONST_CHAR_PTR(_host)
+
+/**Use the given port when proxying connections.
+   This option only has an effect if #OP_HTTP_PROXY_HOST is specified with a
+    non-<code>NULL</code> \a _host.
+   If this option is not provided, the proxy port number defaults to 8080
+    (http-alt).
+   All proxy parameters are ignored for non-http and non-https URLs.
+   \param _port <code>opus_int32</code>: The proxy server port.
+                This must be in the range 0...65535 (inclusive), or the
+                 URL function this is passed to will fail.
+   \hideinitializer*/
+#define OP_HTTP_PROXY_PORT(_port) \
+ OP_URL_OPT(OP_HTTP_PROXY_PORT_REQUEST),OP_CHECK_INT(_port)
+
+/**Use the given user name for authentication when proxying connections.
+   All proxy parameters are ignored for non-http and non-https URLs.
+   \param _user const char *: The proxy server user name.
+                              This may be <code>NULL</code> to disable proxy
+                               authentication.
+                              A non-<code>NULL</code> value only has an effect
+                               if #OP_HTTP_PROXY_HOST and #OP_HTTP_PROXY_PASS
+                               are also specified with non-<code>NULL</code>
+                               arguments.
+   \hideinitializer*/
+#define OP_HTTP_PROXY_USER(_user) \
+ OP_URL_OPT(OP_HTTP_PROXY_USER_REQUEST),OP_CHECK_CONST_CHAR_PTR(_user)
+
+/**Use the given password for authentication when proxying connections.
+   All proxy parameters are ignored for non-http and non-https URLs.
+   \param _pass const char *: The proxy server password.
+                              This may be <code>NULL</code> to disable proxy
+                               authentication.
+                              A non-<code>NULL</code> value only has an effect
+                               if #OP_HTTP_PROXY_HOST and #OP_HTTP_PROXY_USER
+                               are also specified with non-<code>NULL</code>
+                               arguments.
+   \hideinitializer*/
+#define OP_HTTP_PROXY_PASS(_pass) \
+ OP_URL_OPT(OP_HTTP_PROXY_PASS_REQUEST),OP_CHECK_CONST_CHAR_PTR(_pass)
+
+/**Parse information about the streaming server (if any) and return it.
+   Very little validation is done.
+   In particular, OpusServerInfo::url may not be a valid URL,
+    OpusServerInfo::bitrate_kbps may not really be in kbps, and
+    OpusServerInfo::content_type may not be a valid MIME type.
+   The character set of the string fields is not specified anywhere, and should
+    not be assumed to be valid UTF-8.
+   \param _info OpusServerInfo *: Returns information about the server.
+                                  If there is any error opening the stream, the
+                                   contents of this structure remain
+                                   unmodified.
+                                  On success, fills in the structure with the
+                                   server information that was available, if
+                                   any.
+                                  After a successful return, the contents of
+                                   this structure should be freed by calling
+                                   opus_server_info_clear().
+   \hideinitializer*/
+#define OP_GET_SERVER_INFO(_info) \
+ OP_URL_OPT(OP_GET_SERVER_INFO_REQUEST),OP_CHECK_SERVER_INFO_PTR(_info)
+
+/*@}*/
+/*@}*/
+
+/**\defgroup stream_callbacks Abstract Stream Reading Interface*/
+/*@{*/
+/**\name Functions for reading from streams
+   These functions define the interface used to read from and seek in a stream
+    of data.
+   A stream does not need to implement seeking, but the decoder will not be
+    able to seek if it does not do so.
+   These functions also include some convenience routines for working with
+    standard <code>FILE</code> pointers, complete streams stored in a single
+    block of memory, or URLs.*/
+/*@{*/
+
+/**Reads up to \a _nbytes bytes of data from \a _stream.
+   \param      _stream The stream to read from.
+   \param[out] _ptr    The buffer to store the data in.
+   \param      _nbytes The maximum number of bytes to read.
+                       This function may return fewer, though it will not
+                        return zero unless it reaches end-of-file.
+   \return The number of bytes successfully read, or a negative value on
+            error.*/
+typedef int (*op_read_func)(void *_stream,unsigned char *_ptr,int _nbytes);
+
+/**Sets the position indicator for \a _stream.
+   The new position, measured in bytes, is obtained by adding \a _offset
+    bytes to the position specified by \a _whence.
+   If \a _whence is set to <code>SEEK_SET</code>, <code>SEEK_CUR</code>, or
+    <code>SEEK_END</code>, the offset is relative to the start of the stream,
+    the current position indicator, or end-of-file, respectively.
+   \retval 0  Success.
+   \retval -1 Seeking is not supported or an error occurred.
+              <code>errno</code> need not be set.*/
+typedef int (*op_seek_func)(void *_stream,opus_int64 _offset,int _whence);
+
+/**Obtains the current value of the position indicator for \a _stream.
+   \return The current position indicator.*/
+typedef opus_int64 (*op_tell_func)(void *_stream);
+
+/**Closes the underlying stream.
+   \retval 0   Success.
+   \retval EOF An error occurred.
+               <code>errno</code> need not be set.*/
+typedef int (*op_close_func)(void *_stream);
+
+/**The callbacks used to access non-<code>FILE</code> stream resources.
+   The function prototypes are basically the same as for the stdio functions
+    <code>fread()</code>, <code>fseek()</code>, <code>ftell()</code>, and
+    <code>fclose()</code>.
+   The differences are that the <code>FILE *</code> arguments have been
+    replaced with a <code>void *</code>, which is to be used as a pointer to
+    whatever internal data these functions might need, that #seek and #tell
+    take and return 64-bit offsets, and that #seek <em>must</em> return -1 if
+    the stream is unseekable.*/
+struct OpusFileCallbacks{
+  /**Used to read data from the stream.
+     This must not be <code>NULL</code>.*/
+  op_read_func  read;
+  /**Used to seek in the stream.
+     This may be <code>NULL</code> if seeking is not implemented.*/
+  op_seek_func  seek;
+  /**Used to return the current read position in the stream.
+     This may be <code>NULL</code> if seeking is not implemented.*/
+  op_tell_func  tell;
+  /**Used to close the stream when the decoder is freed.
+     This may be <code>NULL</code> to leave the stream open.*/
+  op_close_func close;
+};
+
+/**Opens a stream with <code>fopen()</code> and fills in a set of callbacks
+    that can be used to access it.
+   This is useful to avoid writing your own portable 64-bit seeking wrappers,
+    and also avoids cross-module linking issues on Windows, where a
+    <code>FILE *</code> must be accessed by routines defined in the same module
+    that opened it.
+   \param[out] _cb   The callbacks to use for this file.
+                     If there is an error opening the file, nothing will be
+                      filled in here.
+   \param      _path The path to the file to open.
+                     On Windows, this string must be UTF-8 (to allow access to
+                      files whose names cannot be represented in the current
+                      MBCS code page).
+                     All other systems use the native character encoding.
+   \param      _mode The mode to open the file in.
+   \return A stream handle to use with the callbacks, or <code>NULL</code> on
+            error.*/
+OP_WARN_UNUSED_RESULT void *op_fopen(OpusFileCallbacks *_cb,
+ const char *_path,const char *_mode) OP_ARG_NONNULL(1) OP_ARG_NONNULL(2)
+ OP_ARG_NONNULL(3);
+
+/**Opens a stream with <code>fdopen()</code> and fills in a set of callbacks
+    that can be used to access it.
+   This is useful to avoid writing your own portable 64-bit seeking wrappers,
+    and also avoids cross-module linking issues on Windows, where a
+    <code>FILE *</code> must be accessed by routines defined in the same module
+    that opened it.
+   \param[out] _cb   The callbacks to use for this file.
+                     If there is an error opening the file, nothing will be
+                      filled in here.
+   \param      _fd   The file descriptor to open.
+   \param      _mode The mode to open the file in.
+   \return A stream handle to use with the callbacks, or <code>NULL</code> on
+            error.*/
+OP_WARN_UNUSED_RESULT void *op_fdopen(OpusFileCallbacks *_cb,
+ int _fd,const char *_mode) OP_ARG_NONNULL(1) OP_ARG_NONNULL(3);
+
+/**Opens a stream with <code>freopen()</code> and fills in a set of callbacks
+    that can be used to access it.
+   This is useful to avoid writing your own portable 64-bit seeking wrappers,
+    and also avoids cross-module linking issues on Windows, where a
+    <code>FILE *</code> must be accessed by routines defined in the same module
+    that opened it.
+   \param[out] _cb     The callbacks to use for this file.
+                       If there is an error opening the file, nothing will be
+                        filled in here.
+   \param      _path   The path to the file to open.
+                       On Windows, this string must be UTF-8 (to allow access
+                        to files whose names cannot be represented in the
+                        current MBCS code page).
+                       All other systems use the native character encoding.
+   \param      _mode   The mode to open the file in.
+   \param      _stream A stream previously returned by op_fopen(), op_fdopen(),
+                        or op_freopen().
+   \return A stream handle to use with the callbacks, or <code>NULL</code> on
+            error.*/
+OP_WARN_UNUSED_RESULT void *op_freopen(OpusFileCallbacks *_cb,
+ const char *_path,const char *_mode,void *_stream) OP_ARG_NONNULL(1)
+ OP_ARG_NONNULL(2) OP_ARG_NONNULL(3) OP_ARG_NONNULL(4);
+
+/**Creates a stream that reads from the given block of memory.
+   This block of memory must contain the complete stream to decode.
+   This is useful for caching small streams (e.g., sound effects) in RAM.
+   \param[out] _cb   The callbacks to use for this stream.
+                     If there is an error creating the stream, nothing will be
+                      filled in here.
+   \param      _data The block of memory to read from.
+   \param      _size The size of the block of memory.
+   \return A stream handle to use with the callbacks, or <code>NULL</code> on
+            error.*/
+OP_WARN_UNUSED_RESULT void *op_mem_stream_create(OpusFileCallbacks *_cb,
+ const unsigned char *_data,size_t _size) OP_ARG_NONNULL(1);
+
+/**Creates a stream that reads from the given URL.
+   This function behaves identically to op_url_stream_create(), except that it
+    takes a va_list instead of a variable number of arguments.
+   It does not call the <code>va_end</code> macro, and because it invokes the
+    <code>va_arg</code> macro, the value of \a _ap is undefined after the call.
+   \note If you use this function, you must link against <tt>libopusurl</tt>.
+   \param[out]    _cb  The callbacks to use for this stream.
+                       If there is an error creating the stream, nothing will
+                        be filled in here.
+   \param         _url The URL to read from.
+                       Currently only the <file:>, <http:>, and <https:>
+                        schemes are supported.
+                       Both <http:> and <https:> may be disabled at compile
+                        time, in which case opening such URLs will always fail.
+                       Currently this only supports URIs.
+                       IRIs should be converted to UTF-8 and URL-escaped, with
+                        internationalized domain names encoded in punycode,
+                        before passing them to this function.
+   \param[in,out] _ap  A list of the \ref url_options "optional flags" to use.
+                       This is a variable-length list of options terminated
+                        with <code>NULL</code>.
+   \return A stream handle to use with the callbacks, or <code>NULL</code> on
+            error.*/
+OP_WARN_UNUSED_RESULT void *op_url_stream_vcreate(OpusFileCallbacks *_cb,
+ const char *_url,va_list _ap) OP_ARG_NONNULL(1) OP_ARG_NONNULL(2);
+
+/**Creates a stream that reads from the given URL.
+   \note If you use this function, you must link against <tt>libopusurl</tt>.
+   \param[out] _cb  The callbacks to use for this stream.
+                    If there is an error creating the stream, nothing will be
+                     filled in here.
+   \param      _url The URL to read from.
+                    Currently only the <file:>, <http:>, and <https:> schemes
+                     are supported.
+                    Both <http:> and <https:> may be disabled at compile time,
+                     in which case opening such URLs will always fail.
+                    Currently this only supports URIs.
+                    IRIs should be converted to UTF-8 and URL-escaped, with
+                     internationalized domain names encoded in punycode, before
+                     passing them to this function.
+   \param      ...  The \ref url_options "optional flags" to use.
+                    This is a variable-length list of options terminated with
+                     <code>NULL</code>.
+   \return A stream handle to use with the callbacks, or <code>NULL</code> on
+            error.*/
+OP_WARN_UNUSED_RESULT void *op_url_stream_create(OpusFileCallbacks *_cb,
+ const char *_url,...) OP_ARG_NONNULL(1) OP_ARG_NONNULL(2);
+
+/*@}*/
+/*@}*/
+
+/**\defgroup stream_open_close Opening and Closing*/
+/*@{*/
+/**\name Functions for opening and closing streams
+
+   These functions allow you to test a stream to see if it is Opus, open it,
+    and close it.
+   Several flavors are provided for each of the built-in stream types, plus a
+    more general version which takes a set of application-provided callbacks.*/
+/*@{*/
+
+/**Test to see if this is an Opus stream.
+   For good results, you will need at least 57 bytes (for a pure Opus-only
+    stream).
+   Something like 512 bytes will give more reliable results for multiplexed
+    streams.
+   This function is meant to be a quick-rejection filter.
+   Its purpose is not to guarantee that a stream is a valid Opus stream, but to
+    ensure that it looks enough like Opus that it isn't going to be recognized
+    as some other format (except possibly an Opus stream that is also
+    multiplexed with other codecs, such as video).
+   \param[out] _head     The parsed ID header contents.
+                         You may pass <code>NULL</code> if you do not need
+                          this information.
+                         If the function fails, the contents of this structure
+                          remain untouched.
+   \param _initial_data  An initial buffer of data from the start of the
+                          stream.
+   \param _initial_bytes The number of bytes in \a _initial_data.
+   \return 0 if the data appears to be Opus, or a negative value on error.
+   \retval #OP_FALSE      There was not enough data to tell if this was an Opus
+                           stream or not.
+   \retval #OP_EFAULT     An internal memory allocation failed.
+   \retval #OP_EIMPL      The stream used a feature that is not implemented,
+                           such as an unsupported channel family.
+   \retval #OP_ENOTFORMAT If the data did not contain a recognizable ID
+                           header for an Opus stream.
+   \retval #OP_EVERSION   If the version field signaled a version this library
+                           does not know how to parse.
+   \retval #OP_EBADHEADER The ID header was not properly formatted or contained
+                           illegal values.*/
+int op_test(OpusHead *_head,
+ const unsigned char *_initial_data,size_t _initial_bytes);
+
+/**Open a stream from the given file path.
+   \param      _path  The path to the file to open.
+   \param[out] _error Returns 0 on success, or a failure code on error.
+                      You may pass in <code>NULL</code> if you don't want the
+                       failure code.
+                      The failure code will be #OP_EFAULT if the file could not
+                       be opened, or one of the other failure codes from
+                       op_open_callbacks() otherwise.
+   \return A freshly opened \c OggOpusFile, or <code>NULL</code> on error.*/
+OP_WARN_UNUSED_RESULT OggOpusFile *op_open_file(const char *_path,int *_error)
+ OP_ARG_NONNULL(1);
+
+/**Open a stream from a memory buffer.
+   \param      _data  The memory buffer to open.
+   \param      _size  The number of bytes in the buffer.
+   \param[out] _error Returns 0 on success, or a failure code on error.
+                      You may pass in <code>NULL</code> if you don't want the
+                       failure code.
+                      See op_open_callbacks() for a full list of failure codes.
+   \return A freshly opened \c OggOpusFile, or <code>NULL</code> on error.*/
+OP_WARN_UNUSED_RESULT OggOpusFile *op_open_memory(const unsigned char *_data,
+ size_t _size,int *_error);
+
+/**Open a stream from a URL.
+   This function behaves identically to op_open_url(), except that it
+    takes a va_list instead of a variable number of arguments.
+   It does not call the <code>va_end</code> macro, and because it invokes the
+    <code>va_arg</code> macro, the value of \a _ap is undefined after the call.
+   \note If you use this function, you must link against <tt>libopusurl</tt>.
+   \param         _url   The URL to open.
+                         Currently only the <file:>, <http:>, and <https:>
+                          schemes are supported.
+                         Both <http:> and <https:> may be disabled at compile
+                          time, in which case opening such URLs will always
+                          fail.
+                         Currently this only supports URIs.
+                         IRIs should be converted to UTF-8 and URL-escaped,
+                          with internationalized domain names encoded in
+                          punycode, before passing them to this function.
+   \param[out]    _error Returns 0 on success, or a failure code on error.
+                         You may pass in <code>NULL</code> if you don't want
+                          the failure code.
+                         See op_open_callbacks() for a full list of failure
+                          codes.
+   \param[in,out] _ap    A list of the \ref url_options "optional flags" to
+                          use.
+                         This is a variable-length list of options terminated
+                          with <code>NULL</code>.
+   \return A freshly opened \c OggOpusFile, or <code>NULL</code> on error.*/
+OP_WARN_UNUSED_RESULT OggOpusFile *op_vopen_url(const char *_url,
+ int *_error,va_list _ap) OP_ARG_NONNULL(1);
+
+/**Open a stream from a URL.
+   \note If you use this function, you must link against <tt>libopusurl</tt>.
+   \param      _url   The URL to open.
+                      Currently only the <file:>, <http:>, and <https:> schemes
+                       are supported.
+                      Both <http:> and <https:> may be disabled at compile
+                       time, in which case opening such URLs will always fail.
+                      Currently this only supports URIs.
+                      IRIs should be converted to UTF-8 and URL-escaped, with
+                       internationalized domain names encoded in punycode,
+                       before passing them to this function.
+   \param[out] _error Returns 0 on success, or a failure code on error.
+                      You may pass in <code>NULL</code> if you don't want the
+                       failure code.
+                      See op_open_callbacks() for a full list of failure codes.
+   \param      ...    The \ref url_options "optional flags" to use.
+                      This is a variable-length list of options terminated with
+                       <code>NULL</code>.
+   \return A freshly opened \c OggOpusFile, or <code>NULL</code> on error.*/
+OP_WARN_UNUSED_RESULT OggOpusFile *op_open_url(const char *_url,
+ int *_error,...) OP_ARG_NONNULL(1);
+
+/**Open a stream using the given set of callbacks to access it.
+   \param _source        The stream to read from (e.g., a <code>FILE *</code>).
+   \param _cb            The callbacks with which to access the stream.
+                         <code><a href="#op_read_func">read()</a></code> must
+                          be implemented.
+                         <code><a href="#op_seek_func">seek()</a></code> and
+                          <code><a href="#op_tell_func">tell()</a></code> may
+                          be <code>NULL</code>, or may always return -1 to
+                          indicate a source is unseekable, but if
+                          <code><a href="#op_seek_func">seek()</a></code> is
+                          implemented and succeeds on a particular source, then
+                          <code><a href="#op_tell_func">tell()</a></code> must
+                          also.
+                         <code><a href="#op_close_func">close()</a></code> may
+                          be <code>NULL</code>, but if it is not, it will be
+                          called when the \c OggOpusFile is destroyed by
+                          op_free().
+                         It will not be called if op_open_callbacks() fails
+                          with an error.
+   \param _initial_data  An initial buffer of data from the start of the
+                          stream.
+                         Applications can read some number of bytes from the
+                          start of the stream to help identify this as an Opus
+                          stream, and then provide them here to allow the
+                          stream to be opened, even if it is unseekable.
+   \param _initial_bytes The number of bytes in \a _initial_data.
+                         If the stream is seekable, its current position (as
+                          reported by
+                          <code><a href="#opus_tell_func">tell()</a></code>
+                          at the start of this function) must be equal to
+                          \a _initial_bytes.
+                         Otherwise, seeking to absolute positions will
+                          generate inconsistent results.
+   \param[out] _error    Returns 0 on success, or a failure code on error.
+                         You may pass in <code>NULL</code> if you don't want
+                          the failure code.
+                         The failure code will be one of
+                         <dl>
+                           <dt>#OP_EREAD</dt>
+                           <dd>An underlying read, seek, or tell operation
+                            failed when it should have succeeded, or we failed
+                            to find data in the stream we had seen before.</dd>
+                           <dt>#OP_EFAULT</dt>
+                           <dd>There was a memory allocation failure, or an
+                            internal library error.</dd>
+                           <dt>#OP_EIMPL</dt>
+                           <dd>The stream used a feature that is not
+                            implemented, such as an unsupported channel
+                            family.</dd>
+                           <dt>#OP_EINVAL</dt>
+                           <dd><code><a href="#op_seek_func">seek()</a></code>
+                            was implemented and succeeded on this source, but
+                            <code><a href="#op_tell_func">tell()</a></code>
+                            did not, or the starting position indicator was
+                            not equal to \a _initial_bytes.</dd>
+                           <dt>#OP_ENOTFORMAT</dt>
+                           <dd>The stream contained a link that did not have
+                            any logical Opus streams in it.</dd>
+                           <dt>#OP_EBADHEADER</dt>
+                           <dd>A required header packet was not properly
+                            formatted, contained illegal values, or was missing
+                            altogether.</dd>
+                           <dt>#OP_EVERSION</dt>
+                           <dd>An ID header contained an unrecognized version
+                            number.</dd>
+                           <dt>#OP_EBADLINK</dt>
+                           <dd>We failed to find data we had seen before after
+                            seeking.</dd>
+                           <dt>#OP_EBADTIMESTAMP</dt>
+                           <dd>The first or last timestamp in a link failed
+                            basic validity checks.</dd>
+                         </dl>
+   \return A freshly opened \c OggOpusFile, or <code>NULL</code> on error.
+           <tt>libopusfile</tt> does <em>not</em> take ownership of the source
+            if the call fails.
+           The calling application is responsible for closing the source if
+            this call returns an error.*/
+OP_WARN_UNUSED_RESULT OggOpusFile *op_open_callbacks(void *_source,
+ const OpusFileCallbacks *_cb,const unsigned char *_initial_data,
+ size_t _initial_bytes,int *_error) OP_ARG_NONNULL(2);
+
+/**Partially open a stream from the given file path.
+   \see op_test_callbacks
+   \param      _path  The path to the file to open.
+   \param[out] _error Returns 0 on success, or a failure code on error.
+                      You may pass in <code>NULL</code> if you don't want the
+                       failure code.
+                      The failure code will be #OP_EFAULT if the file could not
+                       be opened, or one of the other failure codes from
+                       op_open_callbacks() otherwise.
+   \return A partially opened \c OggOpusFile, or <code>NULL</code> on error.*/
+OP_WARN_UNUSED_RESULT OggOpusFile *op_test_file(const char *_path,int *_error)
+ OP_ARG_NONNULL(1);
+
+/**Partially open a stream from a memory buffer.
+   \see op_test_callbacks
+   \param      _data  The memory buffer to open.
+   \param      _size  The number of bytes in the buffer.
+   \param[out] _error Returns 0 on success, or a failure code on error.
+                      You may pass in <code>NULL</code> if you don't want the
+                       failure code.
+                      See op_open_callbacks() for a full list of failure codes.
+   \return A partially opened \c OggOpusFile, or <code>NULL</code> on error.*/
+OP_WARN_UNUSED_RESULT OggOpusFile *op_test_memory(const unsigned char *_data,
+ size_t _size,int *_error);
+
+/**Partially open a stream from a URL.
+   This function behaves identically to op_test_url(), except that it
+    takes a va_list instead of a variable number of arguments.
+   It does not call the <code>va_end</code> macro, and because it invokes the
+    <code>va_arg</code> macro, the value of \a _ap is undefined after the call.
+   \note If you use this function, you must link against <tt>libopusurl</tt>.
+   \see op_test_url
+   \see op_test_callbacks
+   \param         _url    The URL to open.
+                          Currently only the <file:>, <http:>, and <https:>
+                           schemes are supported.
+                          Both <http:> and <https:> may be disabled at compile
+                           time, in which case opening such URLs will always
+                           fail.
+                          Currently this only supports URIs.
+                          IRIs should be converted to UTF-8 and URL-escaped,
+                           with internationalized domain names encoded in
+                           punycode, before passing them to this function.
+   \param[out]    _error  Returns 0 on success, or a failure code on error.
+                          You may pass in <code>NULL</code> if you don't want
+                           the failure code.
+                          See op_open_callbacks() for a full list of failure
+                           codes.
+   \param[in,out] _ap     A list of the \ref url_options "optional flags" to
+                           use.
+                          This is a variable-length list of options terminated
+                           with <code>NULL</code>.
+   \return A partially opened \c OggOpusFile, or <code>NULL</code> on error.*/
+OP_WARN_UNUSED_RESULT OggOpusFile *op_vtest_url(const char *_url,
+ int *_error,va_list _ap) OP_ARG_NONNULL(1);
+
+/**Partially open a stream from a URL.
+   \note If you use this function, you must link against <tt>libopusurl</tt>.
+   \see op_test_callbacks
+   \param      _url    The URL to open.
+                       Currently only the <file:>, <http:>, and <https:>
+                        schemes are supported.
+                       Both <http:> and <https:> may be disabled at compile
+                        time, in which case opening such URLs will always fail.
+                       Currently this only supports URIs.
+                       IRIs should be converted to UTF-8 and URL-escaped, with
+                        internationalized domain names encoded in punycode,
+                        before passing them to this function.
+   \param[out] _error  Returns 0 on success, or a failure code on error.
+                       You may pass in <code>NULL</code> if you don't want the
+                        failure code.
+                       See op_open_callbacks() for a full list of failure
+                        codes.
+   \param      ...     The \ref url_options "optional flags" to use.
+                       This is a variable-length list of options terminated
+                        with <code>NULL</code>.
+   \return A partially opened \c OggOpusFile, or <code>NULL</code> on error.*/
+OP_WARN_UNUSED_RESULT OggOpusFile *op_test_url(const char *_url,
+ int *_error,...) OP_ARG_NONNULL(1);
+
+/**Partially open a stream using the given set of callbacks to access it.
+   This tests for Opusness and loads the headers for the first link.
+   It does not seek (although it tests for seekability).
+   You can query a partially open stream for the few pieces of basic
+    information returned by op_serialno(), op_channel_count(), op_head(), and
+    op_tags() (but only for the first link).
+   You may also determine if it is seekable via a call to op_seekable().
+   You cannot read audio from the stream, seek, get the size or duration,
+    get information from links other than the first one, or even get the total
+    number of links until you finish opening the stream with op_test_open().
+   If you do not need to do any of these things, you can dispose of it with
+    op_free() instead.
+
+   This function is provided mostly to simplify porting existing code that used
+    <tt>libvorbisfile</tt>.
+   For new code, you are likely better off using op_test() instead, which
+    is less resource-intensive, requires less data to succeed, and imposes a
+    hard limit on the amount of data it examines (important for unseekable
+    sources, where all such data must be buffered until you are sure of the
+    stream type).
+   \param _source        The stream to read from (e.g., a <code>FILE *</code>).
+   \param _cb            The callbacks with which to access the stream.
+                         <code><a href="#op_read_func">read()</a></code> must
+                          be implemented.
+                         <code><a href="#op_seek_func">seek()</a></code> and
+                          <code><a href="#op_tell_func">tell()</a></code> may
+                          be <code>NULL</code>, or may always return -1 to
+                          indicate a source is unseekable, but if
+                          <code><a href="#op_seek_func">seek()</a></code> is
+                          implemented and succeeds on a particular source, then
+                          <code><a href="#op_tell_func">tell()</a></code> must
+                          also.
+                         <code><a href="#op_close_func">close()</a></code> may
+                          be <code>NULL</code>, but if it is not, it will be
+                          called when the \c OggOpusFile is destroyed by
+                          op_free().
+                         It will not be called if op_open_callbacks() fails
+                          with an error.
+   \param _initial_data  An initial buffer of data from the start of the
+                          stream.
+                         Applications can read some number of bytes from the
+                          start of the stream to help identify this as an Opus
+                          stream, and then provide them here to allow the
+                          stream to be tested more thoroughly, even if it is
+                          unseekable.
+   \param _initial_bytes The number of bytes in \a _initial_data.
+                         If the stream is seekable, its current position (as
+                          reported by
+                          <code><a href="#opus_tell_func">tell()</a></code>
+                          at the start of this function) must be equal to
+                          \a _initial_bytes.
+                         Otherwise, seeking to absolute positions will
+                          generate inconsistent results.
+   \param[out] _error    Returns 0 on success, or a failure code on error.
+                         You may pass in <code>NULL</code> if you don't want
+                          the failure code.
+                         See op_open_callbacks() for a full list of failure
+                          codes.
+   \return A partially opened \c OggOpusFile, or <code>NULL</code> on error.
+           <tt>libopusfile</tt> does <em>not</em> take ownership of the source
+            if the call fails.
+           The calling application is responsible for closing the source if
+            this call returns an error.*/
+OP_WARN_UNUSED_RESULT OggOpusFile *op_test_callbacks(void *_source,
+ const OpusFileCallbacks *_cb,const unsigned char *_initial_data,
+ size_t _initial_bytes,int *_error) OP_ARG_NONNULL(2);
+
+/**Finish opening a stream partially opened with op_test_callbacks() or one of
+    the associated convenience functions.
+   If this function fails, you are still responsible for freeing the
+    \c OggOpusFile with op_free().
+   \param _of The \c OggOpusFile to finish opening.
+   \return 0 on success, or a negative value on error.
+   \retval #OP_EREAD         An underlying read, seek, or tell operation failed
+                              when it should have succeeded.
+   \retval #OP_EFAULT        There was a memory allocation failure, or an
+                              internal library error.
+   \retval #OP_EIMPL         The stream used a feature that is not implemented,
+                              such as an unsupported channel family.
+   \retval #OP_EINVAL        The stream was not partially opened with
+                              op_test_callbacks() or one of the associated
+                              convenience functions.
+   \retval #OP_ENOTFORMAT    The stream contained a link that did not have any
+                              logical Opus streams in it.
+   \retval #OP_EBADHEADER    A required header packet was not properly
+                              formatted, contained illegal values, or was
+                              missing altogether.
+   \retval #OP_EVERSION      An ID header contained an unrecognized version
+                              number.
+   \retval #OP_EBADLINK      We failed to find data we had seen before after
+                              seeking.
+   \retval #OP_EBADTIMESTAMP The first or last timestamp in a link failed basic
+                              validity checks.*/
+int op_test_open(OggOpusFile *_of) OP_ARG_NONNULL(1);
+
+/**Release all memory used by an \c OggOpusFile.
+   \param _of The \c OggOpusFile to free.*/
+void op_free(OggOpusFile *_of);
+
+/*@}*/
+/*@}*/
+
+/**\defgroup stream_info Stream Information*/
+/*@{*/
+/**\name Functions for obtaining information about streams
+
+   These functions allow you to get basic information about a stream, including
+    seekability, the number of links (for chained streams), plus the size,
+    duration, bitrate, header parameters, and meta information for each link
+    (or, where available, the stream as a whole).
+   Some of these (size, duration) are only available for seekable streams.
+   You can also query the current stream position, link, and playback time,
+    and instantaneous bitrate during playback.
+
+   Some of these functions may be used successfully on the partially open
+    streams returned by op_test_callbacks() or one of the associated
+    convenience functions.
+   Their documention will indicate so explicitly.*/
+/*@{*/
+
+/**Returns whether or not the data source being read is seekable.
+   This is true if
+   <ol>
+   <li>The <code><a href="#op_seek_func">seek()</a></code> and
+    <code><a href="#op_tell_func">tell()</a></code> callbacks are both
+    non-<code>NULL</code>,</li>
+   <li>The <code><a href="#op_seek_func">seek()</a></code> callback was
+    successfully executed at least once, and</li>
+   <li>The <code><a href="#op_tell_func">tell()</a></code> callback was
+    successfully able to report the position indicator afterwards.</li>
+   </ol>
+   This function may be called on partially-opened streams.
+   \param _of The \c OggOpusFile whose seekable status is to be returned.
+   \return A non-zero value if seekable, and 0 if unseekable.*/
+int op_seekable(const OggOpusFile *_of) OP_ARG_NONNULL(1);
+
+/**Returns the number of links in this chained stream.
+   This function may be called on partially-opened streams, but it will always
+    return 1.
+   The actual number of links is not known until the stream is fully opened.
+   \param _of The \c OggOpusFile from which to retrieve the link count.
+   \return For fully-open seekable sources, this returns the total number of
+            links in the whole stream, which will be at least 1.
+           For partially-open or unseekable sources, this always returns 1.*/
+int op_link_count(const OggOpusFile *_of) OP_ARG_NONNULL(1);
+
+/**Get the serial number of the given link in a (possibly-chained) Ogg Opus
+    stream.
+   This function may be called on partially-opened streams, but it will always
+    return the serial number of the Opus stream in the first link.
+   \param _of The \c OggOpusFile from which to retrieve the serial number.
+   \param _li The index of the link whose serial number should be retrieved.
+              Use a negative number to get the serial number of the current
+               link.
+   \return The serial number of the given link.
+           If \a _li is greater than the total number of links, this returns
+            the serial number of the last link.
+           If the source is not seekable, this always returns the serial number
+            of the current link.*/
+opus_uint32 op_serialno(const OggOpusFile *_of,int _li) OP_ARG_NONNULL(1);
+
+/**Get the channel count of the given link in a (possibly-chained) Ogg Opus
+    stream.
+   This is equivalent to <code>op_head(_of,_li)->channel_count</code>, but
+    is provided for convenience.
+   This function may be called on partially-opened streams, but it will always
+    return the channel count of the Opus stream in the first link.
+   \param _of The \c OggOpusFile from which to retrieve the channel count.
+   \param _li The index of the link whose channel count should be retrieved.
+              Use a negative number to get the channel count of the current
+               link.
+   \return The channel count of the given link.
+           If \a _li is greater than the total number of links, this returns
+            the channel count of the last link.
+           If the source is not seekable, this always returns the channel count
+            of the current link.*/
+int op_channel_count(const OggOpusFile *_of,int _li) OP_ARG_NONNULL(1);
+
+/**Get the total (compressed) size of the stream, or of an individual link in
+    a (possibly-chained) Ogg Opus stream, including all headers and Ogg muxing
+    overhead.
+   \warning If the Opus stream (or link) is concurrently multiplexed with other
+    logical streams (e.g., video), this returns the size of the entire stream
+    (or link), not just the number of bytes in the first logical Opus stream.
+   Returning the latter would require scanning the entire file.
+   \param _of The \c OggOpusFile from which to retrieve the compressed size.
+   \param _li The index of the link whose compressed size should be computed.
+              Use a negative number to get the compressed size of the entire
+               stream.
+   \return The compressed size of the entire stream if \a _li is negative, the
+            compressed size of link \a _li if it is non-negative, or a negative
+            value on error.
+           The compressed size of the entire stream may be smaller than that
+            of the underlying source if trailing garbage was detected in the
+            file.
+   \retval #OP_EINVAL The source is not seekable (so we can't know the length),
+                       \a _li wasn't less than the total number of links in
+                       the stream, or the stream was only partially open.*/
+opus_int64 op_raw_total(const OggOpusFile *_of,int _li) OP_ARG_NONNULL(1);
+
+/**Get the total PCM length (number of samples at 48 kHz) of the stream, or of
+    an individual link in a (possibly-chained) Ogg Opus stream.
+   Users looking for <code>op_time_total()</code> should use op_pcm_total()
+    instead.
+   Because timestamps in Opus are fixed at 48 kHz, there is no need for a
+    separate function to convert this to seconds (and leaving it out avoids
+    introducing floating point to the API, for those that wish to avoid it).
+   \param _of The \c OggOpusFile from which to retrieve the PCM offset.
+   \param _li The index of the link whose PCM length should be computed.
+              Use a negative number to get the PCM length of the entire stream.
+   \return The PCM length of the entire stream if \a _li is negative, the PCM
+            length of link \a _li if it is non-negative, or a negative value on
+            error.
+   \retval #OP_EINVAL The source is not seekable (so we can't know the length),
+                       \a _li wasn't less than the total number of links in
+                       the stream, or the stream was only partially open.*/
+ogg_int64_t op_pcm_total(const OggOpusFile *_of,int _li) OP_ARG_NONNULL(1);
+
+/**Get the ID header information for the given link in a (possibly chained) Ogg
+    Opus stream.
+   This function may be called on partially-opened streams, but it will always
+    return the ID header information of the Opus stream in the first link.
+   \param _of The \c OggOpusFile from which to retrieve the ID header
+               information.
+   \param _li The index of the link whose ID header information should be
+               retrieved.
+              Use a negative number to get the ID header information of the
+               current link.
+              For an unseekable stream, \a _li is ignored, and the ID header
+               information for the current link is always returned, if
+               available.
+   \return The contents of the ID header for the given link.*/
+const OpusHead *op_head(const OggOpusFile *_of,int _li) OP_ARG_NONNULL(1);
+
+/**Get the comment header information for the given link in a (possibly
+    chained) Ogg Opus stream.
+   This function may be called on partially-opened streams, but it will always
+    return the tags from the Opus stream in the first link.
+   \param _of The \c OggOpusFile from which to retrieve the comment header
+               information.
+   \param _li The index of the link whose comment header information should be
+               retrieved.
+              Use a negative number to get the comment header information of
+               the current link.
+              For an unseekable stream, \a _li is ignored, and the comment
+               header information for the current link is always returned, if
+               available.
+   \return The contents of the comment header for the given link, or
+            <code>NULL</code> if this is an unseekable stream that encountered
+            an invalid link.*/
+const OpusTags *op_tags(const OggOpusFile *_of,int _li) OP_ARG_NONNULL(1);
+
+/**Retrieve the index of the current link.
+   This is the link that produced the data most recently read by
+    op_read_float() or its associated functions, or, after a seek, the link
+    that the seek target landed in.
+   Reading more data may advance the link index (even on the first read after a
+    seek).
+   \param _of The \c OggOpusFile from which to retrieve the current link index.
+   \return The index of the current link on success, or a negative value on
+            failure.
+           For seekable streams, this is a number between 0 and the value
+            returned by op_link_count().
+           For unseekable streams, this value starts at 0 and increments by one
+            each time a new link is encountered (even though op_link_count()
+            always returns 1).
+   \retval #OP_EINVAL The stream was only partially open.*/
+int op_current_link(const OggOpusFile *_of) OP_ARG_NONNULL(1);
+
+/**Computes the bitrate of the stream, or of an individual link in a
+    (possibly-chained) Ogg Opus stream.
+   The stream must be seekable to compute the bitrate.
+   For unseekable streams, use op_bitrate_instant() to get periodic estimates.
+   \warning If the Opus stream (or link) is concurrently multiplexed with other
+    logical streams (e.g., video), this uses the size of the entire stream (or
+    link) to compute the bitrate, not just the number of bytes in the first
+    logical Opus stream.
+   Returning the latter requires scanning the entire file, but this may be done
+    by decoding the whole file and calling op_bitrate_instant() once at the
+    end.
+   Install a trivial decoding callback with op_set_decode_callback() if you
+    wish to skip actual decoding during this process.
+   \param _of The \c OggOpusFile from which to retrieve the bitrate.
+   \param _li The index of the link whose bitrate should be computed.
+              Use a negative number to get the bitrate of the whole stream.
+   \return The bitrate on success, or a negative value on error.
+   \retval #OP_EINVAL The stream was only partially open, the stream was not
+                       seekable, or \a _li was larger than the number of
+                       links.*/
+opus_int32 op_bitrate(const OggOpusFile *_of,int _li) OP_ARG_NONNULL(1);
+
+/**Compute the instantaneous bitrate, measured as the ratio of bits to playable
+    samples decoded since a) the last call to op_bitrate_instant(), b) the last
+    seek, or c) the start of playback, whichever was most recent.
+   This will spike somewhat after a seek or at the start/end of a chain
+    boundary, as pre-skip, pre-roll, and end-trimming causes samples to be
+    decoded but not played.
+   \param _of The \c OggOpusFile from which to retrieve the bitrate.
+   \return The bitrate, in bits per second, or a negative value on error.
+   \retval #OP_FALSE  No data has been decoded since any of the events
+                       described above.
+   \retval #OP_EINVAL The stream was only partially open.*/
+opus_int32 op_bitrate_instant(OggOpusFile *_of) OP_ARG_NONNULL(1);
+
+/**Obtain the current value of the position indicator for \a _of.
+   \param _of The \c OggOpusFile from which to retrieve the position indicator.
+   \return The byte position that is currently being read from.
+   \retval #OP_EINVAL The stream was only partially open.*/
+opus_int64 op_raw_tell(const OggOpusFile *_of) OP_ARG_NONNULL(1);
+
+/**Obtain the PCM offset of the next sample to be read.
+   If the stream is not properly timestamped, this might not increment by the
+    proper amount between reads, or even return monotonically increasing
+    values.
+   \param _of The \c OggOpusFile from which to retrieve the PCM offset.
+   \return The PCM offset of the next sample to be read.
+   \retval #OP_EINVAL The stream was only partially open.*/
+ogg_int64_t op_pcm_tell(const OggOpusFile *_of) OP_ARG_NONNULL(1);
+
+/*@}*/
+/*@}*/
+
+/**\defgroup stream_seeking Seeking*/
+/*@{*/
+/**\name Functions for seeking in Opus streams
+
+   These functions let you seek in Opus streams, if the underlying source
+    support it.
+   Seeking is implemented for all built-in stream I/O routines, though some
+    individual sources may not be seekable (pipes, live HTTP streams, or HTTP
+    streams from a server that does not support <code>Range</code> requests).
+
+   op_raw_seek() is the fastest: it is guaranteed to perform at most one
+    physical seek, but, since the target is a byte position, makes no guarantee
+    how close to a given time it will come.
+   op_pcm_seek() provides sample-accurate seeking.
+   The number of physical seeks it requires is still quite small (often 1 or
+    2, even in highly variable bitrate streams).
+
+   Seeking in Opus requires decoding some pre-roll amount before playback to
+    allow the internal state to converge (as if recovering from packet loss).
+   This is handled internally by <tt>libopusfile</tt>, but means there is
+    little extra overhead for decoding up to the exact position requested
+    (since it must decode some amount of audio anyway).
+   It also means that decoding after seeking may not return exactly the same
+    values as would be obtained by decoding the stream straight through.
+   However, such differences are expected to be smaller than the loss
+    introduced by Opus's lossy compression.*/
+/*@{*/
+
+/**Seek to a byte offset relative to the <b>compressed</b> data.
+   This also scans packets to update the PCM cursor.
+   It will cross a logical bitstream boundary, but only if it can't get any
+    packets out of the tail of the link to which it seeks.
+   \param _of          The \c OggOpusFile in which to seek.
+   \param _byte_offset The byte position to seek to.
+   \return 0 on success, or a negative error code on failure.
+   \retval #OP_EREAD    The underlying seek operation failed.
+   \retval #OP_EINVAL   The stream was only partially open, or the target was
+                         outside the valid range for the stream.
+   \retval #OP_ENOSEEK  This stream is not seekable.
+   \retval #OP_EBADLINK Failed to initialize a decoder for a stream for an
+                         unknown reason.*/
+int op_raw_seek(OggOpusFile *_of,opus_int64 _byte_offset) OP_ARG_NONNULL(1);
+
+/**Seek to the specified PCM offset, such that decoding will begin at exactly
+    the requested position.
+   \param _of         The \c OggOpusFile in which to seek.
+   \param _pcm_offset The PCM offset to seek to.
+                      This is in samples at 48 kHz relative to the start of the
+                       stream.
+   \return 0 on success, or a negative value on error.
+   \retval #OP_EREAD    An underlying read or seek operation failed.
+   \retval #OP_EINVAL   The stream was only partially open, or the target was
+                         outside the valid range for the stream.
+   \retval #OP_ENOSEEK  This stream is not seekable.
+   \retval #OP_EBADLINK We failed to find data we had seen before, or the
+                         bitstream structure was sufficiently malformed that
+                         seeking to the target destination was impossible.*/
+int op_pcm_seek(OggOpusFile *_of,ogg_int64_t _pcm_offset) OP_ARG_NONNULL(1);
+
+/*@}*/
+/*@}*/
+
+/**\defgroup stream_decoding Decoding*/
+/*@{*/
+/**\name Functions for decoding audio data
+
+   These functions retrieve actual decoded audio data from the stream.
+   The general functions, op_read() and op_read_float() return 16-bit or
+    floating-point output, both using native endian ordering.
+   The number of channels returned can change from link to link in a chained
+    stream.
+   There are special functions, op_read_stereo() and op_read_float_stereo(),
+    which always output two channels, to simplify applications which do not
+    wish to handle multichannel audio.
+   These downmix multichannel files to two channels, so they can always return
+    samples in the same format for every link in a chained file.
+
+   If the rest of your audio processing chain can handle floating point, those
+    routines should be preferred, as floating point output avoids introducing
+    clipping and other issues which might be avoided entirely if, e.g., you
+    scale down the volume at some other stage.
+   However, if you intend to direct consume 16-bit samples, the conversion in
+    <tt>libopusfile</tt> provides noise-shaping dithering and, if compiled
+    against <tt>libopus</tt>&nbsp;1.1 or later, soft-clipping prevention.
+
+   <tt>libopusfile</tt> can also be configured at compile time to use the
+    fixed-point <tt>libopus</tt> API.
+   If so, <tt>libopusfile</tt>'s floating-point API may also be disabled.
+   In that configuration, nothing in <tt>libopusfile</tt> will use any
+    floating-point operations, to simplify support on devices without an
+    adequate FPU.
+
+   \warning HTTPS streams may be be vulnerable to truncation attacks if you do
+    not check the error return code from op_read_float() or its associated
+    functions.
+   If the remote peer does not close the connection gracefully (with a TLS
+    "close notify" message), these functions will return #OP_EREAD instead of 0
+    when they reach the end of the file.
+   If you are reading from an <https:> URL (particularly if seeking is not
+    supported), you should make sure to check for this error and warn the user
+    appropriately.*/
+/*@{*/
+
+/**Indicates that the decoding callback should produce signed 16-bit
+    native-endian output samples.*/
+#define OP_DEC_FORMAT_SHORT (7008)
+/**Indicates that the decoding callback should produce 32-bit native-endian
+    float samples.*/
+#define OP_DEC_FORMAT_FLOAT (7040)
+
+/**Indicates that the decoding callback did not decode anything, and that
+    <tt>libopusfile</tt> should decode normally instead.*/
+#define OP_DEC_USE_DEFAULT  (6720)
+
+/**Called to decode an Opus packet.
+   This should invoke the functional equivalent of opus_multistream_decode() or
+    opus_multistream_decode_float(), except that it returns 0 on success
+    instead of the number of decoded samples (which is known a priori).
+   \param _ctx       The application-provided callback context.
+   \param _decoder   The decoder to use to decode the packet.
+   \param[out] _pcm  The buffer to decode into.
+                     This will always have enough room for \a _nchannels of
+                      \a _nsamples samples, which should be placed into this
+                      buffer interleaved.
+   \param _op        The packet to decode.
+                     This will always have its granule position set to a valid
+                      value.
+   \param _nsamples  The number of samples expected from the packet.
+   \param _nchannels The number of channels expected from the packet.
+   \param _format    The desired sample output format.
+                     This is either #OP_DEC_FORMAT_SHORT or
+                      #OP_DEC_FORMAT_FLOAT.
+   \param _li        The index of the link from which this packet was decoded.
+   \return A non-negative value on success, or a negative value on error.
+           The error codes should be the same as those returned by
+            opus_multistream_decode() or opus_multistream_decode_float().
+   \retval 0                   Decoding was successful.
+                               The application has filled the buffer with
+                                exactly <code>\a _nsamples*\a
+                                _nchannels</code> samples in the requested
+                                format.
+   \retval #OP_DEC_USE_DEFAULT No decoding was done.
+                               <tt>libopusfile</tt> should decode normally
+                                instead.*/
+typedef int (*op_decode_cb_func)(void *_ctx,OpusMSDecoder *_decoder,void *_pcm,
+ const ogg_packet *_op,int _nsamples,int _nchannels,int _format,int _li);
+
+/**Sets the packet decode callback function.
+   This is called once for each packet that needs to be decoded.
+   A call to this function is no guarantee that the audio will eventually be
+    delivered to the application.
+   Some or all of the data from the packet may be discarded (i.e., at the
+    beginning or end of a link, or after a seek), however the callback is
+    required to provide all of it.
+   \param _of        The \c OggOpusFile on which to set the decode callback.
+   \param _decode_cb The callback function to call.
+                     This may be <code>NULL</code> to disable calling the
+                      callback.
+   \param _ctx       The application-provided context pointer to pass to the
+                      callback on each call.*/
+void op_set_decode_callback(OggOpusFile *_of,
+ op_decode_cb_func _decode_cb,void *_ctx) OP_ARG_NONNULL(1);
+
+/**Gain offset type that indicates that the provided offset is relative to the
+    header gain.
+   This is the default.*/
+#define OP_HEADER_GAIN   (0)
+
+/**Gain offset type that indicates that the provided offset is relative to the
+    R128_TRACK_GAIN value (if any), in addition to the header gain.*/
+#define OP_TRACK_GAIN    (3008)
+
+/**Gain offset type that indicates that the provided offset should be used as
+    the gain directly, without applying any the header or track gains.*/
+#define OP_ABSOLUTE_GAIN (3009)
+
+/**Sets the gain to be used for decoded output.
+   By default, the gain in the header is applied with no additional offset.
+   The total gain (including header gain and/or track gain, if applicable, and
+    this offset), will be clamped to [-32768,32767]/256 dB.
+   This is more than enough to saturate or underflow 16-bit PCM.
+   \note The new gain will not be applied to any already buffered, decoded
+    output.
+   This means you cannot change it sample-by-sample, as at best it will be
+    updated packet-by-packet.
+   It is meant for setting a target volume level, rather than applying smooth
+    fades, etc.
+   \param _of             The \c OggOpusFile on which to set the gain offset.
+   \param _gain_type      One of #OP_HEADER_GAIN, #OP_TRACK_GAIN, or
+                           #OP_ABSOLUTE_GAIN.
+   \param _gain_offset_q8 The gain offset to apply, in 1/256ths of a dB.
+   \return 0 on success or a negative value on error.
+   \retval #OP_EINVAL The \a _gain_type was unrecognized.*/
+int op_set_gain_offset(OggOpusFile *_of,
+ int _gain_type,opus_int32 _gain_offset_q8) OP_ARG_NONNULL(1);
+
+/**Sets whether or not dithering is enabled for 16-bit decoding.
+   By default, when <tt>libopusfile</tt> is compiled to use floating-point
+    internally, calling op_read() or op_read_stereo() will first decode to
+    float, and then convert to fixed-point using noise-shaping dithering.
+   This flag can be used to disable that dithering.
+   When the application uses op_read_float() or op_read_float_stereo(), or when
+    the library has been compiled to decode directly to fixed point, this flag
+    has no effect.
+   \param _of      The \c OggOpusFile on which to enable or disable dithering.
+   \param _enabled A non-zero value to enable dithering, or 0 to disable it.*/
+void op_set_dither_enabled(OggOpusFile *_of,int _enabled) OP_ARG_NONNULL(1);
+
+/**Reads more samples from the stream.
+   \note Although \a _buf_size must indicate the total number of values that
+    can be stored in \a _pcm, the return value is the number of samples
+    <em>per channel</em>.
+   This is done because
+   <ol>
+   <li>The channel count cannot be known a priori (reading more samples might
+        advance us into the next link, with a different channel count), so
+        \a _buf_size cannot also be in units of samples per channel,</li>
+   <li>Returning the samples per channel matches the <code>libopus</code> API
+        as closely as we're able,</li>
+   <li>Returning the total number of values instead of samples per channel
+        would mean the caller would need a division to compute the samples per
+        channel, and might worry about the possibility of getting back samples
+        for some channels and not others, and</li>
+   <li>This approach is relatively fool-proof: if an application passes too
+        small a value to \a _buf_size, they will simply get fewer samples back,
+        and if they assume the return value is the total number of values, then
+        they will simply read too few (rather than reading too many and going
+        off the end of the buffer).</li>
+   </ol>
+   \param      _of       The \c OggOpusFile from which to read.
+   \param[out] _pcm      A buffer in which to store the output PCM samples, as
+                          signed native-endian 16-bit values at 48&nbsp;kHz
+                          with a nominal range of <code>[-32768,32767)</code>.
+                         Multiple channels are interleaved using the
+                          <a href="http://www.xiph.org/vorbis/doc/Vorbis_I_spec.html#x1-800004.3.9">Vorbis
+                          channel ordering</a>.
+                         This must have room for at least \a _buf_size values.
+   \param      _buf_size The number of values that can be stored in \a _pcm.
+                         It is recommended that this be large enough for at
+                          least 120 ms of data at 48 kHz per channel (5760
+                          values per channel).
+                         Smaller buffers will simply return less data, possibly
+                          consuming more memory to buffer the data internally.
+                         <tt>libopusfile</tt> may return less data than
+                          requested.
+                         If so, there is no guarantee that the remaining data
+                          in \a _pcm will be unmodified.
+   \param[out] _li       The index of the link this data was decoded from.
+                         You may pass <code>NULL</code> if you do not need this
+                          information.
+                         If this function fails (returning a negative value),
+                          this parameter is left unset.
+   \return The number of samples read per channel on success, or a negative
+            value on failure.
+           The channel count can be retrieved on success by calling
+            <code>op_head(_of,*_li)</code>.
+           The number of samples returned may be 0 if the buffer was too small
+            to store even a single sample for all channels, or if end-of-file
+            was reached.
+           The list of possible failure codes follows.
+           Most of them can only be returned by unseekable, chained streams
+            that encounter a new link.
+   \retval #OP_HOLE          There was a hole in the data, and some samples
+                              may have been skipped.
+                             Call this function again to continue decoding
+                              past the hole.
+   \retval #OP_EREAD         An underlying read operation failed.
+                             This may signal a truncation attack from an
+                              <https:> source.
+   \retval #OP_EFAULT        An internal memory allocation failed.
+   \retval #OP_EIMPL         An unseekable stream encountered a new link that
+                              used a feature that is not implemented, such as
+                              an unsupported channel family.
+   \retval #OP_EINVAL        The stream was only partially open.
+   \retval #OP_ENOTFORMAT    An unseekable stream encountered a new link that
+                              did not have any logical Opus streams in it.
+   \retval #OP_EBADHEADER    An unseekable stream encountered a new link with a
+                              required header packet that was not properly
+                              formatted, contained illegal values, or was
+                              missing altogether.
+   \retval #OP_EVERSION      An unseekable stream encountered a new link with
+                              an ID header that contained an unrecognized
+                              version number.
+   \retval #OP_EBADPACKET    Failed to properly decode the next packet.
+   \retval #OP_EBADLINK      We failed to find data we had seen before.
+   \retval #OP_EBADTIMESTAMP An unseekable stream encountered a new link with
+                              a starting timestamp that failed basic validity
+                              checks.*/
+OP_WARN_UNUSED_RESULT int op_read(OggOpusFile *_of,
+ opus_int16 *_pcm,int _buf_size,int *_li) OP_ARG_NONNULL(1);
+
+/**Reads more samples from the stream.
+   \note Although \a _buf_size must indicate the total number of values that
+    can be stored in \a _pcm, the return value is the number of samples
+    <em>per channel</em>.
+   <ol>
+   <li>The channel count cannot be known a priori (reading more samples might
+        advance us into the next link, with a different channel count), so
+        \a _buf_size cannot also be in units of samples per channel,</li>
+   <li>Returning the samples per channel matches the <code>libopus</code> API
+        as closely as we're able,</li>
+   <li>Returning the total number of values instead of samples per channel
+        would mean the caller would need a division to compute the samples per
+        channel, and might worry about the possibility of getting back samples
+        for some channels and not others, and</li>
+   <li>This approach is relatively fool-proof: if an application passes too
+        small a value to \a _buf_size, they will simply get fewer samples back,
+        and if they assume the return value is the total number of values, then
+        they will simply read too few (rather than reading too many and going
+        off the end of the buffer).</li>
+   </ol>
+   \param      _of       The \c OggOpusFile from which to read.
+   \param[out] _pcm      A buffer in which to store the output PCM samples as
+                          signed floats at 48&nbsp;kHz with a nominal range of
+                          <code>[-1.0,1.0]</code>.
+                         Multiple channels are interleaved using the
+                          <a href="http://www.xiph.org/vorbis/doc/Vorbis_I_spec.html#x1-800004.3.9">Vorbis
+                          channel ordering</a>.
+                         This must have room for at least \a _buf_size floats.
+   \param      _buf_size The number of floats that can be stored in \a _pcm.
+                         It is recommended that this be large enough for at
+                          least 120 ms of data at 48 kHz per channel (5760
+                          samples per channel).
+                         Smaller buffers will simply return less data, possibly
+                          consuming more memory to buffer the data internally.
+                         If less than \a _buf_size values are returned,
+                          <tt>libopusfile</tt> makes no guarantee that the
+                          remaining data in \a _pcm will be unmodified.
+   \param[out] _li       The index of the link this data was decoded from.
+                         You may pass <code>NULL</code> if you do not need this
+                          information.
+                         If this function fails (returning a negative value),
+                          this parameter is left unset.
+   \return The number of samples read per channel on success, or a negative
+            value on failure.
+           The channel count can be retrieved on success by calling
+            <code>op_head(_of,*_li)</code>.
+           The number of samples returned may be 0 if the buffer was too small
+            to store even a single sample for all channels, or if end-of-file
+            was reached.
+           The list of possible failure codes follows.
+           Most of them can only be returned by unseekable, chained streams
+            that encounter a new link.
+   \retval #OP_HOLE          There was a hole in the data, and some samples
+                              may have been skipped.
+                             Call this function again to continue decoding
+                              past the hole.
+   \retval #OP_EREAD         An underlying read operation failed.
+                             This may signal a truncation attack from an
+                              <https:> source.
+   \retval #OP_EFAULT        An internal memory allocation failed.
+   \retval #OP_EIMPL         An unseekable stream encountered a new link that
+                              used a feature that is not implemented, such as
+                              an unsupported channel family.
+   \retval #OP_EINVAL        The stream was only partially open.
+   \retval #OP_ENOTFORMAT    An unseekable stream encountered a new link that
+                              did not have any logical Opus streams in it.
+   \retval #OP_EBADHEADER    An unseekable stream encountered a new link with a
+                              required header packet that was not properly
+                              formatted, contained illegal values, or was
+                              missing altogether.
+   \retval #OP_EVERSION      An unseekable stream encountered a new link with
+                              an ID header that contained an unrecognized
+                              version number.
+   \retval #OP_EBADPACKET    Failed to properly decode the next packet.
+   \retval #OP_EBADLINK      We failed to find data we had seen before.
+   \retval #OP_EBADTIMESTAMP An unseekable stream encountered a new link with
+                              a starting timestamp that failed basic validity
+                              checks.*/
+OP_WARN_UNUSED_RESULT int op_read_float(OggOpusFile *_of,
+ float *_pcm,int _buf_size,int *_li) OP_ARG_NONNULL(1);
+
+/**Reads more samples from the stream and downmixes to stereo, if necessary.
+   This function is intended for simple players that want a uniform output
+    format, even if the channel count changes between links in a chained
+    stream.
+   \note \a _buf_size indicates the total number of values that can be stored
+    in \a _pcm, while the return value is the number of samples <em>per
+    channel</em>, even though the channel count is known, for consistency with
+    op_read().
+   \param      _of       The \c OggOpusFile from which to read.
+   \param[out] _pcm      A buffer in which to store the output PCM samples, as
+                          signed native-endian 16-bit values at 48&nbsp;kHz
+                          with a nominal range of <code>[-32768,32767)</code>.
+                         The left and right channels are interleaved in the
+                          buffer.
+                         This must have room for at least \a _buf_size values.
+   \param      _buf_size The number of values that can be stored in \a _pcm.
+                         It is recommended that this be large enough for at
+                          least 120 ms of data at 48 kHz per channel (11520
+                          values total).
+                         Smaller buffers will simply return less data, possibly
+                          consuming more memory to buffer the data internally.
+                         If less than \a _buf_size values are returned,
+                          <tt>libopusfile</tt> makes no guarantee that the
+                          remaining data in \a _pcm will be unmodified.
+   \return The number of samples read per channel on success, or a negative
+            value on failure.
+           The number of samples returned may be 0 if the buffer was too small
+            to store even a single sample for both channels, or if end-of-file
+            was reached.
+           The list of possible failure codes follows.
+           Most of them can only be returned by unseekable, chained streams
+            that encounter a new link.
+   \retval #OP_HOLE          There was a hole in the data, and some samples
+                              may have been skipped.
+                             Call this function again to continue decoding
+                              past the hole.
+   \retval #OP_EREAD         An underlying read operation failed.
+                             This may signal a truncation attack from an
+                              <https:> source.
+   \retval #OP_EFAULT        An internal memory allocation failed.
+   \retval #OP_EIMPL         An unseekable stream encountered a new link that
+                              used a feature that is not implemented, such as
+                              an unsupported channel family.
+   \retval #OP_EINVAL        The stream was only partially open.
+   \retval #OP_ENOTFORMAT    An unseekable stream encountered a new link that
+                              did not have any logical Opus streams in it.
+   \retval #OP_EBADHEADER    An unseekable stream encountered a new link with a
+                              required header packet that was not properly
+                              formatted, contained illegal values, or was
+                              missing altogether.
+   \retval #OP_EVERSION      An unseekable stream encountered a new link with
+                              an ID header that contained an unrecognized
+                              version number.
+   \retval #OP_EBADPACKET    Failed to properly decode the next packet.
+   \retval #OP_EBADLINK      We failed to find data we had seen before.
+   \retval #OP_EBADTIMESTAMP An unseekable stream encountered a new link with
+                              a starting timestamp that failed basic validity
+                              checks.*/
+OP_WARN_UNUSED_RESULT int op_read_stereo(OggOpusFile *_of,
+ opus_int16 *_pcm,int _buf_size) OP_ARG_NONNULL(1);
+
+/**Reads more samples from the stream and downmixes to stereo, if necessary.
+   This function is intended for simple players that want a uniform output
+    format, even if the channel count changes between links in a chained
+    stream.
+   \note \a _buf_size indicates the total number of values that can be stored
+    in \a _pcm, while the return value is the number of samples <em>per
+    channel</em>, even though the channel count is known, for consistency with
+    op_read_float().
+   \param      _of       The \c OggOpusFile from which to read.
+   \param[out] _pcm      A buffer in which to store the output PCM samples, as
+                          signed floats at 48&nbsp;kHz with a nominal range of
+                          <code>[-1.0,1.0]</code>.
+                         The left and right channels are interleaved in the
+                          buffer.
+                         This must have room for at least \a _buf_size values.
+   \param      _buf_size The number of values that can be stored in \a _pcm.
+                         It is recommended that this be large enough for at
+                          least 120 ms of data at 48 kHz per channel (11520
+                          values total).
+                         Smaller buffers will simply return less data, possibly
+                          consuming more memory to buffer the data internally.
+                         If less than \a _buf_size values are returned,
+                          <tt>libopusfile</tt> makes no guarantee that the
+                          remaining data in \a _pcm will be unmodified.
+   \return The number of samples read per channel on success, or a negative
+            value on failure.
+           The number of samples returned may be 0 if the buffer was too small
+            to store even a single sample for both channels, or if end-of-file
+            was reached.
+           The list of possible failure codes follows.
+           Most of them can only be returned by unseekable, chained streams
+            that encounter a new link.
+   \retval #OP_HOLE          There was a hole in the data, and some samples
+                              may have been skipped.
+                             Call this function again to continue decoding
+                              past the hole.
+   \retval #OP_EREAD         An underlying read operation failed.
+                             This may signal a truncation attack from an
+                              <https:> source.
+   \retval #OP_EFAULT        An internal memory allocation failed.
+   \retval #OP_EIMPL         An unseekable stream encountered a new link that
+                              used a feature that is not implemented, such as
+                              an unsupported channel family.
+   \retval #OP_EINVAL        The stream was only partially open.
+   \retval #OP_ENOTFORMAT    An unseekable stream encountered a new link that
+                              that did not have any logical Opus streams in it.
+   \retval #OP_EBADHEADER    An unseekable stream encountered a new link with a
+                              required header packet that was not properly
+                              formatted, contained illegal values, or was
+                              missing altogether.
+   \retval #OP_EVERSION      An unseekable stream encountered a new link with
+                              an ID header that contained an unrecognized
+                              version number.
+   \retval #OP_EBADPACKET    Failed to properly decode the next packet.
+   \retval #OP_EBADLINK      We failed to find data we had seen before.
+   \retval #OP_EBADTIMESTAMP An unseekable stream encountered a new link with
+                              a starting timestamp that failed basic validity
+                              checks.*/
+OP_WARN_UNUSED_RESULT int op_read_float_stereo(OggOpusFile *_of,
+ float *_pcm,int _buf_size) OP_ARG_NONNULL(1);
+
+/*@}*/
+/*@}*/
+
+# if OP_GNUC_PREREQ(4,0)
+#  pragma GCC visibility pop
+# endif
+
+# if defined(__cplusplus)
+}
+# endif
+
+#endif
diff --git a/drivers/opus/repacketizer.c b/drivers/opus/repacketizer.c
new file mode 100644
index 0000000000..01406bb39b
--- /dev/null
+++ b/drivers/opus/repacketizer.c
@@ -0,0 +1,345 @@
+/* Copyright (c) 2011 Xiph.Org Foundation
+   Written by Jean-Marc Valin */
+/*
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifdef OPUS_HAVE_CONFIG_H
+#include "opus_config.h"
+#endif
+
+#include "opus.h"
+#include "opus_private.h"
+#include "os_support.h"
+
+
+int opus_repacketizer_get_size(void)
+{
+   return sizeof(OpusRepacketizer);
+}
+
+OpusRepacketizer *opus_repacketizer_init(OpusRepacketizer *rp)
+{
+   rp->nb_frames = 0;
+   return rp;
+}
+
+OpusRepacketizer *opus_repacketizer_create(void)
+{
+   OpusRepacketizer *rp;
+   rp=(OpusRepacketizer *)opus_alloc(opus_repacketizer_get_size());
+   if(rp==NULL)return NULL;
+   return opus_repacketizer_init(rp);
+}
+
+void opus_repacketizer_destroy(OpusRepacketizer *rp)
+{
+   opus_free(rp);
+}
+
+static int opus_repacketizer_cat_impl(OpusRepacketizer *rp, const unsigned char *data, opus_int32 len, int self_delimited)
+{
+   unsigned char tmp_toc;
+   int curr_nb_frames,ret;
+   /* Set of check ToC */
+   if (len<1) return OPUS_INVALID_PACKET;
+   if (rp->nb_frames == 0)
+   {
+      rp->toc = data[0];
+      rp->framesize = opus_packet_get_samples_per_frame(data, 8000);
+   } else if ((rp->toc&0xFC) != (data[0]&0xFC))
+   {
+      /*fprintf(stderr, "toc mismatch: 0x%x vs 0x%x\n", rp->toc, data[0]);*/
+      return OPUS_INVALID_PACKET;
+   }
+   curr_nb_frames = opus_packet_get_nb_frames(data, len);
+   if(curr_nb_frames<1) return OPUS_INVALID_PACKET;
+
+   /* Check the 120 ms maximum packet size */
+   if ((curr_nb_frames+rp->nb_frames)*rp->framesize > 960)
+   {
+      return OPUS_INVALID_PACKET;
+   }
+
+   ret=opus_packet_parse_impl(data, len, self_delimited, &tmp_toc, &rp->frames[rp->nb_frames], &rp->len[rp->nb_frames], NULL, NULL);
+   if(ret<1)return ret;
+
+   rp->nb_frames += curr_nb_frames;
+   return OPUS_OK;
+}
+
+int opus_repacketizer_cat(OpusRepacketizer *rp, const unsigned char *data, opus_int32 len)
+{
+   return opus_repacketizer_cat_impl(rp, data, len, 0);
+}
+
+int opus_repacketizer_get_nb_frames(OpusRepacketizer *rp)
+{
+   return rp->nb_frames;
+}
+
+opus_int32 opus_repacketizer_out_range_impl(OpusRepacketizer *rp, int begin, int end,
+      unsigned char *data, opus_int32 maxlen, int self_delimited, int pad)
+{
+   int i, count;
+   opus_int32 tot_size;
+   opus_int16 *len;
+   const unsigned char **frames;
+   unsigned char * ptr;
+
+   if (begin<0 || begin>=end || end>rp->nb_frames)
+   {
+      /*fprintf(stderr, "%d %d %d\n", begin, end, rp->nb_frames);*/
+      return OPUS_BAD_ARG;
+   }
+   count = end-begin;
+
+   len = rp->len+begin;
+   frames = rp->frames+begin;
+   if (self_delimited)
+      tot_size = 1 + (len[count-1]>=252);
+   else
+      tot_size = 0;
+
+   ptr = data;
+   if (count==1)
+   {
+      /* Code 0 */
+      tot_size += len[0]+1;
+      if (tot_size > maxlen)
+         return OPUS_BUFFER_TOO_SMALL;
+      *ptr++ = rp->toc&0xFC;
+   } else if (count==2)
+   {
+      if (len[1] == len[0])
+      {
+         /* Code 1 */
+         tot_size += 2*len[0]+1;
+         if (tot_size > maxlen)
+            return OPUS_BUFFER_TOO_SMALL;
+         *ptr++ = (rp->toc&0xFC) | 0x1;
+      } else {
+         /* Code 2 */
+         tot_size += len[0]+len[1]+2+(len[0]>=252);
+         if (tot_size > maxlen)
+            return OPUS_BUFFER_TOO_SMALL;
+         *ptr++ = (rp->toc&0xFC) | 0x2;
+         ptr += encode_size(len[0], ptr);
+      }
+   }
+   if (count > 2 || (pad && tot_size < maxlen))
+   {
+      /* Code 3 */
+      int vbr;
+      int pad_amount=0;
+
+      /* Restart the process for the padding case */
+      ptr = data;
+      if (self_delimited)
+         tot_size = 1 + (len[count-1]>=252);
+      else
+         tot_size = 0;
+      vbr = 0;
+      for (i=1;i<count;i++)
+      {
+         if (len[i] != len[0])
+         {
+            vbr=1;
+            break;
+         }
+      }
+      if (vbr)
+      {
+         tot_size += 2;
+         for (i=0;i<count-1;i++)
+            tot_size += 1 + (len[i]>=252) + len[i];
+         tot_size += len[count-1];
+
+         if (tot_size > maxlen)
+            return OPUS_BUFFER_TOO_SMALL;
+         *ptr++ = (rp->toc&0xFC) | 0x3;
+         *ptr++ = count | 0x80;
+      } else {
+         tot_size += count*len[0]+2;
+         if (tot_size > maxlen)
+            return OPUS_BUFFER_TOO_SMALL;
+         *ptr++ = (rp->toc&0xFC) | 0x3;
+         *ptr++ = count;
+      }
+      pad_amount = pad ? (maxlen-tot_size) : 0;
+      if (pad_amount != 0)
+      {
+         int nb_255s;
+         data[1] |= 0x40;
+         nb_255s = (pad_amount-1)/255;
+         for (i=0;i<nb_255s;i++)
+            *ptr++ = 255;
+         *ptr++ = pad_amount-255*nb_255s-1;
+         tot_size += pad_amount;
+      }
+      if (vbr)
+      {
+         for (i=0;i<count-1;i++)
+            ptr += encode_size(len[i], ptr);
+      }
+   }
+   if (self_delimited) {
+      int sdlen = encode_size(len[count-1], ptr);
+      ptr += sdlen;
+   }
+   /* Copy the actual data */
+   for (i=0;i<count;i++)
+   {
+      /* Using OPUS_MOVE() instead of OPUS_COPY() in case we're doing in-place
+         padding from opus_packet_pad or opus_packet_unpad(). */
+      celt_assert(frames[i] + len[i] <= data || ptr <= frames[i]);
+      OPUS_MOVE(ptr, frames[i], len[i]);
+      ptr += len[i];
+   }
+   if (pad)
+   {
+      for (i=ptr-data;i<maxlen;i++)
+         data[i] = 0;
+   }
+   return tot_size;
+}
+
+opus_int32 opus_repacketizer_out_range(OpusRepacketizer *rp, int begin, int end, unsigned char *data, opus_int32 maxlen)
+{
+   return opus_repacketizer_out_range_impl(rp, begin, end, data, maxlen, 0, 0);
+}
+
+opus_int32 opus_repacketizer_out(OpusRepacketizer *rp, unsigned char *data, opus_int32 maxlen)
+{
+   return opus_repacketizer_out_range_impl(rp, 0, rp->nb_frames, data, maxlen, 0, 0);
+}
+
+int opus_packet_pad(unsigned char *data, opus_int32 len, opus_int32 new_len)
+{
+   OpusRepacketizer rp;
+   opus_int32 ret;
+   if (len < 1)
+      return OPUS_BAD_ARG;
+   if (len==new_len)
+      return OPUS_OK;
+   else if (len > new_len)
+      return OPUS_BAD_ARG;
+   opus_repacketizer_init(&rp);
+   /* Moving payload to the end of the packet so we can do in-place padding */
+   OPUS_MOVE(data+new_len-len, data, len);
+   opus_repacketizer_cat(&rp, data+new_len-len, len);
+   ret = opus_repacketizer_out_range_impl(&rp, 0, rp.nb_frames, data, new_len, 0, 1);
+   if (ret > 0)
+      return OPUS_OK;
+   else
+      return ret;
+}
+
+opus_int32 opus_packet_unpad(unsigned char *data, opus_int32 len)
+{
+   OpusRepacketizer rp;
+   opus_int32 ret;
+   if (len < 1)
+      return OPUS_BAD_ARG;
+   opus_repacketizer_init(&rp);
+   ret = opus_repacketizer_cat(&rp, data, len);
+   if (ret < 0)
+      return ret;
+   ret = opus_repacketizer_out_range_impl(&rp, 0, rp.nb_frames, data, len, 0, 0);
+   celt_assert(ret > 0 && ret <= len);
+   return ret;
+}
+
+int opus_multistream_packet_pad(unsigned char *data, opus_int32 len, opus_int32 new_len, int nb_streams)
+{
+   int s;
+   int count;
+   unsigned char toc;
+   opus_int16 size[48];
+   opus_int32 packet_offset;
+   opus_int32 amount;
+
+   if (len < 1)
+      return OPUS_BAD_ARG;
+   if (len==new_len)
+      return OPUS_OK;
+   else if (len > new_len)
+      return OPUS_BAD_ARG;
+   amount = new_len - len;
+   /* Seek to last stream */
+   for (s=0;s<nb_streams-1;s++)
+   {
+      if (len<=0)
+         return OPUS_INVALID_PACKET;
+      count = opus_packet_parse_impl(data, len, 1, &toc, NULL,
+                                     size, NULL, &packet_offset);
+      if (count<0)
+         return count;
+      data += packet_offset;
+      len -= packet_offset;
+   }
+   return opus_packet_pad(data, len, len+amount);
+}
+
+opus_int32 opus_multistream_packet_unpad(unsigned char *data, opus_int32 len, int nb_streams)
+{
+   int s;
+   unsigned char toc;
+   opus_int16 size[48];
+   opus_int32 packet_offset;
+   OpusRepacketizer rp;
+   unsigned char *dst;
+   opus_int32 dst_len;
+
+   if (len < 1)
+      return OPUS_BAD_ARG;
+   dst = data;
+   dst_len = 0;
+   /* Unpad all frames */
+   for (s=0;s<nb_streams;s++)
+   {
+      opus_int32 ret;
+      int self_delimited = s!=nb_streams-1;
+      if (len<=0)
+         return OPUS_INVALID_PACKET;
+      opus_repacketizer_init(&rp);
+      ret = opus_packet_parse_impl(data, len, self_delimited, &toc, NULL,
+                                     size, NULL, &packet_offset);
+      if (ret<0)
+         return ret;
+      ret = opus_repacketizer_cat_impl(&rp, data, packet_offset, self_delimited);
+      if (ret < 0)
+         return ret;
+      ret = opus_repacketizer_out_range_impl(&rp, 0, rp.nb_frames, dst, len, self_delimited, 0);
+      if (ret < 0)
+         return ret;
+      else
+         dst_len += ret;
+      dst += ret;
+      data += packet_offset;
+      len -= packet_offset;
+   }
+   return dst_len;
+}
+
diff --git a/drivers/opus/repacketizer_demo.c b/drivers/opus/repacketizer_demo.c
new file mode 100644
index 0000000000..1ca9cc3c96
--- /dev/null
+++ b/drivers/opus/repacketizer_demo.c
@@ -0,0 +1,217 @@
+/* Copyright (c) 2011 Xiph.Org Foundation
+   Written by Jean-Marc Valin */
+/*
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifdef OPUS_HAVE_CONFIG_H
+#include "opus_config.h"
+#endif
+
+#include "opus.h"
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#define MAX_PACKETOUT 32000
+
+void usage(char *argv0)
+{
+   fprintf(stderr, "usage: %s [options] input_file output_file\n", argv0);
+}
+
+static void int_to_char(opus_uint32 i, unsigned char ch[4])
+{
+    ch[0] = i>>24;
+    ch[1] = (i>>16)&0xFF;
+    ch[2] = (i>>8)&0xFF;
+    ch[3] = i&0xFF;
+}
+
+static opus_uint32 char_to_int(unsigned char ch[4])
+{
+    return ((opus_uint32)ch[0]<<24) | ((opus_uint32)ch[1]<<16)
+         | ((opus_uint32)ch[2]<< 8) |  (opus_uint32)ch[3];
+}
+
+int main(int argc, char *argv[])
+{
+   int i, eof=0;
+   FILE *fin, *fout;
+   unsigned char packets[48][1500];
+   int len[48];
+   int rng[48];
+   OpusRepacketizer *rp;
+   unsigned char output_packet[MAX_PACKETOUT];
+   int merge = 1, split=0;
+
+   if (argc < 3)
+   {
+      usage(argv[0]);
+      return EXIT_FAILURE;
+   }
+   for (i=1;i<argc-2;i++)
+   {
+      if (strcmp(argv[i], "-merge")==0)
+      {
+         merge = atoi(argv[i+1]);
+         if(merge<1)
+         {
+            fprintf(stderr, "-merge parameter must be at least 1.\n");
+            return EXIT_FAILURE;
+         }
+         if(merge>48)
+         {
+            fprintf(stderr, "-merge parameter must be less than 48.\n");
+            return EXIT_FAILURE;
+         }
+         i++;
+      } else if (strcmp(argv[i], "-split")==0)
+         split = 1;
+      else
+      {
+         fprintf(stderr, "Unknown option: %s\n", argv[i]);
+         usage(argv[0]);
+         return EXIT_FAILURE;
+      }
+   }
+   fin = fopen(argv[argc-2], "r");
+   if(fin==NULL)
+   {
+     fprintf(stderr, "Error opening input file: %s\n", argv[argc-2]);
+     return EXIT_FAILURE;
+   }
+   fout = fopen(argv[argc-1], "w");
+   if(fout==NULL)
+   {
+     fprintf(stderr, "Error opening output file: %s\n", argv[argc-1]);
+     fclose(fin);
+     return EXIT_FAILURE;
+   }
+
+   rp = opus_repacketizer_create();
+   while (!eof)
+   {
+      int err;
+      int nb_packets=merge;
+      opus_repacketizer_init(rp);
+      for (i=0;i<nb_packets;i++)
+      {
+         unsigned char ch[4];
+         err = fread(ch, 1, 4, fin);
+         len[i] = char_to_int(ch);
+         /*fprintf(stderr, "in len = %d\n", len[i]);*/
+         if (len[i]>1500 || len[i]<0)
+         {
+             if (feof(fin))
+             {
+                eof = 1;
+             } else {
+                fprintf(stderr, "Invalid payload length\n");
+                fclose(fin);
+                fclose(fout);
+                return EXIT_FAILURE;
+             }
+             break;
+         }
+         err = fread(ch, 1, 4, fin);
+         rng[i] = char_to_int(ch);
+         err = fread(packets[i], 1, len[i], fin);
+         if (feof(fin))
+         {
+            eof = 1;
+            break;
+         }
+         err = opus_repacketizer_cat(rp, packets[i], len[i]);
+         if (err!=OPUS_OK)
+         {
+            fprintf(stderr, "opus_repacketizer_cat() failed: %s\n", opus_strerror(err));
+            break;
+         }
+      }
+      nb_packets = i;
+
+      if (eof)
+         break;
+
+      if (!split)
+      {
+         err = opus_repacketizer_out(rp, output_packet, MAX_PACKETOUT);
+         if (err>0) {
+            unsigned char int_field[4];
+            int_to_char(err, int_field);
+            if(fwrite(int_field, 1, 4, fout)!=4){
+               fprintf(stderr, "Error writing.\n");
+               return EXIT_FAILURE;
+            }
+            int_to_char(rng[nb_packets-1], int_field);
+            if (fwrite(int_field, 1, 4, fout)!=4) {
+               fprintf(stderr, "Error writing.\n");
+               return EXIT_FAILURE;
+            }
+            if (fwrite(output_packet, 1, err, fout)!=(unsigned)err) {
+               fprintf(stderr, "Error writing.\n");
+               return EXIT_FAILURE;
+            }
+            /*fprintf(stderr, "out len = %d\n", err);*/
+         } else {
+            fprintf(stderr, "opus_repacketizer_out() failed: %s\n", opus_strerror(err));
+         }
+      } else {
+         int nb_frames = opus_repacketizer_get_nb_frames(rp);
+         for (i=0;i<nb_frames;i++)
+         {
+            err = opus_repacketizer_out_range(rp, i, i+1, output_packet, MAX_PACKETOUT);
+            if (err>0) {
+               unsigned char int_field[4];
+               int_to_char(err, int_field);
+               if (fwrite(int_field, 1, 4, fout)!=4) {
+                  fprintf(stderr, "Error writing.\n");
+                  return EXIT_FAILURE;
+               }
+               if (i==nb_frames-1)
+                  int_to_char(rng[nb_packets-1], int_field);
+               else
+                  int_to_char(0, int_field);
+               if (fwrite(int_field, 1, 4, fout)!=4) {
+                  fprintf(stderr, "Error writing.\n");
+                  return EXIT_FAILURE;
+               }
+               if (fwrite(output_packet, 1, err, fout)!=(unsigned)err) {
+                  fprintf(stderr, "Error writing.\n");
+                  return EXIT_FAILURE;
+               }
+               /*fprintf(stderr, "out len = %d\n", err);*/
+            } else {
+               fprintf(stderr, "opus_repacketizer_out() failed: %s\n", opus_strerror(err));
+            }
+
+         }
+      }
+   }
+
+   fclose(fin);
+   fclose(fout);
+   return EXIT_SUCCESS;
+}
diff --git a/drivers/opus/silk/A2NLSF.c b/drivers/opus/silk/A2NLSF.c
new file mode 100644
index 0000000000..cec53a5cd8
--- /dev/null
+++ b/drivers/opus/silk/A2NLSF.c
@@ -0,0 +1,252 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+/* Conversion between prediction filter coefficients and NLSFs  */
+/* Requires the order to be an even number                      */
+/* A piecewise linear approximation maps LSF <-> cos(LSF)       */
+/* Therefore the result is not accurate NLSFs, but the two      */
+/* functions are accurate inverses of each other                */
+
+#ifdef OPUS_HAVE_CONFIG_H
+#include "opus_config.h"
+#endif
+
+#include "SigProc_FIX.h"
+#include "tables.h"
+
+/* Number of binary divisions, when not in low complexity mode */
+#define BIN_DIV_STEPS_A2NLSF_FIX      3 /* must be no higher than 16 - log2( LSF_COS_TAB_SZ_FIX ) */
+#define MAX_ITERATIONS_A2NLSF_FIX    30
+
+/* Helper function for A2NLSF(..)                    */
+/* Transforms polynomials from cos(n*f) to cos(f)^n  */
+static OPUS_INLINE void silk_A2NLSF_trans_poly(
+    opus_int32          *p,                     /* I/O    Polynomial                                */
+    const opus_int      dd                      /* I      Polynomial order (= filter order / 2 )    */
+)
+{
+    opus_int k, n;
+
+    for( k = 2; k <= dd; k++ ) {
+        for( n = dd; n > k; n-- ) {
+            p[ n - 2 ] -= p[ n ];
+        }
+        p[ k - 2 ] -= silk_LSHIFT( p[ k ], 1 );
+    }
+}
+/* Helper function for A2NLSF(..) */
+/* Polynomial evaluation          */
+static OPUS_INLINE opus_int32 silk_A2NLSF_eval_poly( /* return the polynomial evaluation, in Q16     */
+    opus_int32          *p,                     /* I    Polynomial, Q16                         */
+    const opus_int32    x,                      /* I    Evaluation point, Q12                   */
+    const opus_int      dd                      /* I    Order                                   */
+)
+{
+    opus_int   n;
+    opus_int32 x_Q16, y32;
+
+    y32 = p[ dd ];                                  /* Q16 */
+    x_Q16 = silk_LSHIFT( x, 4 );
+    for( n = dd - 1; n >= 0; n-- ) {
+        y32 = silk_SMLAWW( p[ n ], y32, x_Q16 );    /* Q16 */
+    }
+    return y32;
+}
+
+static OPUS_INLINE void silk_A2NLSF_init(
+     const opus_int32    *a_Q16,
+     opus_int32          *P,
+     opus_int32          *Q,
+     const opus_int      dd
+)
+{
+    opus_int k;
+
+    /* Convert filter coefs to even and odd polynomials */
+    P[dd] = silk_LSHIFT( 1, 16 );
+    Q[dd] = silk_LSHIFT( 1, 16 );
+    for( k = 0; k < dd; k++ ) {
+        P[ k ] = -a_Q16[ dd - k - 1 ] - a_Q16[ dd + k ];    /* Q16 */
+        Q[ k ] = -a_Q16[ dd - k - 1 ] + a_Q16[ dd + k ];    /* Q16 */
+    }
+
+    /* Divide out zeros as we have that for even filter orders, */
+    /* z =  1 is always a root in Q, and                        */
+    /* z = -1 is always a root in P                             */
+    for( k = dd; k > 0; k-- ) {
+        P[ k - 1 ] -= P[ k ];
+        Q[ k - 1 ] += Q[ k ];
+    }
+
+    /* Transform polynomials from cos(n*f) to cos(f)^n */
+    silk_A2NLSF_trans_poly( P, dd );
+    silk_A2NLSF_trans_poly( Q, dd );
+}
+
+/* Compute Normalized Line Spectral Frequencies (NLSFs) from whitening filter coefficients      */
+/* If not all roots are found, the a_Q16 coefficients are bandwidth expanded until convergence. */
+void silk_A2NLSF(
+    opus_int16                  *NLSF,              /* O    Normalized Line Spectral Frequencies in Q15 (0..2^15-1) [d] */
+    opus_int32                  *a_Q16,             /* I/O  Monic whitening filter coefficients in Q16 [d]              */
+    const opus_int              d                   /* I    Filter order (must be even)                                 */
+)
+{
+    opus_int      i, k, m, dd, root_ix, ffrac;
+    opus_int32 xlo, xhi, xmid;
+    opus_int32 ylo, yhi, ymid, thr;
+    opus_int32 nom, den;
+    opus_int32 P[ SILK_MAX_ORDER_LPC / 2 + 1 ];
+    opus_int32 Q[ SILK_MAX_ORDER_LPC / 2 + 1 ];
+    opus_int32 *PQ[ 2 ];
+    opus_int32 *p;
+
+    /* Store pointers to array */
+    PQ[ 0 ] = P;
+    PQ[ 1 ] = Q;
+
+    dd = silk_RSHIFT( d, 1 );
+
+    silk_A2NLSF_init( a_Q16, P, Q, dd );
+
+    /* Find roots, alternating between P and Q */
+    p = P;                          /* Pointer to polynomial */
+
+    xlo = silk_LSFCosTab_FIX_Q12[ 0 ]; /* Q12*/
+    ylo = silk_A2NLSF_eval_poly( p, xlo, dd );
+
+    if( ylo < 0 ) {
+        /* Set the first NLSF to zero and move on to the next */
+        NLSF[ 0 ] = 0;
+        p = Q;                      /* Pointer to polynomial */
+        ylo = silk_A2NLSF_eval_poly( p, xlo, dd );
+        root_ix = 1;                /* Index of current root */
+    } else {
+        root_ix = 0;                /* Index of current root */
+    }
+    k = 1;                          /* Loop counter */
+    i = 0;                          /* Counter for bandwidth expansions applied */
+    thr = 0;
+    while( 1 ) {
+        /* Evaluate polynomial */
+        xhi = silk_LSFCosTab_FIX_Q12[ k ]; /* Q12 */
+        yhi = silk_A2NLSF_eval_poly( p, xhi, dd );
+
+        /* Detect zero crossing */
+        if( ( ylo <= 0 && yhi >= thr ) || ( ylo >= 0 && yhi <= -thr ) ) {
+            if( yhi == 0 ) {
+                /* If the root lies exactly at the end of the current       */
+                /* interval, look for the next root in the next interval    */
+                thr = 1;
+            } else {
+                thr = 0;
+            }
+            /* Binary division */
+            ffrac = -256;
+            for( m = 0; m < BIN_DIV_STEPS_A2NLSF_FIX; m++ ) {
+                /* Evaluate polynomial */
+                xmid = silk_RSHIFT_ROUND( xlo + xhi, 1 );
+                ymid = silk_A2NLSF_eval_poly( p, xmid, dd );
+
+                /* Detect zero crossing */
+                if( ( ylo <= 0 && ymid >= 0 ) || ( ylo >= 0 && ymid <= 0 ) ) {
+                    /* Reduce frequency */
+                    xhi = xmid;
+                    yhi = ymid;
+                } else {
+                    /* Increase frequency */
+                    xlo = xmid;
+                    ylo = ymid;
+                    ffrac = silk_ADD_RSHIFT( ffrac, 128, m );
+                }
+            }
+
+            /* Interpolate */
+            if( silk_abs( ylo ) < 65536 ) {
+                /* Avoid dividing by zero */
+                den = ylo - yhi;
+                nom = silk_LSHIFT( ylo, 8 - BIN_DIV_STEPS_A2NLSF_FIX ) + silk_RSHIFT( den, 1 );
+                if( den != 0 ) {
+                    ffrac += silk_DIV32( nom, den );
+                }
+            } else {
+                /* No risk of dividing by zero because abs(ylo - yhi) >= abs(ylo) >= 65536 */
+                ffrac += silk_DIV32( ylo, silk_RSHIFT( ylo - yhi, 8 - BIN_DIV_STEPS_A2NLSF_FIX ) );
+            }
+            NLSF[ root_ix ] = (opus_int16)silk_min_32( silk_LSHIFT( (opus_int32)k, 8 ) + ffrac, silk_int16_MAX );
+
+            silk_assert( NLSF[ root_ix ] >= 0 );
+
+            root_ix++;        /* Next root */
+            if( root_ix >= d ) {
+                /* Found all roots */
+                break;
+            }
+            /* Alternate pointer to polynomial */
+            p = PQ[ root_ix & 1 ];
+
+            /* Evaluate polynomial */
+            xlo = silk_LSFCosTab_FIX_Q12[ k - 1 ]; /* Q12*/
+            ylo = silk_LSHIFT( 1 - ( root_ix & 2 ), 12 );
+        } else {
+            /* Increment loop counter */
+            k++;
+            xlo = xhi;
+            ylo = yhi;
+            thr = 0;
+
+            if( k > LSF_COS_TAB_SZ_FIX ) {
+                i++;
+                if( i > MAX_ITERATIONS_A2NLSF_FIX ) {
+                    /* Set NLSFs to white spectrum and exit */
+                    NLSF[ 0 ] = (opus_int16)silk_DIV32_16( 1 << 15, d + 1 );
+                    for( k = 1; k < d; k++ ) {
+                        NLSF[ k ] = (opus_int16)silk_SMULBB( k + 1, NLSF[ 0 ] );
+                    }
+                    return;
+                }
+
+                /* Error: Apply progressively more bandwidth expansion and run again */
+                silk_bwexpander_32( a_Q16, d, 65536 - silk_SMULBB( 10 + i, i ) ); /* 10_Q16 = 0.00015*/
+
+                silk_A2NLSF_init( a_Q16, P, Q, dd );
+                p = P;                            /* Pointer to polynomial */
+                xlo = silk_LSFCosTab_FIX_Q12[ 0 ]; /* Q12*/
+                ylo = silk_A2NLSF_eval_poly( p, xlo, dd );
+                if( ylo < 0 ) {
+                    /* Set the first NLSF to zero and move on to the next */
+                    NLSF[ 0 ] = 0;
+                    p = Q;                        /* Pointer to polynomial */
+                    ylo = silk_A2NLSF_eval_poly( p, xlo, dd );
+                    root_ix = 1;                  /* Index of current root */
+                } else {
+                    root_ix = 0;                  /* Index of current root */
+                }
+                k = 1;                            /* Reset loop counter */
+            }
+        }
+    }
+}
diff --git a/drivers/opus/silk/API.h b/drivers/opus/silk/API.h
new file mode 100644
index 0000000000..f0601bcf6b
--- /dev/null
+++ b/drivers/opus/silk/API.h
@@ -0,0 +1,133 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifndef SILK_API_H
+#define SILK_API_H
+
+#include "control.h"
+#include "typedef.h"
+#include "errors.h"
+#include "entenc.h"
+#include "entdec.h"
+
+#ifdef __cplusplus
+extern "C"
+{
+#endif
+
+#define SILK_MAX_FRAMES_PER_PACKET  3
+
+/* Struct for TOC (Table of Contents) */
+typedef struct {
+    opus_int    VADFlag;                                /* Voice activity for packet                            */
+    opus_int    VADFlags[ SILK_MAX_FRAMES_PER_PACKET ]; /* Voice activity for each frame in packet              */
+    opus_int    inbandFECFlag;                          /* Flag indicating if packet contains in-band FEC       */
+} silk_TOC_struct;
+
+/****************************************/
+/* Encoder functions                    */
+/****************************************/
+
+/***********************************************/
+/* Get size in bytes of the Silk encoder state */
+/***********************************************/
+opus_int silk_Get_Encoder_Size(                         /* O    Returns error code                              */
+    opus_int                        *encSizeBytes       /* O    Number of bytes in SILK encoder state           */
+);
+
+/*************************/
+/* Init or reset encoder */
+/*************************/
+opus_int silk_InitEncoder(                              /* O    Returns error code                              */
+    void                            *encState,          /* I/O  State                                           */
+    int                              arch,              /* I    Run-time architecture                           */
+    silk_EncControlStruct           *encStatus          /* O    Encoder Status                                  */
+);
+
+/**************************/
+/* Encode frame with Silk */
+/**************************/
+/* Note: if prefillFlag is set, the input must contain 10 ms of audio, irrespective of what                     */
+/* encControl->payloadSize_ms is set to                                                                         */
+opus_int silk_Encode(                                   /* O    Returns error code                              */
+    void                            *encState,          /* I/O  State                                           */
+    silk_EncControlStruct           *encControl,        /* I    Control status                                  */
+    const opus_int16                *samplesIn,         /* I    Speech sample input vector                      */
+    opus_int                        nSamplesIn,         /* I    Number of samples in input vector               */
+    ec_enc                          *psRangeEnc,        /* I/O  Compressor data structure                       */
+    opus_int32                      *nBytesOut,         /* I/O  Number of bytes in payload (input: Max bytes)   */
+    const opus_int                  prefillFlag         /* I    Flag to indicate prefilling buffers no coding   */
+);
+
+/****************************************/
+/* Decoder functions                    */
+/****************************************/
+
+/***********************************************/
+/* Get size in bytes of the Silk decoder state */
+/***********************************************/
+opus_int silk_Get_Decoder_Size(                         /* O    Returns error code                              */
+    opus_int                        *decSizeBytes       /* O    Number of bytes in SILK decoder state           */
+);
+
+/*************************/
+/* Init or Reset decoder */
+/*************************/
+opus_int silk_InitDecoder(                              /* O    Returns error code                              */
+    void                            *decState           /* I/O  State                                           */
+);
+
+/******************/
+/* Decode a frame */
+/******************/
+opus_int silk_Decode(                                   /* O    Returns error code                              */
+    void*                           decState,           /* I/O  State                                           */
+    silk_DecControlStruct*          decControl,         /* I/O  Control Structure                               */
+    opus_int                        lostFlag,           /* I    0: no loss, 1 loss, 2 decode fec                */
+    opus_int                        newPacketFlag,      /* I    Indicates first decoder call for this packet    */
+    ec_dec                          *psRangeDec,        /* I/O  Compressor data structure                       */
+    opus_int16                      *samplesOut,        /* O    Decoded output speech vector                    */
+    opus_int32                      *nSamplesOut        /* O    Number of samples decoded                       */
+);
+
+#if 0
+/**************************************/
+/* Get table of contents for a packet */
+/**************************************/
+opus_int silk_get_TOC(
+    const opus_uint8                *payload,           /* I    Payload data                                */
+    const opus_int                  nBytesIn,           /* I    Number of input bytes                       */
+    const opus_int                  nFramesPerPayload,  /* I    Number of SILK frames per payload           */
+    silk_TOC_struct                 *Silk_TOC           /* O    Type of content                             */
+);
+#endif
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/drivers/opus/silk/CNG.c b/drivers/opus/silk/CNG.c
new file mode 100644
index 0000000000..8b8dbf882c
--- /dev/null
+++ b/drivers/opus/silk/CNG.c
@@ -0,0 +1,172 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef OPUS_HAVE_CONFIG_H
+#include "opus_config.h"
+#endif
+
+#include "silk_main.h"
+#include "stack_alloc.h"
+
+/* Generates excitation for CNG LPC synthesis */
+static OPUS_INLINE void silk_CNG_exc(
+    opus_int32                       residual_Q10[],     /* O    CNG residual signal Q10                     */
+    opus_int32                       exc_buf_Q14[],      /* I    Random samples buffer Q10                   */
+    opus_int32                       Gain_Q16,           /* I    Gain to apply                               */
+    opus_int                         length,             /* I    Length                                      */
+    opus_int32                       *rand_seed          /* I/O  Seed to random index generator              */
+)
+{
+    opus_int32 seed;
+    opus_int   i, idx, exc_mask;
+
+    exc_mask = CNG_BUF_MASK_MAX;
+    while( exc_mask > length ) {
+        exc_mask = silk_RSHIFT( exc_mask, 1 );
+    }
+
+    seed = *rand_seed;
+    for( i = 0; i < length; i++ ) {
+        seed = silk_RAND( seed );
+        idx = (opus_int)( silk_RSHIFT( seed, 24 ) & exc_mask );
+        silk_assert( idx >= 0 );
+        silk_assert( idx <= CNG_BUF_MASK_MAX );
+        residual_Q10[ i ] = (opus_int16)silk_SAT16( silk_SMULWW( exc_buf_Q14[ idx ], Gain_Q16 >> 4 ) );
+    }
+    *rand_seed = seed;
+}
+
+void silk_CNG_Reset(
+    silk_decoder_state          *psDec                          /* I/O  Decoder state                               */
+)
+{
+    opus_int i, NLSF_step_Q15, NLSF_acc_Q15;
+
+    NLSF_step_Q15 = silk_DIV32_16( silk_int16_MAX, psDec->LPC_order + 1 );
+    NLSF_acc_Q15 = 0;
+    for( i = 0; i < psDec->LPC_order; i++ ) {
+        NLSF_acc_Q15 += NLSF_step_Q15;
+        psDec->sCNG.CNG_smth_NLSF_Q15[ i ] = NLSF_acc_Q15;
+    }
+    psDec->sCNG.CNG_smth_Gain_Q16 = 0;
+    psDec->sCNG.rand_seed = 3176576;
+}
+
+/* Updates CNG estimate, and applies the CNG when packet was lost   */
+void silk_CNG(
+    silk_decoder_state          *psDec,                         /* I/O  Decoder state                               */
+    silk_decoder_control        *psDecCtrl,                     /* I/O  Decoder control                             */
+    opus_int16                  frame[],                        /* I/O  Signal                                      */
+    opus_int                    length                          /* I    Length of residual                          */
+)
+{
+    opus_int   i, subfr;
+    opus_int32 sum_Q6, max_Gain_Q16;
+    opus_int16 A_Q12[ MAX_LPC_ORDER ];
+    silk_CNG_struct *psCNG = &psDec->sCNG;
+    SAVE_STACK;
+
+    if( psDec->fs_kHz != psCNG->fs_kHz ) {
+        /* Reset state */
+        silk_CNG_Reset( psDec );
+
+        psCNG->fs_kHz = psDec->fs_kHz;
+    }
+    if( psDec->lossCnt == 0 && psDec->prevSignalType == TYPE_NO_VOICE_ACTIVITY ) {
+        /* Update CNG parameters */
+
+        /* Smoothing of LSF's  */
+        for( i = 0; i < psDec->LPC_order; i++ ) {
+            psCNG->CNG_smth_NLSF_Q15[ i ] += silk_SMULWB( (opus_int32)psDec->prevNLSF_Q15[ i ] - (opus_int32)psCNG->CNG_smth_NLSF_Q15[ i ], CNG_NLSF_SMTH_Q16 );
+        }
+        /* Find the subframe with the highest gain */
+        max_Gain_Q16 = 0;
+        subfr        = 0;
+        for( i = 0; i < psDec->nb_subfr; i++ ) {
+            if( psDecCtrl->Gains_Q16[ i ] > max_Gain_Q16 ) {
+                max_Gain_Q16 = psDecCtrl->Gains_Q16[ i ];
+                subfr        = i;
+            }
+        }
+        /* Update CNG excitation buffer with excitation from this subframe */
+        silk_memmove( &psCNG->CNG_exc_buf_Q14[ psDec->subfr_length ], psCNG->CNG_exc_buf_Q14, ( psDec->nb_subfr - 1 ) * psDec->subfr_length * sizeof( opus_int32 ) );
+        silk_memcpy(   psCNG->CNG_exc_buf_Q14, &psDec->exc_Q14[ subfr * psDec->subfr_length ], psDec->subfr_length * sizeof( opus_int32 ) );
+
+        /* Smooth gains */
+        for( i = 0; i < psDec->nb_subfr; i++ ) {
+            psCNG->CNG_smth_Gain_Q16 += silk_SMULWB( psDecCtrl->Gains_Q16[ i ] - psCNG->CNG_smth_Gain_Q16, CNG_GAIN_SMTH_Q16 );
+        }
+    }
+
+    /* Add CNG when packet is lost or during DTX */
+    if( psDec->lossCnt ) {
+        VARDECL( opus_int32, CNG_sig_Q10 );
+
+        ALLOC( CNG_sig_Q10, length + MAX_LPC_ORDER, opus_int32 );
+
+        /* Generate CNG excitation */
+        silk_CNG_exc( CNG_sig_Q10 + MAX_LPC_ORDER, psCNG->CNG_exc_buf_Q14, psCNG->CNG_smth_Gain_Q16, length, &psCNG->rand_seed );
+
+        /* Convert CNG NLSF to filter representation */
+        silk_NLSF2A( A_Q12, psCNG->CNG_smth_NLSF_Q15, psDec->LPC_order );
+
+        /* Generate CNG signal, by synthesis filtering */
+        silk_memcpy( CNG_sig_Q10, psCNG->CNG_synth_state, MAX_LPC_ORDER * sizeof( opus_int32 ) );
+        for( i = 0; i < length; i++ ) {
+            silk_assert( psDec->LPC_order == 10 || psDec->LPC_order == 16 );
+            /* Avoids introducing a bias because silk_SMLAWB() always rounds to -inf */
+            sum_Q6 = silk_RSHIFT( psDec->LPC_order, 1 );
+            sum_Q6 = silk_SMLAWB( sum_Q6, CNG_sig_Q10[ MAX_LPC_ORDER + i -  1 ], A_Q12[ 0 ] );
+            sum_Q6 = silk_SMLAWB( sum_Q6, CNG_sig_Q10[ MAX_LPC_ORDER + i -  2 ], A_Q12[ 1 ] );
+            sum_Q6 = silk_SMLAWB( sum_Q6, CNG_sig_Q10[ MAX_LPC_ORDER + i -  3 ], A_Q12[ 2 ] );
+            sum_Q6 = silk_SMLAWB( sum_Q6, CNG_sig_Q10[ MAX_LPC_ORDER + i -  4 ], A_Q12[ 3 ] );
+            sum_Q6 = silk_SMLAWB( sum_Q6, CNG_sig_Q10[ MAX_LPC_ORDER + i -  5 ], A_Q12[ 4 ] );
+            sum_Q6 = silk_SMLAWB( sum_Q6, CNG_sig_Q10[ MAX_LPC_ORDER + i -  6 ], A_Q12[ 5 ] );
+            sum_Q6 = silk_SMLAWB( sum_Q6, CNG_sig_Q10[ MAX_LPC_ORDER + i -  7 ], A_Q12[ 6 ] );
+            sum_Q6 = silk_SMLAWB( sum_Q6, CNG_sig_Q10[ MAX_LPC_ORDER + i -  8 ], A_Q12[ 7 ] );
+            sum_Q6 = silk_SMLAWB( sum_Q6, CNG_sig_Q10[ MAX_LPC_ORDER + i -  9 ], A_Q12[ 8 ] );
+            sum_Q6 = silk_SMLAWB( sum_Q6, CNG_sig_Q10[ MAX_LPC_ORDER + i - 10 ], A_Q12[ 9 ] );
+            if( psDec->LPC_order == 16 ) {
+                sum_Q6 = silk_SMLAWB( sum_Q6, CNG_sig_Q10[ MAX_LPC_ORDER + i - 11 ], A_Q12[ 10 ] );
+                sum_Q6 = silk_SMLAWB( sum_Q6, CNG_sig_Q10[ MAX_LPC_ORDER + i - 12 ], A_Q12[ 11 ] );
+                sum_Q6 = silk_SMLAWB( sum_Q6, CNG_sig_Q10[ MAX_LPC_ORDER + i - 13 ], A_Q12[ 12 ] );
+                sum_Q6 = silk_SMLAWB( sum_Q6, CNG_sig_Q10[ MAX_LPC_ORDER + i - 14 ], A_Q12[ 13 ] );
+                sum_Q6 = silk_SMLAWB( sum_Q6, CNG_sig_Q10[ MAX_LPC_ORDER + i - 15 ], A_Q12[ 14 ] );
+                sum_Q6 = silk_SMLAWB( sum_Q6, CNG_sig_Q10[ MAX_LPC_ORDER + i - 16 ], A_Q12[ 15 ] );
+            }
+
+            /* Update states */
+            CNG_sig_Q10[ MAX_LPC_ORDER + i ] = silk_ADD_LSHIFT( CNG_sig_Q10[ MAX_LPC_ORDER + i ], sum_Q6, 4 );
+
+            frame[ i ] = silk_ADD_SAT16( frame[ i ], silk_RSHIFT_ROUND( sum_Q6, 6 ) );
+        }
+        silk_memcpy( psCNG->CNG_synth_state, &CNG_sig_Q10[ length ], MAX_LPC_ORDER * sizeof( opus_int32 ) );
+    } else {
+        silk_memset( psCNG->CNG_synth_state, 0, psDec->LPC_order *  sizeof( opus_int32 ) );
+    }
+    RESTORE_STACK;
+}
diff --git a/drivers/opus/silk/HP_variable_cutoff.c b/drivers/opus/silk/HP_variable_cutoff.c
new file mode 100644
index 0000000000..379752bb19
--- /dev/null
+++ b/drivers/opus/silk/HP_variable_cutoff.c
@@ -0,0 +1,77 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef OPUS_HAVE_CONFIG_H
+#include "opus_config.h"
+#endif
+#ifdef OPUS_FIXED_POINT
+#include "main_FIX.h"
+#else
+#include "main_FLP.h"
+#endif
+#include "tuning_parameters.h"
+
+/* High-pass filter with cutoff frequency adaptation based on pitch lag statistics */
+void silk_HP_variable_cutoff(
+    silk_encoder_state_Fxx          state_Fxx[]                         /* I/O  Encoder states                              */
+)
+{
+   opus_int   quality_Q15;
+   opus_int32 pitch_freq_Hz_Q16, pitch_freq_log_Q7, delta_freq_Q7;
+   silk_encoder_state *psEncC1 = &state_Fxx[ 0 ].sCmn;
+
+   /* Adaptive cutoff frequency: estimate low end of pitch frequency range */
+   if( psEncC1->prevSignalType == TYPE_VOICED ) {
+      /* difference, in log domain */
+      pitch_freq_Hz_Q16 = silk_DIV32_16( silk_LSHIFT( silk_MUL( psEncC1->fs_kHz, 1000 ), 16 ), psEncC1->prevLag );
+      pitch_freq_log_Q7 = silk_lin2log( pitch_freq_Hz_Q16 ) - ( 16 << 7 );
+
+      /* adjustment based on quality */
+      quality_Q15 = psEncC1->input_quality_bands_Q15[ 0 ];
+      pitch_freq_log_Q7 = silk_SMLAWB( pitch_freq_log_Q7, silk_SMULWB( silk_LSHIFT( -quality_Q15, 2 ), quality_Q15 ),
+            pitch_freq_log_Q7 - ( silk_lin2log( SILK_FIX_CONST( VARIABLE_HP_MIN_CUTOFF_HZ, 16 ) ) - ( 16 << 7 ) ) );
+
+      /* delta_freq = pitch_freq_log - psEnc->variable_HP_smth1; */
+      delta_freq_Q7 = pitch_freq_log_Q7 - silk_RSHIFT( psEncC1->variable_HP_smth1_Q15, 8 );
+      if( delta_freq_Q7 < 0 ) {
+         /* less smoothing for decreasing pitch frequency, to track something close to the minimum */
+         delta_freq_Q7 = silk_MUL( delta_freq_Q7, 3 );
+      }
+
+      /* limit delta, to reduce impact of outliers in pitch estimation */
+      delta_freq_Q7 = silk_LIMIT_32( delta_freq_Q7, -SILK_FIX_CONST( VARIABLE_HP_MAX_DELTA_FREQ, 7 ), SILK_FIX_CONST( VARIABLE_HP_MAX_DELTA_FREQ, 7 ) );
+
+      /* update smoother */
+      psEncC1->variable_HP_smth1_Q15 = silk_SMLAWB( psEncC1->variable_HP_smth1_Q15,
+            silk_SMULBB( psEncC1->speech_activity_Q8, delta_freq_Q7 ), SILK_FIX_CONST( VARIABLE_HP_SMTH_COEF1, 16 ) );
+
+      /* limit frequency range */
+      psEncC1->variable_HP_smth1_Q15 = silk_LIMIT_32( psEncC1->variable_HP_smth1_Q15,
+            silk_LSHIFT( silk_lin2log( VARIABLE_HP_MIN_CUTOFF_HZ ), 8 ),
+            silk_LSHIFT( silk_lin2log( VARIABLE_HP_MAX_CUTOFF_HZ ), 8 ) );
+   }
+}
diff --git a/drivers/opus/silk/Inlines.h b/drivers/opus/silk/Inlines.h
new file mode 100644
index 0000000000..ec986cdfdd
--- /dev/null
+++ b/drivers/opus/silk/Inlines.h
@@ -0,0 +1,188 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+/*! \file silk_Inlines.h
+ *  \brief silk_Inlines.h defines OPUS_INLINE signal processing functions.
+ */
+
+#ifndef SILK_FIX_INLINES_H
+#define SILK_FIX_INLINES_H
+
+#ifdef  __cplusplus
+extern "C"
+{
+#endif
+
+/* count leading zeros of opus_int64 */
+static OPUS_INLINE opus_int32 silk_CLZ64( opus_int64 in )
+{
+    opus_int32 in_upper;
+
+    in_upper = (opus_int32)silk_RSHIFT64(in, 32);
+    if (in_upper == 0) {
+        /* Search in the lower 32 bits */
+        return 32 + silk_CLZ32( (opus_int32) in );
+    } else {
+        /* Search in the upper 32 bits */
+        return silk_CLZ32( in_upper );
+    }
+}
+
+/* get number of leading zeros and fractional part (the bits right after the leading one */
+static OPUS_INLINE void silk_CLZ_FRAC(
+    opus_int32 in,            /* I  input                               */
+    opus_int32 *lz,           /* O  number of leading zeros             */
+    opus_int32 *frac_Q7       /* O  the 7 bits right after the leading one */
+)
+{
+    opus_int32 lzeros = silk_CLZ32(in);
+
+    * lz = lzeros;
+    * frac_Q7 = silk_ROR32(in, 24 - lzeros) & 0x7f;
+}
+
+/* Approximation of square root                                          */
+/* Accuracy: < +/- 10%  for output values > 15                           */
+/*           < +/- 2.5% for output values > 120                          */
+static OPUS_INLINE opus_int32 silk_SQRT_APPROX( opus_int32 x )
+{
+    opus_int32 y, lz, frac_Q7;
+
+    if( x <= 0 ) {
+        return 0;
+    }
+
+    silk_CLZ_FRAC(x, &lz, &frac_Q7);
+
+    if( lz & 1 ) {
+        y = 32768;
+    } else {
+        y = 46214;        /* 46214 = sqrt(2) * 32768 */
+    }
+
+    /* get scaling right */
+    y >>= silk_RSHIFT(lz, 1);
+
+    /* increment using fractional part of input */
+    y = silk_SMLAWB(y, y, silk_SMULBB(213, frac_Q7));
+
+    return y;
+}
+
+/* Divide two int32 values and return result as int32 in a given Q-domain */
+static OPUS_INLINE opus_int32 silk_DIV32_varQ(   /* O    returns a good approximation of "(a32 << Qres) / b32" */
+    const opus_int32     a32,               /* I    numerator (Q0)                  */
+    const opus_int32     b32,               /* I    denominator (Q0)                */
+    const opus_int       Qres               /* I    Q-domain of result (>= 0)       */
+)
+{
+    opus_int   a_headrm, b_headrm, lshift;
+    opus_int32 b32_inv, a32_nrm, b32_nrm, result;
+
+    silk_assert( b32 != 0 );
+    silk_assert( Qres >= 0 );
+
+    /* Compute number of bits head room and normalize inputs */
+    a_headrm = silk_CLZ32( silk_abs(a32) ) - 1;
+    a32_nrm = silk_LSHIFT(a32, a_headrm);                                       /* Q: a_headrm                  */
+    b_headrm = silk_CLZ32( silk_abs(b32) ) - 1;
+    b32_nrm = silk_LSHIFT(b32, b_headrm);                                       /* Q: b_headrm                  */
+
+    /* Inverse of b32, with 14 bits of precision */
+    b32_inv = silk_DIV32_16( silk_int32_MAX >> 2, silk_RSHIFT(b32_nrm, 16) );   /* Q: 29 + 16 - b_headrm        */
+
+    /* First approximation */
+    result = silk_SMULWB(a32_nrm, b32_inv);                                     /* Q: 29 + a_headrm - b_headrm  */
+
+    /* Compute residual by subtracting product of denominator and first approximation */
+    /* It's OK to overflow because the final value of a32_nrm should always be small */
+    a32_nrm = silk_SUB32_ovflw(a32_nrm, silk_LSHIFT_ovflw( silk_SMMUL(b32_nrm, result), 3 ));  /* Q: a_headrm   */
+
+    /* Refinement */
+    result = silk_SMLAWB(result, a32_nrm, b32_inv);                             /* Q: 29 + a_headrm - b_headrm  */
+
+    /* Convert to Qres domain */
+    lshift = 29 + a_headrm - b_headrm - Qres;
+    if( lshift < 0 ) {
+        return silk_LSHIFT_SAT32(result, -lshift);
+    } else {
+        if( lshift < 32){
+            return silk_RSHIFT(result, lshift);
+        } else {
+            /* Avoid undefined result */
+            return 0;
+        }
+    }
+}
+
+/* Invert int32 value and return result as int32 in a given Q-domain */
+static OPUS_INLINE opus_int32 silk_INVERSE32_varQ(   /* O    returns a good approximation of "(1 << Qres) / b32" */
+    const opus_int32     b32,                   /* I    denominator (Q0)                */
+    const opus_int       Qres                   /* I    Q-domain of result (> 0)        */
+)
+{
+    opus_int   b_headrm, lshift;
+    opus_int32 b32_inv, b32_nrm, err_Q32, result;
+
+    silk_assert( b32 != 0 );
+    silk_assert( Qres > 0 );
+
+    /* Compute number of bits head room and normalize input */
+    b_headrm = silk_CLZ32( silk_abs(b32) ) - 1;
+    b32_nrm = silk_LSHIFT(b32, b_headrm);                                       /* Q: b_headrm                */
+
+    /* Inverse of b32, with 14 bits of precision */
+    b32_inv = silk_DIV32_16( silk_int32_MAX >> 2, silk_RSHIFT(b32_nrm, 16) );   /* Q: 29 + 16 - b_headrm    */
+
+    /* First approximation */
+    result = silk_LSHIFT(b32_inv, 16);                                          /* Q: 61 - b_headrm            */
+
+    /* Compute residual by subtracting product of denominator and first approximation from one */
+    err_Q32 = silk_LSHIFT( ((opus_int32)1<<29) - silk_SMULWB(b32_nrm, b32_inv), 3 );        /* Q32                        */
+
+    /* Refinement */
+    result = silk_SMLAWW(result, err_Q32, b32_inv);                             /* Q: 61 - b_headrm            */
+
+    /* Convert to Qres domain */
+    lshift = 61 - b_headrm - Qres;
+    if( lshift <= 0 ) {
+        return silk_LSHIFT_SAT32(result, -lshift);
+    } else {
+        if( lshift < 32){
+            return silk_RSHIFT(result, lshift);
+        }else{
+            /* Avoid undefined result */
+            return 0;
+        }
+    }
+}
+
+#ifdef  __cplusplus
+}
+#endif
+
+#endif /* SILK_FIX_INLINES_H */
diff --git a/drivers/opus/silk/LPC_analysis_filter.c b/drivers/opus/silk/LPC_analysis_filter.c
new file mode 100644
index 0000000000..98ef509e4e
--- /dev/null
+++ b/drivers/opus/silk/LPC_analysis_filter.c
@@ -0,0 +1,106 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef OPUS_HAVE_CONFIG_H
+#include "opus_config.h"
+#endif
+
+#include "SigProc_FIX.h"
+#include "celt_lpc.h"
+
+/*******************************************/
+/* LPC analysis filter                     */
+/* NB! State is kept internally and the    */
+/* filter always starts with zero state    */
+/* first d output samples are set to zero  */
+/*******************************************/
+
+void silk_LPC_analysis_filter(
+    opus_int16                  *out,               /* O    Output signal                                               */
+    const opus_int16            *in,                /* I    Input signal                                                */
+    const opus_int16            *B,                 /* I    MA prediction coefficients, Q12 [order]                     */
+    const opus_int32            len,                /* I    Signal length                                               */
+    const opus_int32            d                   /* I    Filter order                                                */
+)
+{
+    opus_int   j;
+#ifdef OPUS_FIXED_POINT
+    opus_int16 mem[SILK_MAX_ORDER_LPC];
+    opus_int16 num[SILK_MAX_ORDER_LPC];
+#else
+    int ix;
+    opus_int32       out32_Q12, out32;
+    const opus_int16 *in_ptr;
+#endif
+
+    silk_assert( d >= 6 );
+    silk_assert( (d & 1) == 0 );
+    silk_assert( d <= len );
+
+#ifdef OPUS_FIXED_POINT
+    silk_assert( d <= SILK_MAX_ORDER_LPC );
+    for ( j = 0; j < d; j++ ) {
+        num[ j ] = -B[ j ];
+    }
+    for (j=0;j<d;j++) {
+        mem[ j ] = in[ d - j - 1 ];
+    }
+    celt_fir( in + d, num, out + d, len - d, d, mem );
+    for ( j = 0; j < d; j++ ) {
+        out[ j ] = 0;
+    }
+#else
+    for( ix = d; ix < len; ix++ ) {
+        in_ptr = &in[ ix - 1 ];
+
+        out32_Q12 = silk_SMULBB( in_ptr[  0 ], B[ 0 ] );
+        /* Allowing wrap around so that two wraps can cancel each other. The rare
+           cases where the result wraps around can only be triggered by invalid streams*/
+        out32_Q12 = silk_SMLABB_ovflw( out32_Q12, in_ptr[ -1 ], B[ 1 ] );
+        out32_Q12 = silk_SMLABB_ovflw( out32_Q12, in_ptr[ -2 ], B[ 2 ] );
+        out32_Q12 = silk_SMLABB_ovflw( out32_Q12, in_ptr[ -3 ], B[ 3 ] );
+        out32_Q12 = silk_SMLABB_ovflw( out32_Q12, in_ptr[ -4 ], B[ 4 ] );
+        out32_Q12 = silk_SMLABB_ovflw( out32_Q12, in_ptr[ -5 ], B[ 5 ] );
+        for( j = 6; j < d; j += 2 ) {
+            out32_Q12 = silk_SMLABB_ovflw( out32_Q12, in_ptr[ -j     ], B[ j     ] );
+            out32_Q12 = silk_SMLABB_ovflw( out32_Q12, in_ptr[ -j - 1 ], B[ j + 1 ] );
+        }
+
+        /* Subtract prediction */
+        out32_Q12 = silk_SUB32_ovflw( silk_LSHIFT( (opus_int32)in_ptr[ 1 ], 12 ), out32_Q12 );
+
+        /* Scale to Q0 */
+        out32 = silk_RSHIFT_ROUND( out32_Q12, 12 );
+
+        /* Saturate output */
+        out[ ix ] = (opus_int16)silk_SAT16( out32 );
+    }
+
+    /* Set first d output samples to zero */
+    silk_memset( out, 0, d * sizeof( opus_int16 ) );
+#endif
+}
diff --git a/drivers/opus/silk/LPC_inv_pred_gain.c b/drivers/opus/silk/LPC_inv_pred_gain.c
new file mode 100644
index 0000000000..6dc9a49861
--- /dev/null
+++ b/drivers/opus/silk/LPC_inv_pred_gain.c
@@ -0,0 +1,154 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef OPUS_HAVE_CONFIG_H
+#include "opus_config.h"
+#endif
+
+#include "SigProc_FIX.h"
+
+#define QA                          24
+#define A_LIMIT                     SILK_FIX_CONST( 0.99975, QA )
+
+#define MUL32_FRAC_Q(a32, b32, Q)   ((opus_int32)(silk_RSHIFT_ROUND64(silk_SMULL(a32, b32), Q)))
+
+/* Compute inverse of LPC prediction gain, and                          */
+/* test if LPC coefficients are stable (all poles within unit circle)   */
+static opus_int32 LPC_inverse_pred_gain_QA(                 /* O   Returns inverse prediction gain in energy domain, Q30    */
+    opus_int32           A_QA[ 2 ][ SILK_MAX_ORDER_LPC ],   /* I   Prediction coefficients                                  */
+    const opus_int       order                              /* I   Prediction order                                         */
+)
+{
+    opus_int   k, n, mult2Q;
+    opus_int32 invGain_Q30, rc_Q31, rc_mult1_Q30, rc_mult2, tmp_QA;
+    opus_int32 *Aold_QA, *Anew_QA;
+
+    Anew_QA = A_QA[ order & 1 ];
+
+    invGain_Q30 = (opus_int32)1 << 30;
+    for( k = order - 1; k > 0; k-- ) {
+        /* Check for stability */
+        if( ( Anew_QA[ k ] > A_LIMIT ) || ( Anew_QA[ k ] < -A_LIMIT ) ) {
+            return 0;
+        }
+
+        /* Set RC equal to negated AR coef */
+        rc_Q31 = -silk_LSHIFT( Anew_QA[ k ], 31 - QA );
+
+        /* rc_mult1_Q30 range: [ 1 : 2^30 ] */
+        rc_mult1_Q30 = ( (opus_int32)1 << 30 ) - silk_SMMUL( rc_Q31, rc_Q31 );
+        silk_assert( rc_mult1_Q30 > ( 1 << 15 ) );                   /* reduce A_LIMIT if fails */
+        silk_assert( rc_mult1_Q30 <= ( 1 << 30 ) );
+
+        /* rc_mult2 range: [ 2^30 : silk_int32_MAX ] */
+        mult2Q = 32 - silk_CLZ32( silk_abs( rc_mult1_Q30 ) );
+        rc_mult2 = silk_INVERSE32_varQ( rc_mult1_Q30, mult2Q + 30 );
+
+        /* Update inverse gain */
+        /* invGain_Q30 range: [ 0 : 2^30 ] */
+        invGain_Q30 = silk_LSHIFT( silk_SMMUL( invGain_Q30, rc_mult1_Q30 ), 2 );
+        silk_assert( invGain_Q30 >= 0           );
+        silk_assert( invGain_Q30 <= ( 1 << 30 ) );
+
+        /* Swap pointers */
+        Aold_QA = Anew_QA;
+        Anew_QA = A_QA[ k & 1 ];
+
+        /* Update AR coefficient */
+        for( n = 0; n < k; n++ ) {
+            tmp_QA = Aold_QA[ n ] - MUL32_FRAC_Q( Aold_QA[ k - n - 1 ], rc_Q31, 31 );
+            Anew_QA[ n ] = MUL32_FRAC_Q( tmp_QA, rc_mult2 , mult2Q );
+        }
+    }
+
+    /* Check for stability */
+    if( ( Anew_QA[ 0 ] > A_LIMIT ) || ( Anew_QA[ 0 ] < -A_LIMIT ) ) {
+        return 0;
+    }
+
+    /* Set RC equal to negated AR coef */
+    rc_Q31 = -silk_LSHIFT( Anew_QA[ 0 ], 31 - QA );
+
+    /* Range: [ 1 : 2^30 ] */
+    rc_mult1_Q30 = ( (opus_int32)1 << 30 ) - silk_SMMUL( rc_Q31, rc_Q31 );
+
+    /* Update inverse gain */
+    /* Range: [ 0 : 2^30 ] */
+    invGain_Q30 = silk_LSHIFT( silk_SMMUL( invGain_Q30, rc_mult1_Q30 ), 2 );
+    silk_assert( invGain_Q30 >= 0     );
+    silk_assert( invGain_Q30 <= 1<<30 );
+
+    return invGain_Q30;
+}
+
+/* For input in Q12 domain */
+opus_int32 silk_LPC_inverse_pred_gain(              /* O   Returns inverse prediction gain in energy domain, Q30        */
+    const opus_int16            *A_Q12,             /* I   Prediction coefficients, Q12 [order]                         */
+    const opus_int              order               /* I   Prediction order                                             */
+)
+{
+    opus_int   k;
+    opus_int32 Atmp_QA[ 2 ][ SILK_MAX_ORDER_LPC ];
+    opus_int32 *Anew_QA;
+    opus_int32 DC_resp = 0;
+
+    Anew_QA = Atmp_QA[ order & 1 ];
+
+    /* Increase Q domain of the AR coefficients */
+    for( k = 0; k < order; k++ ) {
+        DC_resp += (opus_int32)A_Q12[ k ];
+        Anew_QA[ k ] = silk_LSHIFT32( (opus_int32)A_Q12[ k ], QA - 12 );
+    }
+    /* If the DC is unstable, we don't even need to do the full calculations */
+    if( DC_resp >= 4096 ) {
+        return 0;
+    }
+    return LPC_inverse_pred_gain_QA( Atmp_QA, order );
+}
+
+#ifdef OPUS_FIXED_POINT
+
+/* For input in Q24 domain */
+opus_int32 silk_LPC_inverse_pred_gain_Q24(          /* O    Returns inverse prediction gain in energy domain, Q30       */
+    const opus_int32            *A_Q24,             /* I    Prediction coefficients [order]                             */
+    const opus_int              order               /* I    Prediction order                                            */
+)
+{
+    opus_int   k;
+    opus_int32 Atmp_QA[ 2 ][ SILK_MAX_ORDER_LPC ];
+    opus_int32 *Anew_QA;
+
+    Anew_QA = Atmp_QA[ order & 1 ];
+
+    /* Increase Q domain of the AR coefficients */
+    for( k = 0; k < order; k++ ) {
+        Anew_QA[ k ] = silk_RSHIFT32( A_Q24[ k ], 24 - QA );
+    }
+
+    return LPC_inverse_pred_gain_QA( Atmp_QA, order );
+}
+#endif
diff --git a/drivers/opus/silk/LP_variable_cutoff.c b/drivers/opus/silk/LP_variable_cutoff.c
new file mode 100644
index 0000000000..098c19d34f
--- /dev/null
+++ b/drivers/opus/silk/LP_variable_cutoff.c
@@ -0,0 +1,135 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef OPUS_HAVE_CONFIG_H
+#include "opus_config.h"
+#endif
+
+/*
+    Elliptic/Cauer filters designed with 0.1 dB passband ripple,
+    80 dB minimum stopband attenuation, and
+    [0.95 : 0.15 : 0.35] normalized cut off frequencies.
+*/
+
+#include "silk_main.h"
+
+/* Helper function, interpolates the filter taps */
+static OPUS_INLINE void silk_LP_interpolate_filter_taps(
+    opus_int32           B_Q28[ TRANSITION_NB ],
+    opus_int32           A_Q28[ TRANSITION_NA ],
+    const opus_int       ind,
+    const opus_int32     fac_Q16
+)
+{
+    opus_int nb, na;
+
+    if( ind < TRANSITION_INT_NUM - 1 ) {
+        if( fac_Q16 > 0 ) {
+            if( fac_Q16 < 32768 ) { /* fac_Q16 is in range of a 16-bit int */
+                /* Piece-wise linear interpolation of B and A */
+                for( nb = 0; nb < TRANSITION_NB; nb++ ) {
+                    B_Q28[ nb ] = silk_SMLAWB(
+                        silk_Transition_LP_B_Q28[ ind     ][ nb ],
+                        silk_Transition_LP_B_Q28[ ind + 1 ][ nb ] -
+                        silk_Transition_LP_B_Q28[ ind     ][ nb ],
+                        fac_Q16 );
+                }
+                for( na = 0; na < TRANSITION_NA; na++ ) {
+                    A_Q28[ na ] = silk_SMLAWB(
+                        silk_Transition_LP_A_Q28[ ind     ][ na ],
+                        silk_Transition_LP_A_Q28[ ind + 1 ][ na ] -
+                        silk_Transition_LP_A_Q28[ ind     ][ na ],
+                        fac_Q16 );
+                }
+            } else { /* ( fac_Q16 - ( 1 << 16 ) ) is in range of a 16-bit int */
+                silk_assert( fac_Q16 - ( 1 << 16 ) == silk_SAT16( fac_Q16 - ( 1 << 16 ) ) );
+                /* Piece-wise linear interpolation of B and A */
+                for( nb = 0; nb < TRANSITION_NB; nb++ ) {
+                    B_Q28[ nb ] = silk_SMLAWB(
+                        silk_Transition_LP_B_Q28[ ind + 1 ][ nb ],
+                        silk_Transition_LP_B_Q28[ ind + 1 ][ nb ] -
+                        silk_Transition_LP_B_Q28[ ind     ][ nb ],
+                        fac_Q16 - ( (opus_int32)1 << 16 ) );
+                }
+                for( na = 0; na < TRANSITION_NA; na++ ) {
+                    A_Q28[ na ] = silk_SMLAWB(
+                        silk_Transition_LP_A_Q28[ ind + 1 ][ na ],
+                        silk_Transition_LP_A_Q28[ ind + 1 ][ na ] -
+                        silk_Transition_LP_A_Q28[ ind     ][ na ],
+                        fac_Q16 - ( (opus_int32)1 << 16 ) );
+                }
+            }
+        } else {
+            silk_memcpy( B_Q28, silk_Transition_LP_B_Q28[ ind ], TRANSITION_NB * sizeof( opus_int32 ) );
+            silk_memcpy( A_Q28, silk_Transition_LP_A_Q28[ ind ], TRANSITION_NA * sizeof( opus_int32 ) );
+        }
+    } else {
+        silk_memcpy( B_Q28, silk_Transition_LP_B_Q28[ TRANSITION_INT_NUM - 1 ], TRANSITION_NB * sizeof( opus_int32 ) );
+        silk_memcpy( A_Q28, silk_Transition_LP_A_Q28[ TRANSITION_INT_NUM - 1 ], TRANSITION_NA * sizeof( opus_int32 ) );
+    }
+}
+
+/* Low-pass filter with variable cutoff frequency based on  */
+/* piece-wise linear interpolation between elliptic filters */
+/* Start by setting psEncC->mode <> 0;                      */
+/* Deactivate by setting psEncC->mode = 0;                  */
+void silk_LP_variable_cutoff(
+    silk_LP_state               *psLP,                          /* I/O  LP filter state                             */
+    opus_int16                  *frame,                         /* I/O  Low-pass filtered output signal             */
+    const opus_int              frame_length                    /* I    Frame length                                */
+)
+{
+    opus_int32   B_Q28[ TRANSITION_NB ], A_Q28[ TRANSITION_NA ], fac_Q16 = 0;
+    opus_int     ind = 0;
+
+    silk_assert( psLP->transition_frame_no >= 0 && psLP->transition_frame_no <= TRANSITION_FRAMES );
+
+    /* Run filter if needed */
+    if( psLP->mode != 0 ) {
+        /* Calculate index and interpolation factor for interpolation */
+#if( TRANSITION_INT_STEPS == 64 )
+        fac_Q16 = silk_LSHIFT( TRANSITION_FRAMES - psLP->transition_frame_no, 16 - 6 );
+#else
+        fac_Q16 = silk_DIV32_16( silk_LSHIFT( TRANSITION_FRAMES - psLP->transition_frame_no, 16 ), TRANSITION_FRAMES );
+#endif
+        ind      = silk_RSHIFT( fac_Q16, 16 );
+        fac_Q16 -= silk_LSHIFT( ind, 16 );
+
+        silk_assert( ind >= 0 );
+        silk_assert( ind < TRANSITION_INT_NUM );
+
+        /* Interpolate filter coefficients */
+        silk_LP_interpolate_filter_taps( B_Q28, A_Q28, ind, fac_Q16 );
+
+        /* Update transition frame number for next frame */
+        psLP->transition_frame_no = silk_LIMIT( psLP->transition_frame_no + psLP->mode, 0, TRANSITION_FRAMES );
+
+        /* ARMA low-pass filtering */
+        silk_assert( TRANSITION_NB == 3 && TRANSITION_NA == 2 );
+        silk_biquad_alt( frame, B_Q28, A_Q28, psLP->In_LP_State, frame, frame_length, 1);
+    }
+}
diff --git a/drivers/opus/silk/MacroCount.h b/drivers/opus/silk/MacroCount.h
new file mode 100644
index 0000000000..834817d058
--- /dev/null
+++ b/drivers/opus/silk/MacroCount.h
@@ -0,0 +1,718 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifndef SIGPROCFIX_API_MACROCOUNT_H
+#define SIGPROCFIX_API_MACROCOUNT_H
+#include <stdio.h>
+
+#ifdef    silk_MACRO_COUNT
+#define varDefine opus_int64 ops_count = 0;
+
+extern opus_int64 ops_count;
+
+static OPUS_INLINE opus_int64 silk_SaveCount(){
+    return(ops_count);
+}
+
+static OPUS_INLINE opus_int64 silk_SaveResetCount(){
+    opus_int64 ret;
+
+    ret = ops_count;
+    ops_count = 0;
+    return(ret);
+}
+
+static OPUS_INLINE silk_PrintCount(){
+    printf("ops_count = %d \n ", (opus_int32)ops_count);
+}
+
+#undef silk_MUL
+static OPUS_INLINE opus_int32 silk_MUL(opus_int32 a32, opus_int32 b32){
+    opus_int32 ret;
+    ops_count += 4;
+    ret = a32 * b32;
+    return ret;
+}
+
+#undef silk_MUL_uint
+static OPUS_INLINE opus_uint32 silk_MUL_uint(opus_uint32 a32, opus_uint32 b32){
+    opus_uint32 ret;
+    ops_count += 4;
+    ret = a32 * b32;
+    return ret;
+}
+#undef silk_MLA
+static OPUS_INLINE opus_int32 silk_MLA(opus_int32 a32, opus_int32 b32, opus_int32 c32){
+    opus_int32 ret;
+    ops_count += 4;
+    ret = a32 + b32 * c32;
+    return ret;
+}
+
+#undef silk_MLA_uint
+static OPUS_INLINE opus_int32 silk_MLA_uint(opus_uint32 a32, opus_uint32 b32, opus_uint32 c32){
+    opus_uint32 ret;
+    ops_count += 4;
+    ret = a32 + b32 * c32;
+    return ret;
+}
+
+#undef silk_SMULWB
+static OPUS_INLINE opus_int32 silk_SMULWB(opus_int32 a32, opus_int32 b32){
+    opus_int32 ret;
+    ops_count += 5;
+    ret = (a32 >> 16) * (opus_int32)((opus_int16)b32) + (((a32 & 0x0000FFFF) * (opus_int32)((opus_int16)b32)) >> 16);
+    return ret;
+}
+#undef    silk_SMLAWB
+static OPUS_INLINE opus_int32 silk_SMLAWB(opus_int32 a32, opus_int32 b32, opus_int32 c32){
+    opus_int32 ret;
+    ops_count += 5;
+    ret = ((a32) + ((((b32) >> 16) * (opus_int32)((opus_int16)(c32))) + ((((b32) & 0x0000FFFF) * (opus_int32)((opus_int16)(c32))) >> 16)));
+    return ret;
+}
+
+#undef silk_SMULWT
+static OPUS_INLINE opus_int32 silk_SMULWT(opus_int32 a32, opus_int32 b32){
+    opus_int32 ret;
+    ops_count += 4;
+    ret = (a32 >> 16) * (b32 >> 16) + (((a32 & 0x0000FFFF) * (b32 >> 16)) >> 16);
+    return ret;
+}
+#undef silk_SMLAWT
+static OPUS_INLINE opus_int32 silk_SMLAWT(opus_int32 a32, opus_int32 b32, opus_int32 c32){
+    opus_int32 ret;
+    ops_count += 4;
+    ret = a32 + ((b32 >> 16) * (c32 >> 16)) + (((b32 & 0x0000FFFF) * ((c32 >> 16)) >> 16));
+    return ret;
+}
+
+#undef silk_SMULBB
+static OPUS_INLINE opus_int32 silk_SMULBB(opus_int32 a32, opus_int32 b32){
+    opus_int32 ret;
+    ops_count += 1;
+    ret = (opus_int32)((opus_int16)a32) * (opus_int32)((opus_int16)b32);
+    return ret;
+}
+#undef silk_SMLABB
+static OPUS_INLINE opus_int32 silk_SMLABB(opus_int32 a32, opus_int32 b32, opus_int32 c32){
+    opus_int32 ret;
+    ops_count += 1;
+    ret = a32 + (opus_int32)((opus_int16)b32) * (opus_int32)((opus_int16)c32);
+    return ret;
+}
+
+#undef silk_SMULBT
+static OPUS_INLINE opus_int32 silk_SMULBT(opus_int32 a32, opus_int32 b32 ){
+    opus_int32 ret;
+    ops_count += 4;
+    ret = ((opus_int32)((opus_int16)a32)) * (b32 >> 16);
+    return ret;
+}
+
+#undef silk_SMLABT
+static OPUS_INLINE opus_int32 silk_SMLABT(opus_int32 a32, opus_int32 b32, opus_int32 c32){
+    opus_int32 ret;
+    ops_count += 1;
+    ret = a32 + ((opus_int32)((opus_int16)b32)) * (c32 >> 16);
+    return ret;
+}
+
+#undef silk_SMULTT
+static OPUS_INLINE opus_int32 silk_SMULTT(opus_int32 a32, opus_int32 b32){
+    opus_int32 ret;
+    ops_count += 1;
+    ret = (a32 >> 16) * (b32 >> 16);
+    return ret;
+}
+
+#undef    silk_SMLATT
+static OPUS_INLINE opus_int32 silk_SMLATT(opus_int32 a32, opus_int32 b32, opus_int32 c32){
+    opus_int32 ret;
+    ops_count += 1;
+    ret = a32 + (b32 >> 16) * (c32 >> 16);
+    return ret;
+}
+
+
+/* multiply-accumulate macros that allow overflow in the addition (ie, no asserts in debug mode)*/
+#undef    silk_MLA_ovflw
+#define silk_MLA_ovflw silk_MLA
+
+#undef silk_SMLABB_ovflw
+#define silk_SMLABB_ovflw silk_SMLABB
+
+#undef silk_SMLABT_ovflw
+#define silk_SMLABT_ovflw silk_SMLABT
+
+#undef silk_SMLATT_ovflw
+#define silk_SMLATT_ovflw silk_SMLATT
+
+#undef silk_SMLAWB_ovflw
+#define silk_SMLAWB_ovflw silk_SMLAWB
+
+#undef silk_SMLAWT_ovflw
+#define silk_SMLAWT_ovflw silk_SMLAWT
+
+#undef silk_SMULL
+static OPUS_INLINE opus_int64 silk_SMULL(opus_int32 a32, opus_int32 b32){
+    opus_int64 ret;
+    ops_count += 8;
+    ret = ((opus_int64)(a32) * /*(opus_int64)*/(b32));
+    return ret;
+}
+
+#undef    silk_SMLAL
+static OPUS_INLINE opus_int64 silk_SMLAL(opus_int64 a64, opus_int32 b32, opus_int32 c32){
+    opus_int64 ret;
+    ops_count += 8;
+    ret = a64 + ((opus_int64)(b32) * /*(opus_int64)*/(c32));
+    return ret;
+}
+#undef    silk_SMLALBB
+static OPUS_INLINE opus_int64 silk_SMLALBB(opus_int64 a64, opus_int16 b16, opus_int16 c16){
+    opus_int64 ret;
+    ops_count += 4;
+    ret = a64 + ((opus_int64)(b16) * /*(opus_int64)*/(c16));
+    return ret;
+}
+
+#undef    SigProcFIX_CLZ16
+static OPUS_INLINE opus_int32 SigProcFIX_CLZ16(opus_int16 in16)
+{
+    opus_int32 out32 = 0;
+    ops_count += 10;
+    if( in16 == 0 ) {
+        return 16;
+    }
+    /* test nibbles */
+    if( in16 & 0xFF00 ) {
+        if( in16 & 0xF000 ) {
+            in16 >>= 12;
+        } else {
+            out32 += 4;
+            in16 >>= 8;
+        }
+    } else {
+        if( in16 & 0xFFF0 ) {
+            out32 += 8;
+            in16 >>= 4;
+        } else {
+            out32 += 12;
+        }
+    }
+    /* test bits and return */
+    if( in16 & 0xC ) {
+        if( in16 & 0x8 )
+            return out32 + 0;
+        else
+            return out32 + 1;
+    } else {
+        if( in16 & 0xE )
+            return out32 + 2;
+        else
+            return out32 + 3;
+    }
+}
+
+#undef SigProcFIX_CLZ32
+static OPUS_INLINE opus_int32 SigProcFIX_CLZ32(opus_int32 in32)
+{
+    /* test highest 16 bits and convert to opus_int16 */
+    ops_count += 2;
+    if( in32 & 0xFFFF0000 ) {
+        return SigProcFIX_CLZ16((opus_int16)(in32 >> 16));
+    } else {
+        return SigProcFIX_CLZ16((opus_int16)in32) + 16;
+    }
+}
+
+#undef silk_DIV32
+static OPUS_INLINE opus_int32 silk_DIV32(opus_int32 a32, opus_int32 b32){
+    ops_count += 64;
+    return a32 / b32;
+}
+
+#undef silk_DIV32_16
+static OPUS_INLINE opus_int32 silk_DIV32_16(opus_int32 a32, opus_int32 b32){
+    ops_count += 32;
+    return a32 / b32;
+}
+
+#undef silk_SAT8
+static OPUS_INLINE opus_int8 silk_SAT8(opus_int64 a){
+    opus_int8 tmp;
+    ops_count += 1;
+    tmp = (opus_int8)((a) > silk_int8_MAX ? silk_int8_MAX  : \
+                    ((a) < silk_int8_MIN ? silk_int8_MIN  : (a)));
+    return(tmp);
+}
+
+#undef silk_SAT16
+static OPUS_INLINE opus_int16 silk_SAT16(opus_int64 a){
+    opus_int16 tmp;
+    ops_count += 1;
+    tmp = (opus_int16)((a) > silk_int16_MAX ? silk_int16_MAX  : \
+                     ((a) < silk_int16_MIN ? silk_int16_MIN  : (a)));
+    return(tmp);
+}
+#undef silk_SAT32
+static OPUS_INLINE opus_int32 silk_SAT32(opus_int64 a){
+    opus_int32 tmp;
+    ops_count += 1;
+    tmp = (opus_int32)((a) > silk_int32_MAX ? silk_int32_MAX  : \
+                     ((a) < silk_int32_MIN ? silk_int32_MIN  : (a)));
+    return(tmp);
+}
+#undef silk_POS_SAT32
+static OPUS_INLINE opus_int32 silk_POS_SAT32(opus_int64 a){
+    opus_int32 tmp;
+    ops_count += 1;
+    tmp = (opus_int32)((a) > silk_int32_MAX ? silk_int32_MAX : (a));
+    return(tmp);
+}
+
+#undef silk_ADD_POS_SAT8
+static OPUS_INLINE opus_int8 silk_ADD_POS_SAT8(opus_int64 a, opus_int64 b){
+    opus_int8 tmp;
+    ops_count += 1;
+    tmp = (opus_int8)((((a)+(b)) & 0x80) ? silk_int8_MAX  : ((a)+(b)));
+    return(tmp);
+}
+#undef silk_ADD_POS_SAT16
+static OPUS_INLINE opus_int16 silk_ADD_POS_SAT16(opus_int64 a, opus_int64 b){
+    opus_int16 tmp;
+    ops_count += 1;
+    tmp = (opus_int16)((((a)+(b)) & 0x8000) ? silk_int16_MAX : ((a)+(b)));
+    return(tmp);
+}
+
+#undef silk_ADD_POS_SAT32
+static OPUS_INLINE opus_int32 silk_ADD_POS_SAT32(opus_int64 a, opus_int64 b){
+    opus_int32 tmp;
+    ops_count += 1;
+    tmp = (opus_int32)((((a)+(b)) & 0x80000000) ? silk_int32_MAX : ((a)+(b)));
+    return(tmp);
+}
+
+#undef silk_ADD_POS_SAT64
+static OPUS_INLINE opus_int64 silk_ADD_POS_SAT64(opus_int64 a, opus_int64 b){
+    opus_int64 tmp;
+    ops_count += 1;
+    tmp = ((((a)+(b)) & 0x8000000000000000LL) ? silk_int64_MAX : ((a)+(b)));
+    return(tmp);
+}
+
+#undef    silk_LSHIFT8
+static OPUS_INLINE opus_int8 silk_LSHIFT8(opus_int8 a, opus_int32 shift){
+    opus_int8 ret;
+    ops_count += 1;
+    ret = a << shift;
+    return ret;
+}
+#undef    silk_LSHIFT16
+static OPUS_INLINE opus_int16 silk_LSHIFT16(opus_int16 a, opus_int32 shift){
+    opus_int16 ret;
+    ops_count += 1;
+    ret = a << shift;
+    return ret;
+}
+#undef    silk_LSHIFT32
+static OPUS_INLINE opus_int32 silk_LSHIFT32(opus_int32 a, opus_int32 shift){
+    opus_int32 ret;
+    ops_count += 1;
+    ret = a << shift;
+    return ret;
+}
+#undef    silk_LSHIFT64
+static OPUS_INLINE opus_int64 silk_LSHIFT64(opus_int64 a, opus_int shift){
+    ops_count += 1;
+    return a << shift;
+}
+
+#undef    silk_LSHIFT_ovflw
+static OPUS_INLINE opus_int32 silk_LSHIFT_ovflw(opus_int32 a, opus_int32 shift){
+    ops_count += 1;
+    return a << shift;
+}
+
+#undef    silk_LSHIFT_uint
+static OPUS_INLINE opus_uint32 silk_LSHIFT_uint(opus_uint32 a, opus_int32 shift){
+    opus_uint32 ret;
+    ops_count += 1;
+    ret = a << shift;
+    return ret;
+}
+
+#undef    silk_RSHIFT8
+static OPUS_INLINE opus_int8 silk_RSHIFT8(opus_int8 a, opus_int32 shift){
+    ops_count += 1;
+    return a >> shift;
+}
+#undef    silk_RSHIFT16
+static OPUS_INLINE opus_int16 silk_RSHIFT16(opus_int16 a, opus_int32 shift){
+    ops_count += 1;
+    return a >> shift;
+}
+#undef    silk_RSHIFT32
+static OPUS_INLINE opus_int32 silk_RSHIFT32(opus_int32 a, opus_int32 shift){
+    ops_count += 1;
+    return a >> shift;
+}
+#undef    silk_RSHIFT64
+static OPUS_INLINE opus_int64 silk_RSHIFT64(opus_int64 a, opus_int64 shift){
+    ops_count += 1;
+    return a >> shift;
+}
+
+#undef    silk_RSHIFT_uint
+static OPUS_INLINE opus_uint32 silk_RSHIFT_uint(opus_uint32 a, opus_int32 shift){
+    ops_count += 1;
+    return a >> shift;
+}
+
+#undef    silk_ADD_LSHIFT
+static OPUS_INLINE opus_int32 silk_ADD_LSHIFT(opus_int32 a, opus_int32 b, opus_int32 shift){
+    opus_int32 ret;
+    ops_count += 1;
+    ret = a + (b << shift);
+    return ret;                /* shift >= 0*/
+}
+#undef    silk_ADD_LSHIFT32
+static OPUS_INLINE opus_int32 silk_ADD_LSHIFT32(opus_int32 a, opus_int32 b, opus_int32 shift){
+    opus_int32 ret;
+    ops_count += 1;
+    ret = a + (b << shift);
+    return ret;                /* shift >= 0*/
+}
+#undef    silk_ADD_LSHIFT_uint
+static OPUS_INLINE opus_uint32 silk_ADD_LSHIFT_uint(opus_uint32 a, opus_uint32 b, opus_int32 shift){
+    opus_uint32 ret;
+    ops_count += 1;
+    ret = a + (b << shift);
+    return ret;                /* shift >= 0*/
+}
+#undef    silk_ADD_RSHIFT
+static OPUS_INLINE opus_int32 silk_ADD_RSHIFT(opus_int32 a, opus_int32 b, opus_int32 shift){
+    opus_int32 ret;
+    ops_count += 1;
+    ret = a + (b >> shift);
+    return ret;                /* shift  > 0*/
+}
+#undef    silk_ADD_RSHIFT32
+static OPUS_INLINE opus_int32 silk_ADD_RSHIFT32(opus_int32 a, opus_int32 b, opus_int32 shift){
+    opus_int32 ret;
+    ops_count += 1;
+    ret = a + (b >> shift);
+    return ret;                /* shift  > 0*/
+}
+#undef    silk_ADD_RSHIFT_uint
+static OPUS_INLINE opus_uint32 silk_ADD_RSHIFT_uint(opus_uint32 a, opus_uint32 b, opus_int32 shift){
+    opus_uint32 ret;
+    ops_count += 1;
+    ret = a + (b >> shift);
+    return ret;                /* shift  > 0*/
+}
+#undef    silk_SUB_LSHIFT32
+static OPUS_INLINE opus_int32 silk_SUB_LSHIFT32(opus_int32 a, opus_int32 b, opus_int32 shift){
+    opus_int32 ret;
+    ops_count += 1;
+    ret = a - (b << shift);
+    return ret;                /* shift >= 0*/
+}
+#undef    silk_SUB_RSHIFT32
+static OPUS_INLINE opus_int32 silk_SUB_RSHIFT32(opus_int32 a, opus_int32 b, opus_int32 shift){
+    opus_int32 ret;
+    ops_count += 1;
+    ret = a - (b >> shift);
+    return ret;                /* shift  > 0*/
+}
+
+#undef    silk_RSHIFT_ROUND
+static OPUS_INLINE opus_int32 silk_RSHIFT_ROUND(opus_int32 a, opus_int32 shift){
+    opus_int32 ret;
+    ops_count += 3;
+    ret = shift == 1 ? (a >> 1) + (a & 1) : ((a >> (shift - 1)) + 1) >> 1;
+    return ret;
+}
+
+#undef    silk_RSHIFT_ROUND64
+static OPUS_INLINE opus_int64 silk_RSHIFT_ROUND64(opus_int64 a, opus_int32 shift){
+    opus_int64 ret;
+    ops_count += 6;
+    ret = shift == 1 ? (a >> 1) + (a & 1) : ((a >> (shift - 1)) + 1) >> 1;
+    return ret;
+}
+
+#undef    silk_abs_int64
+static OPUS_INLINE opus_int64 silk_abs_int64(opus_int64 a){
+    ops_count += 1;
+    return (((a) >  0)  ? (a) : -(a));            /* Be careful, silk_abs returns wrong when input equals to silk_intXX_MIN*/
+}
+
+#undef    silk_abs_int32
+static OPUS_INLINE opus_int32 silk_abs_int32(opus_int32 a){
+    ops_count += 1;
+    return silk_abs(a);
+}
+
+
+#undef silk_min
+static silk_min(a, b){
+    ops_count += 1;
+    return (((a) < (b)) ? (a) :  (b));
+}
+#undef silk_max
+static silk_max(a, b){
+    ops_count += 1;
+    return (((a) > (b)) ? (a) :  (b));
+}
+#undef silk_sign
+static silk_sign(a){
+    ops_count += 1;
+    return ((a) > 0 ? 1 : ( (a) < 0 ? -1 : 0 ));
+}
+
+#undef    silk_ADD16
+static OPUS_INLINE opus_int16 silk_ADD16(opus_int16 a, opus_int16 b){
+    opus_int16 ret;
+    ops_count += 1;
+    ret = a + b;
+    return ret;
+}
+
+#undef    silk_ADD32
+static OPUS_INLINE opus_int32 silk_ADD32(opus_int32 a, opus_int32 b){
+    opus_int32 ret;
+    ops_count += 1;
+    ret = a + b;
+    return ret;
+}
+
+#undef    silk_ADD64
+static OPUS_INLINE opus_int64 silk_ADD64(opus_int64 a, opus_int64 b){
+    opus_int64 ret;
+    ops_count += 2;
+    ret = a + b;
+    return ret;
+}
+
+#undef    silk_SUB16
+static OPUS_INLINE opus_int16 silk_SUB16(opus_int16 a, opus_int16 b){
+    opus_int16 ret;
+    ops_count += 1;
+    ret = a - b;
+    return ret;
+}
+
+#undef    silk_SUB32
+static OPUS_INLINE opus_int32 silk_SUB32(opus_int32 a, opus_int32 b){
+    opus_int32 ret;
+    ops_count += 1;
+    ret = a - b;
+    return ret;
+}
+
+#undef    silk_SUB64
+static OPUS_INLINE opus_int64 silk_SUB64(opus_int64 a, opus_int64 b){
+    opus_int64 ret;
+    ops_count += 2;
+    ret = a - b;
+    return ret;
+}
+
+#undef silk_ADD_SAT16
+static OPUS_INLINE opus_int16 silk_ADD_SAT16( opus_int16 a16, opus_int16 b16 ) {
+    opus_int16 res;
+    /* Nb will be counted in AKP_add32 and silk_SAT16*/
+    res = (opus_int16)silk_SAT16( silk_ADD32( (opus_int32)(a16), (b16) ) );
+    return res;
+}
+
+#undef silk_ADD_SAT32
+static OPUS_INLINE opus_int32 silk_ADD_SAT32(opus_int32 a32, opus_int32 b32){
+    opus_int32 res;
+    ops_count += 1;
+    res =    ((((a32) + (b32)) & 0x80000000) == 0 ?                                    \
+            ((((a32) & (b32)) & 0x80000000) != 0 ? silk_int32_MIN : (a32)+(b32)) :    \
+            ((((a32) | (b32)) & 0x80000000) == 0 ? silk_int32_MAX : (a32)+(b32)) );
+    return res;
+}
+
+#undef silk_ADD_SAT64
+static OPUS_INLINE opus_int64 silk_ADD_SAT64( opus_int64 a64, opus_int64 b64 ) {
+    opus_int64 res;
+    ops_count += 1;
+    res =    ((((a64) + (b64)) & 0x8000000000000000LL) == 0 ?                                \
+            ((((a64) & (b64)) & 0x8000000000000000LL) != 0 ? silk_int64_MIN : (a64)+(b64)) :    \
+            ((((a64) | (b64)) & 0x8000000000000000LL) == 0 ? silk_int64_MAX : (a64)+(b64)) );
+    return res;
+}
+
+#undef silk_SUB_SAT16
+static OPUS_INLINE opus_int16 silk_SUB_SAT16( opus_int16 a16, opus_int16 b16 ) {
+    opus_int16 res;
+    silk_assert(0);
+    /* Nb will be counted in sub-macros*/
+    res = (opus_int16)silk_SAT16( silk_SUB32( (opus_int32)(a16), (b16) ) );
+    return res;
+}
+
+#undef silk_SUB_SAT32
+static OPUS_INLINE opus_int32 silk_SUB_SAT32( opus_int32 a32, opus_int32 b32 ) {
+    opus_int32 res;
+    ops_count += 1;
+    res =     ((((a32)-(b32)) & 0x80000000) == 0 ?                                            \
+            (( (a32) & ((b32)^0x80000000) & 0x80000000) ? silk_int32_MIN : (a32)-(b32)) :    \
+            ((((a32)^0x80000000) & (b32)  & 0x80000000) ? silk_int32_MAX : (a32)-(b32)) );
+    return res;
+}
+
+#undef silk_SUB_SAT64
+static OPUS_INLINE opus_int64 silk_SUB_SAT64( opus_int64 a64, opus_int64 b64 ) {
+    opus_int64 res;
+    ops_count += 1;
+    res =    ((((a64)-(b64)) & 0x8000000000000000LL) == 0 ?                                                        \
+            (( (a64) & ((b64)^0x8000000000000000LL) & 0x8000000000000000LL) ? silk_int64_MIN : (a64)-(b64)) :    \
+            ((((a64)^0x8000000000000000LL) & (b64)  & 0x8000000000000000LL) ? silk_int64_MAX : (a64)-(b64)) );
+
+    return res;
+}
+
+#undef    silk_SMULWW
+static OPUS_INLINE opus_int32 silk_SMULWW(opus_int32 a32, opus_int32 b32){
+    opus_int32 ret;
+    /* Nb will be counted in sub-macros*/
+    ret = silk_MLA(silk_SMULWB((a32), (b32)), (a32), silk_RSHIFT_ROUND((b32), 16));
+    return ret;
+}
+
+#undef    silk_SMLAWW
+static OPUS_INLINE opus_int32 silk_SMLAWW(opus_int32 a32, opus_int32 b32, opus_int32 c32){
+    opus_int32 ret;
+    /* Nb will be counted in sub-macros*/
+    ret = silk_MLA(silk_SMLAWB((a32), (b32), (c32)), (b32), silk_RSHIFT_ROUND((c32), 16));
+    return ret;
+}
+
+#undef    silk_min_int
+static OPUS_INLINE opus_int silk_min_int(opus_int a, opus_int b)
+{
+    ops_count += 1;
+    return (((a) < (b)) ? (a) : (b));
+}
+
+#undef    silk_min_16
+static OPUS_INLINE opus_int16 silk_min_16(opus_int16 a, opus_int16 b)
+{
+    ops_count += 1;
+    return (((a) < (b)) ? (a) : (b));
+}
+#undef    silk_min_32
+static OPUS_INLINE opus_int32 silk_min_32(opus_int32 a, opus_int32 b)
+{
+    ops_count += 1;
+    return (((a) < (b)) ? (a) : (b));
+}
+#undef    silk_min_64
+static OPUS_INLINE opus_int64 silk_min_64(opus_int64 a, opus_int64 b)
+{
+    ops_count += 1;
+    return (((a) < (b)) ? (a) : (b));
+}
+
+/* silk_min() versions with typecast in the function call */
+#undef    silk_max_int
+static OPUS_INLINE opus_int silk_max_int(opus_int a, opus_int b)
+{
+    ops_count += 1;
+    return (((a) > (b)) ? (a) : (b));
+}
+#undef    silk_max_16
+static OPUS_INLINE opus_int16 silk_max_16(opus_int16 a, opus_int16 b)
+{
+    ops_count += 1;
+    return (((a) > (b)) ? (a) : (b));
+}
+#undef    silk_max_32
+static OPUS_INLINE opus_int32 silk_max_32(opus_int32 a, opus_int32 b)
+{
+    ops_count += 1;
+    return (((a) > (b)) ? (a) : (b));
+}
+
+#undef    silk_max_64
+static OPUS_INLINE opus_int64 silk_max_64(opus_int64 a, opus_int64 b)
+{
+    ops_count += 1;
+    return (((a) > (b)) ? (a) : (b));
+}
+
+
+#undef silk_LIMIT_int
+static OPUS_INLINE opus_int silk_LIMIT_int(opus_int a, opus_int limit1, opus_int limit2)
+{
+    opus_int ret;
+    ops_count += 6;
+
+    ret = ((limit1) > (limit2) ? ((a) > (limit1) ? (limit1) : ((a) < (limit2) ? (limit2) : (a))) \
+        : ((a) > (limit2) ? (limit2) : ((a) < (limit1) ? (limit1) : (a))));
+
+    return(ret);
+}
+
+#undef silk_LIMIT_16
+static OPUS_INLINE opus_int16 silk_LIMIT_16(opus_int16 a, opus_int16 limit1, opus_int16 limit2)
+{
+    opus_int16 ret;
+    ops_count += 6;
+
+    ret = ((limit1) > (limit2) ? ((a) > (limit1) ? (limit1) : ((a) < (limit2) ? (limit2) : (a))) \
+        : ((a) > (limit2) ? (limit2) : ((a) < (limit1) ? (limit1) : (a))));
+
+return(ret);
+}
+
+
+#undef silk_LIMIT_32
+static OPUS_INLINE opus_int silk_LIMIT_32(opus_int32 a, opus_int32 limit1, opus_int32 limit2)
+{
+    opus_int32 ret;
+    ops_count += 6;
+
+    ret = ((limit1) > (limit2) ? ((a) > (limit1) ? (limit1) : ((a) < (limit2) ? (limit2) : (a))) \
+        : ((a) > (limit2) ? (limit2) : ((a) < (limit1) ? (limit1) : (a))));
+    return(ret);
+}
+
+#else
+#define varDefine
+#define silk_SaveCount()
+
+#endif
+#endif
+
diff --git a/drivers/opus/silk/MacroDebug.h b/drivers/opus/silk/MacroDebug.h
new file mode 100644
index 0000000000..35aedc5c5f
--- /dev/null
+++ b/drivers/opus/silk/MacroDebug.h
@@ -0,0 +1,952 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Copyright (C) 2012 Xiph.Org Foundation
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifndef MACRO_DEBUG_H
+#define MACRO_DEBUG_H
+
+/* Redefine macro functions with extensive assertion in DEBUG mode.
+   As functions can't be undefined, this file can't work with SigProcFIX_MacroCount.h */
+
+#if ( defined (FIXED_DEBUG) || ( 0 && defined (_DEBUG) ) ) && !defined (silk_MACRO_COUNT)
+
+#undef silk_ADD16
+#define silk_ADD16(a,b) silk_ADD16_((a), (b), __FILE__, __LINE__)
+static OPUS_INLINE opus_int16 silk_ADD16_(opus_int16 a, opus_int16 b, char *file, int line){
+    opus_int16 ret;
+
+    ret = a + b;
+    if ( ret != silk_ADD_SAT16( a, b ) )
+    {
+        fprintf (stderr, "silk_ADD16(%d, %d) in %s: line %d\n", a, b, file, line);
+#ifdef FIXED_DEBUG_ASSERT
+        silk_assert( 0 );
+#endif
+    }
+    return ret;
+}
+
+#undef silk_ADD32
+#define silk_ADD32(a,b) silk_ADD32_((a), (b), __FILE__, __LINE__)
+static OPUS_INLINE opus_int32 silk_ADD32_(opus_int32 a, opus_int32 b, char *file, int line){
+    opus_int32 ret;
+
+    ret = a + b;
+    if ( ret != silk_ADD_SAT32( a, b ) )
+    {
+        fprintf (stderr, "silk_ADD32(%d, %d) in %s: line %d\n", a, b, file, line);
+#ifdef FIXED_DEBUG_ASSERT
+        silk_assert( 0 );
+#endif
+    }
+    return ret;
+}
+
+#undef silk_ADD64
+#define silk_ADD64(a,b) silk_ADD64_((a), (b), __FILE__, __LINE__)
+static OPUS_INLINE opus_int64 silk_ADD64_(opus_int64 a, opus_int64 b, char *file, int line){
+    opus_int64 ret;
+
+    ret = a + b;
+    if ( ret != silk_ADD_SAT64( a, b ) )
+    {
+        fprintf (stderr, "silk_ADD64(%lld, %lld) in %s: line %d\n", (long long)a, (long long)b, file, line);
+#ifdef FIXED_DEBUG_ASSERT
+        silk_assert( 0 );
+#endif
+    }
+    return ret;
+}
+
+#undef silk_SUB16
+#define silk_SUB16(a,b) silk_SUB16_((a), (b), __FILE__, __LINE__)
+static OPUS_INLINE opus_int16 silk_SUB16_(opus_int16 a, opus_int16 b, char *file, int line){
+    opus_int16 ret;
+
+    ret = a - b;
+    if ( ret != silk_SUB_SAT16( a, b ) )
+    {
+        fprintf (stderr, "silk_SUB16(%d, %d) in %s: line %d\n", a, b, file, line);
+#ifdef FIXED_DEBUG_ASSERT
+        silk_assert( 0 );
+#endif
+    }
+    return ret;
+}
+
+#undef silk_SUB32
+#define silk_SUB32(a,b) silk_SUB32_((a), (b), __FILE__, __LINE__)
+static OPUS_INLINE opus_int32 silk_SUB32_(opus_int32 a, opus_int32 b, char *file, int line){
+    opus_int32 ret;
+
+    ret = a - b;
+    if ( ret != silk_SUB_SAT32( a, b ) )
+    {
+        fprintf (stderr, "silk_SUB32(%d, %d) in %s: line %d\n", a, b, file, line);
+#ifdef FIXED_DEBUG_ASSERT
+        silk_assert( 0 );
+#endif
+    }
+    return ret;
+}
+
+#undef silk_SUB64
+#define silk_SUB64(a,b) silk_SUB64_((a), (b), __FILE__, __LINE__)
+static OPUS_INLINE opus_int64 silk_SUB64_(opus_int64 a, opus_int64 b, char *file, int line){
+    opus_int64 ret;
+
+    ret = a - b;
+    if ( ret != silk_SUB_SAT64( a, b ) )
+    {
+        fprintf (stderr, "silk_SUB64(%lld, %lld) in %s: line %d\n", (long long)a, (long long)b, file, line);
+#ifdef FIXED_DEBUG_ASSERT
+        silk_assert( 0 );
+#endif
+    }
+    return ret;
+}
+
+#undef silk_ADD_SAT16
+#define silk_ADD_SAT16(a,b) silk_ADD_SAT16_((a), (b), __FILE__, __LINE__)
+static OPUS_INLINE opus_int16 silk_ADD_SAT16_( opus_int16 a16, opus_int16 b16, char *file, int line) {
+    opus_int16 res;
+    res = (opus_int16)silk_SAT16( silk_ADD32( (opus_int32)(a16), (b16) ) );
+    if ( res != silk_SAT16( (opus_int32)a16 + (opus_int32)b16 ) )
+    {
+        fprintf (stderr, "silk_ADD_SAT16(%d, %d) in %s: line %d\n", a16, b16, file, line);
+#ifdef FIXED_DEBUG_ASSERT
+        silk_assert( 0 );
+#endif
+    }
+    return res;
+}
+
+#undef silk_ADD_SAT32
+#define silk_ADD_SAT32(a,b) silk_ADD_SAT32_((a), (b), __FILE__, __LINE__)
+static OPUS_INLINE opus_int32 silk_ADD_SAT32_(opus_int32 a32, opus_int32 b32, char *file, int line){
+    opus_int32 res;
+    res =   ((((opus_uint32)(a32) + (opus_uint32)(b32)) & 0x80000000) == 0 ?       \
+            ((((a32) & (b32)) & 0x80000000) != 0 ? silk_int32_MIN : (a32)+(b32)) : \
+            ((((a32) | (b32)) & 0x80000000) == 0 ? silk_int32_MAX : (a32)+(b32)) );
+    if ( res != silk_SAT32( (opus_int64)a32 + (opus_int64)b32 ) )
+    {
+        fprintf (stderr, "silk_ADD_SAT32(%d, %d) in %s: line %d\n", a32, b32, file, line);
+#ifdef FIXED_DEBUG_ASSERT
+        silk_assert( 0 );
+#endif
+    }
+    return res;
+}
+
+#undef silk_ADD_SAT64
+#define silk_ADD_SAT64(a,b) silk_ADD_SAT64_((a), (b), __FILE__, __LINE__)
+static OPUS_INLINE opus_int64 silk_ADD_SAT64_( opus_int64 a64, opus_int64 b64, char *file, int line) {
+    opus_int64 res;
+    int        fail = 0;
+    res =   ((((a64) + (b64)) & 0x8000000000000000LL) == 0 ?                                 \
+            ((((a64) & (b64)) & 0x8000000000000000LL) != 0 ? silk_int64_MIN : (a64)+(b64)) : \
+            ((((a64) | (b64)) & 0x8000000000000000LL) == 0 ? silk_int64_MAX : (a64)+(b64)) );
+    if( res != a64 + b64 ) {
+        /* Check that we saturated to the correct extreme value */
+        if ( !(( res == silk_int64_MAX && ( ( a64 >> 1 ) + ( b64 >> 1 ) > ( silk_int64_MAX >> 3 ) ) ) ||
+               ( res == silk_int64_MIN && ( ( a64 >> 1 ) + ( b64 >> 1 ) < ( silk_int64_MIN >> 3 ) ) ) ) )
+        {
+            fail = 1;
+        }
+    } else {
+        /* Saturation not necessary */
+        fail = res != a64 + b64;
+    }
+    if ( fail )
+    {
+        fprintf (stderr, "silk_ADD_SAT64(%lld, %lld) in %s: line %d\n", (long long)a64, (long long)b64, file, line);
+#ifdef FIXED_DEBUG_ASSERT
+        silk_assert( 0 );
+#endif
+    }
+    return res;
+}
+
+#undef silk_SUB_SAT16
+#define silk_SUB_SAT16(a,b) silk_SUB_SAT16_((a), (b), __FILE__, __LINE__)
+static OPUS_INLINE opus_int16 silk_SUB_SAT16_( opus_int16 a16, opus_int16 b16, char *file, int line ) {
+    opus_int16 res;
+    res = (opus_int16)silk_SAT16( silk_SUB32( (opus_int32)(a16), (b16) ) );
+    if ( res != silk_SAT16( (opus_int32)a16 - (opus_int32)b16 ) )
+    {
+        fprintf (stderr, "silk_SUB_SAT16(%d, %d) in %s: line %d\n", a16, b16, file, line);
+#ifdef FIXED_DEBUG_ASSERT
+        silk_assert( 0 );
+#endif
+    }
+    return res;
+}
+
+#undef silk_SUB_SAT32
+#define silk_SUB_SAT32(a,b) silk_SUB_SAT32_((a), (b), __FILE__, __LINE__)
+static OPUS_INLINE opus_int32 silk_SUB_SAT32_( opus_int32 a32, opus_int32 b32, char *file, int line ) {
+    opus_int32 res;
+    res =   ((((opus_uint32)(a32)-(opus_uint32)(b32)) & 0x80000000) == 0 ?                \
+            (( (a32) & ((b32)^0x80000000) & 0x80000000) ? silk_int32_MIN : (a32)-(b32)) : \
+            ((((a32)^0x80000000) & (b32)  & 0x80000000) ? silk_int32_MAX : (a32)-(b32)) );
+    if ( res != silk_SAT32( (opus_int64)a32 - (opus_int64)b32 ) )
+    {
+        fprintf (stderr, "silk_SUB_SAT32(%d, %d) in %s: line %d\n", a32, b32, file, line);
+#ifdef FIXED_DEBUG_ASSERT
+        silk_assert( 0 );
+#endif
+    }
+    return res;
+}
+
+#undef silk_SUB_SAT64
+#define silk_SUB_SAT64(a,b) silk_SUB_SAT64_((a), (b), __FILE__, __LINE__)
+static OPUS_INLINE opus_int64 silk_SUB_SAT64_( opus_int64 a64, opus_int64 b64, char *file, int line ) {
+    opus_int64 res;
+    int        fail = 0;
+    res =   ((((a64)-(b64)) & 0x8000000000000000LL) == 0 ?                                                    \
+            (( (a64) & ((b64)^0x8000000000000000LL) & 0x8000000000000000LL) ? silk_int64_MIN : (a64)-(b64)) : \
+            ((((a64)^0x8000000000000000LL) & (b64)  & 0x8000000000000000LL) ? silk_int64_MAX : (a64)-(b64)) );
+    if( res != a64 - b64 ) {
+        /* Check that we saturated to the correct extreme value */
+        if( !(( res == silk_int64_MAX && ( ( a64 >> 1 ) + ( b64 >> 1 ) > ( silk_int64_MAX >> 3 ) ) ) ||
+              ( res == silk_int64_MIN && ( ( a64 >> 1 ) + ( b64 >> 1 ) < ( silk_int64_MIN >> 3 ) ) ) ))
+        {
+            fail = 1;
+        }
+    } else {
+        /* Saturation not necessary */
+        fail = res != a64 - b64;
+    }
+    if ( fail )
+    {
+        fprintf (stderr, "silk_SUB_SAT64(%lld, %lld) in %s: line %d\n", (long long)a64, (long long)b64, file, line);
+#ifdef FIXED_DEBUG_ASSERT
+        silk_assert( 0 );
+#endif
+    }
+    return res;
+}
+
+#undef silk_MUL
+#define silk_MUL(a,b) silk_MUL_((a), (b), __FILE__, __LINE__)
+static OPUS_INLINE opus_int32 silk_MUL_(opus_int32 a32, opus_int32 b32, char *file, int line){
+    opus_int32 ret;
+    opus_int64 ret64;
+    ret = a32 * b32;
+    ret64 = (opus_int64)a32 * (opus_int64)b32;
+    if ( (opus_int64)ret != ret64 )
+    {
+        fprintf (stderr, "silk_MUL(%d, %d) in %s: line %d\n", a32, b32, file, line);
+#ifdef FIXED_DEBUG_ASSERT
+        silk_assert( 0 );
+#endif
+    }
+    return ret;
+}
+
+#undef silk_MUL_uint
+#define silk_MUL_uint(a,b) silk_MUL_uint_((a), (b), __FILE__, __LINE__)
+static OPUS_INLINE opus_uint32 silk_MUL_uint_(opus_uint32 a32, opus_uint32 b32, char *file, int line){
+    opus_uint32 ret;
+    ret = a32 * b32;
+    if ( (opus_uint64)ret != (opus_uint64)a32 * (opus_uint64)b32 )
+    {
+        fprintf (stderr, "silk_MUL_uint(%u, %u) in %s: line %d\n", a32, b32, file, line);
+#ifdef FIXED_DEBUG_ASSERT
+        silk_assert( 0 );
+#endif
+    }
+    return ret;
+}
+
+#undef silk_MLA
+#define silk_MLA(a,b,c) silk_MLA_((a), (b), (c), __FILE__, __LINE__)
+static OPUS_INLINE opus_int32 silk_MLA_(opus_int32 a32, opus_int32 b32, opus_int32 c32, char *file, int line){
+    opus_int32 ret;
+    ret = a32 + b32 * c32;
+    if ( (opus_int64)ret != (opus_int64)a32 + (opus_int64)b32 * (opus_int64)c32 )
+    {
+        fprintf (stderr, "silk_MLA(%d, %d, %d) in %s: line %d\n", a32, b32, c32, file, line);
+#ifdef FIXED_DEBUG_ASSERT
+        silk_assert( 0 );
+#endif
+    }
+    return ret;
+}
+
+#undef silk_MLA_uint
+#define silk_MLA_uint(a,b,c) silk_MLA_uint_((a), (b), (c), __FILE__, __LINE__)
+static OPUS_INLINE opus_int32 silk_MLA_uint_(opus_uint32 a32, opus_uint32 b32, opus_uint32 c32, char *file, int line){
+    opus_uint32 ret;
+    ret = a32 + b32 * c32;
+    if ( (opus_int64)ret != (opus_int64)a32 + (opus_int64)b32 * (opus_int64)c32 )
+    {
+        fprintf (stderr, "silk_MLA_uint(%d, %d, %d) in %s: line %d\n", a32, b32, c32, file, line);
+#ifdef FIXED_DEBUG_ASSERT
+        silk_assert( 0 );
+#endif
+    }
+    return ret;
+}
+
+#undef silk_SMULWB
+#define silk_SMULWB(a,b) silk_SMULWB_((a), (b), __FILE__, __LINE__)
+static OPUS_INLINE opus_int32 silk_SMULWB_(opus_int32 a32, opus_int32 b32, char *file, int line){
+    opus_int32 ret;
+    ret = (a32 >> 16) * (opus_int32)((opus_int16)b32) + (((a32 & 0x0000FFFF) * (opus_int32)((opus_int16)b32)) >> 16);
+    if ( (opus_int64)ret != ((opus_int64)a32 * (opus_int16)b32) >> 16 )
+    {
+        fprintf (stderr, "silk_SMULWB(%d, %d) in %s: line %d\n", a32, b32, file, line);
+#ifdef FIXED_DEBUG_ASSERT
+        silk_assert( 0 );
+#endif
+    }
+    return ret;
+}
+
+#undef silk_SMLAWB
+#define silk_SMLAWB(a,b,c) silk_SMLAWB_((a), (b), (c), __FILE__, __LINE__)
+static OPUS_INLINE opus_int32 silk_SMLAWB_(opus_int32 a32, opus_int32 b32, opus_int32 c32, char *file, int line){
+    opus_int32 ret;
+    ret = silk_ADD32( a32, silk_SMULWB( b32, c32 ) );
+    if ( silk_ADD32( a32, silk_SMULWB( b32, c32 ) ) != silk_ADD_SAT32( a32, silk_SMULWB( b32, c32 ) ) )
+    {
+        fprintf (stderr, "silk_SMLAWB(%d, %d, %d) in %s: line %d\n", a32, b32, c32, file, line);
+#ifdef FIXED_DEBUG_ASSERT
+        silk_assert( 0 );
+#endif
+    }
+    return ret;
+}
+
+#undef silk_SMULWT
+#define silk_SMULWT(a,b) silk_SMULWT_((a), (b), __FILE__, __LINE__)
+static OPUS_INLINE opus_int32 silk_SMULWT_(opus_int32 a32, opus_int32 b32, char *file, int line){
+    opus_int32 ret;
+    ret = (a32 >> 16) * (b32 >> 16) + (((a32 & 0x0000FFFF) * (b32 >> 16)) >> 16);
+    if ( (opus_int64)ret != ((opus_int64)a32 * (b32 >> 16)) >> 16 )
+    {
+        fprintf (stderr, "silk_SMULWT(%d, %d) in %s: line %d\n", a32, b32, file, line);
+#ifdef FIXED_DEBUG_ASSERT
+        silk_assert( 0 );
+#endif
+    }
+    return ret;
+}
+
+#undef silk_SMLAWT
+#define silk_SMLAWT(a,b,c) silk_SMLAWT_((a), (b), (c), __FILE__, __LINE__)
+static OPUS_INLINE opus_int32 silk_SMLAWT_(opus_int32 a32, opus_int32 b32, opus_int32 c32, char *file, int line){
+    opus_int32 ret;
+    ret = a32 + ((b32 >> 16) * (c32 >> 16)) + (((b32 & 0x0000FFFF) * ((c32 >> 16)) >> 16));
+    if ( (opus_int64)ret != (opus_int64)a32 + (((opus_int64)b32 * (c32 >> 16)) >> 16) )
+    {
+        fprintf (stderr, "silk_SMLAWT(%d, %d, %d) in %s: line %d\n", a32, b32, c32, file, line);
+#ifdef FIXED_DEBUG_ASSERT
+        silk_assert( 0 );
+#endif
+    }
+    return ret;
+}
+
+#undef silk_SMULL
+#define silk_SMULL(a,b) silk_SMULL_((a), (b), __FILE__, __LINE__)
+static OPUS_INLINE opus_int64 silk_SMULL_(opus_int64 a64, opus_int64 b64, char *file, int line){
+    opus_int64 ret64;
+    int        fail = 0;
+    ret64 = a64 * b64;
+    if( b64 != 0 ) {
+        fail = a64 != (ret64 / b64);
+    } else if( a64 != 0 ) {
+        fail = b64 != (ret64 / a64);
+    }
+    if ( fail )
+    {
+        fprintf (stderr, "silk_SMULL(%lld, %lld) in %s: line %d\n", (long long)a64, (long long)b64, file, line);
+#ifdef FIXED_DEBUG_ASSERT
+        silk_assert( 0 );
+#endif
+    }
+    return ret64;
+}
+
+/* no checking needed for silk_SMULBB */
+#undef silk_SMLABB
+#define silk_SMLABB(a,b,c) silk_SMLABB_((a), (b), (c), __FILE__, __LINE__)
+static OPUS_INLINE opus_int32 silk_SMLABB_(opus_int32 a32, opus_int32 b32, opus_int32 c32, char *file, int line){
+    opus_int32 ret;
+    ret = a32 + (opus_int32)((opus_int16)b32) * (opus_int32)((opus_int16)c32);
+    if ( (opus_int64)ret != (opus_int64)a32 + (opus_int64)b32 * (opus_int16)c32 )
+    {
+        fprintf (stderr, "silk_SMLABB(%d, %d, %d) in %s: line %d\n", a32, b32, c32, file, line);
+#ifdef FIXED_DEBUG_ASSERT
+        silk_assert( 0 );
+#endif
+    }
+    return ret;
+}
+
+/* no checking needed for silk_SMULBT */
+#undef silk_SMLABT
+#define silk_SMLABT(a,b,c) silk_SMLABT_((a), (b), (c), __FILE__, __LINE__)
+static OPUS_INLINE opus_int32 silk_SMLABT_(opus_int32 a32, opus_int32 b32, opus_int32 c32, char *file, int line){
+    opus_int32 ret;
+    ret = a32 + ((opus_int32)((opus_int16)b32)) * (c32 >> 16);
+    if ( (opus_int64)ret != (opus_int64)a32 + (opus_int64)b32 * (c32 >> 16) )
+    {
+        fprintf (stderr, "silk_SMLABT(%d, %d, %d) in %s: line %d\n", a32, b32, c32, file, line);
+#ifdef FIXED_DEBUG_ASSERT
+        silk_assert( 0 );
+#endif
+    }
+    return ret;
+}
+
+/* no checking needed for silk_SMULTT */
+#undef silk_SMLATT
+#define silk_SMLATT(a,b,c) silk_SMLATT_((a), (b), (c), __FILE__, __LINE__)
+static OPUS_INLINE opus_int32 silk_SMLATT_(opus_int32 a32, opus_int32 b32, opus_int32 c32, char *file, int line){
+    opus_int32 ret;
+    ret = a32 + (b32 >> 16) * (c32 >> 16);
+    if ( (opus_int64)ret != (opus_int64)a32 + (b32 >> 16) * (c32 >> 16) )
+    {
+        fprintf (stderr, "silk_SMLATT(%d, %d, %d) in %s: line %d\n", a32, b32, c32, file, line);
+#ifdef FIXED_DEBUG_ASSERT
+        silk_assert( 0 );
+#endif
+    }
+    return ret;
+}
+
+#undef silk_SMULWW
+#define silk_SMULWW(a,b) silk_SMULWW_((a), (b), __FILE__, __LINE__)
+static OPUS_INLINE opus_int32 silk_SMULWW_(opus_int32 a32, opus_int32 b32, char *file, int line){
+    opus_int32 ret, tmp1, tmp2;
+    opus_int64 ret64;
+    int        fail = 0;
+
+    ret  = silk_SMULWB( a32, b32 );
+    tmp1 = silk_RSHIFT_ROUND( b32, 16 );
+    tmp2 = silk_MUL( a32, tmp1 );
+
+    fail |= (opus_int64)tmp2 != (opus_int64) a32 * (opus_int64) tmp1;
+
+    tmp1 = ret;
+    ret  = silk_ADD32( tmp1, tmp2 );
+    fail |= silk_ADD32( tmp1, tmp2 ) != silk_ADD_SAT32( tmp1, tmp2 );
+
+    ret64 = silk_RSHIFT64( silk_SMULL( a32, b32 ), 16 );
+    fail |= (opus_int64)ret != ret64;
+
+    if ( fail )
+    {
+        fprintf (stderr, "silk_SMULWT(%d, %d) in %s: line %d\n", a32, b32, file, line);
+#ifdef FIXED_DEBUG_ASSERT
+        silk_assert( 0 );
+#endif
+    }
+
+    return ret;
+}
+
+#undef silk_SMLAWW
+#define silk_SMLAWW(a,b,c) silk_SMLAWW_((a), (b), (c), __FILE__, __LINE__)
+static OPUS_INLINE opus_int32 silk_SMLAWW_(opus_int32 a32, opus_int32 b32, opus_int32 c32, char *file, int line){
+    opus_int32 ret, tmp;
+
+    tmp = silk_SMULWW( b32, c32 );
+    ret = silk_ADD32( a32, tmp );
+    if ( ret != silk_ADD_SAT32( a32, tmp ) )
+    {
+        fprintf (stderr, "silk_SMLAWW(%d, %d, %d) in %s: line %d\n", a32, b32, c32, file, line);
+#ifdef FIXED_DEBUG_ASSERT
+        silk_assert( 0 );
+#endif
+    }
+    return ret;
+}
+
+/* Multiply-accumulate macros that allow overflow in the addition (ie, no asserts in debug mode) */
+#undef  silk_MLA_ovflw
+#define silk_MLA_ovflw(a32, b32, c32)    ((a32) + ((b32) * (c32)))
+#undef  silk_SMLABB_ovflw
+#define silk_SMLABB_ovflw(a32, b32, c32)    ((a32) + ((opus_int32)((opus_int16)(b32))) * (opus_int32)((opus_int16)(c32)))
+
+/* no checking needed for silk_SMULL
+   no checking needed for silk_SMLAL
+   no checking needed for silk_SMLALBB
+   no checking needed for SigProcFIX_CLZ16
+   no checking needed for SigProcFIX_CLZ32*/
+
+#undef silk_DIV32
+#define silk_DIV32(a,b) silk_DIV32_((a), (b), __FILE__, __LINE__)
+static OPUS_INLINE opus_int32 silk_DIV32_(opus_int32 a32, opus_int32 b32, char *file, int line){
+    if ( b32 == 0 )
+    {
+        fprintf (stderr, "silk_DIV32(%d, %d) in %s: line %d\n", a32, b32, file, line);
+#ifdef FIXED_DEBUG_ASSERT
+        silk_assert( 0 );
+#endif
+    }
+    return a32 / b32;
+}
+
+#undef silk_DIV32_16
+#define silk_DIV32_16(a,b) silk_DIV32_16_((a), (b), __FILE__, __LINE__)
+static OPUS_INLINE opus_int32 silk_DIV32_16_(opus_int32 a32, opus_int32 b32, char *file, int line){
+    int fail = 0;
+    fail |= b32 == 0;
+    fail |= b32 > silk_int16_MAX;
+    fail |= b32 < silk_int16_MIN;
+    if ( fail )
+    {
+        fprintf (stderr, "silk_DIV32_16(%d, %d) in %s: line %d\n", a32, b32, file, line);
+#ifdef FIXED_DEBUG_ASSERT
+        silk_assert( 0 );
+#endif
+    }
+    return a32 / b32;
+}
+
+/* no checking needed for silk_SAT8
+   no checking needed for silk_SAT16
+   no checking needed for silk_SAT32
+   no checking needed for silk_POS_SAT32
+   no checking needed for silk_ADD_POS_SAT8
+   no checking needed for silk_ADD_POS_SAT16
+   no checking needed for silk_ADD_POS_SAT32
+   no checking needed for silk_ADD_POS_SAT64 */
+
+#undef silk_LSHIFT8
+#define silk_LSHIFT8(a,b) silk_LSHIFT8_((a), (b), __FILE__, __LINE__)
+static OPUS_INLINE opus_int8 silk_LSHIFT8_(opus_int8 a, opus_int32 shift, char *file, int line){
+    opus_int8 ret;
+    int       fail = 0;
+    ret = a << shift;
+    fail |= shift < 0;
+    fail |= shift >= 8;
+    fail |= (opus_int64)ret != ((opus_int64)a) << shift;
+    if ( fail )
+    {
+        fprintf (stderr, "silk_LSHIFT8(%d, %d) in %s: line %d\n", a, shift, file, line);
+#ifdef FIXED_DEBUG_ASSERT
+        silk_assert( 0 );
+#endif
+    }
+    return ret;
+}
+
+#undef silk_LSHIFT16
+#define silk_LSHIFT16(a,b) silk_LSHIFT16_((a), (b), __FILE__, __LINE__)
+static OPUS_INLINE opus_int16 silk_LSHIFT16_(opus_int16 a, opus_int32 shift, char *file, int line){
+    opus_int16 ret;
+    int        fail = 0;
+    ret = a << shift;
+    fail |= shift < 0;
+    fail |= shift >= 16;
+    fail |= (opus_int64)ret != ((opus_int64)a) << shift;
+    if ( fail )
+    {
+        fprintf (stderr, "silk_LSHIFT16(%d, %d) in %s: line %d\n", a, shift, file, line);
+#ifdef FIXED_DEBUG_ASSERT
+        silk_assert( 0 );
+#endif
+    }
+    return ret;
+}
+
+#undef silk_LSHIFT32
+#define silk_LSHIFT32(a,b) silk_LSHIFT32_((a), (b), __FILE__, __LINE__)
+static OPUS_INLINE opus_int32 silk_LSHIFT32_(opus_int32 a, opus_int32 shift, char *file, int line){
+    opus_int32 ret;
+    int        fail = 0;
+    ret = a << shift;
+    fail |= shift < 0;
+    fail |= shift >= 32;
+    fail |= (opus_int64)ret != ((opus_int64)a) << shift;
+    if ( fail )
+    {
+        fprintf (stderr, "silk_LSHIFT32(%d, %d) in %s: line %d\n", a, shift, file, line);
+#ifdef FIXED_DEBUG_ASSERT
+        silk_assert( 0 );
+#endif
+    }
+    return ret;
+}
+
+#undef silk_LSHIFT64
+#define silk_LSHIFT64(a,b) silk_LSHIFT64_((a), (b), __FILE__, __LINE__)
+static OPUS_INLINE opus_int64 silk_LSHIFT64_(opus_int64 a, opus_int shift, char *file, int line){
+    opus_int64 ret;
+    int        fail = 0;
+    ret = a << shift;
+    fail |= shift < 0;
+    fail |= shift >= 64;
+    fail |= (ret>>shift) != ((opus_int64)a);
+    if ( fail )
+    {
+        fprintf (stderr, "silk_LSHIFT64(%lld, %d) in %s: line %d\n", (long long)a, shift, file, line);
+#ifdef FIXED_DEBUG_ASSERT
+        silk_assert( 0 );
+#endif
+    }
+    return ret;
+}
+
+#undef silk_LSHIFT_ovflw
+#define silk_LSHIFT_ovflw(a,b) silk_LSHIFT_ovflw_((a), (b), __FILE__, __LINE__)
+static OPUS_INLINE opus_int32 silk_LSHIFT_ovflw_(opus_int32 a, opus_int32 shift, char *file, int line){
+    if ( (shift < 0) || (shift >= 32) ) /* no check for overflow */
+    {
+        fprintf (stderr, "silk_LSHIFT_ovflw(%d, %d) in %s: line %d\n", a, shift, file, line);
+#ifdef FIXED_DEBUG_ASSERT
+        silk_assert( 0 );
+#endif
+    }
+    return a << shift;
+}
+
+#undef silk_LSHIFT_uint
+#define silk_LSHIFT_uint(a,b) silk_LSHIFT_uint_((a), (b), __FILE__, __LINE__)
+static OPUS_INLINE opus_uint32 silk_LSHIFT_uint_(opus_uint32 a, opus_int32 shift, char *file, int line){
+    opus_uint32 ret;
+    ret = a << shift;
+    if ( (shift < 0) || ((opus_int64)ret != ((opus_int64)a) << shift))
+    {
+        fprintf (stderr, "silk_LSHIFT_uint(%u, %d) in %s: line %d\n", a, shift, file, line);
+#ifdef FIXED_DEBUG_ASSERT
+        silk_assert( 0 );
+#endif
+    }
+    return ret;
+}
+
+#undef silk_RSHIFT8
+#define silk_RSHITF8(a,b) silk_RSHIFT8_((a), (b), __FILE__, __LINE__)
+static OPUS_INLINE opus_int8 silk_RSHIFT8_(opus_int8 a, opus_int32 shift, char *file, int line){
+    if ( (shift < 0) || (shift>=8) )
+    {
+        fprintf (stderr, "silk_RSHITF8(%d, %d) in %s: line %d\n", a, shift, file, line);
+#ifdef FIXED_DEBUG_ASSERT
+        silk_assert( 0 );
+#endif
+    }
+    return a >> shift;
+}
+
+#undef silk_RSHIFT16
+#define silk_RSHITF16(a,b) silk_RSHIFT16_((a), (b), __FILE__, __LINE__)
+static OPUS_INLINE opus_int16 silk_RSHIFT16_(opus_int16 a, opus_int32 shift, char *file, int line){
+    if ( (shift < 0) || (shift>=16) )
+    {
+        fprintf (stderr, "silk_RSHITF16(%d, %d) in %s: line %d\n", a, shift, file, line);
+#ifdef FIXED_DEBUG_ASSERT
+        silk_assert( 0 );
+#endif
+    }
+    return a >> shift;
+}
+
+#undef silk_RSHIFT32
+#define silk_RSHIFT32(a,b) silk_RSHIFT32_((a), (b), __FILE__, __LINE__)
+static OPUS_INLINE opus_int32 silk_RSHIFT32_(opus_int32 a, opus_int32 shift, char *file, int line){
+    if ( (shift < 0) || (shift>=32) )
+    {
+        fprintf (stderr, "silk_RSHITF32(%d, %d) in %s: line %d\n", a, shift, file, line);
+#ifdef FIXED_DEBUG_ASSERT
+        silk_assert( 0 );
+#endif
+    }
+    return a >> shift;
+}
+
+#undef silk_RSHIFT64
+#define silk_RSHIFT64(a,b) silk_RSHIFT64_((a), (b), __FILE__, __LINE__)
+static OPUS_INLINE opus_int64 silk_RSHIFT64_(opus_int64 a, opus_int64 shift, char *file, int line){
+    if ( (shift < 0) || (shift>=64) )
+    {
+        fprintf (stderr, "silk_RSHITF64(%lld, %lld) in %s: line %d\n", (long long)a, (long long)shift, file, line);
+#ifdef FIXED_DEBUG_ASSERT
+        silk_assert( 0 );
+#endif
+    }
+    return a >> shift;
+}
+
+#undef silk_RSHIFT_uint
+#define silk_RSHIFT_uint(a,b) silk_RSHIFT_uint_((a), (b), __FILE__, __LINE__)
+static OPUS_INLINE opus_uint32 silk_RSHIFT_uint_(opus_uint32 a, opus_int32 shift, char *file, int line){
+    if ( (shift < 0) || (shift>32) )
+    {
+        fprintf (stderr, "silk_RSHIFT_uint(%u, %d) in %s: line %d\n", a, shift, file, line);
+#ifdef FIXED_DEBUG_ASSERT
+        silk_assert( 0 );
+#endif
+    }
+    return a >> shift;
+}
+
+#undef silk_ADD_LSHIFT
+#define silk_ADD_LSHIFT(a,b,c) silk_ADD_LSHIFT_((a), (b), (c), __FILE__, __LINE__)
+static OPUS_INLINE int silk_ADD_LSHIFT_(int a, int b, int shift, char *file, int line){
+    opus_int16 ret;
+    ret = a + (b << shift);
+    if ( (shift < 0) || (shift>15) || ((opus_int64)ret != (opus_int64)a + (((opus_int64)b) << shift)) )
+    {
+        fprintf (stderr, "silk_ADD_LSHIFT(%d, %d, %d) in %s: line %d\n", a, b, shift, file, line);
+#ifdef FIXED_DEBUG_ASSERT
+        silk_assert( 0 );
+#endif
+    }
+    return ret;                /* shift >= 0 */
+}
+
+#undef silk_ADD_LSHIFT32
+#define silk_ADD_LSHIFT32(a,b,c) silk_ADD_LSHIFT32_((a), (b), (c), __FILE__, __LINE__)
+static OPUS_INLINE opus_int32 silk_ADD_LSHIFT32_(opus_int32 a, opus_int32 b, opus_int32 shift, char *file, int line){
+    opus_int32 ret;
+    ret = a + (b << shift);
+    if ( (shift < 0) || (shift>31) || ((opus_int64)ret != (opus_int64)a + (((opus_int64)b) << shift)) )
+    {
+        fprintf (stderr, "silk_ADD_LSHIFT32(%d, %d, %d) in %s: line %d\n", a, b, shift, file, line);
+#ifdef FIXED_DEBUG_ASSERT
+        silk_assert( 0 );
+#endif
+    }
+    return ret;                /* shift >= 0 */
+}
+
+#undef silk_ADD_LSHIFT_uint
+#define silk_ADD_LSHIFT_uint(a,b,c) silk_ADD_LSHIFT_uint_((a), (b), (c), __FILE__, __LINE__)
+static OPUS_INLINE opus_uint32 silk_ADD_LSHIFT_uint_(opus_uint32 a, opus_uint32 b, opus_int32 shift, char *file, int line){
+    opus_uint32 ret;
+    ret = a + (b << shift);
+    if ( (shift < 0) || (shift>32) || ((opus_int64)ret != (opus_int64)a + (((opus_int64)b) << shift)) )
+    {
+        fprintf (stderr, "silk_ADD_LSHIFT_uint(%u, %u, %d) in %s: line %d\n", a, b, shift, file, line);
+#ifdef FIXED_DEBUG_ASSERT
+        silk_assert( 0 );
+#endif
+    }
+    return ret;                /* shift >= 0 */
+}
+
+#undef silk_ADD_RSHIFT
+#define silk_ADD_RSHIFT(a,b,c) silk_ADD_RSHIFT_((a), (b), (c), __FILE__, __LINE__)
+static OPUS_INLINE int silk_ADD_RSHIFT_(int a, int b, int shift, char *file, int line){
+    opus_int16 ret;
+    ret = a + (b >> shift);
+    if ( (shift < 0) || (shift>15) || ((opus_int64)ret != (opus_int64)a + (((opus_int64)b) >> shift)) )
+    {
+        fprintf (stderr, "silk_ADD_RSHIFT(%d, %d, %d) in %s: line %d\n", a, b, shift, file, line);
+#ifdef FIXED_DEBUG_ASSERT
+        silk_assert( 0 );
+#endif
+    }
+    return ret;                /* shift  > 0 */
+}
+
+#undef silk_ADD_RSHIFT32
+#define silk_ADD_RSHIFT32(a,b,c) silk_ADD_RSHIFT32_((a), (b), (c), __FILE__, __LINE__)
+static OPUS_INLINE opus_int32 silk_ADD_RSHIFT32_(opus_int32 a, opus_int32 b, opus_int32 shift, char *file, int line){
+    opus_int32 ret;
+    ret = a + (b >> shift);
+    if ( (shift < 0) || (shift>31) || ((opus_int64)ret != (opus_int64)a + (((opus_int64)b) >> shift)) )
+    {
+        fprintf (stderr, "silk_ADD_RSHIFT32(%d, %d, %d) in %s: line %d\n", a, b, shift, file, line);
+#ifdef FIXED_DEBUG_ASSERT
+        silk_assert( 0 );
+#endif
+    }
+    return ret;                /* shift  > 0 */
+}
+
+#undef silk_ADD_RSHIFT_uint
+#define silk_ADD_RSHIFT_uint(a,b,c) silk_ADD_RSHIFT_uint_((a), (b), (c), __FILE__, __LINE__)
+static OPUS_INLINE opus_uint32 silk_ADD_RSHIFT_uint_(opus_uint32 a, opus_uint32 b, opus_int32 shift, char *file, int line){
+    opus_uint32 ret;
+    ret = a + (b >> shift);
+    if ( (shift < 0) || (shift>32) || ((opus_int64)ret != (opus_int64)a + (((opus_int64)b) >> shift)) )
+    {
+        fprintf (stderr, "silk_ADD_RSHIFT_uint(%u, %u, %d) in %s: line %d\n", a, b, shift, file, line);
+#ifdef FIXED_DEBUG_ASSERT
+        silk_assert( 0 );
+#endif
+    }
+    return ret;                /* shift  > 0 */
+}
+
+#undef silk_SUB_LSHIFT32
+#define silk_SUB_LSHIFT32(a,b,c) silk_SUB_LSHIFT32_((a), (b), (c), __FILE__, __LINE__)
+static OPUS_INLINE opus_int32 silk_SUB_LSHIFT32_(opus_int32 a, opus_int32 b, opus_int32 shift, char *file, int line){
+    opus_int32 ret;
+    ret = a - (b << shift);
+    if ( (shift < 0) || (shift>31) || ((opus_int64)ret != (opus_int64)a - (((opus_int64)b) << shift)) )
+    {
+        fprintf (stderr, "silk_SUB_LSHIFT32(%d, %d, %d) in %s: line %d\n", a, b, shift, file, line);
+#ifdef FIXED_DEBUG_ASSERT
+        silk_assert( 0 );
+#endif
+    }
+    return ret;                /* shift >= 0 */
+}
+
+#undef silk_SUB_RSHIFT32
+#define silk_SUB_RSHIFT32(a,b,c) silk_SUB_RSHIFT32_((a), (b), (c), __FILE__, __LINE__)
+static OPUS_INLINE opus_int32 silk_SUB_RSHIFT32_(opus_int32 a, opus_int32 b, opus_int32 shift, char *file, int line){
+    opus_int32 ret;
+    ret = a - (b >> shift);
+    if ( (shift < 0) || (shift>31) || ((opus_int64)ret != (opus_int64)a - (((opus_int64)b) >> shift)) )
+    {
+        fprintf (stderr, "silk_SUB_RSHIFT32(%d, %d, %d) in %s: line %d\n", a, b, shift, file, line);
+#ifdef FIXED_DEBUG_ASSERT
+        silk_assert( 0 );
+#endif
+    }
+    return ret;                /* shift  > 0 */
+}
+
+#undef silk_RSHIFT_ROUND
+#define silk_RSHIFT_ROUND(a,b) silk_RSHIFT_ROUND_((a), (b), __FILE__, __LINE__)
+static OPUS_INLINE opus_int32 silk_RSHIFT_ROUND_(opus_int32 a, opus_int32 shift, char *file, int line){
+    opus_int32 ret;
+    ret = shift == 1 ? (a >> 1) + (a & 1) : ((a >> (shift - 1)) + 1) >> 1;
+    /* the marco definition can't handle a shift of zero */
+    if ( (shift <= 0) || (shift>31) || ((opus_int64)ret != ((opus_int64)a + ((opus_int64)1 << (shift - 1))) >> shift) )
+    {
+        fprintf (stderr, "silk_RSHIFT_ROUND(%d, %d) in %s: line %d\n", a, shift, file, line);
+#ifdef FIXED_DEBUG_ASSERT
+        silk_assert( 0 );
+#endif
+    }
+    return ret;
+}
+
+#undef silk_RSHIFT_ROUND64
+#define silk_RSHIFT_ROUND64(a,b) silk_RSHIFT_ROUND64_((a), (b), __FILE__, __LINE__)
+static OPUS_INLINE opus_int64 silk_RSHIFT_ROUND64_(opus_int64 a, opus_int32 shift, char *file, int line){
+    opus_int64 ret;
+    /* the marco definition can't handle a shift of zero */
+    if ( (shift <= 0) || (shift>=64) )
+    {
+        fprintf (stderr, "silk_RSHIFT_ROUND64(%lld, %d) in %s: line %d\n", (long long)a, shift, file, line);
+#ifdef FIXED_DEBUG_ASSERT
+        silk_assert( 0 );
+#endif
+    }
+    ret = shift == 1 ? (a >> 1) + (a & 1) : ((a >> (shift - 1)) + 1) >> 1;
+    return ret;
+}
+
+/* silk_abs is used on floats also, so doesn't work... */
+/*#undef silk_abs
+static OPUS_INLINE opus_int32 silk_abs(opus_int32 a){
+    silk_assert(a != 0x80000000);
+    return (((a) >  0)  ? (a) : -(a));            // Be careful, silk_abs returns wrong when input equals to silk_intXX_MIN
+}*/
+
+#undef silk_abs_int64
+#define silk_abs_int64(a) silk_abs_int64_((a), __FILE__, __LINE__)
+static OPUS_INLINE opus_int64 silk_abs_int64_(opus_int64 a, char *file, int line){
+    if ( a == silk_int64_MIN )
+    {
+        fprintf (stderr, "silk_abs_int64(%lld) in %s: line %d\n", (long long)a, file, line);
+#ifdef FIXED_DEBUG_ASSERT
+        silk_assert( 0 );
+#endif
+    }
+    return (((a) >  0)  ? (a) : -(a));            /* Be careful, silk_abs returns wrong when input equals to silk_intXX_MIN */
+}
+
+#undef silk_abs_int32
+#define silk_abs_int32(a) silk_abs_int32_((a), __FILE__, __LINE__)
+static OPUS_INLINE opus_int32 silk_abs_int32_(opus_int32 a, char *file, int line){
+    if ( a == silk_int32_MIN )
+    {
+        fprintf (stderr, "silk_abs_int32(%d) in %s: line %d\n", a, file, line);
+#ifdef FIXED_DEBUG_ASSERT
+        silk_assert( 0 );
+#endif
+    }
+    return silk_abs(a);
+}
+
+#undef silk_CHECK_FIT8
+#define silk_CHECK_FIT8(a) silk_CHECK_FIT8_((a), __FILE__, __LINE__)
+static OPUS_INLINE opus_int8 silk_CHECK_FIT8_( opus_int64 a, char *file, int line ){
+    opus_int8 ret;
+    ret = (opus_int8)a;
+    if ( (opus_int64)ret != a )
+    {
+        fprintf (stderr, "silk_CHECK_FIT8(%lld) in %s: line %d\n", (long long)a, file, line);
+#ifdef FIXED_DEBUG_ASSERT
+        silk_assert( 0 );
+#endif
+    }
+    return( ret );
+}
+
+#undef silk_CHECK_FIT16
+#define silk_CHECK_FIT16(a) silk_CHECK_FIT16_((a), __FILE__, __LINE__)
+static OPUS_INLINE opus_int16 silk_CHECK_FIT16_( opus_int64 a, char *file, int line ){
+    opus_int16 ret;
+    ret = (opus_int16)a;
+    if ( (opus_int64)ret != a )
+    {
+        fprintf (stderr, "silk_CHECK_FIT16(%lld) in %s: line %d\n", (long long)a, file, line);
+#ifdef FIXED_DEBUG_ASSERT
+        silk_assert( 0 );
+#endif
+    }
+    return( ret );
+}
+
+#undef silk_CHECK_FIT32
+#define silk_CHECK_FIT32(a) silk_CHECK_FIT32_((a), __FILE__, __LINE__)
+static OPUS_INLINE opus_int32 silk_CHECK_FIT32_( opus_int64 a, char *file, int line ){
+    opus_int32 ret;
+    ret = (opus_int32)a;
+    if ( (opus_int64)ret != a )
+    {
+        fprintf (stderr, "silk_CHECK_FIT32(%lld) in %s: line %d\n", (long long)a, file, line);
+#ifdef FIXED_DEBUG_ASSERT
+        silk_assert( 0 );
+#endif
+    }
+    return( ret );
+}
+
+/* no checking for silk_NSHIFT_MUL_32_32
+   no checking for silk_NSHIFT_MUL_16_16
+   no checking needed for silk_min
+   no checking needed for silk_max
+   no checking needed for silk_sign
+*/
+
+#endif
+#endif /* MACRO_DEBUG_H */
diff --git a/drivers/opus/silk/NLSF2A.c b/drivers/opus/silk/NLSF2A.c
new file mode 100644
index 0000000000..2b6f685f49
--- /dev/null
+++ b/drivers/opus/silk/NLSF2A.c
@@ -0,0 +1,178 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef OPUS_HAVE_CONFIG_H
+#include "opus_config.h"
+#endif
+
+/* conversion between prediction filter coefficients and LSFs   */
+/* order should be even                                         */
+/* a piecewise linear approximation maps LSF <-> cos(LSF)       */
+/* therefore the result is not accurate LSFs, but the two       */
+/* functions are accurate inverses of each other                */
+
+#include "SigProc_FIX.h"
+#include "tables.h"
+
+#define QA      16
+
+/* helper function for NLSF2A(..) */
+static OPUS_INLINE void silk_NLSF2A_find_poly(
+    opus_int32          *out,      /* O    intermediate polynomial, QA [dd+1]        */
+    const opus_int32    *cLSF,     /* I    vector of interleaved 2*cos(LSFs), QA [d] */
+    opus_int            dd         /* I    polynomial order (= 1/2 * filter order)   */
+)
+{
+    opus_int   k, n;
+    opus_int32 ftmp;
+
+    out[0] = silk_LSHIFT( 1, QA );
+    out[1] = -cLSF[0];
+    for( k = 1; k < dd; k++ ) {
+        ftmp = cLSF[2*k];            /* QA*/
+        out[k+1] = silk_LSHIFT( out[k-1], 1 ) - (opus_int32)silk_RSHIFT_ROUND64( silk_SMULL( ftmp, out[k] ), QA );
+        for( n = k; n > 1; n-- ) {
+            out[n] += out[n-2] - (opus_int32)silk_RSHIFT_ROUND64( silk_SMULL( ftmp, out[n-1] ), QA );
+        }
+        out[1] -= ftmp;
+    }
+}
+
+/* compute whitening filter coefficients from normalized line spectral frequencies */
+void silk_NLSF2A(
+    opus_int16                  *a_Q12,             /* O    monic whitening filter coefficients in Q12,  [ d ]          */
+    const opus_int16            *NLSF,              /* I    normalized line spectral frequencies in Q15, [ d ]          */
+    const opus_int              d                   /* I    filter order (should be even)                               */
+)
+{
+    /* This ordering was found to maximize quality. It improves numerical accuracy of
+       silk_NLSF2A_find_poly() compared to "standard" ordering. */
+    static const unsigned char ordering16[16] = {
+      0, 15, 8, 7, 4, 11, 12, 3, 2, 13, 10, 5, 6, 9, 14, 1
+    };
+    static const unsigned char ordering10[10] = {
+      0, 9, 6, 3, 4, 5, 8, 1, 2, 7
+    };
+    const unsigned char *ordering;
+    opus_int   k, i, dd;
+    opus_int32 cos_LSF_QA[ SILK_MAX_ORDER_LPC ];
+    opus_int32 P[ SILK_MAX_ORDER_LPC / 2 + 1 ], Q[ SILK_MAX_ORDER_LPC / 2 + 1 ];
+    opus_int32 Ptmp, Qtmp, f_int, f_frac, cos_val, delta;
+    opus_int32 a32_QA1[ SILK_MAX_ORDER_LPC ];
+    opus_int32 maxabs, absval, idx=0, sc_Q16;
+
+    silk_assert( LSF_COS_TAB_SZ_FIX == 128 );
+    silk_assert( d==10||d==16 );
+
+    /* convert LSFs to 2*cos(LSF), using piecewise linear curve from table */
+    ordering = d == 16 ? ordering16 : ordering10;
+    for( k = 0; k < d; k++ ) {
+        silk_assert(NLSF[k] >= 0 );
+
+        /* f_int on a scale 0-127 (rounded down) */
+        f_int = silk_RSHIFT( NLSF[k], 15 - 7 );
+
+        /* f_frac, range: 0..255 */
+        f_frac = NLSF[k] - silk_LSHIFT( f_int, 15 - 7 );
+
+        silk_assert(f_int >= 0);
+        silk_assert(f_int < LSF_COS_TAB_SZ_FIX );
+
+        /* Read start and end value from table */
+        cos_val = silk_LSFCosTab_FIX_Q12[ f_int ];                /* Q12 */
+        delta   = silk_LSFCosTab_FIX_Q12[ f_int + 1 ] - cos_val;  /* Q12, with a range of 0..200 */
+
+        /* Linear interpolation */
+        cos_LSF_QA[ordering[k]] = silk_RSHIFT_ROUND( silk_LSHIFT( cos_val, 8 ) + silk_MUL( delta, f_frac ), 20 - QA ); /* QA */
+    }
+
+    dd = silk_RSHIFT( d, 1 );
+
+    /* generate even and odd polynomials using convolution */
+    silk_NLSF2A_find_poly( P, &cos_LSF_QA[ 0 ], dd );
+    silk_NLSF2A_find_poly( Q, &cos_LSF_QA[ 1 ], dd );
+
+    /* convert even and odd polynomials to opus_int32 Q12 filter coefs */
+    for( k = 0; k < dd; k++ ) {
+        Ptmp = P[ k+1 ] + P[ k ];
+        Qtmp = Q[ k+1 ] - Q[ k ];
+
+        /* the Ptmp and Qtmp values at this stage need to fit in int32 */
+        a32_QA1[ k ]     = -Qtmp - Ptmp;        /* QA+1 */
+        a32_QA1[ d-k-1 ] =  Qtmp - Ptmp;        /* QA+1 */
+    }
+
+    /* Limit the maximum absolute value of the prediction coefficients, so that they'll fit in int16 */
+    for( i = 0; i < 10; i++ ) {
+        /* Find maximum absolute value and its index */
+        maxabs = 0;
+        for( k = 0; k < d; k++ ) {
+            absval = silk_abs( a32_QA1[k] );
+            if( absval > maxabs ) {
+                maxabs = absval;
+                idx    = k;
+            }
+        }
+        maxabs = silk_RSHIFT_ROUND( maxabs, QA + 1 - 12 );                                          /* QA+1 -> Q12 */
+
+        if( maxabs > silk_int16_MAX ) {
+            /* Reduce magnitude of prediction coefficients */
+            maxabs = silk_min( maxabs, 163838 );  /* ( silk_int32_MAX >> 14 ) + silk_int16_MAX = 163838 */
+            sc_Q16 = SILK_FIX_CONST( 0.999, 16 ) - silk_DIV32( silk_LSHIFT( maxabs - silk_int16_MAX, 14 ),
+                                        silk_RSHIFT32( silk_MUL( maxabs, idx + 1), 2 ) );
+            silk_bwexpander_32( a32_QA1, d, sc_Q16 );
+        } else {
+            break;
+        }
+    }
+
+    if( i == 10 ) {
+        /* Reached the last iteration, clip the coefficients */
+        for( k = 0; k < d; k++ ) {
+            a_Q12[ k ] = (opus_int16)silk_SAT16( silk_RSHIFT_ROUND( a32_QA1[ k ], QA + 1 - 12 ) );  /* QA+1 -> Q12 */
+            a32_QA1[ k ] = silk_LSHIFT( (opus_int32)a_Q12[ k ], QA + 1 - 12 );
+        }
+    } else {
+        for( k = 0; k < d; k++ ) {
+            a_Q12[ k ] = (opus_int16)silk_RSHIFT_ROUND( a32_QA1[ k ], QA + 1 - 12 );                /* QA+1 -> Q12 */
+        }
+    }
+
+    for( i = 0; i < MAX_LPC_STABILIZE_ITERATIONS; i++ ) {
+        if( silk_LPC_inverse_pred_gain( a_Q12, d ) < SILK_FIX_CONST( 1.0 / MAX_PREDICTION_POWER_GAIN, 30 ) ) {
+            /* Prediction coefficients are (too close to) unstable; apply bandwidth expansion   */
+            /* on the unscaled coefficients, convert to Q12 and measure again                   */
+            silk_bwexpander_32( a32_QA1, d, 65536 - silk_LSHIFT( 2, i ) );
+            for( k = 0; k < d; k++ ) {
+                a_Q12[ k ] = (opus_int16)silk_RSHIFT_ROUND( a32_QA1[ k ], QA + 1 - 12 );            /* QA+1 -> Q12 */
+            }
+        } else {
+            break;
+        }
+    }
+}
+
diff --git a/drivers/opus/silk/NLSF_VQ.c b/drivers/opus/silk/NLSF_VQ.c
new file mode 100644
index 0000000000..e4ca79fbfe
--- /dev/null
+++ b/drivers/opus/silk/NLSF_VQ.c
@@ -0,0 +1,68 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef OPUS_HAVE_CONFIG_H
+#include "opus_config.h"
+#endif
+
+#include "silk_main.h"
+
+/* Compute quantization errors for an LPC_order element input vector for a VQ codebook */
+void silk_NLSF_VQ(
+    opus_int32                  err_Q26[],                      /* O    Quantization errors [K]                     */
+    const opus_int16            in_Q15[],                       /* I    Input vectors to be quantized [LPC_order]   */
+    const opus_uint8            pCB_Q8[],                       /* I    Codebook vectors [K*LPC_order]              */
+    const opus_int              K,                              /* I    Number of codebook vectors                  */
+    const opus_int              LPC_order                       /* I    Number of LPCs                              */
+)
+{
+    opus_int        i, m;
+    opus_int32      diff_Q15, sum_error_Q30, sum_error_Q26;
+
+    silk_assert( LPC_order <= 16 );
+    silk_assert( ( LPC_order & 1 ) == 0 );
+
+    /* Loop over codebook */
+    for( i = 0; i < K; i++ ) {
+        sum_error_Q26 = 0;
+        for( m = 0; m < LPC_order; m += 2 ) {
+            /* Compute weighted squared quantization error for index m */
+            diff_Q15 = silk_SUB_LSHIFT32( in_Q15[ m ], (opus_int32)*pCB_Q8++, 7 ); /* range: [ -32767 : 32767 ]*/
+            sum_error_Q30 = silk_SMULBB( diff_Q15, diff_Q15 );
+
+            /* Compute weighted squared quantization error for index m + 1 */
+            diff_Q15 = silk_SUB_LSHIFT32( in_Q15[m + 1], (opus_int32)*pCB_Q8++, 7 ); /* range: [ -32767 : 32767 ]*/
+            sum_error_Q30 = silk_SMLABB( sum_error_Q30, diff_Q15, diff_Q15 );
+
+            sum_error_Q26 = silk_ADD_RSHIFT32( sum_error_Q26, sum_error_Q30, 4 );
+
+            silk_assert( sum_error_Q26 >= 0 );
+            silk_assert( sum_error_Q30 >= 0 );
+        }
+        err_Q26[ i ] = sum_error_Q26;
+    }
+}
diff --git a/drivers/opus/silk/NLSF_VQ_weights_laroia.c b/drivers/opus/silk/NLSF_VQ_weights_laroia.c
new file mode 100644
index 0000000000..f461ba01c0
--- /dev/null
+++ b/drivers/opus/silk/NLSF_VQ_weights_laroia.c
@@ -0,0 +1,80 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef OPUS_HAVE_CONFIG_H
+#include "opus_config.h"
+#endif
+
+#include "define.h"
+#include "SigProc_FIX.h"
+
+/*
+R. Laroia, N. Phamdo and N. Farvardin, "Robust and Efficient Quantization of Speech LSP
+Parameters Using Structured Vector Quantization", Proc. IEEE Int. Conf. Acoust., Speech,
+Signal Processing, pp. 641-644, 1991.
+*/
+
+/* Laroia low complexity NLSF weights */
+void silk_NLSF_VQ_weights_laroia(
+    opus_int16                  *pNLSFW_Q_OUT,      /* O     Pointer to input vector weights [D]                        */
+    const opus_int16            *pNLSF_Q15,         /* I     Pointer to input vector         [D]                        */
+    const opus_int              D                   /* I     Input vector dimension (even)                              */
+)
+{
+    opus_int   k;
+    opus_int32 tmp1_int, tmp2_int;
+
+    silk_assert( D > 0 );
+    silk_assert( ( D & 1 ) == 0 );
+
+    /* First value */
+    tmp1_int = silk_max_int( pNLSF_Q15[ 0 ], 1 );
+    tmp1_int = silk_DIV32_16( (opus_int32)1 << ( 15 + NLSF_W_Q ), tmp1_int );
+    tmp2_int = silk_max_int( pNLSF_Q15[ 1 ] - pNLSF_Q15[ 0 ], 1 );
+    tmp2_int = silk_DIV32_16( (opus_int32)1 << ( 15 + NLSF_W_Q ), tmp2_int );
+    pNLSFW_Q_OUT[ 0 ] = (opus_int16)silk_min_int( tmp1_int + tmp2_int, silk_int16_MAX );
+    silk_assert( pNLSFW_Q_OUT[ 0 ] > 0 );
+
+    /* Main loop */
+    for( k = 1; k < D - 1; k += 2 ) {
+        tmp1_int = silk_max_int( pNLSF_Q15[ k + 1 ] - pNLSF_Q15[ k ], 1 );
+        tmp1_int = silk_DIV32_16( (opus_int32)1 << ( 15 + NLSF_W_Q ), tmp1_int );
+        pNLSFW_Q_OUT[ k ] = (opus_int16)silk_min_int( tmp1_int + tmp2_int, silk_int16_MAX );
+        silk_assert( pNLSFW_Q_OUT[ k ] > 0 );
+
+        tmp2_int = silk_max_int( pNLSF_Q15[ k + 2 ] - pNLSF_Q15[ k + 1 ], 1 );
+        tmp2_int = silk_DIV32_16( (opus_int32)1 << ( 15 + NLSF_W_Q ), tmp2_int );
+        pNLSFW_Q_OUT[ k + 1 ] = (opus_int16)silk_min_int( tmp1_int + tmp2_int, silk_int16_MAX );
+        silk_assert( pNLSFW_Q_OUT[ k + 1 ] > 0 );
+    }
+
+    /* Last value */
+    tmp1_int = silk_max_int( ( 1 << 15 ) - pNLSF_Q15[ D - 1 ], 1 );
+    tmp1_int = silk_DIV32_16( (opus_int32)1 << ( 15 + NLSF_W_Q ), tmp1_int );
+    pNLSFW_Q_OUT[ D - 1 ] = (opus_int16)silk_min_int( tmp1_int + tmp2_int, silk_int16_MAX );
+    silk_assert( pNLSFW_Q_OUT[ D - 1 ] > 0 );
+}
diff --git a/drivers/opus/silk/NLSF_decode.c b/drivers/opus/silk/NLSF_decode.c
new file mode 100644
index 0000000000..786a62d278
--- /dev/null
+++ b/drivers/opus/silk/NLSF_decode.c
@@ -0,0 +1,101 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef OPUS_HAVE_CONFIG_H
+#include "opus_config.h"
+#endif
+
+#include "silk_main.h"
+
+/* Predictive dequantizer for NLSF residuals */
+static OPUS_INLINE void silk_NLSF_residual_dequant(               /* O    Returns RD value in Q30                     */
+          opus_int16         x_Q10[],                        /* O    Output [ order ]                            */
+    const opus_int8          indices[],                      /* I    Quantization indices [ order ]              */
+    const opus_uint8         pred_coef_Q8[],                 /* I    Backward predictor coefs [ order ]          */
+    const opus_int           quant_step_size_Q16,            /* I    Quantization step size                      */
+    const opus_int16         order                           /* I    Number of input values                      */
+)
+{
+    opus_int     i, out_Q10, pred_Q10;
+
+    out_Q10 = 0;
+    for( i = order-1; i >= 0; i-- ) {
+        pred_Q10 = silk_RSHIFT( silk_SMULBB( out_Q10, (opus_int16)pred_coef_Q8[ i ] ), 8 );
+        out_Q10  = silk_LSHIFT( indices[ i ], 10 );
+        if( out_Q10 > 0 ) {
+            out_Q10 = silk_SUB16( out_Q10, SILK_FIX_CONST( NLSF_QUANT_LEVEL_ADJ, 10 ) );
+        } else if( out_Q10 < 0 ) {
+            out_Q10 = silk_ADD16( out_Q10, SILK_FIX_CONST( NLSF_QUANT_LEVEL_ADJ, 10 ) );
+        }
+        out_Q10  = silk_SMLAWB( pred_Q10, (opus_int32)out_Q10, quant_step_size_Q16 );
+        x_Q10[ i ] = out_Q10;
+    }
+}
+
+
+/***********************/
+/* NLSF vector decoder */
+/***********************/
+void silk_NLSF_decode(
+          opus_int16            *pNLSF_Q15,                     /* O    Quantized NLSF vector [ LPC_ORDER ]         */
+          opus_int8             *NLSFIndices,                   /* I    Codebook path vector [ LPC_ORDER + 1 ]      */
+    const silk_NLSF_CB_struct   *psNLSF_CB                      /* I    Codebook object                             */
+)
+{
+    opus_int         i;
+    opus_uint8       pred_Q8[  MAX_LPC_ORDER ];
+    opus_int16       ec_ix[    MAX_LPC_ORDER ];
+    opus_int16       res_Q10[  MAX_LPC_ORDER ];
+    opus_int16       W_tmp_QW[ MAX_LPC_ORDER ];
+    opus_int32       W_tmp_Q9, NLSF_Q15_tmp;
+    const opus_uint8 *pCB_element;
+
+    /* Decode first stage */
+    pCB_element = &psNLSF_CB->CB1_NLSF_Q8[ NLSFIndices[ 0 ] * psNLSF_CB->order ];
+    for( i = 0; i < psNLSF_CB->order; i++ ) {
+        pNLSF_Q15[ i ] = silk_LSHIFT( (opus_int16)pCB_element[ i ], 7 );
+    }
+
+    /* Unpack entropy table indices and predictor for current CB1 index */
+    silk_NLSF_unpack( ec_ix, pred_Q8, psNLSF_CB, NLSFIndices[ 0 ] );
+
+    /* Predictive residual dequantizer */
+    silk_NLSF_residual_dequant( res_Q10, &NLSFIndices[ 1 ], pred_Q8, psNLSF_CB->quantStepSize_Q16, psNLSF_CB->order );
+
+    /* Weights from codebook vector */
+    silk_NLSF_VQ_weights_laroia( W_tmp_QW, pNLSF_Q15, psNLSF_CB->order );
+
+    /* Apply inverse square-rooted weights and add to output */
+    for( i = 0; i < psNLSF_CB->order; i++ ) {
+        W_tmp_Q9 = silk_SQRT_APPROX( silk_LSHIFT( (opus_int32)W_tmp_QW[ i ], 18 - NLSF_W_Q ) );
+        NLSF_Q15_tmp = silk_ADD32( pNLSF_Q15[ i ], silk_DIV32_16( silk_LSHIFT( (opus_int32)res_Q10[ i ], 14 ), W_tmp_Q9 ) );
+        pNLSF_Q15[ i ] = (opus_int16)silk_LIMIT( NLSF_Q15_tmp, 0, 32767 );
+    }
+
+    /* NLSF stabilization */
+    silk_NLSF_stabilize( pNLSF_Q15, psNLSF_CB->deltaMin_Q15, psNLSF_CB->order );
+}
diff --git a/drivers/opus/silk/NLSF_del_dec_quant.c b/drivers/opus/silk/NLSF_del_dec_quant.c
new file mode 100644
index 0000000000..b74585370c
--- /dev/null
+++ b/drivers/opus/silk/NLSF_del_dec_quant.c
@@ -0,0 +1,207 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef OPUS_HAVE_CONFIG_H
+#include "opus_config.h"
+#endif
+
+#include "silk_main.h"
+
+/* Delayed-decision quantizer for NLSF residuals */
+opus_int32 silk_NLSF_del_dec_quant(                             /* O    Returns RD value in Q25                     */
+    opus_int8                   indices[],                      /* O    Quantization indices [ order ]              */
+    const opus_int16            x_Q10[],                        /* I    Input [ order ]                             */
+    const opus_int16            w_Q5[],                         /* I    Weights [ order ]                           */
+    const opus_uint8            pred_coef_Q8[],                 /* I    Backward predictor coefs [ order ]          */
+    const opus_int16            ec_ix[],                        /* I    Indices to entropy coding tables [ order ]  */
+    const opus_uint8            ec_rates_Q5[],                  /* I    Rates []                                    */
+    const opus_int              quant_step_size_Q16,            /* I    Quantization step size                      */
+    const opus_int16            inv_quant_step_size_Q6,         /* I    Inverse quantization step size              */
+    const opus_int32            mu_Q20,                         /* I    R/D tradeoff                                */
+    const opus_int16            order                           /* I    Number of input values                      */
+)
+{
+    opus_int         i, j, nStates, ind_tmp, ind_min_max, ind_max_min, in_Q10, res_Q10;
+    opus_int         pred_Q10, diff_Q10, out0_Q10, out1_Q10, rate0_Q5, rate1_Q5;
+    opus_int32       RD_tmp_Q25, min_Q25, min_max_Q25, max_min_Q25, pred_coef_Q16;
+    opus_int         ind_sort[         NLSF_QUANT_DEL_DEC_STATES ];
+    opus_int8        ind[              NLSF_QUANT_DEL_DEC_STATES ][ MAX_LPC_ORDER ];
+    opus_int16       prev_out_Q10[ 2 * NLSF_QUANT_DEL_DEC_STATES ];
+    opus_int32       RD_Q25[       2 * NLSF_QUANT_DEL_DEC_STATES ];
+    opus_int32       RD_min_Q25[       NLSF_QUANT_DEL_DEC_STATES ];
+    opus_int32       RD_max_Q25[       NLSF_QUANT_DEL_DEC_STATES ];
+    const opus_uint8 *rates_Q5;
+
+    silk_assert( (NLSF_QUANT_DEL_DEC_STATES & (NLSF_QUANT_DEL_DEC_STATES-1)) == 0 );     /* must be power of two */
+
+    nStates = 1;
+    RD_Q25[ 0 ] = 0;
+    prev_out_Q10[ 0 ] = 0;
+    for( i = order - 1; ; i-- ) {
+        rates_Q5 = &ec_rates_Q5[ ec_ix[ i ] ];
+        pred_coef_Q16 = silk_LSHIFT( (opus_int32)pred_coef_Q8[ i ], 8 );
+        in_Q10 = x_Q10[ i ];
+        for( j = 0; j < nStates; j++ ) {
+            pred_Q10 = silk_SMULWB( pred_coef_Q16, prev_out_Q10[ j ] );
+            res_Q10  = silk_SUB16( in_Q10, pred_Q10 );
+            ind_tmp  = silk_SMULWB( (opus_int32)inv_quant_step_size_Q6, res_Q10 );
+            ind_tmp  = silk_LIMIT( ind_tmp, -NLSF_QUANT_MAX_AMPLITUDE_EXT, NLSF_QUANT_MAX_AMPLITUDE_EXT-1 );
+            ind[ j ][ i ] = (opus_int8)ind_tmp;
+
+            /* compute outputs for ind_tmp and ind_tmp + 1 */
+            out0_Q10 = silk_LSHIFT( ind_tmp, 10 );
+            out1_Q10 = silk_ADD16( out0_Q10, 1024 );
+            if( ind_tmp > 0 ) {
+                out0_Q10 = silk_SUB16( out0_Q10, SILK_FIX_CONST( NLSF_QUANT_LEVEL_ADJ, 10 ) );
+                out1_Q10 = silk_SUB16( out1_Q10, SILK_FIX_CONST( NLSF_QUANT_LEVEL_ADJ, 10 ) );
+            } else if( ind_tmp == 0 ) {
+                out1_Q10 = silk_SUB16( out1_Q10, SILK_FIX_CONST( NLSF_QUANT_LEVEL_ADJ, 10 ) );
+            } else if( ind_tmp == -1 ) {
+                out0_Q10 = silk_ADD16( out0_Q10, SILK_FIX_CONST( NLSF_QUANT_LEVEL_ADJ, 10 ) );
+            } else {
+                out0_Q10 = silk_ADD16( out0_Q10, SILK_FIX_CONST( NLSF_QUANT_LEVEL_ADJ, 10 ) );
+                out1_Q10 = silk_ADD16( out1_Q10, SILK_FIX_CONST( NLSF_QUANT_LEVEL_ADJ, 10 ) );
+            }
+            out0_Q10  = silk_SMULWB( (opus_int32)out0_Q10, quant_step_size_Q16 );
+            out1_Q10  = silk_SMULWB( (opus_int32)out1_Q10, quant_step_size_Q16 );
+            out0_Q10  = silk_ADD16( out0_Q10, pred_Q10 );
+            out1_Q10  = silk_ADD16( out1_Q10, pred_Q10 );
+            prev_out_Q10[ j           ] = out0_Q10;
+            prev_out_Q10[ j + nStates ] = out1_Q10;
+
+            /* compute RD for ind_tmp and ind_tmp + 1 */
+            if( ind_tmp + 1 >= NLSF_QUANT_MAX_AMPLITUDE ) {
+                if( ind_tmp + 1 == NLSF_QUANT_MAX_AMPLITUDE ) {
+                    rate0_Q5 = rates_Q5[ ind_tmp + NLSF_QUANT_MAX_AMPLITUDE ];
+                    rate1_Q5 = 280;
+                } else {
+                    rate0_Q5 = silk_SMLABB( 280 - 43 * NLSF_QUANT_MAX_AMPLITUDE, 43, ind_tmp );
+                    rate1_Q5 = silk_ADD16( rate0_Q5, 43 );
+                }
+            } else if( ind_tmp <= -NLSF_QUANT_MAX_AMPLITUDE ) {
+                if( ind_tmp == -NLSF_QUANT_MAX_AMPLITUDE ) {
+                    rate0_Q5 = 280;
+                    rate1_Q5 = rates_Q5[ ind_tmp + 1 + NLSF_QUANT_MAX_AMPLITUDE ];
+                } else {
+                    rate0_Q5 = silk_SMLABB( 280 - 43 * NLSF_QUANT_MAX_AMPLITUDE, -43, ind_tmp );
+                    rate1_Q5 = silk_SUB16( rate0_Q5, 43 );
+                }
+            } else {
+                rate0_Q5 = rates_Q5[ ind_tmp +     NLSF_QUANT_MAX_AMPLITUDE ];
+                rate1_Q5 = rates_Q5[ ind_tmp + 1 + NLSF_QUANT_MAX_AMPLITUDE ];
+            }
+            RD_tmp_Q25            = RD_Q25[ j ];
+            diff_Q10              = silk_SUB16( in_Q10, out0_Q10 );
+            RD_Q25[ j ]           = silk_SMLABB( silk_MLA( RD_tmp_Q25, silk_SMULBB( diff_Q10, diff_Q10 ), w_Q5[ i ] ), mu_Q20, rate0_Q5 );
+            diff_Q10              = silk_SUB16( in_Q10, out1_Q10 );
+            RD_Q25[ j + nStates ] = silk_SMLABB( silk_MLA( RD_tmp_Q25, silk_SMULBB( diff_Q10, diff_Q10 ), w_Q5[ i ] ), mu_Q20, rate1_Q5 );
+        }
+
+        if( nStates <= ( NLSF_QUANT_DEL_DEC_STATES >> 1 ) ) {
+            /* double number of states and copy */
+            for( j = 0; j < nStates; j++ ) {
+                ind[ j + nStates ][ i ] = ind[ j ][ i ] + 1;
+            }
+            nStates = silk_LSHIFT( nStates, 1 );
+            for( j = nStates; j < NLSF_QUANT_DEL_DEC_STATES; j++ ) {
+                ind[ j ][ i ] = ind[ j - nStates ][ i ];
+            }
+        } else if( i > 0 ) {
+            /* sort lower and upper half of RD_Q25, pairwise */
+            for( j = 0; j < NLSF_QUANT_DEL_DEC_STATES; j++ ) {
+                if( RD_Q25[ j ] > RD_Q25[ j + NLSF_QUANT_DEL_DEC_STATES ] ) {
+                    RD_max_Q25[ j ]                         = RD_Q25[ j ];
+                    RD_min_Q25[ j ]                         = RD_Q25[ j + NLSF_QUANT_DEL_DEC_STATES ];
+                    RD_Q25[ j ]                             = RD_min_Q25[ j ];
+                    RD_Q25[ j + NLSF_QUANT_DEL_DEC_STATES ] = RD_max_Q25[ j ];
+                    /* swap prev_out values */
+                    out0_Q10 = prev_out_Q10[ j ];
+                    prev_out_Q10[ j ] = prev_out_Q10[ j + NLSF_QUANT_DEL_DEC_STATES ];
+                    prev_out_Q10[ j + NLSF_QUANT_DEL_DEC_STATES ] = out0_Q10;
+                    ind_sort[ j ] = j + NLSF_QUANT_DEL_DEC_STATES;
+                } else {
+                    RD_min_Q25[ j ] = RD_Q25[ j ];
+                    RD_max_Q25[ j ] = RD_Q25[ j + NLSF_QUANT_DEL_DEC_STATES ];
+                    ind_sort[ j ] = j;
+                }
+            }
+            /* compare the highest RD values of the winning half with the lowest one in the losing half, and copy if necessary */
+            /* afterwards ind_sort[] will contain the indices of the NLSF_QUANT_DEL_DEC_STATES winning RD values */
+            while( 1 ) {
+                min_max_Q25 = silk_int32_MAX;
+                max_min_Q25 = 0;
+                ind_min_max = 0;
+                ind_max_min = 0;
+                for( j = 0; j < NLSF_QUANT_DEL_DEC_STATES; j++ ) {
+                    if( min_max_Q25 > RD_max_Q25[ j ] ) {
+                        min_max_Q25 = RD_max_Q25[ j ];
+                        ind_min_max = j;
+                    }
+                    if( max_min_Q25 < RD_min_Q25[ j ] ) {
+                        max_min_Q25 = RD_min_Q25[ j ];
+                        ind_max_min = j;
+                    }
+                }
+                if( min_max_Q25 >= max_min_Q25 ) {
+                    break;
+                }
+                /* copy ind_min_max to ind_max_min */
+                ind_sort[     ind_max_min ] = ind_sort[     ind_min_max ] ^ NLSF_QUANT_DEL_DEC_STATES;
+                RD_Q25[       ind_max_min ] = RD_Q25[       ind_min_max + NLSF_QUANT_DEL_DEC_STATES ];
+                prev_out_Q10[ ind_max_min ] = prev_out_Q10[ ind_min_max + NLSF_QUANT_DEL_DEC_STATES ];
+                RD_min_Q25[   ind_max_min ] = 0;
+                RD_max_Q25[   ind_min_max ] = silk_int32_MAX;
+                silk_memcpy( ind[ ind_max_min ], ind[ ind_min_max ], MAX_LPC_ORDER * sizeof( opus_int8 ) );
+            }
+            /* increment index if it comes from the upper half */
+            for( j = 0; j < NLSF_QUANT_DEL_DEC_STATES; j++ ) {
+                ind[ j ][ i ] += silk_RSHIFT( ind_sort[ j ], NLSF_QUANT_DEL_DEC_STATES_LOG2 );
+            }
+        } else {  /* i == 0 */
+            break;
+        }
+    }
+
+    /* last sample: find winner, copy indices and return RD value */
+    ind_tmp = 0;
+    min_Q25 = silk_int32_MAX;
+    for( j = 0; j < 2 * NLSF_QUANT_DEL_DEC_STATES; j++ ) {
+        if( min_Q25 > RD_Q25[ j ] ) {
+            min_Q25 = RD_Q25[ j ];
+            ind_tmp = j;
+        }
+    }
+    for( j = 0; j < order; j++ ) {
+        indices[ j ] = ind[ ind_tmp & ( NLSF_QUANT_DEL_DEC_STATES - 1 ) ][ j ];
+        silk_assert( indices[ j ] >= -NLSF_QUANT_MAX_AMPLITUDE_EXT );
+        silk_assert( indices[ j ] <=  NLSF_QUANT_MAX_AMPLITUDE_EXT );
+    }
+    indices[ 0 ] += silk_RSHIFT( ind_tmp, NLSF_QUANT_DEL_DEC_STATES_LOG2 );
+    silk_assert( indices[ 0 ] <= NLSF_QUANT_MAX_AMPLITUDE_EXT );
+    silk_assert( min_Q25 >= 0 );
+    return min_Q25;
+}
diff --git a/drivers/opus/silk/NLSF_encode.c b/drivers/opus/silk/NLSF_encode.c
new file mode 100644
index 0000000000..bf67bd5cf1
--- /dev/null
+++ b/drivers/opus/silk/NLSF_encode.c
@@ -0,0 +1,136 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef OPUS_HAVE_CONFIG_H
+#include "opus_config.h"
+#endif
+
+#include "silk_main.h"
+#include "stack_alloc.h"
+
+/***********************/
+/* NLSF vector encoder */
+/***********************/
+opus_int32 silk_NLSF_encode(                                    /* O    Returns RD value in Q25                     */
+          opus_int8             *NLSFIndices,                   /* I    Codebook path vector [ LPC_ORDER + 1 ]      */
+          opus_int16            *pNLSF_Q15,                     /* I/O  Quantized NLSF vector [ LPC_ORDER ]         */
+    const silk_NLSF_CB_struct   *psNLSF_CB,                     /* I    Codebook object                             */
+    const opus_int16            *pW_QW,                         /* I    NLSF weight vector [ LPC_ORDER ]            */
+    const opus_int              NLSF_mu_Q20,                    /* I    Rate weight for the RD optimization         */
+    const opus_int              nSurvivors,                     /* I    Max survivors after first stage             */
+    const opus_int              signalType                      /* I    Signal type: 0/1/2                          */
+)
+{
+    opus_int         i, s, ind1, bestIndex, prob_Q8, bits_q7;
+    opus_int32       W_tmp_Q9;
+    VARDECL( opus_int32, err_Q26 );
+    VARDECL( opus_int32, RD_Q25 );
+    VARDECL( opus_int, tempIndices1 );
+    VARDECL( opus_int8, tempIndices2 );
+    opus_int16       res_Q15[      MAX_LPC_ORDER ];
+    opus_int16       res_Q10[      MAX_LPC_ORDER ];
+    opus_int16       NLSF_tmp_Q15[ MAX_LPC_ORDER ];
+    opus_int16       W_tmp_QW[     MAX_LPC_ORDER ];
+    opus_int16       W_adj_Q5[     MAX_LPC_ORDER ];
+    opus_uint8       pred_Q8[      MAX_LPC_ORDER ];
+    opus_int16       ec_ix[        MAX_LPC_ORDER ];
+    const opus_uint8 *pCB_element, *iCDF_ptr;
+    SAVE_STACK;
+
+    silk_assert( nSurvivors <= NLSF_VQ_MAX_SURVIVORS );
+    silk_assert( signalType >= 0 && signalType <= 2 );
+    silk_assert( NLSF_mu_Q20 <= 32767 && NLSF_mu_Q20 >= 0 );
+
+    /* NLSF stabilization */
+    silk_NLSF_stabilize( pNLSF_Q15, psNLSF_CB->deltaMin_Q15, psNLSF_CB->order );
+
+    /* First stage: VQ */
+    ALLOC( err_Q26, psNLSF_CB->nVectors, opus_int32 );
+    silk_NLSF_VQ( err_Q26, pNLSF_Q15, psNLSF_CB->CB1_NLSF_Q8, psNLSF_CB->nVectors, psNLSF_CB->order );
+
+    /* Sort the quantization errors */
+    ALLOC( tempIndices1, nSurvivors, opus_int );
+    silk_insertion_sort_increasing( err_Q26, tempIndices1, psNLSF_CB->nVectors, nSurvivors );
+
+    ALLOC( RD_Q25, nSurvivors, opus_int32 );
+    ALLOC( tempIndices2, nSurvivors * MAX_LPC_ORDER, opus_int8 );
+
+    /* Loop over survivors */
+    for( s = 0; s < nSurvivors; s++ ) {
+        ind1 = tempIndices1[ s ];
+
+        /* Residual after first stage */
+        pCB_element = &psNLSF_CB->CB1_NLSF_Q8[ ind1 * psNLSF_CB->order ];
+        for( i = 0; i < psNLSF_CB->order; i++ ) {
+            NLSF_tmp_Q15[ i ] = silk_LSHIFT16( (opus_int16)pCB_element[ i ], 7 );
+            res_Q15[ i ] = pNLSF_Q15[ i ] - NLSF_tmp_Q15[ i ];
+        }
+
+        /* Weights from codebook vector */
+        silk_NLSF_VQ_weights_laroia( W_tmp_QW, NLSF_tmp_Q15, psNLSF_CB->order );
+
+        /* Apply square-rooted weights */
+        for( i = 0; i < psNLSF_CB->order; i++ ) {
+            W_tmp_Q9 = silk_SQRT_APPROX( silk_LSHIFT( (opus_int32)W_tmp_QW[ i ], 18 - NLSF_W_Q ) );
+            res_Q10[ i ] = (opus_int16)silk_RSHIFT( silk_SMULBB( res_Q15[ i ], W_tmp_Q9 ), 14 );
+        }
+
+        /* Modify input weights accordingly */
+        for( i = 0; i < psNLSF_CB->order; i++ ) {
+            W_adj_Q5[ i ] = silk_DIV32_16( silk_LSHIFT( (opus_int32)pW_QW[ i ], 5 ), W_tmp_QW[ i ] );
+        }
+
+        /* Unpack entropy table indices and predictor for current CB1 index */
+        silk_NLSF_unpack( ec_ix, pred_Q8, psNLSF_CB, ind1 );
+
+        /* Trellis quantizer */
+        RD_Q25[ s ] = silk_NLSF_del_dec_quant( &tempIndices2[ s * MAX_LPC_ORDER ], res_Q10, W_adj_Q5, pred_Q8, ec_ix,
+            psNLSF_CB->ec_Rates_Q5, psNLSF_CB->quantStepSize_Q16, psNLSF_CB->invQuantStepSize_Q6, NLSF_mu_Q20, psNLSF_CB->order );
+
+        /* Add rate for first stage */
+        iCDF_ptr = &psNLSF_CB->CB1_iCDF[ ( signalType >> 1 ) * psNLSF_CB->nVectors ];
+        if( ind1 == 0 ) {
+            prob_Q8 = 256 - iCDF_ptr[ ind1 ];
+        } else {
+            prob_Q8 = iCDF_ptr[ ind1 - 1 ] - iCDF_ptr[ ind1 ];
+        }
+        bits_q7 = ( 8 << 7 ) - silk_lin2log( prob_Q8 );
+        RD_Q25[ s ] = silk_SMLABB( RD_Q25[ s ], bits_q7, silk_RSHIFT( NLSF_mu_Q20, 2 ) );
+    }
+
+    /* Find the lowest rate-distortion error */
+    silk_insertion_sort_increasing( RD_Q25, &bestIndex, nSurvivors, 1 );
+
+    NLSFIndices[ 0 ] = (opus_int8)tempIndices1[ bestIndex ];
+    silk_memcpy( &NLSFIndices[ 1 ], &tempIndices2[ bestIndex * MAX_LPC_ORDER ], psNLSF_CB->order * sizeof( opus_int8 ) );
+
+    /* Decode */
+    silk_NLSF_decode( pNLSF_Q15, NLSFIndices, psNLSF_CB );
+
+    RESTORE_STACK;
+    return RD_Q25[ 0 ];
+}
diff --git a/drivers/opus/silk/NLSF_stabilize.c b/drivers/opus/silk/NLSF_stabilize.c
new file mode 100644
index 0000000000..a1bf20b8d4
--- /dev/null
+++ b/drivers/opus/silk/NLSF_stabilize.c
@@ -0,0 +1,142 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef OPUS_HAVE_CONFIG_H
+#include "opus_config.h"
+#endif
+
+/* NLSF stabilizer:                                         */
+/*                                                          */
+/* - Moves NLSFs further apart if they are too close        */
+/* - Moves NLSFs away from borders if they are too close    */
+/* - High effort to achieve a modification with minimum     */
+/*     Euclidean distance to input vector                   */
+/* - Output are sorted NLSF coefficients                    */
+/*                                                          */
+
+#include "SigProc_FIX.h"
+
+/* Constant Definitions */
+#define MAX_LOOPS        20
+
+/* NLSF stabilizer, for a single input data vector */
+void silk_NLSF_stabilize(
+          opus_int16            *NLSF_Q15,          /* I/O   Unstable/stabilized normalized LSF vector in Q15 [L]       */
+    const opus_int16            *NDeltaMin_Q15,     /* I     Min distance vector, NDeltaMin_Q15[L] must be >= 1 [L+1]   */
+    const opus_int              L                   /* I     Number of NLSF parameters in the input vector              */
+)
+{
+    opus_int   i, I=0, k, loops;
+    opus_int16 center_freq_Q15;
+    opus_int32 diff_Q15, min_diff_Q15, min_center_Q15, max_center_Q15;
+
+    /* This is necessary to ensure an output within range of a opus_int16 */
+    silk_assert( NDeltaMin_Q15[L] >= 1 );
+
+    for( loops = 0; loops < MAX_LOOPS; loops++ ) {
+        /**************************/
+        /* Find smallest distance */
+        /**************************/
+        /* First element */
+        min_diff_Q15 = NLSF_Q15[0] - NDeltaMin_Q15[0];
+        I = 0;
+        /* Middle elements */
+        for( i = 1; i <= L-1; i++ ) {
+            diff_Q15 = NLSF_Q15[i] - ( NLSF_Q15[i-1] + NDeltaMin_Q15[i] );
+            if( diff_Q15 < min_diff_Q15 ) {
+                min_diff_Q15 = diff_Q15;
+                I = i;
+            }
+        }
+        /* Last element */
+        diff_Q15 = ( 1 << 15 ) - ( NLSF_Q15[L-1] + NDeltaMin_Q15[L] );
+        if( diff_Q15 < min_diff_Q15 ) {
+            min_diff_Q15 = diff_Q15;
+            I = L;
+        }
+
+        /***************************************************/
+        /* Now check if the smallest distance non-negative */
+        /***************************************************/
+        if( min_diff_Q15 >= 0 ) {
+            return;
+        }
+
+        if( I == 0 ) {
+            /* Move away from lower limit */
+            NLSF_Q15[0] = NDeltaMin_Q15[0];
+
+        } else if( I == L) {
+            /* Move away from higher limit */
+            NLSF_Q15[L-1] = ( 1 << 15 ) - NDeltaMin_Q15[L];
+
+        } else {
+            /* Find the lower extreme for the location of the current center frequency */
+            min_center_Q15 = 0;
+            for( k = 0; k < I; k++ ) {
+                min_center_Q15 += NDeltaMin_Q15[k];
+            }
+            min_center_Q15 += silk_RSHIFT( NDeltaMin_Q15[I], 1 );
+
+            /* Find the upper extreme for the location of the current center frequency */
+            max_center_Q15 = 1 << 15;
+            for( k = L; k > I; k-- ) {
+                max_center_Q15 -= NDeltaMin_Q15[k];
+            }
+            max_center_Q15 -= silk_RSHIFT( NDeltaMin_Q15[I], 1 );
+
+            /* Move apart, sorted by value, keeping the same center frequency */
+            center_freq_Q15 = (opus_int16)silk_LIMIT_32( silk_RSHIFT_ROUND( (opus_int32)NLSF_Q15[I-1] + (opus_int32)NLSF_Q15[I], 1 ),
+                min_center_Q15, max_center_Q15 );
+            NLSF_Q15[I-1] = center_freq_Q15 - silk_RSHIFT( NDeltaMin_Q15[I], 1 );
+            NLSF_Q15[I] = NLSF_Q15[I-1] + NDeltaMin_Q15[I];
+        }
+    }
+
+    /* Safe and simple fall back method, which is less ideal than the above */
+    if( loops == MAX_LOOPS )
+    {
+        /* Insertion sort (fast for already almost sorted arrays):   */
+        /* Best case:  O(n)   for an already sorted array            */
+        /* Worst case: O(n^2) for an inversely sorted array          */
+        silk_insertion_sort_increasing_all_values_int16( &NLSF_Q15[0], L );
+
+        /* First NLSF should be no less than NDeltaMin[0] */
+        NLSF_Q15[0] = silk_max_int( NLSF_Q15[0], NDeltaMin_Q15[0] );
+
+        /* Keep delta_min distance between the NLSFs */
+        for( i = 1; i < L; i++ )
+            NLSF_Q15[i] = silk_max_int( NLSF_Q15[i], NLSF_Q15[i-1] + NDeltaMin_Q15[i] );
+
+        /* Last NLSF should be no higher than 1 - NDeltaMin[L] */
+        NLSF_Q15[L-1] = silk_min_int( NLSF_Q15[L-1], (1<<15) - NDeltaMin_Q15[L] );
+
+        /* Keep NDeltaMin distance between the NLSFs */
+        for( i = L-2; i >= 0; i-- )
+            NLSF_Q15[i] = silk_min_int( NLSF_Q15[i], NLSF_Q15[i+1] - NDeltaMin_Q15[i+1] );
+    }
+}
diff --git a/drivers/opus/silk/NLSF_unpack.c b/drivers/opus/silk/NLSF_unpack.c
new file mode 100644
index 0000000000..60242a3b52
--- /dev/null
+++ b/drivers/opus/silk/NLSF_unpack.c
@@ -0,0 +1,55 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef OPUS_HAVE_CONFIG_H
+#include "opus_config.h"
+#endif
+
+#include "silk_main.h"
+
+/* Unpack predictor values and indices for entropy coding tables */
+void silk_NLSF_unpack(
+          opus_int16            ec_ix[],                        /* O    Indices to entropy tables [ LPC_ORDER ]     */
+          opus_uint8            pred_Q8[],                      /* O    LSF predictor [ LPC_ORDER ]                 */
+    const silk_NLSF_CB_struct   *psNLSF_CB,                     /* I    Codebook object                             */
+    const opus_int              CB1_index                       /* I    Index of vector in first LSF codebook       */
+)
+{
+    opus_int   i;
+    opus_uint8 entry;
+    const opus_uint8 *ec_sel_ptr;
+
+    ec_sel_ptr = &psNLSF_CB->ec_sel[ CB1_index * psNLSF_CB->order / 2 ];
+    for( i = 0; i < psNLSF_CB->order; i += 2 ) {
+        entry = *ec_sel_ptr++;
+        ec_ix  [ i     ] = silk_SMULBB( silk_RSHIFT( entry, 1 ) & 7, 2 * NLSF_QUANT_MAX_AMPLITUDE + 1 );
+        pred_Q8[ i     ] = psNLSF_CB->pred_Q8[ i + ( entry & 1 ) * ( psNLSF_CB->order - 1 ) ];
+        ec_ix  [ i + 1 ] = silk_SMULBB( silk_RSHIFT( entry, 5 ) & 7, 2 * NLSF_QUANT_MAX_AMPLITUDE + 1 );
+        pred_Q8[ i + 1 ] = psNLSF_CB->pred_Q8[ i + ( silk_RSHIFT( entry, 4 ) & 1 ) * ( psNLSF_CB->order - 1 ) + 1 ];
+    }
+}
+
diff --git a/drivers/opus/silk/NSQ.c b/drivers/opus/silk/NSQ.c
new file mode 100644
index 0000000000..a08e34e893
--- /dev/null
+++ b/drivers/opus/silk/NSQ.c
@@ -0,0 +1,446 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef OPUS_HAVE_CONFIG_H
+#include "opus_config.h"
+#endif
+
+#include "silk_main.h"
+#include "stack_alloc.h"
+
+static OPUS_INLINE void silk_nsq_scale_states(
+    const silk_encoder_state *psEncC,           /* I    Encoder State                   */
+    silk_nsq_state      *NSQ,                   /* I/O  NSQ state                       */
+    const opus_int32    x_Q3[],                 /* I    input in Q3                     */
+    opus_int32          x_sc_Q10[],             /* O    input scaled with 1/Gain        */
+    const opus_int16    sLTP[],                 /* I    re-whitened LTP state in Q0     */
+    opus_int32          sLTP_Q15[],             /* O    LTP state matching scaled input */
+    opus_int            subfr,                  /* I    subframe number                 */
+    const opus_int      LTP_scale_Q14,          /* I                                    */
+    const opus_int32    Gains_Q16[ MAX_NB_SUBFR ], /* I                                 */
+    const opus_int      pitchL[ MAX_NB_SUBFR ], /* I    Pitch lag                       */
+    const opus_int      signal_type             /* I    Signal type                     */
+);
+
+static OPUS_INLINE void silk_noise_shape_quantizer(
+    silk_nsq_state      *NSQ,                   /* I/O  NSQ state                       */
+    opus_int            signalType,             /* I    Signal type                     */
+    const opus_int32    x_sc_Q10[],             /* I                                    */
+    opus_int8           pulses[],               /* O                                    */
+    opus_int16          xq[],                   /* O                                    */
+    opus_int32          sLTP_Q15[],             /* I/O  LTP state                       */
+    const opus_int16    a_Q12[],                /* I    Short term prediction coefs     */
+    const opus_int16    b_Q14[],                /* I    Long term prediction coefs      */
+    const opus_int16    AR_shp_Q13[],           /* I    Noise shaping AR coefs          */
+    opus_int            lag,                    /* I    Pitch lag                       */
+    opus_int32          HarmShapeFIRPacked_Q14, /* I                                    */
+    opus_int            Tilt_Q14,               /* I    Spectral tilt                   */
+    opus_int32          LF_shp_Q14,             /* I                                    */
+    opus_int32          Gain_Q16,               /* I                                    */
+    opus_int            Lambda_Q10,             /* I                                    */
+    opus_int            offset_Q10,             /* I                                    */
+    opus_int            length,                 /* I    Input length                    */
+    opus_int            shapingLPCOrder,        /* I    Noise shaping AR filter order   */
+    opus_int            predictLPCOrder         /* I    Prediction filter order         */
+);
+
+void silk_NSQ(
+    const silk_encoder_state    *psEncC,                                    /* I/O  Encoder State                   */
+    silk_nsq_state              *NSQ,                                       /* I/O  NSQ state                       */
+    SideInfoIndices             *psIndices,                                 /* I/O  Quantization Indices            */
+    const opus_int32            x_Q3[],                                     /* I    Prefiltered input signal        */
+    opus_int8                   pulses[],                                   /* O    Quantized pulse signal          */
+    const opus_int16            PredCoef_Q12[ 2 * MAX_LPC_ORDER ],          /* I    Short term prediction coefs     */
+    const opus_int16            LTPCoef_Q14[ LTP_ORDER * MAX_NB_SUBFR ],    /* I    Long term prediction coefs      */
+    const opus_int16            AR2_Q13[ MAX_NB_SUBFR * MAX_SHAPE_LPC_ORDER ], /* I Noise shaping coefs             */
+    const opus_int              HarmShapeGain_Q14[ MAX_NB_SUBFR ],          /* I    Long term shaping coefs         */
+    const opus_int              Tilt_Q14[ MAX_NB_SUBFR ],                   /* I    Spectral tilt                   */
+    const opus_int32            LF_shp_Q14[ MAX_NB_SUBFR ],                 /* I    Low frequency shaping coefs     */
+    const opus_int32            Gains_Q16[ MAX_NB_SUBFR ],                  /* I    Quantization step sizes         */
+    const opus_int              pitchL[ MAX_NB_SUBFR ],                     /* I    Pitch lags                      */
+    const opus_int              Lambda_Q10,                                 /* I    Rate/distortion tradeoff        */
+    const opus_int              LTP_scale_Q14                               /* I    LTP state scaling               */
+)
+{
+    opus_int            k, lag, start_idx, LSF_interpolation_flag;
+    const opus_int16    *A_Q12, *B_Q14, *AR_shp_Q13;
+    opus_int16          *pxq;
+    VARDECL( opus_int32, sLTP_Q15 );
+    VARDECL( opus_int16, sLTP );
+    opus_int32          HarmShapeFIRPacked_Q14;
+    opus_int            offset_Q10;
+    VARDECL( opus_int32, x_sc_Q10 );
+    SAVE_STACK;
+
+    NSQ->rand_seed = psIndices->Seed;
+
+    /* Set unvoiced lag to the previous one, overwrite later for voiced */
+    lag = NSQ->lagPrev;
+
+    silk_assert( NSQ->prev_gain_Q16 != 0 );
+
+    offset_Q10 = silk_Quantization_Offsets_Q10[ psIndices->signalType >> 1 ][ psIndices->quantOffsetType ];
+
+    if( psIndices->NLSFInterpCoef_Q2 == 4 ) {
+        LSF_interpolation_flag = 0;
+    } else {
+        LSF_interpolation_flag = 1;
+    }
+
+    ALLOC( sLTP_Q15,
+           psEncC->ltp_mem_length + psEncC->frame_length, opus_int32 );
+    ALLOC( sLTP, psEncC->ltp_mem_length + psEncC->frame_length, opus_int16 );
+    ALLOC( x_sc_Q10, psEncC->subfr_length, opus_int32 );
+    /* Set up pointers to start of sub frame */
+    NSQ->sLTP_shp_buf_idx = psEncC->ltp_mem_length;
+    NSQ->sLTP_buf_idx     = psEncC->ltp_mem_length;
+    pxq                   = &NSQ->xq[ psEncC->ltp_mem_length ];
+    for( k = 0; k < psEncC->nb_subfr; k++ ) {
+        A_Q12      = &PredCoef_Q12[ (( k >> 1 ) | ( 1 - LSF_interpolation_flag )) * MAX_LPC_ORDER ];
+        B_Q14      = &LTPCoef_Q14[ k * LTP_ORDER ];
+        AR_shp_Q13 = &AR2_Q13[     k * MAX_SHAPE_LPC_ORDER ];
+
+        /* Noise shape parameters */
+        silk_assert( HarmShapeGain_Q14[ k ] >= 0 );
+        HarmShapeFIRPacked_Q14  =                          silk_RSHIFT( HarmShapeGain_Q14[ k ], 2 );
+        HarmShapeFIRPacked_Q14 |= silk_LSHIFT( (opus_int32)silk_RSHIFT( HarmShapeGain_Q14[ k ], 1 ), 16 );
+
+        NSQ->rewhite_flag = 0;
+        if( psIndices->signalType == TYPE_VOICED ) {
+            /* Voiced */
+            lag = pitchL[ k ];
+
+            /* Re-whitening */
+            if( ( k & ( 3 - silk_LSHIFT( LSF_interpolation_flag, 1 ) ) ) == 0 ) {
+                /* Rewhiten with new A coefs */
+                start_idx = psEncC->ltp_mem_length - lag - psEncC->predictLPCOrder - LTP_ORDER / 2;
+                silk_assert( start_idx > 0 );
+
+                silk_LPC_analysis_filter( &sLTP[ start_idx ], &NSQ->xq[ start_idx + k * psEncC->subfr_length ],
+                    A_Q12, psEncC->ltp_mem_length - start_idx, psEncC->predictLPCOrder );
+
+                NSQ->rewhite_flag = 1;
+                NSQ->sLTP_buf_idx = psEncC->ltp_mem_length;
+            }
+        }
+
+        silk_nsq_scale_states( psEncC, NSQ, x_Q3, x_sc_Q10, sLTP, sLTP_Q15, k, LTP_scale_Q14, Gains_Q16, pitchL, psIndices->signalType );
+
+        silk_noise_shape_quantizer( NSQ, psIndices->signalType, x_sc_Q10, pulses, pxq, sLTP_Q15, A_Q12, B_Q14,
+            AR_shp_Q13, lag, HarmShapeFIRPacked_Q14, Tilt_Q14[ k ], LF_shp_Q14[ k ], Gains_Q16[ k ], Lambda_Q10,
+            offset_Q10, psEncC->subfr_length, psEncC->shapingLPCOrder, psEncC->predictLPCOrder );
+
+        x_Q3   += psEncC->subfr_length;
+        pulses += psEncC->subfr_length;
+        pxq    += psEncC->subfr_length;
+    }
+
+    /* Update lagPrev for next frame */
+    NSQ->lagPrev = pitchL[ psEncC->nb_subfr - 1 ];
+
+    /* Save quantized speech and noise shaping signals */
+    /* DEBUG_STORE_DATA( enc.pcm, &NSQ->xq[ psEncC->ltp_mem_length ], psEncC->frame_length * sizeof( opus_int16 ) ) */
+    silk_memmove( NSQ->xq,           &NSQ->xq[           psEncC->frame_length ], psEncC->ltp_mem_length * sizeof( opus_int16 ) );
+    silk_memmove( NSQ->sLTP_shp_Q14, &NSQ->sLTP_shp_Q14[ psEncC->frame_length ], psEncC->ltp_mem_length * sizeof( opus_int32 ) );
+    RESTORE_STACK;
+}
+
+/***********************************/
+/* silk_noise_shape_quantizer  */
+/***********************************/
+static OPUS_INLINE void silk_noise_shape_quantizer(
+    silk_nsq_state      *NSQ,                   /* I/O  NSQ state                       */
+    opus_int            signalType,             /* I    Signal type                     */
+    const opus_int32    x_sc_Q10[],             /* I                                    */
+    opus_int8           pulses[],               /* O                                    */
+    opus_int16          xq[],                   /* O                                    */
+    opus_int32          sLTP_Q15[],             /* I/O  LTP state                       */
+    const opus_int16    a_Q12[],                /* I    Short term prediction coefs     */
+    const opus_int16    b_Q14[],                /* I    Long term prediction coefs      */
+    const opus_int16    AR_shp_Q13[],           /* I    Noise shaping AR coefs          */
+    opus_int            lag,                    /* I    Pitch lag                       */
+    opus_int32          HarmShapeFIRPacked_Q14, /* I                                    */
+    opus_int            Tilt_Q14,               /* I    Spectral tilt                   */
+    opus_int32          LF_shp_Q14,             /* I                                    */
+    opus_int32          Gain_Q16,               /* I                                    */
+    opus_int            Lambda_Q10,             /* I                                    */
+    opus_int            offset_Q10,             /* I                                    */
+    opus_int            length,                 /* I    Input length                    */
+    opus_int            shapingLPCOrder,        /* I    Noise shaping AR filter order   */
+    opus_int            predictLPCOrder         /* I    Prediction filter order         */
+)
+{
+    opus_int     i, j;
+    opus_int32   LTP_pred_Q13, LPC_pred_Q10, n_AR_Q12, n_LTP_Q13;
+    opus_int32   n_LF_Q12, r_Q10, rr_Q10, q1_Q0, q1_Q10, q2_Q10, rd1_Q20, rd2_Q20;
+    opus_int32   exc_Q14, LPC_exc_Q14, xq_Q14, Gain_Q10;
+    opus_int32   tmp1, tmp2, sLF_AR_shp_Q14;
+    opus_int32   *psLPC_Q14, *shp_lag_ptr, *pred_lag_ptr;
+
+    shp_lag_ptr  = &NSQ->sLTP_shp_Q14[ NSQ->sLTP_shp_buf_idx - lag + HARM_SHAPE_FIR_TAPS / 2 ];
+    pred_lag_ptr = &sLTP_Q15[ NSQ->sLTP_buf_idx - lag + LTP_ORDER / 2 ];
+    Gain_Q10     = silk_RSHIFT( Gain_Q16, 6 );
+
+    /* Set up short term AR state */
+    psLPC_Q14 = &NSQ->sLPC_Q14[ NSQ_LPC_BUF_LENGTH - 1 ];
+
+    for( i = 0; i < length; i++ ) {
+        /* Generate dither */
+        NSQ->rand_seed = silk_RAND( NSQ->rand_seed );
+
+        /* Short-term prediction */
+        silk_assert( predictLPCOrder == 10 || predictLPCOrder == 16 );
+        /* Avoids introducing a bias because silk_SMLAWB() always rounds to -inf */
+        LPC_pred_Q10 = silk_RSHIFT( predictLPCOrder, 1 );
+        LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, psLPC_Q14[  0 ], a_Q12[ 0 ] );
+        LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, psLPC_Q14[ -1 ], a_Q12[ 1 ] );
+        LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, psLPC_Q14[ -2 ], a_Q12[ 2 ] );
+        LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, psLPC_Q14[ -3 ], a_Q12[ 3 ] );
+        LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, psLPC_Q14[ -4 ], a_Q12[ 4 ] );
+        LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, psLPC_Q14[ -5 ], a_Q12[ 5 ] );
+        LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, psLPC_Q14[ -6 ], a_Q12[ 6 ] );
+        LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, psLPC_Q14[ -7 ], a_Q12[ 7 ] );
+        LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, psLPC_Q14[ -8 ], a_Q12[ 8 ] );
+        LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, psLPC_Q14[ -9 ], a_Q12[ 9 ] );
+        if( predictLPCOrder == 16 ) {
+            LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, psLPC_Q14[ -10 ], a_Q12[ 10 ] );
+            LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, psLPC_Q14[ -11 ], a_Q12[ 11 ] );
+            LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, psLPC_Q14[ -12 ], a_Q12[ 12 ] );
+            LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, psLPC_Q14[ -13 ], a_Q12[ 13 ] );
+            LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, psLPC_Q14[ -14 ], a_Q12[ 14 ] );
+            LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, psLPC_Q14[ -15 ], a_Q12[ 15 ] );
+        }
+
+        /* Long-term prediction */
+        if( signalType == TYPE_VOICED ) {
+            /* Unrolled loop */
+            /* Avoids introducing a bias because silk_SMLAWB() always rounds to -inf */
+            LTP_pred_Q13 = 2;
+            LTP_pred_Q13 = silk_SMLAWB( LTP_pred_Q13, pred_lag_ptr[  0 ], b_Q14[ 0 ] );
+            LTP_pred_Q13 = silk_SMLAWB( LTP_pred_Q13, pred_lag_ptr[ -1 ], b_Q14[ 1 ] );
+            LTP_pred_Q13 = silk_SMLAWB( LTP_pred_Q13, pred_lag_ptr[ -2 ], b_Q14[ 2 ] );
+            LTP_pred_Q13 = silk_SMLAWB( LTP_pred_Q13, pred_lag_ptr[ -3 ], b_Q14[ 3 ] );
+            LTP_pred_Q13 = silk_SMLAWB( LTP_pred_Q13, pred_lag_ptr[ -4 ], b_Q14[ 4 ] );
+            pred_lag_ptr++;
+        } else {
+            LTP_pred_Q13 = 0;
+        }
+
+        /* Noise shape feedback */
+        silk_assert( ( shapingLPCOrder & 1 ) == 0 );   /* check that order is even */
+        tmp2 = psLPC_Q14[ 0 ];
+        tmp1 = NSQ->sAR2_Q14[ 0 ];
+        NSQ->sAR2_Q14[ 0 ] = tmp2;
+        n_AR_Q12 = silk_RSHIFT( shapingLPCOrder, 1 );
+        n_AR_Q12 = silk_SMLAWB( n_AR_Q12, tmp2, AR_shp_Q13[ 0 ] );
+        for( j = 2; j < shapingLPCOrder; j += 2 ) {
+            tmp2 = NSQ->sAR2_Q14[ j - 1 ];
+            NSQ->sAR2_Q14[ j - 1 ] = tmp1;
+            n_AR_Q12 = silk_SMLAWB( n_AR_Q12, tmp1, AR_shp_Q13[ j - 1 ] );
+            tmp1 = NSQ->sAR2_Q14[ j + 0 ];
+            NSQ->sAR2_Q14[ j + 0 ] = tmp2;
+            n_AR_Q12 = silk_SMLAWB( n_AR_Q12, tmp2, AR_shp_Q13[ j ] );
+        }
+        NSQ->sAR2_Q14[ shapingLPCOrder - 1 ] = tmp1;
+        n_AR_Q12 = silk_SMLAWB( n_AR_Q12, tmp1, AR_shp_Q13[ shapingLPCOrder - 1 ] );
+
+        n_AR_Q12 = silk_LSHIFT32( n_AR_Q12, 1 );                                /* Q11 -> Q12 */
+        n_AR_Q12 = silk_SMLAWB( n_AR_Q12, NSQ->sLF_AR_shp_Q14, Tilt_Q14 );
+
+        n_LF_Q12 = silk_SMULWB( NSQ->sLTP_shp_Q14[ NSQ->sLTP_shp_buf_idx - 1 ], LF_shp_Q14 );
+        n_LF_Q12 = silk_SMLAWT( n_LF_Q12, NSQ->sLF_AR_shp_Q14, LF_shp_Q14 );
+
+        silk_assert( lag > 0 || signalType != TYPE_VOICED );
+
+        /* Combine prediction and noise shaping signals */
+        tmp1 = silk_SUB32( silk_LSHIFT32( LPC_pred_Q10, 2 ), n_AR_Q12 );        /* Q12 */
+        tmp1 = silk_SUB32( tmp1, n_LF_Q12 );                                    /* Q12 */
+        if( lag > 0 ) {
+            /* Symmetric, packed FIR coefficients */
+            n_LTP_Q13 = silk_SMULWB( silk_ADD32( shp_lag_ptr[ 0 ], shp_lag_ptr[ -2 ] ), HarmShapeFIRPacked_Q14 );
+            n_LTP_Q13 = silk_SMLAWT( n_LTP_Q13, shp_lag_ptr[ -1 ],                      HarmShapeFIRPacked_Q14 );
+            n_LTP_Q13 = silk_LSHIFT( n_LTP_Q13, 1 );
+            shp_lag_ptr++;
+
+            tmp2 = silk_SUB32( LTP_pred_Q13, n_LTP_Q13 );                       /* Q13 */
+            tmp1 = silk_ADD_LSHIFT32( tmp2, tmp1, 1 );                          /* Q13 */
+            tmp1 = silk_RSHIFT_ROUND( tmp1, 3 );                                /* Q10 */
+        } else {
+            tmp1 = silk_RSHIFT_ROUND( tmp1, 2 );                                /* Q10 */
+        }
+
+        r_Q10 = silk_SUB32( x_sc_Q10[ i ], tmp1 );                              /* residual error Q10 */
+
+        /* Flip sign depending on dither */
+        if ( NSQ->rand_seed < 0 ) {
+           r_Q10 = -r_Q10;
+        }
+        r_Q10 = silk_LIMIT_32( r_Q10, -(31 << 10), 30 << 10 );
+
+        /* Find two quantization level candidates and measure their rate-distortion */
+        q1_Q10 = silk_SUB32( r_Q10, offset_Q10 );
+        q1_Q0 = silk_RSHIFT( q1_Q10, 10 );
+        if( q1_Q0 > 0 ) {
+            q1_Q10  = silk_SUB32( silk_LSHIFT( q1_Q0, 10 ), QUANT_LEVEL_ADJUST_Q10 );
+            q1_Q10  = silk_ADD32( q1_Q10, offset_Q10 );
+            q2_Q10  = silk_ADD32( q1_Q10, 1024 );
+            rd1_Q20 = silk_SMULBB( q1_Q10, Lambda_Q10 );
+            rd2_Q20 = silk_SMULBB( q2_Q10, Lambda_Q10 );
+        } else if( q1_Q0 == 0 ) {
+            q1_Q10  = offset_Q10;
+            q2_Q10  = silk_ADD32( q1_Q10, 1024 - QUANT_LEVEL_ADJUST_Q10 );
+            rd1_Q20 = silk_SMULBB( q1_Q10, Lambda_Q10 );
+            rd2_Q20 = silk_SMULBB( q2_Q10, Lambda_Q10 );
+        } else if( q1_Q0 == -1 ) {
+            q2_Q10  = offset_Q10;
+            q1_Q10  = silk_SUB32( q2_Q10, 1024 - QUANT_LEVEL_ADJUST_Q10 );
+            rd1_Q20 = silk_SMULBB( -q1_Q10, Lambda_Q10 );
+            rd2_Q20 = silk_SMULBB(  q2_Q10, Lambda_Q10 );
+        } else {            /* Q1_Q0 < -1 */
+            q1_Q10  = silk_ADD32( silk_LSHIFT( q1_Q0, 10 ), QUANT_LEVEL_ADJUST_Q10 );
+            q1_Q10  = silk_ADD32( q1_Q10, offset_Q10 );
+            q2_Q10  = silk_ADD32( q1_Q10, 1024 );
+            rd1_Q20 = silk_SMULBB( -q1_Q10, Lambda_Q10 );
+            rd2_Q20 = silk_SMULBB( -q2_Q10, Lambda_Q10 );
+        }
+        rr_Q10  = silk_SUB32( r_Q10, q1_Q10 );
+        rd1_Q20 = silk_SMLABB( rd1_Q20, rr_Q10, rr_Q10 );
+        rr_Q10  = silk_SUB32( r_Q10, q2_Q10 );
+        rd2_Q20 = silk_SMLABB( rd2_Q20, rr_Q10, rr_Q10 );
+
+        if( rd2_Q20 < rd1_Q20 ) {
+            q1_Q10 = q2_Q10;
+        }
+
+        pulses[ i ] = (opus_int8)silk_RSHIFT_ROUND( q1_Q10, 10 );
+
+        /* Excitation */
+        exc_Q14 = silk_LSHIFT( q1_Q10, 4 );
+        if ( NSQ->rand_seed < 0 ) {
+           exc_Q14 = -exc_Q14;
+        }
+
+        /* Add predictions */
+        LPC_exc_Q14 = silk_ADD_LSHIFT32( exc_Q14, LTP_pred_Q13, 1 );
+        xq_Q14      = silk_ADD_LSHIFT32( LPC_exc_Q14, LPC_pred_Q10, 4 );
+
+        /* Scale XQ back to normal level before saving */
+        xq[ i ] = (opus_int16)silk_SAT16( silk_RSHIFT_ROUND( silk_SMULWW( xq_Q14, Gain_Q10 ), 8 ) );
+
+        /* Update states */
+        psLPC_Q14++;
+        *psLPC_Q14 = xq_Q14;
+        sLF_AR_shp_Q14 = silk_SUB_LSHIFT32( xq_Q14, n_AR_Q12, 2 );
+        NSQ->sLF_AR_shp_Q14 = sLF_AR_shp_Q14;
+
+        NSQ->sLTP_shp_Q14[ NSQ->sLTP_shp_buf_idx ] = silk_SUB_LSHIFT32( sLF_AR_shp_Q14, n_LF_Q12, 2 );
+        sLTP_Q15[ NSQ->sLTP_buf_idx ] = silk_LSHIFT( LPC_exc_Q14, 1 );
+        NSQ->sLTP_shp_buf_idx++;
+        NSQ->sLTP_buf_idx++;
+
+        /* Make dither dependent on quantized signal */
+        NSQ->rand_seed = silk_ADD32_ovflw( NSQ->rand_seed, pulses[ i ] );
+    }
+
+    /* Update LPC synth buffer */
+    silk_memcpy( NSQ->sLPC_Q14, &NSQ->sLPC_Q14[ length ], NSQ_LPC_BUF_LENGTH * sizeof( opus_int32 ) );
+}
+
+static OPUS_INLINE void silk_nsq_scale_states(
+    const silk_encoder_state *psEncC,           /* I    Encoder State                   */
+    silk_nsq_state      *NSQ,                   /* I/O  NSQ state                       */
+    const opus_int32    x_Q3[],                 /* I    input in Q3                     */
+    opus_int32          x_sc_Q10[],             /* O    input scaled with 1/Gain        */
+    const opus_int16    sLTP[],                 /* I    re-whitened LTP state in Q0     */
+    opus_int32          sLTP_Q15[],             /* O    LTP state matching scaled input */
+    opus_int            subfr,                  /* I    subframe number                 */
+    const opus_int      LTP_scale_Q14,          /* I                                    */
+    const opus_int32    Gains_Q16[ MAX_NB_SUBFR ], /* I                                 */
+    const opus_int      pitchL[ MAX_NB_SUBFR ], /* I    Pitch lag                       */
+    const opus_int      signal_type             /* I    Signal type                     */
+)
+{
+    opus_int   i, lag;
+    opus_int32 gain_adj_Q16, inv_gain_Q31, inv_gain_Q23;
+
+    lag          = pitchL[ subfr ];
+    inv_gain_Q31 = silk_INVERSE32_varQ( silk_max( Gains_Q16[ subfr ], 1 ), 47 );
+    silk_assert( inv_gain_Q31 != 0 );
+
+    /* Calculate gain adjustment factor */
+    if( Gains_Q16[ subfr ] != NSQ->prev_gain_Q16 ) {
+        gain_adj_Q16 =  silk_DIV32_varQ( NSQ->prev_gain_Q16, Gains_Q16[ subfr ], 16 );
+    } else {
+        gain_adj_Q16 = (opus_int32)1 << 16;
+    }
+
+    /* Scale input */
+    inv_gain_Q23 = silk_RSHIFT_ROUND( inv_gain_Q31, 8 );
+    for( i = 0; i < psEncC->subfr_length; i++ ) {
+        x_sc_Q10[ i ] = silk_SMULWW( x_Q3[ i ], inv_gain_Q23 );
+    }
+
+    /* Save inverse gain */
+    NSQ->prev_gain_Q16 = Gains_Q16[ subfr ];
+
+    /* After rewhitening the LTP state is un-scaled, so scale with inv_gain_Q16 */
+    if( NSQ->rewhite_flag ) {
+        if( subfr == 0 ) {
+            /* Do LTP downscaling */
+            inv_gain_Q31 = silk_LSHIFT( silk_SMULWB( inv_gain_Q31, LTP_scale_Q14 ), 2 );
+        }
+        for( i = NSQ->sLTP_buf_idx - lag - LTP_ORDER / 2; i < NSQ->sLTP_buf_idx; i++ ) {
+            silk_assert( i < MAX_FRAME_LENGTH );
+            sLTP_Q15[ i ] = silk_SMULWB( inv_gain_Q31, sLTP[ i ] );
+        }
+    }
+
+    /* Adjust for changing gain */
+    if( gain_adj_Q16 != (opus_int32)1 << 16 ) {
+        /* Scale long-term shaping state */
+        for( i = NSQ->sLTP_shp_buf_idx - psEncC->ltp_mem_length; i < NSQ->sLTP_shp_buf_idx; i++ ) {
+            NSQ->sLTP_shp_Q14[ i ] = silk_SMULWW( gain_adj_Q16, NSQ->sLTP_shp_Q14[ i ] );
+        }
+
+        /* Scale long-term prediction state */
+        if( signal_type == TYPE_VOICED && NSQ->rewhite_flag == 0 ) {
+            for( i = NSQ->sLTP_buf_idx - lag - LTP_ORDER / 2; i < NSQ->sLTP_buf_idx; i++ ) {
+                sLTP_Q15[ i ] = silk_SMULWW( gain_adj_Q16, sLTP_Q15[ i ] );
+            }
+        }
+
+        NSQ->sLF_AR_shp_Q14 = silk_SMULWW( gain_adj_Q16, NSQ->sLF_AR_shp_Q14 );
+
+        /* Scale short-term prediction and shaping states */
+        for( i = 0; i < NSQ_LPC_BUF_LENGTH; i++ ) {
+            NSQ->sLPC_Q14[ i ] = silk_SMULWW( gain_adj_Q16, NSQ->sLPC_Q14[ i ] );
+        }
+        for( i = 0; i < MAX_SHAPE_LPC_ORDER; i++ ) {
+            NSQ->sAR2_Q14[ i ] = silk_SMULWW( gain_adj_Q16, NSQ->sAR2_Q14[ i ] );
+        }
+    }
+}
diff --git a/drivers/opus/silk/NSQ_del_dec.c b/drivers/opus/silk/NSQ_del_dec.c
new file mode 100644
index 0000000000..8ac6311b11
--- /dev/null
+++ b/drivers/opus/silk/NSQ_del_dec.c
@@ -0,0 +1,719 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef OPUS_HAVE_CONFIG_H
+#include "opus_config.h"
+#endif
+
+#include "silk_main.h"
+#include "stack_alloc.h"
+
+typedef struct {
+    opus_int32 sLPC_Q14[ MAX_SUB_FRAME_LENGTH + NSQ_LPC_BUF_LENGTH ];
+    opus_int32 RandState[ DECISION_DELAY ];
+    opus_int32 Q_Q10[     DECISION_DELAY ];
+    opus_int32 Xq_Q14[    DECISION_DELAY ];
+    opus_int32 Pred_Q15[  DECISION_DELAY ];
+    opus_int32 Shape_Q14[ DECISION_DELAY ];
+    opus_int32 sAR2_Q14[ MAX_SHAPE_LPC_ORDER ];
+    opus_int32 LF_AR_Q14;
+    opus_int32 Seed;
+    opus_int32 SeedInit;
+    opus_int32 RD_Q10;
+} NSQ_del_dec_struct;
+
+typedef struct {
+    opus_int32 Q_Q10;
+    opus_int32 RD_Q10;
+    opus_int32 xq_Q14;
+    opus_int32 LF_AR_Q14;
+    opus_int32 sLTP_shp_Q14;
+    opus_int32 LPC_exc_Q14;
+} NSQ_sample_struct;
+
+typedef NSQ_sample_struct  NSQ_sample_pair[ 2 ];
+
+static OPUS_INLINE void silk_nsq_del_dec_scale_states(
+    const silk_encoder_state *psEncC,               /* I    Encoder State                       */
+    silk_nsq_state      *NSQ,                       /* I/O  NSQ state                           */
+    NSQ_del_dec_struct  psDelDec[],                 /* I/O  Delayed decision states             */
+    const opus_int32    x_Q3[],                     /* I    Input in Q3                         */
+    opus_int32          x_sc_Q10[],                 /* O    Input scaled with 1/Gain in Q10     */
+    const opus_int16    sLTP[],                     /* I    Re-whitened LTP state in Q0         */
+    opus_int32          sLTP_Q15[],                 /* O    LTP state matching scaled input     */
+    opus_int            subfr,                      /* I    Subframe number                     */
+    opus_int            nStatesDelayedDecision,     /* I    Number of del dec states            */
+    const opus_int      LTP_scale_Q14,              /* I    LTP state scaling                   */
+    const opus_int32    Gains_Q16[ MAX_NB_SUBFR ],  /* I                                        */
+    const opus_int      pitchL[ MAX_NB_SUBFR ],     /* I    Pitch lag                           */
+    const opus_int      signal_type,                /* I    Signal type                         */
+    const opus_int      decisionDelay               /* I    Decision delay                      */
+);
+
+/******************************************/
+/* Noise shape quantizer for one subframe */
+/******************************************/
+static OPUS_INLINE void silk_noise_shape_quantizer_del_dec(
+    silk_nsq_state      *NSQ,                   /* I/O  NSQ state                           */
+    NSQ_del_dec_struct  psDelDec[],             /* I/O  Delayed decision states             */
+    opus_int            signalType,             /* I    Signal type                         */
+    const opus_int32    x_Q10[],                /* I                                        */
+    opus_int8           pulses[],               /* O                                        */
+    opus_int16          xq[],                   /* O                                        */
+    opus_int32          sLTP_Q15[],             /* I/O  LTP filter state                    */
+    opus_int32          delayedGain_Q10[],      /* I/O  Gain delay buffer                   */
+    const opus_int16    a_Q12[],                /* I    Short term prediction coefs         */
+    const opus_int16    b_Q14[],                /* I    Long term prediction coefs          */
+    const opus_int16    AR_shp_Q13[],           /* I    Noise shaping coefs                 */
+    opus_int            lag,                    /* I    Pitch lag                           */
+    opus_int32          HarmShapeFIRPacked_Q14, /* I                                        */
+    opus_int            Tilt_Q14,               /* I    Spectral tilt                       */
+    opus_int32          LF_shp_Q14,             /* I                                        */
+    opus_int32          Gain_Q16,               /* I                                        */
+    opus_int            Lambda_Q10,             /* I                                        */
+    opus_int            offset_Q10,             /* I                                        */
+    opus_int            length,                 /* I    Input length                        */
+    opus_int            subfr,                  /* I    Subframe number                     */
+    opus_int            shapingLPCOrder,        /* I    Shaping LPC filter order            */
+    opus_int            predictLPCOrder,        /* I    Prediction filter order             */
+    opus_int            warping_Q16,            /* I                                        */
+    opus_int            nStatesDelayedDecision, /* I    Number of states in decision tree   */
+    opus_int            *smpl_buf_idx,          /* I    Index to newest samples in buffers  */
+    opus_int            decisionDelay           /* I                                        */
+);
+
+void silk_NSQ_del_dec(
+    const silk_encoder_state    *psEncC,                                    /* I/O  Encoder State                   */
+    silk_nsq_state              *NSQ,                                       /* I/O  NSQ state                       */
+    SideInfoIndices             *psIndices,                                 /* I/O  Quantization Indices            */
+    const opus_int32            x_Q3[],                                     /* I    Prefiltered input signal        */
+    opus_int8                   pulses[],                                   /* O    Quantized pulse signal          */
+    const opus_int16            PredCoef_Q12[ 2 * MAX_LPC_ORDER ],          /* I    Short term prediction coefs     */
+    const opus_int16            LTPCoef_Q14[ LTP_ORDER * MAX_NB_SUBFR ],    /* I    Long term prediction coefs      */
+    const opus_int16            AR2_Q13[ MAX_NB_SUBFR * MAX_SHAPE_LPC_ORDER ], /* I Noise shaping coefs             */
+    const opus_int              HarmShapeGain_Q14[ MAX_NB_SUBFR ],          /* I    Long term shaping coefs         */
+    const opus_int              Tilt_Q14[ MAX_NB_SUBFR ],                   /* I    Spectral tilt                   */
+    const opus_int32            LF_shp_Q14[ MAX_NB_SUBFR ],                 /* I    Low frequency shaping coefs     */
+    const opus_int32            Gains_Q16[ MAX_NB_SUBFR ],                  /* I    Quantization step sizes         */
+    const opus_int              pitchL[ MAX_NB_SUBFR ],                     /* I    Pitch lags                      */
+    const opus_int              Lambda_Q10,                                 /* I    Rate/distortion tradeoff        */
+    const opus_int              LTP_scale_Q14                               /* I    LTP state scaling               */
+)
+{
+    opus_int            i, k, lag, start_idx, LSF_interpolation_flag, Winner_ind, subfr;
+    opus_int            last_smple_idx, smpl_buf_idx, decisionDelay;
+    const opus_int16    *A_Q12, *B_Q14, *AR_shp_Q13;
+    opus_int16          *pxq;
+    VARDECL( opus_int32, sLTP_Q15 );
+    VARDECL( opus_int16, sLTP );
+    opus_int32          HarmShapeFIRPacked_Q14;
+    opus_int            offset_Q10;
+    opus_int32          RDmin_Q10, Gain_Q10;
+    VARDECL( opus_int32, x_sc_Q10 );
+    VARDECL( opus_int32, delayedGain_Q10 );
+    VARDECL( NSQ_del_dec_struct, psDelDec );
+    NSQ_del_dec_struct  *psDD;
+    SAVE_STACK;
+
+    /* Set unvoiced lag to the previous one, overwrite later for voiced */
+    lag = NSQ->lagPrev;
+
+    silk_assert( NSQ->prev_gain_Q16 != 0 );
+
+    /* Initialize delayed decision states */
+    ALLOC( psDelDec, psEncC->nStatesDelayedDecision, NSQ_del_dec_struct );
+    silk_memset( psDelDec, 0, psEncC->nStatesDelayedDecision * sizeof( NSQ_del_dec_struct ) );
+    for( k = 0; k < psEncC->nStatesDelayedDecision; k++ ) {
+        psDD                 = &psDelDec[ k ];
+        psDD->Seed           = ( k + psIndices->Seed ) & 3;
+        psDD->SeedInit       = psDD->Seed;
+        psDD->RD_Q10         = 0;
+        psDD->LF_AR_Q14      = NSQ->sLF_AR_shp_Q14;
+        psDD->Shape_Q14[ 0 ] = NSQ->sLTP_shp_Q14[ psEncC->ltp_mem_length - 1 ];
+        silk_memcpy( psDD->sLPC_Q14, NSQ->sLPC_Q14, NSQ_LPC_BUF_LENGTH * sizeof( opus_int32 ) );
+        silk_memcpy( psDD->sAR2_Q14, NSQ->sAR2_Q14, sizeof( NSQ->sAR2_Q14 ) );
+    }
+
+    offset_Q10   = silk_Quantization_Offsets_Q10[ psIndices->signalType >> 1 ][ psIndices->quantOffsetType ];
+    smpl_buf_idx = 0; /* index of oldest samples */
+
+    decisionDelay = silk_min_int( DECISION_DELAY, psEncC->subfr_length );
+
+    /* For voiced frames limit the decision delay to lower than the pitch lag */
+    if( psIndices->signalType == TYPE_VOICED ) {
+        for( k = 0; k < psEncC->nb_subfr; k++ ) {
+            decisionDelay = silk_min_int( decisionDelay, pitchL[ k ] - LTP_ORDER / 2 - 1 );
+        }
+    } else {
+        if( lag > 0 ) {
+            decisionDelay = silk_min_int( decisionDelay, lag - LTP_ORDER / 2 - 1 );
+        }
+    }
+
+    if( psIndices->NLSFInterpCoef_Q2 == 4 ) {
+        LSF_interpolation_flag = 0;
+    } else {
+        LSF_interpolation_flag = 1;
+    }
+
+    ALLOC( sLTP_Q15,
+           psEncC->ltp_mem_length + psEncC->frame_length, opus_int32 );
+    ALLOC( sLTP, psEncC->ltp_mem_length + psEncC->frame_length, opus_int16 );
+    ALLOC( x_sc_Q10, psEncC->subfr_length, opus_int32 );
+    ALLOC( delayedGain_Q10, DECISION_DELAY, opus_int32 );
+    /* Set up pointers to start of sub frame */
+    pxq                   = &NSQ->xq[ psEncC->ltp_mem_length ];
+    NSQ->sLTP_shp_buf_idx = psEncC->ltp_mem_length;
+    NSQ->sLTP_buf_idx     = psEncC->ltp_mem_length;
+    subfr = 0;
+    for( k = 0; k < psEncC->nb_subfr; k++ ) {
+        A_Q12      = &PredCoef_Q12[ ( ( k >> 1 ) | ( 1 - LSF_interpolation_flag ) ) * MAX_LPC_ORDER ];
+        B_Q14      = &LTPCoef_Q14[ k * LTP_ORDER           ];
+        AR_shp_Q13 = &AR2_Q13[     k * MAX_SHAPE_LPC_ORDER ];
+
+        /* Noise shape parameters */
+        silk_assert( HarmShapeGain_Q14[ k ] >= 0 );
+        HarmShapeFIRPacked_Q14  =                          silk_RSHIFT( HarmShapeGain_Q14[ k ], 2 );
+        HarmShapeFIRPacked_Q14 |= silk_LSHIFT( (opus_int32)silk_RSHIFT( HarmShapeGain_Q14[ k ], 1 ), 16 );
+
+        NSQ->rewhite_flag = 0;
+        if( psIndices->signalType == TYPE_VOICED ) {
+            /* Voiced */
+            lag = pitchL[ k ];
+
+            /* Re-whitening */
+            if( ( k & ( 3 - silk_LSHIFT( LSF_interpolation_flag, 1 ) ) ) == 0 ) {
+                if( k == 2 ) {
+                    /* RESET DELAYED DECISIONS */
+                    /* Find winner */
+                    RDmin_Q10 = psDelDec[ 0 ].RD_Q10;
+                    Winner_ind = 0;
+                    for( i = 1; i < psEncC->nStatesDelayedDecision; i++ ) {
+                        if( psDelDec[ i ].RD_Q10 < RDmin_Q10 ) {
+                            RDmin_Q10 = psDelDec[ i ].RD_Q10;
+                            Winner_ind = i;
+                        }
+                    }
+                    for( i = 0; i < psEncC->nStatesDelayedDecision; i++ ) {
+                        if( i != Winner_ind ) {
+                            psDelDec[ i ].RD_Q10 += ( silk_int32_MAX >> 4 );
+                            silk_assert( psDelDec[ i ].RD_Q10 >= 0 );
+                        }
+                    }
+
+                    /* Copy final part of signals from winner state to output and long-term filter states */
+                    psDD = &psDelDec[ Winner_ind ];
+                    last_smple_idx = smpl_buf_idx + decisionDelay;
+                    for( i = 0; i < decisionDelay; i++ ) {
+                        last_smple_idx = ( last_smple_idx - 1 ) & DECISION_DELAY_MASK;
+                        pulses[   i - decisionDelay ] = (opus_int8)silk_RSHIFT_ROUND( psDD->Q_Q10[ last_smple_idx ], 10 );
+                        pxq[ i - decisionDelay ] = (opus_int16)silk_SAT16( silk_RSHIFT_ROUND(
+                            silk_SMULWW( psDD->Xq_Q14[ last_smple_idx ], Gains_Q16[ 1 ] ), 14 ) );
+                        NSQ->sLTP_shp_Q14[ NSQ->sLTP_shp_buf_idx - decisionDelay + i ] = psDD->Shape_Q14[ last_smple_idx ];
+                    }
+
+                    subfr = 0;
+                }
+
+                /* Rewhiten with new A coefs */
+                start_idx = psEncC->ltp_mem_length - lag - psEncC->predictLPCOrder - LTP_ORDER / 2;
+                silk_assert( start_idx > 0 );
+
+                silk_LPC_analysis_filter( &sLTP[ start_idx ], &NSQ->xq[ start_idx + k * psEncC->subfr_length ],
+                    A_Q12, psEncC->ltp_mem_length - start_idx, psEncC->predictLPCOrder );
+
+                NSQ->sLTP_buf_idx = psEncC->ltp_mem_length;
+                NSQ->rewhite_flag = 1;
+            }
+        }
+
+        silk_nsq_del_dec_scale_states( psEncC, NSQ, psDelDec, x_Q3, x_sc_Q10, sLTP, sLTP_Q15, k,
+            psEncC->nStatesDelayedDecision, LTP_scale_Q14, Gains_Q16, pitchL, psIndices->signalType, decisionDelay );
+
+        silk_noise_shape_quantizer_del_dec( NSQ, psDelDec, psIndices->signalType, x_sc_Q10, pulses, pxq, sLTP_Q15,
+            delayedGain_Q10, A_Q12, B_Q14, AR_shp_Q13, lag, HarmShapeFIRPacked_Q14, Tilt_Q14[ k ], LF_shp_Q14[ k ],
+            Gains_Q16[ k ], Lambda_Q10, offset_Q10, psEncC->subfr_length, subfr++, psEncC->shapingLPCOrder,
+            psEncC->predictLPCOrder, psEncC->warping_Q16, psEncC->nStatesDelayedDecision, &smpl_buf_idx, decisionDelay );
+
+        x_Q3   += psEncC->subfr_length;
+        pulses += psEncC->subfr_length;
+        pxq    += psEncC->subfr_length;
+    }
+
+    /* Find winner */
+    RDmin_Q10 = psDelDec[ 0 ].RD_Q10;
+    Winner_ind = 0;
+    for( k = 1; k < psEncC->nStatesDelayedDecision; k++ ) {
+        if( psDelDec[ k ].RD_Q10 < RDmin_Q10 ) {
+            RDmin_Q10 = psDelDec[ k ].RD_Q10;
+            Winner_ind = k;
+        }
+    }
+
+    /* Copy final part of signals from winner state to output and long-term filter states */
+    psDD = &psDelDec[ Winner_ind ];
+    psIndices->Seed = psDD->SeedInit;
+    last_smple_idx = smpl_buf_idx + decisionDelay;
+    Gain_Q10 = silk_RSHIFT32( Gains_Q16[ psEncC->nb_subfr - 1 ], 6 );
+    for( i = 0; i < decisionDelay; i++ ) {
+        last_smple_idx = ( last_smple_idx - 1 ) & DECISION_DELAY_MASK;
+        pulses[   i - decisionDelay ] = (opus_int8)silk_RSHIFT_ROUND( psDD->Q_Q10[ last_smple_idx ], 10 );
+        pxq[ i - decisionDelay ] = (opus_int16)silk_SAT16( silk_RSHIFT_ROUND(
+            silk_SMULWW( psDD->Xq_Q14[ last_smple_idx ], Gain_Q10 ), 8 ) );
+        NSQ->sLTP_shp_Q14[ NSQ->sLTP_shp_buf_idx - decisionDelay + i ] = psDD->Shape_Q14[ last_smple_idx ];
+    }
+    silk_memcpy( NSQ->sLPC_Q14, &psDD->sLPC_Q14[ psEncC->subfr_length ], NSQ_LPC_BUF_LENGTH * sizeof( opus_int32 ) );
+    silk_memcpy( NSQ->sAR2_Q14, psDD->sAR2_Q14, sizeof( psDD->sAR2_Q14 ) );
+
+    /* Update states */
+    NSQ->sLF_AR_shp_Q14 = psDD->LF_AR_Q14;
+    NSQ->lagPrev        = pitchL[ psEncC->nb_subfr - 1 ];
+
+    /* Save quantized speech signal */
+    /* DEBUG_STORE_DATA( enc.pcm, &NSQ->xq[psEncC->ltp_mem_length], psEncC->frame_length * sizeof( opus_int16 ) ) */
+    silk_memmove( NSQ->xq,           &NSQ->xq[           psEncC->frame_length ], psEncC->ltp_mem_length * sizeof( opus_int16 ) );
+    silk_memmove( NSQ->sLTP_shp_Q14, &NSQ->sLTP_shp_Q14[ psEncC->frame_length ], psEncC->ltp_mem_length * sizeof( opus_int32 ) );
+    RESTORE_STACK;
+}
+
+/******************************************/
+/* Noise shape quantizer for one subframe */
+/******************************************/
+static OPUS_INLINE void silk_noise_shape_quantizer_del_dec(
+    silk_nsq_state      *NSQ,                   /* I/O  NSQ state                           */
+    NSQ_del_dec_struct  psDelDec[],             /* I/O  Delayed decision states             */
+    opus_int            signalType,             /* I    Signal type                         */
+    const opus_int32    x_Q10[],                /* I                                        */
+    opus_int8           pulses[],               /* O                                        */
+    opus_int16          xq[],                   /* O                                        */
+    opus_int32          sLTP_Q15[],             /* I/O  LTP filter state                    */
+    opus_int32          delayedGain_Q10[],      /* I/O  Gain delay buffer                   */
+    const opus_int16    a_Q12[],                /* I    Short term prediction coefs         */
+    const opus_int16    b_Q14[],                /* I    Long term prediction coefs          */
+    const opus_int16    AR_shp_Q13[],           /* I    Noise shaping coefs                 */
+    opus_int            lag,                    /* I    Pitch lag                           */
+    opus_int32          HarmShapeFIRPacked_Q14, /* I                                        */
+    opus_int            Tilt_Q14,               /* I    Spectral tilt                       */
+    opus_int32          LF_shp_Q14,             /* I                                        */
+    opus_int32          Gain_Q16,               /* I                                        */
+    opus_int            Lambda_Q10,             /* I                                        */
+    opus_int            offset_Q10,             /* I                                        */
+    opus_int            length,                 /* I    Input length                        */
+    opus_int            subfr,                  /* I    Subframe number                     */
+    opus_int            shapingLPCOrder,        /* I    Shaping LPC filter order            */
+    opus_int            predictLPCOrder,        /* I    Prediction filter order             */
+    opus_int            warping_Q16,            /* I                                        */
+    opus_int            nStatesDelayedDecision, /* I    Number of states in decision tree   */
+    opus_int            *smpl_buf_idx,          /* I    Index to newest samples in buffers  */
+    opus_int            decisionDelay           /* I                                        */
+)
+{
+    opus_int     i, j, k, Winner_ind, RDmin_ind, RDmax_ind, last_smple_idx;
+    opus_int32   Winner_rand_state;
+    opus_int32   LTP_pred_Q14, LPC_pred_Q14, n_AR_Q14, n_LTP_Q14;
+    opus_int32   n_LF_Q14, r_Q10, rr_Q10, rd1_Q10, rd2_Q10, RDmin_Q10, RDmax_Q10;
+    opus_int32   q1_Q0, q1_Q10, q2_Q10, exc_Q14, LPC_exc_Q14, xq_Q14, Gain_Q10;
+    opus_int32   tmp1, tmp2, sLF_AR_shp_Q14;
+    opus_int32   *pred_lag_ptr, *shp_lag_ptr, *psLPC_Q14;
+    VARDECL( NSQ_sample_pair, psSampleState );
+    NSQ_del_dec_struct *psDD;
+    NSQ_sample_struct  *psSS;
+    SAVE_STACK;
+
+    silk_assert( nStatesDelayedDecision > 0 );
+    ALLOC( psSampleState, nStatesDelayedDecision, NSQ_sample_pair );
+
+    shp_lag_ptr  = &NSQ->sLTP_shp_Q14[ NSQ->sLTP_shp_buf_idx - lag + HARM_SHAPE_FIR_TAPS / 2 ];
+    pred_lag_ptr = &sLTP_Q15[ NSQ->sLTP_buf_idx - lag + LTP_ORDER / 2 ];
+    Gain_Q10     = silk_RSHIFT( Gain_Q16, 6 );
+
+    for( i = 0; i < length; i++ ) {
+        /* Perform common calculations used in all states */
+
+        /* Long-term prediction */
+        if( signalType == TYPE_VOICED ) {
+            /* Unrolled loop */
+            /* Avoids introducing a bias because silk_SMLAWB() always rounds to -inf */
+            LTP_pred_Q14 = 2;
+            LTP_pred_Q14 = silk_SMLAWB( LTP_pred_Q14, pred_lag_ptr[  0 ], b_Q14[ 0 ] );
+            LTP_pred_Q14 = silk_SMLAWB( LTP_pred_Q14, pred_lag_ptr[ -1 ], b_Q14[ 1 ] );
+            LTP_pred_Q14 = silk_SMLAWB( LTP_pred_Q14, pred_lag_ptr[ -2 ], b_Q14[ 2 ] );
+            LTP_pred_Q14 = silk_SMLAWB( LTP_pred_Q14, pred_lag_ptr[ -3 ], b_Q14[ 3 ] );
+            LTP_pred_Q14 = silk_SMLAWB( LTP_pred_Q14, pred_lag_ptr[ -4 ], b_Q14[ 4 ] );
+            LTP_pred_Q14 = silk_LSHIFT( LTP_pred_Q14, 1 );                          /* Q13 -> Q14 */
+            pred_lag_ptr++;
+        } else {
+            LTP_pred_Q14 = 0;
+        }
+
+        /* Long-term shaping */
+        if( lag > 0 ) {
+            /* Symmetric, packed FIR coefficients */
+            n_LTP_Q14 = silk_SMULWB( silk_ADD32( shp_lag_ptr[ 0 ], shp_lag_ptr[ -2 ] ), HarmShapeFIRPacked_Q14 );
+            n_LTP_Q14 = silk_SMLAWT( n_LTP_Q14, shp_lag_ptr[ -1 ],                      HarmShapeFIRPacked_Q14 );
+            n_LTP_Q14 = silk_SUB_LSHIFT32( LTP_pred_Q14, n_LTP_Q14, 2 );            /* Q12 -> Q14 */
+            shp_lag_ptr++;
+        } else {
+            n_LTP_Q14 = 0;
+        }
+
+        for( k = 0; k < nStatesDelayedDecision; k++ ) {
+            /* Delayed decision state */
+            psDD = &psDelDec[ k ];
+
+            /* Sample state */
+            psSS = psSampleState[ k ];
+
+            /* Generate dither */
+            psDD->Seed = silk_RAND( psDD->Seed );
+
+            /* Pointer used in short term prediction and shaping */
+            psLPC_Q14 = &psDD->sLPC_Q14[ NSQ_LPC_BUF_LENGTH - 1 + i ];
+            /* Short-term prediction */
+            silk_assert( predictLPCOrder == 10 || predictLPCOrder == 16 );
+            /* Avoids introducing a bias because silk_SMLAWB() always rounds to -inf */
+            LPC_pred_Q14 = silk_RSHIFT( predictLPCOrder, 1 );
+            LPC_pred_Q14 = silk_SMLAWB( LPC_pred_Q14, psLPC_Q14[  0 ], a_Q12[ 0 ] );
+            LPC_pred_Q14 = silk_SMLAWB( LPC_pred_Q14, psLPC_Q14[ -1 ], a_Q12[ 1 ] );
+            LPC_pred_Q14 = silk_SMLAWB( LPC_pred_Q14, psLPC_Q14[ -2 ], a_Q12[ 2 ] );
+            LPC_pred_Q14 = silk_SMLAWB( LPC_pred_Q14, psLPC_Q14[ -3 ], a_Q12[ 3 ] );
+            LPC_pred_Q14 = silk_SMLAWB( LPC_pred_Q14, psLPC_Q14[ -4 ], a_Q12[ 4 ] );
+            LPC_pred_Q14 = silk_SMLAWB( LPC_pred_Q14, psLPC_Q14[ -5 ], a_Q12[ 5 ] );
+            LPC_pred_Q14 = silk_SMLAWB( LPC_pred_Q14, psLPC_Q14[ -6 ], a_Q12[ 6 ] );
+            LPC_pred_Q14 = silk_SMLAWB( LPC_pred_Q14, psLPC_Q14[ -7 ], a_Q12[ 7 ] );
+            LPC_pred_Q14 = silk_SMLAWB( LPC_pred_Q14, psLPC_Q14[ -8 ], a_Q12[ 8 ] );
+            LPC_pred_Q14 = silk_SMLAWB( LPC_pred_Q14, psLPC_Q14[ -9 ], a_Q12[ 9 ] );
+            if( predictLPCOrder == 16 ) {
+                LPC_pred_Q14 = silk_SMLAWB( LPC_pred_Q14, psLPC_Q14[ -10 ], a_Q12[ 10 ] );
+                LPC_pred_Q14 = silk_SMLAWB( LPC_pred_Q14, psLPC_Q14[ -11 ], a_Q12[ 11 ] );
+                LPC_pred_Q14 = silk_SMLAWB( LPC_pred_Q14, psLPC_Q14[ -12 ], a_Q12[ 12 ] );
+                LPC_pred_Q14 = silk_SMLAWB( LPC_pred_Q14, psLPC_Q14[ -13 ], a_Q12[ 13 ] );
+                LPC_pred_Q14 = silk_SMLAWB( LPC_pred_Q14, psLPC_Q14[ -14 ], a_Q12[ 14 ] );
+                LPC_pred_Q14 = silk_SMLAWB( LPC_pred_Q14, psLPC_Q14[ -15 ], a_Q12[ 15 ] );
+            }
+            LPC_pred_Q14 = silk_LSHIFT( LPC_pred_Q14, 4 );                              /* Q10 -> Q14 */
+
+            /* Noise shape feedback */
+            silk_assert( ( shapingLPCOrder & 1 ) == 0 );   /* check that order is even */
+            /* Output of lowpass section */
+            tmp2 = silk_SMLAWB( psLPC_Q14[ 0 ], psDD->sAR2_Q14[ 0 ], warping_Q16 );
+            /* Output of allpass section */
+            tmp1 = silk_SMLAWB( psDD->sAR2_Q14[ 0 ], psDD->sAR2_Q14[ 1 ] - tmp2, warping_Q16 );
+            psDD->sAR2_Q14[ 0 ] = tmp2;
+            n_AR_Q14 = silk_RSHIFT( shapingLPCOrder, 1 );
+            n_AR_Q14 = silk_SMLAWB( n_AR_Q14, tmp2, AR_shp_Q13[ 0 ] );
+            /* Loop over allpass sections */
+            for( j = 2; j < shapingLPCOrder; j += 2 ) {
+                /* Output of allpass section */
+                tmp2 = silk_SMLAWB( psDD->sAR2_Q14[ j - 1 ], psDD->sAR2_Q14[ j + 0 ] - tmp1, warping_Q16 );
+                psDD->sAR2_Q14[ j - 1 ] = tmp1;
+                n_AR_Q14 = silk_SMLAWB( n_AR_Q14, tmp1, AR_shp_Q13[ j - 1 ] );
+                /* Output of allpass section */
+                tmp1 = silk_SMLAWB( psDD->sAR2_Q14[ j + 0 ], psDD->sAR2_Q14[ j + 1 ] - tmp2, warping_Q16 );
+                psDD->sAR2_Q14[ j + 0 ] = tmp2;
+                n_AR_Q14 = silk_SMLAWB( n_AR_Q14, tmp2, AR_shp_Q13[ j ] );
+            }
+            psDD->sAR2_Q14[ shapingLPCOrder - 1 ] = tmp1;
+            n_AR_Q14 = silk_SMLAWB( n_AR_Q14, tmp1, AR_shp_Q13[ shapingLPCOrder - 1 ] );
+
+            n_AR_Q14 = silk_LSHIFT( n_AR_Q14, 1 );                                      /* Q11 -> Q12 */
+            n_AR_Q14 = silk_SMLAWB( n_AR_Q14, psDD->LF_AR_Q14, Tilt_Q14 );              /* Q12 */
+            n_AR_Q14 = silk_LSHIFT( n_AR_Q14, 2 );                                      /* Q12 -> Q14 */
+
+            n_LF_Q14 = silk_SMULWB( psDD->Shape_Q14[ *smpl_buf_idx ], LF_shp_Q14 );     /* Q12 */
+            n_LF_Q14 = silk_SMLAWT( n_LF_Q14, psDD->LF_AR_Q14, LF_shp_Q14 );            /* Q12 */
+            n_LF_Q14 = silk_LSHIFT( n_LF_Q14, 2 );                                      /* Q12 -> Q14 */
+
+            /* Input minus prediction plus noise feedback                       */
+            /* r = x[ i ] - LTP_pred - LPC_pred + n_AR + n_Tilt + n_LF + n_LTP  */
+            tmp1 = silk_ADD32( n_AR_Q14, n_LF_Q14 );                                    /* Q14 */
+            tmp2 = silk_ADD32( n_LTP_Q14, LPC_pred_Q14 );                               /* Q13 */
+            tmp1 = silk_SUB32( tmp2, tmp1 );                                            /* Q13 */
+            tmp1 = silk_RSHIFT_ROUND( tmp1, 4 );                                        /* Q10 */
+
+            r_Q10 = silk_SUB32( x_Q10[ i ], tmp1 );                                     /* residual error Q10 */
+
+            /* Flip sign depending on dither */
+            if ( psDD->Seed < 0 ) {
+                r_Q10 = -r_Q10;
+            }
+            r_Q10 = silk_LIMIT_32( r_Q10, -(31 << 10), 30 << 10 );
+
+            /* Find two quantization level candidates and measure their rate-distortion */
+            q1_Q10 = silk_SUB32( r_Q10, offset_Q10 );
+            q1_Q0 = silk_RSHIFT( q1_Q10, 10 );
+            if( q1_Q0 > 0 ) {
+                q1_Q10  = silk_SUB32( silk_LSHIFT( q1_Q0, 10 ), QUANT_LEVEL_ADJUST_Q10 );
+                q1_Q10  = silk_ADD32( q1_Q10, offset_Q10 );
+                q2_Q10  = silk_ADD32( q1_Q10, 1024 );
+                rd1_Q10 = silk_SMULBB( q1_Q10, Lambda_Q10 );
+                rd2_Q10 = silk_SMULBB( q2_Q10, Lambda_Q10 );
+            } else if( q1_Q0 == 0 ) {
+                q1_Q10  = offset_Q10;
+                q2_Q10  = silk_ADD32( q1_Q10, 1024 - QUANT_LEVEL_ADJUST_Q10 );
+                rd1_Q10 = silk_SMULBB( q1_Q10, Lambda_Q10 );
+                rd2_Q10 = silk_SMULBB( q2_Q10, Lambda_Q10 );
+            } else if( q1_Q0 == -1 ) {
+                q2_Q10  = offset_Q10;
+                q1_Q10  = silk_SUB32( q2_Q10, 1024 - QUANT_LEVEL_ADJUST_Q10 );
+                rd1_Q10 = silk_SMULBB( -q1_Q10, Lambda_Q10 );
+                rd2_Q10 = silk_SMULBB(  q2_Q10, Lambda_Q10 );
+            } else {            /* q1_Q0 < -1 */
+                q1_Q10  = silk_ADD32( silk_LSHIFT( q1_Q0, 10 ), QUANT_LEVEL_ADJUST_Q10 );
+                q1_Q10  = silk_ADD32( q1_Q10, offset_Q10 );
+                q2_Q10  = silk_ADD32( q1_Q10, 1024 );
+                rd1_Q10 = silk_SMULBB( -q1_Q10, Lambda_Q10 );
+                rd2_Q10 = silk_SMULBB( -q2_Q10, Lambda_Q10 );
+            }
+            rr_Q10  = silk_SUB32( r_Q10, q1_Q10 );
+            rd1_Q10 = silk_RSHIFT( silk_SMLABB( rd1_Q10, rr_Q10, rr_Q10 ), 10 );
+            rr_Q10  = silk_SUB32( r_Q10, q2_Q10 );
+            rd2_Q10 = silk_RSHIFT( silk_SMLABB( rd2_Q10, rr_Q10, rr_Q10 ), 10 );
+
+            if( rd1_Q10 < rd2_Q10 ) {
+                psSS[ 0 ].RD_Q10 = silk_ADD32( psDD->RD_Q10, rd1_Q10 );
+                psSS[ 1 ].RD_Q10 = silk_ADD32( psDD->RD_Q10, rd2_Q10 );
+                psSS[ 0 ].Q_Q10  = q1_Q10;
+                psSS[ 1 ].Q_Q10  = q2_Q10;
+            } else {
+                psSS[ 0 ].RD_Q10 = silk_ADD32( psDD->RD_Q10, rd2_Q10 );
+                psSS[ 1 ].RD_Q10 = silk_ADD32( psDD->RD_Q10, rd1_Q10 );
+                psSS[ 0 ].Q_Q10  = q2_Q10;
+                psSS[ 1 ].Q_Q10  = q1_Q10;
+            }
+
+            /* Update states for best quantization */
+
+            /* Quantized excitation */
+            exc_Q14 = silk_LSHIFT32( psSS[ 0 ].Q_Q10, 4 );
+            if ( psDD->Seed < 0 ) {
+                exc_Q14 = -exc_Q14;
+            }
+
+            /* Add predictions */
+            LPC_exc_Q14 = silk_ADD32( exc_Q14, LTP_pred_Q14 );
+            xq_Q14      = silk_ADD32( LPC_exc_Q14, LPC_pred_Q14 );
+
+            /* Update states */
+            sLF_AR_shp_Q14         = silk_SUB32( xq_Q14, n_AR_Q14 );
+            psSS[ 0 ].sLTP_shp_Q14 = silk_SUB32( sLF_AR_shp_Q14, n_LF_Q14 );
+            psSS[ 0 ].LF_AR_Q14    = sLF_AR_shp_Q14;
+            psSS[ 0 ].LPC_exc_Q14  = LPC_exc_Q14;
+            psSS[ 0 ].xq_Q14       = xq_Q14;
+
+            /* Update states for second best quantization */
+
+            /* Quantized excitation */
+            exc_Q14 = silk_LSHIFT32( psSS[ 1 ].Q_Q10, 4 );
+            if ( psDD->Seed < 0 ) {
+                exc_Q14 = -exc_Q14;
+            }
+
+
+            /* Add predictions */
+            LPC_exc_Q14 = silk_ADD32( exc_Q14, LTP_pred_Q14 );
+            xq_Q14      = silk_ADD32( LPC_exc_Q14, LPC_pred_Q14 );
+
+            /* Update states */
+            sLF_AR_shp_Q14         = silk_SUB32( xq_Q14, n_AR_Q14 );
+            psSS[ 1 ].sLTP_shp_Q14 = silk_SUB32( sLF_AR_shp_Q14, n_LF_Q14 );
+            psSS[ 1 ].LF_AR_Q14    = sLF_AR_shp_Q14;
+            psSS[ 1 ].LPC_exc_Q14  = LPC_exc_Q14;
+            psSS[ 1 ].xq_Q14       = xq_Q14;
+        }
+
+        *smpl_buf_idx  = ( *smpl_buf_idx - 1 ) & DECISION_DELAY_MASK;                   /* Index to newest samples              */
+        last_smple_idx = ( *smpl_buf_idx + decisionDelay ) & DECISION_DELAY_MASK;       /* Index to decisionDelay old samples   */
+
+        /* Find winner */
+        RDmin_Q10 = psSampleState[ 0 ][ 0 ].RD_Q10;
+        Winner_ind = 0;
+        for( k = 1; k < nStatesDelayedDecision; k++ ) {
+            if( psSampleState[ k ][ 0 ].RD_Q10 < RDmin_Q10 ) {
+                RDmin_Q10  = psSampleState[ k ][ 0 ].RD_Q10;
+                Winner_ind = k;
+            }
+        }
+
+        /* Increase RD values of expired states */
+        Winner_rand_state = psDelDec[ Winner_ind ].RandState[ last_smple_idx ];
+        for( k = 0; k < nStatesDelayedDecision; k++ ) {
+            if( psDelDec[ k ].RandState[ last_smple_idx ] != Winner_rand_state ) {
+                psSampleState[ k ][ 0 ].RD_Q10 = silk_ADD32( psSampleState[ k ][ 0 ].RD_Q10, silk_int32_MAX >> 4 );
+                psSampleState[ k ][ 1 ].RD_Q10 = silk_ADD32( psSampleState[ k ][ 1 ].RD_Q10, silk_int32_MAX >> 4 );
+                silk_assert( psSampleState[ k ][ 0 ].RD_Q10 >= 0 );
+            }
+        }
+
+        /* Find worst in first set and best in second set */
+        RDmax_Q10  = psSampleState[ 0 ][ 0 ].RD_Q10;
+        RDmin_Q10  = psSampleState[ 0 ][ 1 ].RD_Q10;
+        RDmax_ind = 0;
+        RDmin_ind = 0;
+        for( k = 1; k < nStatesDelayedDecision; k++ ) {
+            /* find worst in first set */
+            if( psSampleState[ k ][ 0 ].RD_Q10 > RDmax_Q10 ) {
+                RDmax_Q10  = psSampleState[ k ][ 0 ].RD_Q10;
+                RDmax_ind = k;
+            }
+            /* find best in second set */
+            if( psSampleState[ k ][ 1 ].RD_Q10 < RDmin_Q10 ) {
+                RDmin_Q10  = psSampleState[ k ][ 1 ].RD_Q10;
+                RDmin_ind = k;
+            }
+        }
+
+        /* Replace a state if best from second set outperforms worst in first set */
+        if( RDmin_Q10 < RDmax_Q10 ) {
+            silk_memcpy( ( (opus_int32 *)&psDelDec[ RDmax_ind ] ) + i,
+                         ( (opus_int32 *)&psDelDec[ RDmin_ind ] ) + i, sizeof( NSQ_del_dec_struct ) - i * sizeof( opus_int32) );
+            silk_memcpy( &psSampleState[ RDmax_ind ][ 0 ], &psSampleState[ RDmin_ind ][ 1 ], sizeof( NSQ_sample_struct ) );
+        }
+
+        /* Write samples from winner to output and long-term filter states */
+        psDD = &psDelDec[ Winner_ind ];
+        if( subfr > 0 || i >= decisionDelay ) {
+            pulses[  i - decisionDelay ] = (opus_int8)silk_RSHIFT_ROUND( psDD->Q_Q10[ last_smple_idx ], 10 );
+            xq[ i - decisionDelay ] = (opus_int16)silk_SAT16( silk_RSHIFT_ROUND(
+                silk_SMULWW( psDD->Xq_Q14[ last_smple_idx ], delayedGain_Q10[ last_smple_idx ] ), 8 ) );
+            NSQ->sLTP_shp_Q14[ NSQ->sLTP_shp_buf_idx - decisionDelay ] = psDD->Shape_Q14[ last_smple_idx ];
+            sLTP_Q15[          NSQ->sLTP_buf_idx     - decisionDelay ] = psDD->Pred_Q15[  last_smple_idx ];
+        }
+        NSQ->sLTP_shp_buf_idx++;
+        NSQ->sLTP_buf_idx++;
+
+        /* Update states */
+        for( k = 0; k < nStatesDelayedDecision; k++ ) {
+            psDD                                     = &psDelDec[ k ];
+            psSS                                     = &psSampleState[ k ][ 0 ];
+            psDD->LF_AR_Q14                          = psSS->LF_AR_Q14;
+            psDD->sLPC_Q14[ NSQ_LPC_BUF_LENGTH + i ] = psSS->xq_Q14;
+            psDD->Xq_Q14[    *smpl_buf_idx ]         = psSS->xq_Q14;
+            psDD->Q_Q10[     *smpl_buf_idx ]         = psSS->Q_Q10;
+            psDD->Pred_Q15[  *smpl_buf_idx ]         = silk_LSHIFT32( psSS->LPC_exc_Q14, 1 );
+            psDD->Shape_Q14[ *smpl_buf_idx ]         = psSS->sLTP_shp_Q14;
+            psDD->Seed                               = silk_ADD32_ovflw( psDD->Seed, silk_RSHIFT_ROUND( psSS->Q_Q10, 10 ) );
+            psDD->RandState[ *smpl_buf_idx ]         = psDD->Seed;
+            psDD->RD_Q10                             = psSS->RD_Q10;
+        }
+        delayedGain_Q10[     *smpl_buf_idx ]         = Gain_Q10;
+    }
+    /* Update LPC states */
+    for( k = 0; k < nStatesDelayedDecision; k++ ) {
+        psDD = &psDelDec[ k ];
+        silk_memcpy( psDD->sLPC_Q14, &psDD->sLPC_Q14[ length ], NSQ_LPC_BUF_LENGTH * sizeof( opus_int32 ) );
+    }
+    RESTORE_STACK;
+}
+
+static OPUS_INLINE void silk_nsq_del_dec_scale_states(
+    const silk_encoder_state *psEncC,               /* I    Encoder State                       */
+    silk_nsq_state      *NSQ,                       /* I/O  NSQ state                           */
+    NSQ_del_dec_struct  psDelDec[],                 /* I/O  Delayed decision states             */
+    const opus_int32    x_Q3[],                     /* I    Input in Q3                         */
+    opus_int32          x_sc_Q10[],                 /* O    Input scaled with 1/Gain in Q10     */
+    const opus_int16    sLTP[],                     /* I    Re-whitened LTP state in Q0         */
+    opus_int32          sLTP_Q15[],                 /* O    LTP state matching scaled input     */
+    opus_int            subfr,                      /* I    Subframe number                     */
+    opus_int            nStatesDelayedDecision,     /* I    Number of del dec states            */
+    const opus_int      LTP_scale_Q14,              /* I    LTP state scaling                   */
+    const opus_int32    Gains_Q16[ MAX_NB_SUBFR ],  /* I                                        */
+    const opus_int      pitchL[ MAX_NB_SUBFR ],     /* I    Pitch lag                           */
+    const opus_int      signal_type,                /* I    Signal type                         */
+    const opus_int      decisionDelay               /* I    Decision delay                      */
+)
+{
+    opus_int            i, k, lag;
+    opus_int32          gain_adj_Q16, inv_gain_Q31, inv_gain_Q23;
+    NSQ_del_dec_struct  *psDD;
+
+    lag          = pitchL[ subfr ];
+    inv_gain_Q31 = silk_INVERSE32_varQ( silk_max( Gains_Q16[ subfr ], 1 ), 47 );
+    silk_assert( inv_gain_Q31 != 0 );
+
+    /* Calculate gain adjustment factor */
+    if( Gains_Q16[ subfr ] != NSQ->prev_gain_Q16 ) {
+        gain_adj_Q16 =  silk_DIV32_varQ( NSQ->prev_gain_Q16, Gains_Q16[ subfr ], 16 );
+    } else {
+        gain_adj_Q16 = (opus_int32)1 << 16;
+    }
+
+    /* Scale input */
+    inv_gain_Q23 = silk_RSHIFT_ROUND( inv_gain_Q31, 8 );
+    for( i = 0; i < psEncC->subfr_length; i++ ) {
+        x_sc_Q10[ i ] = silk_SMULWW( x_Q3[ i ], inv_gain_Q23 );
+    }
+
+    /* Save inverse gain */
+    NSQ->prev_gain_Q16 = Gains_Q16[ subfr ];
+
+    /* After rewhitening the LTP state is un-scaled, so scale with inv_gain_Q16 */
+    if( NSQ->rewhite_flag ) {
+        if( subfr == 0 ) {
+            /* Do LTP downscaling */
+            inv_gain_Q31 = silk_LSHIFT( silk_SMULWB( inv_gain_Q31, LTP_scale_Q14 ), 2 );
+        }
+        for( i = NSQ->sLTP_buf_idx - lag - LTP_ORDER / 2; i < NSQ->sLTP_buf_idx; i++ ) {
+            silk_assert( i < MAX_FRAME_LENGTH );
+            sLTP_Q15[ i ] = silk_SMULWB( inv_gain_Q31, sLTP[ i ] );
+        }
+    }
+
+    /* Adjust for changing gain */
+    if( gain_adj_Q16 != (opus_int32)1 << 16 ) {
+        /* Scale long-term shaping state */
+        for( i = NSQ->sLTP_shp_buf_idx - psEncC->ltp_mem_length; i < NSQ->sLTP_shp_buf_idx; i++ ) {
+            NSQ->sLTP_shp_Q14[ i ] = silk_SMULWW( gain_adj_Q16, NSQ->sLTP_shp_Q14[ i ] );
+        }
+
+        /* Scale long-term prediction state */
+        if( signal_type == TYPE_VOICED && NSQ->rewhite_flag == 0 ) {
+            for( i = NSQ->sLTP_buf_idx - lag - LTP_ORDER / 2; i < NSQ->sLTP_buf_idx - decisionDelay; i++ ) {
+                sLTP_Q15[ i ] = silk_SMULWW( gain_adj_Q16, sLTP_Q15[ i ] );
+            }
+        }
+
+        for( k = 0; k < nStatesDelayedDecision; k++ ) {
+            psDD = &psDelDec[ k ];
+
+            /* Scale scalar states */
+            psDD->LF_AR_Q14 = silk_SMULWW( gain_adj_Q16, psDD->LF_AR_Q14 );
+
+            /* Scale short-term prediction and shaping states */
+            for( i = 0; i < NSQ_LPC_BUF_LENGTH; i++ ) {
+                psDD->sLPC_Q14[ i ] = silk_SMULWW( gain_adj_Q16, psDD->sLPC_Q14[ i ] );
+            }
+            for( i = 0; i < MAX_SHAPE_LPC_ORDER; i++ ) {
+                psDD->sAR2_Q14[ i ] = silk_SMULWW( gain_adj_Q16, psDD->sAR2_Q14[ i ] );
+            }
+            for( i = 0; i < DECISION_DELAY; i++ ) {
+                psDD->Pred_Q15[  i ] = silk_SMULWW( gain_adj_Q16, psDD->Pred_Q15[  i ] );
+                psDD->Shape_Q14[ i ] = silk_SMULWW( gain_adj_Q16, psDD->Shape_Q14[ i ] );
+            }
+        }
+    }
+}
diff --git a/drivers/opus/silk/PLC.c b/drivers/opus/silk/PLC.c
new file mode 100644
index 0000000000..9fc11adda9
--- /dev/null
+++ b/drivers/opus/silk/PLC.c
@@ -0,0 +1,423 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef OPUS_HAVE_CONFIG_H
+#include "opus_config.h"
+#endif
+
+#include "silk_main.h"
+#include "stack_alloc.h"
+#include "PLC.h"
+
+#define NB_ATT 2
+static const opus_int16 HARM_ATT_Q15[NB_ATT]              = { 32440, 31130 }; /* 0.99, 0.95 */
+static const opus_int16 PLC_RAND_ATTENUATE_V_Q15[NB_ATT]  = { 31130, 26214 }; /* 0.95, 0.8 */
+static const opus_int16 PLC_RAND_ATTENUATE_UV_Q15[NB_ATT] = { 32440, 29491 }; /* 0.99, 0.9 */
+
+static OPUS_INLINE void silk_PLC_update(
+    silk_decoder_state                  *psDec,             /* I/O Decoder state        */
+    silk_decoder_control                *psDecCtrl          /* I/O Decoder control      */
+);
+
+static OPUS_INLINE void silk_PLC_conceal(
+    silk_decoder_state                  *psDec,             /* I/O Decoder state        */
+    silk_decoder_control                *psDecCtrl,         /* I/O Decoder control      */
+    opus_int16                          frame[]             /* O LPC residual signal    */
+);
+
+
+void silk_PLC_Reset(
+    silk_decoder_state                  *psDec              /* I/O Decoder state        */
+)
+{
+    psDec->sPLC.pitchL_Q8 = silk_LSHIFT( psDec->frame_length, 8 - 1 );
+    psDec->sPLC.prevGain_Q16[ 0 ] = SILK_FIX_CONST( 1, 16 );
+    psDec->sPLC.prevGain_Q16[ 1 ] = SILK_FIX_CONST( 1, 16 );
+    psDec->sPLC.subfr_length = 20;
+    psDec->sPLC.nb_subfr = 2;
+}
+
+void silk_PLC(
+    silk_decoder_state                  *psDec,             /* I/O Decoder state        */
+    silk_decoder_control                *psDecCtrl,         /* I/O Decoder control      */
+    opus_int16                          frame[],            /* I/O  signal              */
+    opus_int                            lost                /* I Loss flag              */
+)
+{
+    /* PLC control function */
+    if( psDec->fs_kHz != psDec->sPLC.fs_kHz ) {
+        silk_PLC_Reset( psDec );
+        psDec->sPLC.fs_kHz = psDec->fs_kHz;
+    }
+
+    if( lost ) {
+        /****************************/
+        /* Generate Signal          */
+        /****************************/
+        silk_PLC_conceal( psDec, psDecCtrl, frame );
+
+        psDec->lossCnt++;
+    } else {
+        /****************************/
+        /* Update state             */
+        /****************************/
+        silk_PLC_update( psDec, psDecCtrl );
+    }
+}
+
+/**************************************************/
+/* Update state of PLC                            */
+/**************************************************/
+static OPUS_INLINE void silk_PLC_update(
+    silk_decoder_state                  *psDec,             /* I/O Decoder state        */
+    silk_decoder_control                *psDecCtrl          /* I/O Decoder control      */
+)
+{
+    opus_int32 LTP_Gain_Q14, temp_LTP_Gain_Q14;
+    opus_int   i, j;
+    silk_PLC_struct *psPLC;
+
+    psPLC = &psDec->sPLC;
+
+    /* Update parameters used in case of packet loss */
+    psDec->prevSignalType = psDec->indices.signalType;
+    LTP_Gain_Q14 = 0;
+    if( psDec->indices.signalType == TYPE_VOICED ) {
+        /* Find the parameters for the last subframe which contains a pitch pulse */
+        for( j = 0; j * psDec->subfr_length < psDecCtrl->pitchL[ psDec->nb_subfr - 1 ]; j++ ) {
+            if( j == psDec->nb_subfr ) {
+                break;
+            }
+            temp_LTP_Gain_Q14 = 0;
+            for( i = 0; i < LTP_ORDER; i++ ) {
+                temp_LTP_Gain_Q14 += psDecCtrl->LTPCoef_Q14[ ( psDec->nb_subfr - 1 - j ) * LTP_ORDER  + i ];
+            }
+            if( temp_LTP_Gain_Q14 > LTP_Gain_Q14 ) {
+                LTP_Gain_Q14 = temp_LTP_Gain_Q14;
+                silk_memcpy( psPLC->LTPCoef_Q14,
+                    &psDecCtrl->LTPCoef_Q14[ silk_SMULBB( psDec->nb_subfr - 1 - j, LTP_ORDER ) ],
+                    LTP_ORDER * sizeof( opus_int16 ) );
+
+                psPLC->pitchL_Q8 = silk_LSHIFT( psDecCtrl->pitchL[ psDec->nb_subfr - 1 - j ], 8 );
+            }
+        }
+
+        silk_memset( psPLC->LTPCoef_Q14, 0, LTP_ORDER * sizeof( opus_int16 ) );
+        psPLC->LTPCoef_Q14[ LTP_ORDER / 2 ] = LTP_Gain_Q14;
+
+        /* Limit LT coefs */
+        if( LTP_Gain_Q14 < V_PITCH_GAIN_START_MIN_Q14 ) {
+            opus_int   scale_Q10;
+            opus_int32 tmp;
+
+            tmp = silk_LSHIFT( V_PITCH_GAIN_START_MIN_Q14, 10 );
+            scale_Q10 = silk_DIV32( tmp, silk_max( LTP_Gain_Q14, 1 ) );
+            for( i = 0; i < LTP_ORDER; i++ ) {
+                psPLC->LTPCoef_Q14[ i ] = silk_RSHIFT( silk_SMULBB( psPLC->LTPCoef_Q14[ i ], scale_Q10 ), 10 );
+            }
+        } else if( LTP_Gain_Q14 > V_PITCH_GAIN_START_MAX_Q14 ) {
+            opus_int   scale_Q14;
+            opus_int32 tmp;
+
+            tmp = silk_LSHIFT( V_PITCH_GAIN_START_MAX_Q14, 14 );
+            scale_Q14 = silk_DIV32( tmp, silk_max( LTP_Gain_Q14, 1 ) );
+            for( i = 0; i < LTP_ORDER; i++ ) {
+                psPLC->LTPCoef_Q14[ i ] = silk_RSHIFT( silk_SMULBB( psPLC->LTPCoef_Q14[ i ], scale_Q14 ), 14 );
+            }
+        }
+    } else {
+        psPLC->pitchL_Q8 = silk_LSHIFT( silk_SMULBB( psDec->fs_kHz, 18 ), 8 );
+        silk_memset( psPLC->LTPCoef_Q14, 0, LTP_ORDER * sizeof( opus_int16 ));
+    }
+
+    /* Save LPC coeficients */
+    silk_memcpy( psPLC->prevLPC_Q12, psDecCtrl->PredCoef_Q12[ 1 ], psDec->LPC_order * sizeof( opus_int16 ) );
+    psPLC->prevLTP_scale_Q14 = psDecCtrl->LTP_scale_Q14;
+
+    /* Save last two gains */
+    silk_memcpy( psPLC->prevGain_Q16, &psDecCtrl->Gains_Q16[ psDec->nb_subfr - 2 ], 2 * sizeof( opus_int32 ) );
+
+    psPLC->subfr_length = psDec->subfr_length;
+    psPLC->nb_subfr = psDec->nb_subfr;
+}
+
+static OPUS_INLINE void silk_PLC_conceal(
+    silk_decoder_state                  *psDec,             /* I/O Decoder state        */
+    silk_decoder_control                *psDecCtrl,         /* I/O Decoder control      */
+    opus_int16                          frame[]             /* O LPC residual signal    */
+)
+{
+    opus_int   i, j, k;
+    opus_int   lag, idx, sLTP_buf_idx, shift1, shift2;
+    opus_int32 rand_seed, harm_Gain_Q15, rand_Gain_Q15, inv_gain_Q30;
+    opus_int32 energy1, energy2, *rand_ptr, *pred_lag_ptr;
+    opus_int32 LPC_pred_Q10, LTP_pred_Q12;
+    opus_int16 rand_scale_Q14;
+    opus_int16 *B_Q14, *exc_buf_ptr;
+    opus_int32 *sLPC_Q14_ptr;
+    VARDECL( opus_int16, exc_buf );
+    opus_int16 A_Q12[ MAX_LPC_ORDER ];
+    VARDECL( opus_int16, sLTP );
+    VARDECL( opus_int32, sLTP_Q14 );
+    silk_PLC_struct *psPLC = &psDec->sPLC;
+    opus_int32 prevGain_Q10[2];
+    SAVE_STACK;
+
+    ALLOC( exc_buf, 2*psPLC->subfr_length, opus_int16 );
+    ALLOC( sLTP, psDec->ltp_mem_length, opus_int16 );
+    ALLOC( sLTP_Q14, psDec->ltp_mem_length + psDec->frame_length, opus_int32 );
+
+    prevGain_Q10[0] = silk_RSHIFT( psPLC->prevGain_Q16[ 0 ], 6);
+    prevGain_Q10[1] = silk_RSHIFT( psPLC->prevGain_Q16[ 1 ], 6);
+
+    if( psDec->first_frame_after_reset ) {
+       silk_memset( psPLC->prevLPC_Q12, 0, sizeof( psPLC->prevLPC_Q12 ) );
+    }
+
+    /* Find random noise component */
+    /* Scale previous excitation signal */
+    exc_buf_ptr = exc_buf;
+    for( k = 0; k < 2; k++ ) {
+        for( i = 0; i < psPLC->subfr_length; i++ ) {
+            exc_buf_ptr[ i ] = (opus_int16)silk_SAT16( silk_RSHIFT(
+                silk_SMULWW( psDec->exc_Q14[ i + ( k + psPLC->nb_subfr - 2 ) * psPLC->subfr_length ], prevGain_Q10[ k ] ), 8 ) );
+        }
+        exc_buf_ptr += psPLC->subfr_length;
+    }
+    /* Find the subframe with lowest energy of the last two and use that as random noise generator */
+    silk_sum_sqr_shift( &energy1, &shift1, exc_buf,                         psPLC->subfr_length );
+    silk_sum_sqr_shift( &energy2, &shift2, &exc_buf[ psPLC->subfr_length ], psPLC->subfr_length );
+
+    if( silk_RSHIFT( energy1, shift2 ) < silk_RSHIFT( energy2, shift1 ) ) {
+        /* First sub-frame has lowest energy */
+        rand_ptr = &psDec->exc_Q14[ silk_max_int( 0, ( psPLC->nb_subfr - 1 ) * psPLC->subfr_length - RAND_BUF_SIZE ) ];
+    } else {
+        /* Second sub-frame has lowest energy */
+        rand_ptr = &psDec->exc_Q14[ silk_max_int( 0, psPLC->nb_subfr * psPLC->subfr_length - RAND_BUF_SIZE ) ];
+    }
+
+    /* Set up Gain to random noise component */
+    B_Q14          = psPLC->LTPCoef_Q14;
+    rand_scale_Q14 = psPLC->randScale_Q14;
+
+    /* Set up attenuation gains */
+    harm_Gain_Q15 = HARM_ATT_Q15[ silk_min_int( NB_ATT - 1, psDec->lossCnt ) ];
+    if( psDec->prevSignalType == TYPE_VOICED ) {
+        rand_Gain_Q15 = PLC_RAND_ATTENUATE_V_Q15[  silk_min_int( NB_ATT - 1, psDec->lossCnt ) ];
+    } else {
+        rand_Gain_Q15 = PLC_RAND_ATTENUATE_UV_Q15[ silk_min_int( NB_ATT - 1, psDec->lossCnt ) ];
+    }
+
+    /* LPC concealment. Apply BWE to previous LPC */
+    silk_bwexpander( psPLC->prevLPC_Q12, psDec->LPC_order, SILK_FIX_CONST( BWE_COEF, 16 ) );
+
+    /* Preload LPC coeficients to array on stack. Gives small performance gain */
+    silk_memcpy( A_Q12, psPLC->prevLPC_Q12, psDec->LPC_order * sizeof( opus_int16 ) );
+
+    /* First Lost frame */
+    if( psDec->lossCnt == 0 ) {
+        rand_scale_Q14 = 1 << 14;
+
+        /* Reduce random noise Gain for voiced frames */
+        if( psDec->prevSignalType == TYPE_VOICED ) {
+            for( i = 0; i < LTP_ORDER; i++ ) {
+                rand_scale_Q14 -= B_Q14[ i ];
+            }
+            rand_scale_Q14 = silk_max_16( 3277, rand_scale_Q14 ); /* 0.2 */
+            rand_scale_Q14 = (opus_int16)silk_RSHIFT( silk_SMULBB( rand_scale_Q14, psPLC->prevLTP_scale_Q14 ), 14 );
+        } else {
+            /* Reduce random noise for unvoiced frames with high LPC gain */
+            opus_int32 invGain_Q30, down_scale_Q30;
+
+            invGain_Q30 = silk_LPC_inverse_pred_gain( psPLC->prevLPC_Q12, psDec->LPC_order );
+
+            down_scale_Q30 = silk_min_32( silk_RSHIFT( (opus_int32)1 << 30, LOG2_INV_LPC_GAIN_HIGH_THRES ), invGain_Q30 );
+            down_scale_Q30 = silk_max_32( silk_RSHIFT( (opus_int32)1 << 30, LOG2_INV_LPC_GAIN_LOW_THRES ), down_scale_Q30 );
+            down_scale_Q30 = silk_LSHIFT( down_scale_Q30, LOG2_INV_LPC_GAIN_HIGH_THRES );
+
+            rand_Gain_Q15 = silk_RSHIFT( silk_SMULWB( down_scale_Q30, rand_Gain_Q15 ), 14 );
+        }
+    }
+
+    rand_seed    = psPLC->rand_seed;
+    lag          = silk_RSHIFT_ROUND( psPLC->pitchL_Q8, 8 );
+    sLTP_buf_idx = psDec->ltp_mem_length;
+
+    /* Rewhiten LTP state */
+    idx = psDec->ltp_mem_length - lag - psDec->LPC_order - LTP_ORDER / 2;
+    silk_assert( idx > 0 );
+    silk_LPC_analysis_filter( &sLTP[ idx ], &psDec->outBuf[ idx ], A_Q12, psDec->ltp_mem_length - idx, psDec->LPC_order );
+    /* Scale LTP state */
+    inv_gain_Q30 = silk_INVERSE32_varQ( psPLC->prevGain_Q16[ 1 ], 46 );
+    inv_gain_Q30 = silk_min( inv_gain_Q30, silk_int32_MAX >> 1 );
+    for( i = idx + psDec->LPC_order; i < psDec->ltp_mem_length; i++ ) {
+        sLTP_Q14[ i ] = silk_SMULWB( inv_gain_Q30, sLTP[ i ] );
+    }
+
+    /***************************/
+    /* LTP synthesis filtering */
+    /***************************/
+    for( k = 0; k < psDec->nb_subfr; k++ ) {
+        /* Set up pointer */
+        pred_lag_ptr = &sLTP_Q14[ sLTP_buf_idx - lag + LTP_ORDER / 2 ];
+        for( i = 0; i < psDec->subfr_length; i++ ) {
+            /* Unrolled loop */
+            /* Avoids introducing a bias because silk_SMLAWB() always rounds to -inf */
+            LTP_pred_Q12 = 2;
+            LTP_pred_Q12 = silk_SMLAWB( LTP_pred_Q12, pred_lag_ptr[  0 ], B_Q14[ 0 ] );
+            LTP_pred_Q12 = silk_SMLAWB( LTP_pred_Q12, pred_lag_ptr[ -1 ], B_Q14[ 1 ] );
+            LTP_pred_Q12 = silk_SMLAWB( LTP_pred_Q12, pred_lag_ptr[ -2 ], B_Q14[ 2 ] );
+            LTP_pred_Q12 = silk_SMLAWB( LTP_pred_Q12, pred_lag_ptr[ -3 ], B_Q14[ 3 ] );
+            LTP_pred_Q12 = silk_SMLAWB( LTP_pred_Q12, pred_lag_ptr[ -4 ], B_Q14[ 4 ] );
+            pred_lag_ptr++;
+
+            /* Generate LPC excitation */
+            rand_seed = silk_RAND( rand_seed );
+            idx = silk_RSHIFT( rand_seed, 25 ) & RAND_BUF_MASK;
+            sLTP_Q14[ sLTP_buf_idx ] = silk_LSHIFT32( silk_SMLAWB( LTP_pred_Q12, rand_ptr[ idx ], rand_scale_Q14 ), 2 );
+            sLTP_buf_idx++;
+        }
+
+        /* Gradually reduce LTP gain */
+        for( j = 0; j < LTP_ORDER; j++ ) {
+            B_Q14[ j ] = silk_RSHIFT( silk_SMULBB( harm_Gain_Q15, B_Q14[ j ] ), 15 );
+        }
+        /* Gradually reduce excitation gain */
+        rand_scale_Q14 = silk_RSHIFT( silk_SMULBB( rand_scale_Q14, rand_Gain_Q15 ), 15 );
+
+        /* Slowly increase pitch lag */
+        psPLC->pitchL_Q8 = silk_SMLAWB( psPLC->pitchL_Q8, psPLC->pitchL_Q8, PITCH_DRIFT_FAC_Q16 );
+        psPLC->pitchL_Q8 = silk_min_32( psPLC->pitchL_Q8, silk_LSHIFT( silk_SMULBB( MAX_PITCH_LAG_MS, psDec->fs_kHz ), 8 ) );
+        lag = silk_RSHIFT_ROUND( psPLC->pitchL_Q8, 8 );
+    }
+
+    /***************************/
+    /* LPC synthesis filtering */
+    /***************************/
+    sLPC_Q14_ptr = &sLTP_Q14[ psDec->ltp_mem_length - MAX_LPC_ORDER ];
+
+    /* Copy LPC state */
+    silk_memcpy( sLPC_Q14_ptr, psDec->sLPC_Q14_buf, MAX_LPC_ORDER * sizeof( opus_int32 ) );
+
+    silk_assert( psDec->LPC_order >= 10 ); /* check that unrolling works */
+    for( i = 0; i < psDec->frame_length; i++ ) {
+        /* partly unrolled */
+        /* Avoids introducing a bias because silk_SMLAWB() always rounds to -inf */
+        LPC_pred_Q10 = silk_RSHIFT( psDec->LPC_order, 1 );
+        LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, sLPC_Q14_ptr[ MAX_LPC_ORDER + i -  1 ], A_Q12[ 0 ] );
+        LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, sLPC_Q14_ptr[ MAX_LPC_ORDER + i -  2 ], A_Q12[ 1 ] );
+        LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, sLPC_Q14_ptr[ MAX_LPC_ORDER + i -  3 ], A_Q12[ 2 ] );
+        LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, sLPC_Q14_ptr[ MAX_LPC_ORDER + i -  4 ], A_Q12[ 3 ] );
+        LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, sLPC_Q14_ptr[ MAX_LPC_ORDER + i -  5 ], A_Q12[ 4 ] );
+        LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, sLPC_Q14_ptr[ MAX_LPC_ORDER + i -  6 ], A_Q12[ 5 ] );
+        LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, sLPC_Q14_ptr[ MAX_LPC_ORDER + i -  7 ], A_Q12[ 6 ] );
+        LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, sLPC_Q14_ptr[ MAX_LPC_ORDER + i -  8 ], A_Q12[ 7 ] );
+        LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, sLPC_Q14_ptr[ MAX_LPC_ORDER + i -  9 ], A_Q12[ 8 ] );
+        LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, sLPC_Q14_ptr[ MAX_LPC_ORDER + i - 10 ], A_Q12[ 9 ] );
+        for( j = 10; j < psDec->LPC_order; j++ ) {
+            LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, sLPC_Q14_ptr[ MAX_LPC_ORDER + i - j - 1 ], A_Q12[ j ] );
+        }
+
+        /* Add prediction to LPC excitation */
+        sLPC_Q14_ptr[ MAX_LPC_ORDER + i ] = silk_ADD_LSHIFT32( sLPC_Q14_ptr[ MAX_LPC_ORDER + i ], LPC_pred_Q10, 4 );
+
+        /* Scale with Gain */
+        frame[ i ] = (opus_int16)silk_SAT16( silk_SAT16( silk_RSHIFT_ROUND( silk_SMULWW( sLPC_Q14_ptr[ MAX_LPC_ORDER + i ], prevGain_Q10[ 1 ] ), 8 ) ) );
+    }
+
+    /* Save LPC state */
+    silk_memcpy( psDec->sLPC_Q14_buf, &sLPC_Q14_ptr[ psDec->frame_length ], MAX_LPC_ORDER * sizeof( opus_int32 ) );
+
+    /**************************************/
+    /* Update states                      */
+    /**************************************/
+    psPLC->rand_seed     = rand_seed;
+    psPLC->randScale_Q14 = rand_scale_Q14;
+    for( i = 0; i < MAX_NB_SUBFR; i++ ) {
+        psDecCtrl->pitchL[ i ] = lag;
+    }
+    RESTORE_STACK;
+}
+
+/* Glues concealed frames with new good received frames */
+void silk_PLC_glue_frames(
+    silk_decoder_state                  *psDec,             /* I/O decoder state        */
+    opus_int16                          frame[],            /* I/O signal               */
+    opus_int                            length              /* I length of signal       */
+)
+{
+    opus_int   i, energy_shift;
+    opus_int32 energy;
+    silk_PLC_struct *psPLC;
+    psPLC = &psDec->sPLC;
+
+    if( psDec->lossCnt ) {
+        /* Calculate energy in concealed residual */
+        silk_sum_sqr_shift( &psPLC->conc_energy, &psPLC->conc_energy_shift, frame, length );
+
+        psPLC->last_frame_lost = 1;
+    } else {
+        if( psDec->sPLC.last_frame_lost ) {
+            /* Calculate residual in decoded signal if last frame was lost */
+            silk_sum_sqr_shift( &energy, &energy_shift, frame, length );
+
+            /* Normalize energies */
+            if( energy_shift > psPLC->conc_energy_shift ) {
+                psPLC->conc_energy = silk_RSHIFT( psPLC->conc_energy, energy_shift - psPLC->conc_energy_shift );
+            } else if( energy_shift < psPLC->conc_energy_shift ) {
+                energy = silk_RSHIFT( energy, psPLC->conc_energy_shift - energy_shift );
+            }
+
+            /* Fade in the energy difference */
+            if( energy > psPLC->conc_energy ) {
+                opus_int32 frac_Q24, LZ;
+                opus_int32 gain_Q16, slope_Q16;
+
+                LZ = silk_CLZ32( psPLC->conc_energy );
+                LZ = LZ - 1;
+                psPLC->conc_energy = silk_LSHIFT( psPLC->conc_energy, LZ );
+                energy = silk_RSHIFT( energy, silk_max_32( 24 - LZ, 0 ) );
+
+                frac_Q24 = silk_DIV32( psPLC->conc_energy, silk_max( energy, 1 ) );
+
+                gain_Q16 = silk_LSHIFT( silk_SQRT_APPROX( frac_Q24 ), 4 );
+                slope_Q16 = silk_DIV32_16( ( (opus_int32)1 << 16 ) - gain_Q16, length );
+                /* Make slope 4x steeper to avoid missing onsets after DTX */
+                slope_Q16 = silk_LSHIFT( slope_Q16, 2 );
+
+                for( i = 0; i < length; i++ ) {
+                    frame[ i ] = silk_SMULWB( gain_Q16, frame[ i ] );
+                    gain_Q16 += slope_Q16;
+                    if( gain_Q16 > (opus_int32)1 << 16 ) {
+                        break;
+                    }
+                }
+            }
+        }
+        psPLC->last_frame_lost = 0;
+    }
+}
diff --git a/drivers/opus/silk/PLC.h b/drivers/opus/silk/PLC.h
new file mode 100644
index 0000000000..f531cda950
--- /dev/null
+++ b/drivers/opus/silk/PLC.h
@@ -0,0 +1,61 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifndef SILK_PLC_H
+#define SILK_PLC_H
+
+#include "silk_main.h"
+
+#define BWE_COEF                        0.99
+#define V_PITCH_GAIN_START_MIN_Q14      11469               /* 0.7 in Q14               */
+#define V_PITCH_GAIN_START_MAX_Q14      15565               /* 0.95 in Q14              */
+#define MAX_PITCH_LAG_MS                18
+#define RAND_BUF_SIZE                   128
+#define RAND_BUF_MASK                   ( RAND_BUF_SIZE - 1 )
+#define LOG2_INV_LPC_GAIN_HIGH_THRES    3                   /* 2^3 = 8 dB LPC gain      */
+#define LOG2_INV_LPC_GAIN_LOW_THRES     8                   /* 2^8 = 24 dB LPC gain     */
+#define PITCH_DRIFT_FAC_Q16             655                 /* 0.01 in Q16              */
+
+void silk_PLC_Reset(
+    silk_decoder_state                  *psDec              /* I/O Decoder state        */
+);
+
+void silk_PLC(
+    silk_decoder_state                  *psDec,             /* I/O Decoder state        */
+    silk_decoder_control                *psDecCtrl,         /* I/O Decoder control      */
+    opus_int16                          frame[],            /* I/O  signal              */
+    opus_int                            lost                /* I Loss flag              */
+);
+
+void silk_PLC_glue_frames(
+    silk_decoder_state                  *psDec,             /* I/O decoder state        */
+    opus_int16                          frame[],            /* I/O signal               */
+    opus_int                            length              /* I length of signal       */
+);
+
+#endif
+
diff --git a/drivers/opus/silk/SigProc_FIX.h b/drivers/opus/silk/SigProc_FIX.h
new file mode 100644
index 0000000000..1b58057910
--- /dev/null
+++ b/drivers/opus/silk/SigProc_FIX.h
@@ -0,0 +1,594 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifndef SILK_SIGPROC_FIX_H
+#define SILK_SIGPROC_FIX_H
+
+#ifdef  __cplusplus
+extern "C"
+{
+#endif
+
+/*#define silk_MACRO_COUNT */          /* Used to enable WMOPS counting */
+
+#define SILK_MAX_ORDER_LPC            16            /* max order of the LPC analysis in schur() and k2a() */
+
+#include <string.h>                                 /* for memset(), memcpy(), memmove() */
+#include "typedef.h"
+#include "resampler_structs.h"
+#include "macros.h"
+
+
+/********************************************************************/
+/*                    SIGNAL PROCESSING FUNCTIONS                   */
+/********************************************************************/
+
+/*!
+ * Initialize/reset the resampler state for a given pair of input/output sampling rates
+*/
+opus_int silk_resampler_init(
+    silk_resampler_state_struct *S,                 /* I/O  Resampler state                                             */
+    opus_int32                  Fs_Hz_in,           /* I    Input sampling rate (Hz)                                    */
+    opus_int32                  Fs_Hz_out,          /* I    Output sampling rate (Hz)                                   */
+    opus_int                    forEnc              /* I    If 1: encoder; if 0: decoder                                */
+);
+
+/*!
+ * Resampler: convert from one sampling rate to another
+ */
+opus_int silk_resampler(
+    silk_resampler_state_struct *S,                 /* I/O  Resampler state                                             */
+    opus_int16                  out[],              /* O    Output signal                                               */
+    const opus_int16            in[],               /* I    Input signal                                                */
+    opus_int32                  inLen               /* I    Number of input samples                                     */
+);
+
+/*!
+* Downsample 2x, mediocre quality
+*/
+void silk_resampler_down2(
+    opus_int32                  *S,                 /* I/O  State vector [ 2 ]                                          */
+    opus_int16                  *out,               /* O    Output signal [ len ]                                       */
+    const opus_int16            *in,                /* I    Input signal [ floor(len/2) ]                               */
+    opus_int32                  inLen               /* I    Number of input samples                                     */
+);
+
+/*!
+ * Downsample by a factor 2/3, low quality
+*/
+void silk_resampler_down2_3(
+    opus_int32                  *S,                 /* I/O  State vector [ 6 ]                                          */
+    opus_int16                  *out,               /* O    Output signal [ floor(2*inLen/3) ]                          */
+    const opus_int16            *in,                /* I    Input signal [ inLen ]                                      */
+    opus_int32                  inLen               /* I    Number of input samples                                     */
+);
+
+/*!
+ * second order ARMA filter;
+ * slower than biquad() but uses more precise coefficients
+ * can handle (slowly) varying coefficients
+ */
+void silk_biquad_alt(
+    const opus_int16            *in,                /* I     input signal                                               */
+    const opus_int32            *B_Q28,             /* I     MA coefficients [3]                                        */
+    const opus_int32            *A_Q28,             /* I     AR coefficients [2]                                        */
+    opus_int32                  *S,                 /* I/O   State vector [2]                                           */
+    opus_int16                  *out,               /* O     output signal                                              */
+    const opus_int32            len,                /* I     signal length (must be even)                               */
+    opus_int                    stride              /* I     Operate on interleaved signal if > 1                       */
+);
+
+/* Variable order MA prediction error filter. */
+void silk_LPC_analysis_filter(
+    opus_int16                  *out,               /* O    Output signal                                               */
+    const opus_int16            *in,                /* I    Input signal                                                */
+    const opus_int16            *B,                 /* I    MA prediction coefficients, Q12 [order]                     */
+    const opus_int32            len,                /* I    Signal length                                               */
+    const opus_int32            d                   /* I    Filter order                                                */
+);
+
+/* Chirp (bandwidth expand) LP AR filter */
+void silk_bwexpander(
+    opus_int16                  *ar,                /* I/O  AR filter to be expanded (without leading 1)                */
+    const opus_int              d,                  /* I    Length of ar                                                */
+    opus_int32                  chirp_Q16           /* I    Chirp factor (typically in the range 0 to 1)                */
+);
+
+/* Chirp (bandwidth expand) LP AR filter */
+void silk_bwexpander_32(
+    opus_int32                  *ar,                /* I/O  AR filter to be expanded (without leading 1)                */
+    const opus_int              d,                  /* I    Length of ar                                                */
+    opus_int32                  chirp_Q16           /* I    Chirp factor in Q16                                         */
+);
+
+/* Compute inverse of LPC prediction gain, and                           */
+/* test if LPC coefficients are stable (all poles within unit circle)    */
+opus_int32 silk_LPC_inverse_pred_gain(              /* O   Returns inverse prediction gain in energy domain, Q30        */
+    const opus_int16            *A_Q12,             /* I   Prediction coefficients, Q12 [order]                         */
+    const opus_int              order               /* I   Prediction order                                             */
+);
+
+/* For input in Q24 domain */
+opus_int32 silk_LPC_inverse_pred_gain_Q24(          /* O    Returns inverse prediction gain in energy domain, Q30       */
+    const opus_int32            *A_Q24,             /* I    Prediction coefficients [order]                             */
+    const opus_int              order               /* I    Prediction order                                            */
+);
+
+/* Split signal in two decimated bands using first-order allpass filters */
+void silk_ana_filt_bank_1(
+    const opus_int16            *in,                /* I    Input signal [N]                                            */
+    opus_int32                  *S,                 /* I/O  State vector [2]                                            */
+    opus_int16                  *outL,              /* O    Low band [N/2]                                              */
+    opus_int16                  *outH,              /* O    High band [N/2]                                             */
+    const opus_int32            N                   /* I    Number of input samples                                     */
+);
+
+/********************************************************************/
+/*                        SCALAR FUNCTIONS                          */
+/********************************************************************/
+
+/* Approximation of 128 * log2() (exact inverse of approx 2^() below) */
+/* Convert input to a log scale    */
+opus_int32 silk_lin2log(
+    const opus_int32            inLin               /* I  input in linear scale                                         */
+);
+
+/* Approximation of a sigmoid function */
+opus_int silk_sigm_Q15(
+    opus_int                    in_Q5               /* I                                                                */
+);
+
+/* Approximation of 2^() (exact inverse of approx log2() above) */
+/* Convert input to a linear scale */
+opus_int32 silk_log2lin(
+    const opus_int32            inLog_Q7            /* I  input on log scale                                            */
+);
+
+/* Compute number of bits to right shift the sum of squares of a vector    */
+/* of int16s to make it fit in an int32                                    */
+void silk_sum_sqr_shift(
+    opus_int32                  *energy,            /* O   Energy of x, after shifting to the right                     */
+    opus_int                    *shift,             /* O   Number of bits right shift applied to energy                 */
+    const opus_int16            *x,                 /* I   Input vector                                                 */
+    opus_int                    len                 /* I   Length of input vector                                       */
+);
+
+/* Calculates the reflection coefficients from the correlation sequence    */
+/* Faster than schur64(), but much less accurate.                          */
+/* uses SMLAWB(), requiring armv5E and higher.                             */
+opus_int32 silk_schur(                              /* O    Returns residual energy                                     */
+    opus_int16                  *rc_Q15,            /* O    reflection coefficients [order] Q15                         */
+    const opus_int32            *c,                 /* I    correlations [order+1]                                      */
+    const opus_int32            order               /* I    prediction order                                            */
+);
+
+/* Calculates the reflection coefficients from the correlation sequence    */
+/* Slower than schur(), but more accurate.                                 */
+/* Uses SMULL(), available on armv4                                        */
+opus_int32 silk_schur64(                            /* O    returns residual energy                                     */
+    opus_int32                  rc_Q16[],           /* O    Reflection coefficients [order] Q16                         */
+    const opus_int32            c[],                /* I    Correlations [order+1]                                      */
+    opus_int32                  order               /* I    Prediction order                                            */
+);
+
+/* Step up function, converts reflection coefficients to prediction coefficients */
+void silk_k2a(
+    opus_int32                  *A_Q24,             /* O    Prediction coefficients [order] Q24                         */
+    const opus_int16            *rc_Q15,            /* I    Reflection coefficients [order] Q15                         */
+    const opus_int32            order               /* I    Prediction order                                            */
+);
+
+/* Step up function, converts reflection coefficients to prediction coefficients */
+void silk_k2a_Q16(
+    opus_int32                  *A_Q24,             /* O    Prediction coefficients [order] Q24                         */
+    const opus_int32            *rc_Q16,            /* I    Reflection coefficients [order] Q16                         */
+    const opus_int32            order               /* I    Prediction order                                            */
+);
+
+/* Apply sine window to signal vector.                              */
+/* Window types:                                                    */
+/*    1 -> sine window from 0 to pi/2                               */
+/*    2 -> sine window from pi/2 to pi                              */
+/* every other sample of window is linearly interpolated, for speed */
+void silk_apply_sine_window(
+    opus_int16                  px_win[],           /* O    Pointer to windowed signal                                  */
+    const opus_int16            px[],               /* I    Pointer to input signal                                     */
+    const opus_int              win_type,           /* I    Selects a window type                                       */
+    const opus_int              length              /* I    Window length, multiple of 4                                */
+);
+
+/* Compute autocorrelation */
+void silk_autocorr(
+    opus_int32                  *results,           /* O    Result (length correlationCount)                            */
+    opus_int                    *scale,             /* O    Scaling of the correlation vector                           */
+    const opus_int16            *inputData,         /* I    Input data to correlate                                     */
+    const opus_int              inputDataSize,      /* I    Length of input                                             */
+    const opus_int              correlationCount,   /* I    Number of correlation taps to compute                       */
+    int                         arch                /* I    Run-time architecture                                       */
+);
+
+void silk_decode_pitch(
+    opus_int16                  lagIndex,           /* I                                                                */
+    opus_int8                   contourIndex,       /* O                                                                */
+    opus_int                    pitch_lags[],       /* O    4 pitch values                                              */
+    const opus_int              Fs_kHz,             /* I    sampling frequency (kHz)                                    */
+    const opus_int              nb_subfr            /* I    number of sub frames                                        */
+);
+
+opus_int silk_pitch_analysis_core(                  /* O    Voicing estimate: 0 voiced, 1 unvoiced                      */
+    const opus_int16            *frame,             /* I    Signal of length PE_FRAME_LENGTH_MS*Fs_kHz                  */
+    opus_int                    *pitch_out,         /* O    4 pitch lag values                                          */
+    opus_int16                  *lagIndex,          /* O    Lag Index                                                   */
+    opus_int8                   *contourIndex,      /* O    Pitch contour Index                                         */
+    opus_int                    *LTPCorr_Q15,       /* I/O  Normalized correlation; input: value from previous frame    */
+    opus_int                    prevLag,            /* I    Last lag of previous frame; set to zero is unvoiced         */
+    const opus_int32            search_thres1_Q16,  /* I    First stage threshold for lag candidates 0 - 1              */
+    const opus_int              search_thres2_Q13,  /* I    Final threshold for lag candidates 0 - 1                    */
+    const opus_int              Fs_kHz,             /* I    Sample frequency (kHz)                                      */
+    const opus_int              complexity,         /* I    Complexity setting, 0-2, where 2 is highest                 */
+    const opus_int              nb_subfr,           /* I    number of 5 ms subframes                                    */
+    int                         arch                /* I    Run-time architecture                                       */
+);
+
+/* Compute Normalized Line Spectral Frequencies (NLSFs) from whitening filter coefficients      */
+/* If not all roots are found, the a_Q16 coefficients are bandwidth expanded until convergence. */
+void silk_A2NLSF(
+    opus_int16                  *NLSF,              /* O    Normalized Line Spectral Frequencies in Q15 (0..2^15-1) [d] */
+    opus_int32                  *a_Q16,             /* I/O  Monic whitening filter coefficients in Q16 [d]              */
+    const opus_int              d                   /* I    Filter order (must be even)                                 */
+);
+
+/* compute whitening filter coefficients from normalized line spectral frequencies */
+void silk_NLSF2A(
+    opus_int16                  *a_Q12,             /* O    monic whitening filter coefficients in Q12,  [ d ]          */
+    const opus_int16            *NLSF,              /* I    normalized line spectral frequencies in Q15, [ d ]          */
+    const opus_int              d                   /* I    filter order (should be even)                               */
+);
+
+void silk_insertion_sort_increasing(
+    opus_int32                  *a,                 /* I/O   Unsorted / Sorted vector                                   */
+    opus_int                    *idx,               /* O     Index vector for the sorted elements                       */
+    const opus_int              L,                  /* I     Vector length                                              */
+    const opus_int              K                   /* I     Number of correctly sorted positions                       */
+);
+
+void silk_insertion_sort_decreasing_int16(
+    opus_int16                  *a,                 /* I/O   Unsorted / Sorted vector                                   */
+    opus_int                    *idx,               /* O     Index vector for the sorted elements                       */
+    const opus_int              L,                  /* I     Vector length                                              */
+    const opus_int              K                   /* I     Number of correctly sorted positions                       */
+);
+
+void silk_insertion_sort_increasing_all_values_int16(
+     opus_int16                 *a,                 /* I/O   Unsorted / Sorted vector                                   */
+     const opus_int             L                   /* I     Vector length                                              */
+);
+
+/* NLSF stabilizer, for a single input data vector */
+void silk_NLSF_stabilize(
+          opus_int16            *NLSF_Q15,          /* I/O   Unstable/stabilized normalized LSF vector in Q15 [L]       */
+    const opus_int16            *NDeltaMin_Q15,     /* I     Min distance vector, NDeltaMin_Q15[L] must be >= 1 [L+1]   */
+    const opus_int              L                   /* I     Number of NLSF parameters in the input vector              */
+);
+
+/* Laroia low complexity NLSF weights */
+void silk_NLSF_VQ_weights_laroia(
+    opus_int16                  *pNLSFW_Q_OUT,      /* O     Pointer to input vector weights [D]                        */
+    const opus_int16            *pNLSF_Q15,         /* I     Pointer to input vector         [D]                        */
+    const opus_int              D                   /* I     Input vector dimension (even)                              */
+);
+
+/* Compute reflection coefficients from input signal */
+void silk_burg_modified(
+    opus_int32                  *res_nrg,           /* O    Residual energy                                             */
+    opus_int                    *res_nrg_Q,         /* O    Residual energy Q value                                     */
+    opus_int32                  A_Q16[],            /* O    Prediction coefficients (length order)                      */
+    const opus_int16            x[],                /* I    Input signal, length: nb_subfr * ( D + subfr_length )       */
+    const opus_int32            minInvGain_Q30,     /* I    Inverse of max prediction gain                              */
+    const opus_int              subfr_length,       /* I    Input signal subframe length (incl. D preceding samples)    */
+    const opus_int              nb_subfr,           /* I    Number of subframes stacked in x                            */
+    const opus_int              D,                  /* I    Order                                                       */
+    int                         arch                /* I    Run-time architecture                                       */
+);
+
+/* Copy and multiply a vector by a constant */
+void silk_scale_copy_vector16(
+    opus_int16                  *data_out,
+    const opus_int16            *data_in,
+    opus_int32                  gain_Q16,           /* I    Gain in Q16                                                 */
+    const opus_int              dataSize            /* I    Length                                                      */
+);
+
+/* Some for the LTP related function requires Q26 to work.*/
+void silk_scale_vector32_Q26_lshift_18(
+    opus_int32                  *data1,             /* I/O  Q0/Q18                                                      */
+    opus_int32                  gain_Q26,           /* I    Q26                                                         */
+    opus_int                    dataSize            /* I    length                                                      */
+);
+
+/********************************************************************/
+/*                        INLINE ARM MATH                           */
+/********************************************************************/
+
+/*    return sum( inVec1[i] * inVec2[i] ) */
+opus_int32 silk_inner_prod_aligned(
+    const opus_int16 *const     inVec1,             /*    I input vector 1                                              */
+    const opus_int16 *const     inVec2,             /*    I input vector 2                                              */
+    const opus_int              len                 /*    I vector lengths                                              */
+);
+
+opus_int32 silk_inner_prod_aligned_scale(
+    const opus_int16 *const     inVec1,             /*    I input vector 1                                              */
+    const opus_int16 *const     inVec2,             /*    I input vector 2                                              */
+    const opus_int              scale,              /*    I number of bits to shift                                     */
+    const opus_int              len                 /*    I vector lengths                                              */
+);
+
+opus_int64 silk_inner_prod16_aligned_64(
+    const opus_int16            *inVec1,            /*    I input vector 1                                              */
+    const opus_int16            *inVec2,            /*    I input vector 2                                              */
+    const opus_int              len                 /*    I vector lengths                                              */
+);
+
+/********************************************************************/
+/*                                MACROS                            */
+/********************************************************************/
+
+/* Rotate a32 right by 'rot' bits. Negative rot values result in rotating
+   left. Output is 32bit int.
+   Note: contemporary compilers recognize the C expression below and
+   compile it into a 'ror' instruction if available. No need for OPUS_INLINE ASM! */
+static OPUS_INLINE opus_int32 silk_ROR32( opus_int32 a32, opus_int rot )
+{
+    opus_uint32 x = (opus_uint32) a32;
+    opus_uint32 r = (opus_uint32) rot;
+    opus_uint32 m = (opus_uint32) -rot;
+    if( rot == 0 ) {
+        return a32;
+    } else if( rot < 0 ) {
+        return (opus_int32) ((x << m) | (x >> (32 - m)));
+    } else {
+        return (opus_int32) ((x << (32 - r)) | (x >> r));
+    }
+}
+
+/* Allocate opus_int16 aligned to 4-byte memory address */
+#if EMBEDDED_ARM
+#define silk_DWORD_ALIGN __attribute__((aligned(4)))
+#else
+#define silk_DWORD_ALIGN
+#endif
+
+/* Useful Macros that can be adjusted to other platforms */
+#define silk_memcpy(dest, src, size)        memcpy((dest), (src), (size))
+#define silk_memset(dest, src, size)        memset((dest), (src), (size))
+#define silk_memmove(dest, src, size)       memmove((dest), (src), (size))
+
+/* Fixed point macros */
+
+/* (a32 * b32) output have to be 32bit int */
+#define silk_MUL(a32, b32)                  ((a32) * (b32))
+
+/* (a32 * b32) output have to be 32bit uint */
+#define silk_MUL_uint(a32, b32)             silk_MUL(a32, b32)
+
+/* a32 + (b32 * c32) output have to be 32bit int */
+#define silk_MLA(a32, b32, c32)             silk_ADD32((a32),((b32) * (c32)))
+
+/* a32 + (b32 * c32) output have to be 32bit uint */
+#define silk_MLA_uint(a32, b32, c32)        silk_MLA(a32, b32, c32)
+
+/* ((a32 >> 16)  * (b32 >> 16)) output have to be 32bit int */
+#define silk_SMULTT(a32, b32)               (((a32) >> 16) * ((b32) >> 16))
+
+/* a32 + ((a32 >> 16)  * (b32 >> 16)) output have to be 32bit int */
+#define silk_SMLATT(a32, b32, c32)          silk_ADD32((a32),((b32) >> 16) * ((c32) >> 16))
+
+#define silk_SMLALBB(a64, b16, c16)         silk_ADD64((a64),(opus_int64)((opus_int32)(b16) * (opus_int32)(c16)))
+
+/* (a32 * b32) */
+#define silk_SMULL(a32, b32)                ((opus_int64)(a32) * /*(opus_int64)*/(b32))
+
+/* Adds two signed 32-bit values in a way that can overflow, while not relying on undefined behaviour
+   (just standard two's complement implementation-specific behaviour) */
+#define silk_ADD32_ovflw(a, b)              ((opus_int32)((opus_uint32)(a) + (opus_uint32)(b)))
+/* Subtractss two signed 32-bit values in a way that can overflow, while not relying on undefined behaviour
+   (just standard two's complement implementation-specific behaviour) */
+#define silk_SUB32_ovflw(a, b)              ((opus_int32)((opus_uint32)(a) - (opus_uint32)(b)))
+
+/* Multiply-accumulate macros that allow overflow in the addition (ie, no asserts in debug mode) */
+#define silk_MLA_ovflw(a32, b32, c32)       silk_ADD32_ovflw((a32), (opus_uint32)(b32) * (opus_uint32)(c32))
+#define silk_SMLABB_ovflw(a32, b32, c32)    (silk_ADD32_ovflw((a32) , ((opus_int32)((opus_int16)(b32))) * (opus_int32)((opus_int16)(c32))))
+
+#define silk_DIV32_16(a32, b16)             ((opus_int32)((a32) / (b16)))
+#define silk_DIV32(a32, b32)                ((opus_int32)((a32) / (b32)))
+
+/* These macros enables checking for overflow in silk_API_Debug.h*/
+#define silk_ADD16(a, b)                    ((a) + (b))
+#define silk_ADD32(a, b)                    ((a) + (b))
+#define silk_ADD64(a, b)                    ((a) + (b))
+
+#define silk_SUB16(a, b)                    ((a) - (b))
+#define silk_SUB32(a, b)                    ((a) - (b))
+#define silk_SUB64(a, b)                    ((a) - (b))
+
+#define silk_SAT8(a)                        ((a) > silk_int8_MAX ? silk_int8_MAX  :       \
+                                            ((a) < silk_int8_MIN ? silk_int8_MIN  : (a)))
+#define silk_SAT16(a)                       ((a) > silk_int16_MAX ? silk_int16_MAX :      \
+                                            ((a) < silk_int16_MIN ? silk_int16_MIN : (a)))
+#define silk_SAT32(a)                       ((a) > silk_int32_MAX ? silk_int32_MAX :      \
+                                            ((a) < silk_int32_MIN ? silk_int32_MIN : (a)))
+
+#define silk_CHECK_FIT8(a)                  (a)
+#define silk_CHECK_FIT16(a)                 (a)
+#define silk_CHECK_FIT32(a)                 (a)
+
+#define silk_ADD_SAT16(a, b)                (opus_int16)silk_SAT16( silk_ADD32( (opus_int32)(a), (b) ) )
+#define silk_ADD_SAT64(a, b)                ((((a) + (b)) & 0x8000000000000000LL) == 0 ?                            \
+                                            ((((a) & (b)) & 0x8000000000000000LL) != 0 ? silk_int64_MIN : (a)+(b)) : \
+                                            ((((a) | (b)) & 0x8000000000000000LL) == 0 ? silk_int64_MAX : (a)+(b)) )
+
+#define silk_SUB_SAT16(a, b)                (opus_int16)silk_SAT16( silk_SUB32( (opus_int32)(a), (b) ) )
+#define silk_SUB_SAT64(a, b)                ((((a)-(b)) & 0x8000000000000000LL) == 0 ?                                               \
+                                            (( (a) & ((b)^0x8000000000000000LL) & 0x8000000000000000LL) ? silk_int64_MIN : (a)-(b)) : \
+                                            ((((a)^0x8000000000000000LL) & (b)  & 0x8000000000000000LL) ? silk_int64_MAX : (a)-(b)) )
+
+/* Saturation for positive input values */
+#define silk_POS_SAT32(a)                   ((a) > silk_int32_MAX ? silk_int32_MAX : (a))
+
+/* Add with saturation for positive input values */
+#define silk_ADD_POS_SAT8(a, b)             ((((a)+(b)) & 0x80)                 ? silk_int8_MAX  : ((a)+(b)))
+#define silk_ADD_POS_SAT16(a, b)            ((((a)+(b)) & 0x8000)               ? silk_int16_MAX : ((a)+(b)))
+#define silk_ADD_POS_SAT32(a, b)            ((((a)+(b)) & 0x80000000)           ? silk_int32_MAX : ((a)+(b)))
+#define silk_ADD_POS_SAT64(a, b)            ((((a)+(b)) & 0x8000000000000000LL) ? silk_int64_MAX : ((a)+(b)))
+
+#define silk_LSHIFT8(a, shift)              ((opus_int8)((opus_uint8)(a)<<(shift)))         /* shift >= 0, shift < 8  */
+#define silk_LSHIFT16(a, shift)             ((opus_int16)((opus_uint16)(a)<<(shift)))       /* shift >= 0, shift < 16 */
+#define silk_LSHIFT32(a, shift)             ((opus_int32)((opus_uint32)(a)<<(shift)))       /* shift >= 0, shift < 32 */
+#define silk_LSHIFT64(a, shift)             ((opus_int64)((opus_uint64)(a)<<(shift)))       /* shift >= 0, shift < 64 */
+#define silk_LSHIFT(a, shift)               silk_LSHIFT32(a, shift)                         /* shift >= 0, shift < 32 */
+
+#define silk_RSHIFT8(a, shift)              ((a)>>(shift))                                  /* shift >= 0, shift < 8  */
+#define silk_RSHIFT16(a, shift)             ((a)>>(shift))                                  /* shift >= 0, shift < 16 */
+#define silk_RSHIFT32(a, shift)             ((a)>>(shift))                                  /* shift >= 0, shift < 32 */
+#define silk_RSHIFT64(a, shift)             ((a)>>(shift))                                  /* shift >= 0, shift < 64 */
+#define silk_RSHIFT(a, shift)               silk_RSHIFT32(a, shift)                         /* shift >= 0, shift < 32 */
+
+/* saturates before shifting */
+#define silk_LSHIFT_SAT32(a, shift)         (silk_LSHIFT32( silk_LIMIT( (a), silk_RSHIFT32( silk_int32_MIN, (shift) ), \
+                                                    silk_RSHIFT32( silk_int32_MAX, (shift) ) ), (shift) ))
+
+#define silk_LSHIFT_ovflw(a, shift)         ((opus_int32)((opus_uint32)(a) << (shift)))     /* shift >= 0, allowed to overflow */
+#define silk_LSHIFT_uint(a, shift)          ((a) << (shift))                                /* shift >= 0 */
+#define silk_RSHIFT_uint(a, shift)          ((a) >> (shift))                                /* shift >= 0 */
+
+#define silk_ADD_LSHIFT(a, b, shift)        ((a) + silk_LSHIFT((b), (shift)))               /* shift >= 0 */
+#define silk_ADD_LSHIFT32(a, b, shift)      silk_ADD32((a), silk_LSHIFT32((b), (shift)))    /* shift >= 0 */
+#define silk_ADD_LSHIFT_uint(a, b, shift)   ((a) + silk_LSHIFT_uint((b), (shift)))          /* shift >= 0 */
+#define silk_ADD_RSHIFT(a, b, shift)        ((a) + silk_RSHIFT((b), (shift)))               /* shift >= 0 */
+#define silk_ADD_RSHIFT32(a, b, shift)      silk_ADD32((a), silk_RSHIFT32((b), (shift)))    /* shift >= 0 */
+#define silk_ADD_RSHIFT_uint(a, b, shift)   ((a) + silk_RSHIFT_uint((b), (shift)))          /* shift >= 0 */
+#define silk_SUB_LSHIFT32(a, b, shift)      silk_SUB32((a), silk_LSHIFT32((b), (shift)))    /* shift >= 0 */
+#define silk_SUB_RSHIFT32(a, b, shift)      silk_SUB32((a), silk_RSHIFT32((b), (shift)))    /* shift >= 0 */
+
+/* Requires that shift > 0 */
+#define silk_RSHIFT_ROUND(a, shift)         ((shift) == 1 ? ((a) >> 1) + ((a) & 1) : (((a) >> ((shift) - 1)) + 1) >> 1)
+#define silk_RSHIFT_ROUND64(a, shift)       ((shift) == 1 ? ((a) >> 1) + ((a) & 1) : (((a) >> ((shift) - 1)) + 1) >> 1)
+
+/* Number of rightshift required to fit the multiplication */
+#define silk_NSHIFT_MUL_32_32(a, b)         ( -(31- (32-silk_CLZ32(silk_abs(a)) + (32-silk_CLZ32(silk_abs(b))))) )
+#define silk_NSHIFT_MUL_16_16(a, b)         ( -(15- (16-silk_CLZ16(silk_abs(a)) + (16-silk_CLZ16(silk_abs(b))))) )
+
+
+#define silk_min(a, b)                      (((a) < (b)) ? (a) : (b))
+#define silk_max(a, b)                      (((a) > (b)) ? (a) : (b))
+
+/* Macro to convert floating-point constants to fixed-point */
+#define SILK_FIX_CONST( C, Q )              ((opus_int32)((C) * ((opus_int64)1 << (Q)) + 0.5))
+
+/* silk_min() versions with typecast in the function call */
+static OPUS_INLINE opus_int silk_min_int(opus_int a, opus_int b)
+{
+    return (((a) < (b)) ? (a) : (b));
+}
+static OPUS_INLINE opus_int16 silk_min_16(opus_int16 a, opus_int16 b)
+{
+    return (((a) < (b)) ? (a) : (b));
+}
+static OPUS_INLINE opus_int32 silk_min_32(opus_int32 a, opus_int32 b)
+{
+    return (((a) < (b)) ? (a) : (b));
+}
+static OPUS_INLINE opus_int64 silk_min_64(opus_int64 a, opus_int64 b)
+{
+    return (((a) < (b)) ? (a) : (b));
+}
+
+/* silk_min() versions with typecast in the function call */
+static OPUS_INLINE opus_int silk_max_int(opus_int a, opus_int b)
+{
+    return (((a) > (b)) ? (a) : (b));
+}
+static OPUS_INLINE opus_int16 silk_max_16(opus_int16 a, opus_int16 b)
+{
+    return (((a) > (b)) ? (a) : (b));
+}
+static OPUS_INLINE opus_int32 silk_max_32(opus_int32 a, opus_int32 b)
+{
+    return (((a) > (b)) ? (a) : (b));
+}
+static OPUS_INLINE opus_int64 silk_max_64(opus_int64 a, opus_int64 b)
+{
+    return (((a) > (b)) ? (a) : (b));
+}
+
+#define silk_LIMIT( a, limit1, limit2)      ((limit1) > (limit2) ? ((a) > (limit1) ? (limit1) : ((a) < (limit2) ? (limit2) : (a))) \
+                                                                 : ((a) > (limit2) ? (limit2) : ((a) < (limit1) ? (limit1) : (a))))
+
+#define silk_LIMIT_int                      silk_LIMIT
+#define silk_LIMIT_16                       silk_LIMIT
+#define silk_LIMIT_32                       silk_LIMIT
+
+#define silk_abs(a)                         (((a) >  0)  ? (a) : -(a))            /* Be careful, silk_abs returns wrong when input equals to silk_intXX_MIN */
+#define silk_abs_int(a)                     (((a) ^ ((a) >> (8 * sizeof(a) - 1))) - ((a) >> (8 * sizeof(a) - 1)))
+#define silk_abs_int32(a)                   (((a) ^ ((a) >> 31)) - ((a) >> 31))
+#define silk_abs_int64(a)                   (((a) >  0)  ? (a) : -(a))
+
+#define silk_sign(a)                        ((a) > 0 ? 1 : ( (a) < 0 ? -1 : 0 ))
+
+/* PSEUDO-RANDOM GENERATOR                                                          */
+/* Make sure to store the result as the seed for the next call (also in between     */
+/* frames), otherwise result won't be random at all. When only using some of the    */
+/* bits, take the most significant bits by right-shifting.                          */
+#define silk_RAND(seed)                     (silk_MLA_ovflw(907633515, (seed), 196314165))
+
+/*  Add some multiplication functions that can be easily mapped to ARM. */
+
+/*    silk_SMMUL: Signed top word multiply.
+          ARMv6        2 instruction cycles.
+          ARMv3M+      3 instruction cycles. use SMULL and ignore LSB registers.(except xM)*/
+/*#define silk_SMMUL(a32, b32)                (opus_int32)silk_RSHIFT(silk_SMLAL(silk_SMULWB((a32), (b32)), (a32), silk_RSHIFT_ROUND((b32), 16)), 16)*/
+/* the following seems faster on x86 */
+#define silk_SMMUL(a32, b32)                (opus_int32)silk_RSHIFT64(silk_SMULL((a32), (b32)), 32)
+
+#include "Inlines.h"
+#include "MacroCount.h"
+#include "MacroDebug.h"
+
+#ifdef OPUS_ARM_INLINE_ASM
+#include "arm/SigProc_FIX_armv4.h"
+#endif
+
+#ifdef OPUS_ARM_INLINE_EDSP
+#include "arm/SigProc_FIX_armv5e.h"
+#endif
+
+#ifdef  __cplusplus
+}
+#endif
+
+#endif /* SILK_SIGPROC_FIX_H */
diff --git a/drivers/opus/silk/VAD.c b/drivers/opus/silk/VAD.c
new file mode 100644
index 0000000000..3a5c566627
--- /dev/null
+++ b/drivers/opus/silk/VAD.c
@@ -0,0 +1,357 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef OPUS_HAVE_CONFIG_H
+#include "opus_config.h"
+#endif
+
+#include "silk_main.h"
+#include "stack_alloc.h"
+
+/* Silk VAD noise level estimation */
+static OPUS_INLINE void silk_VAD_GetNoiseLevels(
+    const opus_int32             pX[ VAD_N_BANDS ], /* I    subband energies                            */
+    silk_VAD_state              *psSilk_VAD         /* I/O  Pointer to Silk VAD state                   */
+);
+
+/**********************************/
+/* Initialization of the Silk VAD */
+/**********************************/
+opus_int silk_VAD_Init(                                         /* O    Return value, 0 if success                  */
+    silk_VAD_state              *psSilk_VAD                     /* I/O  Pointer to Silk VAD state                   */
+)
+{
+    opus_int b, ret = 0;
+
+    /* reset state memory */
+    silk_memset( psSilk_VAD, 0, sizeof( silk_VAD_state ) );
+
+    /* init noise levels */
+    /* Initialize array with approx pink noise levels (psd proportional to inverse of frequency) */
+    for( b = 0; b < VAD_N_BANDS; b++ ) {
+        psSilk_VAD->NoiseLevelBias[ b ] = silk_max_32( silk_DIV32_16( VAD_NOISE_LEVELS_BIAS, b + 1 ), 1 );
+    }
+
+    /* Initialize state */
+    for( b = 0; b < VAD_N_BANDS; b++ ) {
+        psSilk_VAD->NL[ b ]     = silk_MUL( 100, psSilk_VAD->NoiseLevelBias[ b ] );
+        psSilk_VAD->inv_NL[ b ] = silk_DIV32( silk_int32_MAX, psSilk_VAD->NL[ b ] );
+    }
+    psSilk_VAD->counter = 15;
+
+    /* init smoothed energy-to-noise ratio*/
+    for( b = 0; b < VAD_N_BANDS; b++ ) {
+        psSilk_VAD->NrgRatioSmth_Q8[ b ] = 100 * 256;       /* 100 * 256 --> 20 dB SNR */
+    }
+
+    return( ret );
+}
+
+/* Weighting factors for tilt measure */
+static const opus_int32 tiltWeights[ VAD_N_BANDS ] = { 30000, 6000, -12000, -12000 };
+
+/***************************************/
+/* Get the speech activity level in Q8 */
+/***************************************/
+opus_int silk_VAD_GetSA_Q8(                                     /* O    Return value, 0 if success                  */
+    silk_encoder_state          *psEncC,                        /* I/O  Encoder state                               */
+    const opus_int16            pIn[]                           /* I    PCM input                                   */
+)
+{
+    opus_int   SA_Q15, pSNR_dB_Q7, input_tilt;
+    opus_int   decimated_framelength1, decimated_framelength2;
+    opus_int   decimated_framelength;
+    opus_int   dec_subframe_length, dec_subframe_offset, SNR_Q7, i, b, s;
+    opus_int32 sumSquared, smooth_coef_Q16;
+    opus_int16 HPstateTmp;
+    VARDECL( opus_int16, X );
+    opus_int32 Xnrg[ VAD_N_BANDS ];
+    opus_int32 NrgToNoiseRatio_Q8[ VAD_N_BANDS ];
+    opus_int32 speech_nrg, x_tmp;
+    opus_int   X_offset[ VAD_N_BANDS ];
+    opus_int   ret = 0;
+    silk_VAD_state *psSilk_VAD = &psEncC->sVAD;
+    SAVE_STACK;
+
+    /* Safety checks */
+    silk_assert( VAD_N_BANDS == 4 );
+    silk_assert( MAX_FRAME_LENGTH >= psEncC->frame_length );
+    silk_assert( psEncC->frame_length <= 512 );
+    silk_assert( psEncC->frame_length == 8 * silk_RSHIFT( psEncC->frame_length, 3 ) );
+
+    /***********************/
+    /* Filter and Decimate */
+    /***********************/
+    decimated_framelength1 = silk_RSHIFT( psEncC->frame_length, 1 );
+    decimated_framelength2 = silk_RSHIFT( psEncC->frame_length, 2 );
+    decimated_framelength = silk_RSHIFT( psEncC->frame_length, 3 );
+    /* Decimate into 4 bands:
+       0       L      3L       L              3L                             5L
+               -      --       -              --                             --
+               8       8       2               4                              4
+
+       [0-1 kHz| temp. |1-2 kHz|    2-4 kHz    |            4-8 kHz           |
+
+       They're arranged to allow the minimal ( frame_length / 4 ) extra
+       scratch space during the downsampling process */
+    X_offset[ 0 ] = 0;
+    X_offset[ 1 ] = decimated_framelength + decimated_framelength2;
+    X_offset[ 2 ] = X_offset[ 1 ] + decimated_framelength;
+    X_offset[ 3 ] = X_offset[ 2 ] + decimated_framelength2;
+    ALLOC( X, X_offset[ 3 ] + decimated_framelength1, opus_int16 );
+
+    /* 0-8 kHz to 0-4 kHz and 4-8 kHz */
+    silk_ana_filt_bank_1( pIn, &psSilk_VAD->AnaState[  0 ],
+        X, &X[ X_offset[ 3 ] ], psEncC->frame_length );
+
+    /* 0-4 kHz to 0-2 kHz and 2-4 kHz */
+    silk_ana_filt_bank_1( X, &psSilk_VAD->AnaState1[ 0 ],
+        X, &X[ X_offset[ 2 ] ], decimated_framelength1 );
+
+    /* 0-2 kHz to 0-1 kHz and 1-2 kHz */
+    silk_ana_filt_bank_1( X, &psSilk_VAD->AnaState2[ 0 ],
+        X, &X[ X_offset[ 1 ] ], decimated_framelength2 );
+
+    /*********************************************/
+    /* HP filter on lowest band (differentiator) */
+    /*********************************************/
+    X[ decimated_framelength - 1 ] = silk_RSHIFT( X[ decimated_framelength - 1 ], 1 );
+    HPstateTmp = X[ decimated_framelength - 1 ];
+    for( i = decimated_framelength - 1; i > 0; i-- ) {
+        X[ i - 1 ]  = silk_RSHIFT( X[ i - 1 ], 1 );
+        X[ i ]     -= X[ i - 1 ];
+    }
+    X[ 0 ] -= psSilk_VAD->HPstate;
+    psSilk_VAD->HPstate = HPstateTmp;
+
+    /*************************************/
+    /* Calculate the energy in each band */
+    /*************************************/
+    for( b = 0; b < VAD_N_BANDS; b++ ) {
+        /* Find the decimated framelength in the non-uniformly divided bands */
+        decimated_framelength = silk_RSHIFT( psEncC->frame_length, silk_min_int( VAD_N_BANDS - b, VAD_N_BANDS - 1 ) );
+
+        /* Split length into subframe lengths */
+        dec_subframe_length = silk_RSHIFT( decimated_framelength, VAD_INTERNAL_SUBFRAMES_LOG2 );
+        dec_subframe_offset = 0;
+
+        /* Compute energy per sub-frame */
+        /* initialize with summed energy of last subframe */
+        Xnrg[ b ] = psSilk_VAD->XnrgSubfr[ b ];
+        for( s = 0; s < VAD_INTERNAL_SUBFRAMES; s++ ) {
+            sumSquared = 0;
+            for( i = 0; i < dec_subframe_length; i++ ) {
+                /* The energy will be less than dec_subframe_length * ( silk_int16_MIN / 8 ) ^ 2.            */
+                /* Therefore we can accumulate with no risk of overflow (unless dec_subframe_length > 128)  */
+                x_tmp = silk_RSHIFT(
+                    X[ X_offset[ b ] + i + dec_subframe_offset ], 3 );
+                sumSquared = silk_SMLABB( sumSquared, x_tmp, x_tmp );
+
+                /* Safety check */
+                silk_assert( sumSquared >= 0 );
+            }
+
+            /* Add/saturate summed energy of current subframe */
+            if( s < VAD_INTERNAL_SUBFRAMES - 1 ) {
+                Xnrg[ b ] = silk_ADD_POS_SAT32( Xnrg[ b ], sumSquared );
+            } else {
+                /* Look-ahead subframe */
+                Xnrg[ b ] = silk_ADD_POS_SAT32( Xnrg[ b ], silk_RSHIFT( sumSquared, 1 ) );
+            }
+
+            dec_subframe_offset += dec_subframe_length;
+        }
+        psSilk_VAD->XnrgSubfr[ b ] = sumSquared;
+    }
+
+    /********************/
+    /* Noise estimation */
+    /********************/
+    silk_VAD_GetNoiseLevels( &Xnrg[ 0 ], psSilk_VAD );
+
+    /***********************************************/
+    /* Signal-plus-noise to noise ratio estimation */
+    /***********************************************/
+    sumSquared = 0;
+    input_tilt = 0;
+    for( b = 0; b < VAD_N_BANDS; b++ ) {
+        speech_nrg = Xnrg[ b ] - psSilk_VAD->NL[ b ];
+        if( speech_nrg > 0 ) {
+            /* Divide, with sufficient resolution */
+            if( ( Xnrg[ b ] & 0xFF800000 ) == 0 ) {
+                NrgToNoiseRatio_Q8[ b ] = silk_DIV32( silk_LSHIFT( Xnrg[ b ], 8 ), psSilk_VAD->NL[ b ] + 1 );
+            } else {
+                NrgToNoiseRatio_Q8[ b ] = silk_DIV32( Xnrg[ b ], silk_RSHIFT( psSilk_VAD->NL[ b ], 8 ) + 1 );
+            }
+
+            /* Convert to log domain */
+            SNR_Q7 = silk_lin2log( NrgToNoiseRatio_Q8[ b ] ) - 8 * 128;
+
+            /* Sum-of-squares */
+            sumSquared = silk_SMLABB( sumSquared, SNR_Q7, SNR_Q7 );          /* Q14 */
+
+            /* Tilt measure */
+            if( speech_nrg < ( (opus_int32)1 << 20 ) ) {
+                /* Scale down SNR value for small subband speech energies */
+                SNR_Q7 = silk_SMULWB( silk_LSHIFT( silk_SQRT_APPROX( speech_nrg ), 6 ), SNR_Q7 );
+            }
+            input_tilt = silk_SMLAWB( input_tilt, tiltWeights[ b ], SNR_Q7 );
+        } else {
+            NrgToNoiseRatio_Q8[ b ] = 256;
+        }
+    }
+
+    /* Mean-of-squares */
+    sumSquared = silk_DIV32_16( sumSquared, VAD_N_BANDS ); /* Q14 */
+
+    /* Root-mean-square approximation, scale to dBs, and write to output pointer */
+    pSNR_dB_Q7 = (opus_int16)( 3 * silk_SQRT_APPROX( sumSquared ) ); /* Q7 */
+
+    /*********************************/
+    /* Speech Probability Estimation */
+    /*********************************/
+    SA_Q15 = silk_sigm_Q15( silk_SMULWB( VAD_SNR_FACTOR_Q16, pSNR_dB_Q7 ) - VAD_NEGATIVE_OFFSET_Q5 );
+
+    /**************************/
+    /* Frequency Tilt Measure */
+    /**************************/
+    psEncC->input_tilt_Q15 = silk_LSHIFT( silk_sigm_Q15( input_tilt ) - 16384, 1 );
+
+    /**************************************************/
+    /* Scale the sigmoid output based on power levels */
+    /**************************************************/
+    speech_nrg = 0;
+    for( b = 0; b < VAD_N_BANDS; b++ ) {
+        /* Accumulate signal-without-noise energies, higher frequency bands have more weight */
+        speech_nrg += ( b + 1 ) * silk_RSHIFT( Xnrg[ b ] - psSilk_VAD->NL[ b ], 4 );
+    }
+
+    /* Power scaling */
+    if( speech_nrg <= 0 ) {
+        SA_Q15 = silk_RSHIFT( SA_Q15, 1 );
+    } else if( speech_nrg < 32768 ) {
+        if( psEncC->frame_length == 10 * psEncC->fs_kHz ) {
+            speech_nrg = silk_LSHIFT_SAT32( speech_nrg, 16 );
+        } else {
+            speech_nrg = silk_LSHIFT_SAT32( speech_nrg, 15 );
+        }
+
+        /* square-root */
+        speech_nrg = silk_SQRT_APPROX( speech_nrg );
+        SA_Q15 = silk_SMULWB( 32768 + speech_nrg, SA_Q15 );
+    }
+
+    /* Copy the resulting speech activity in Q8 */
+    psEncC->speech_activity_Q8 = silk_min_int( silk_RSHIFT( SA_Q15, 7 ), silk_uint8_MAX );
+
+    /***********************************/
+    /* Energy Level and SNR estimation */
+    /***********************************/
+    /* Smoothing coefficient */
+    smooth_coef_Q16 = silk_SMULWB( VAD_SNR_SMOOTH_COEF_Q18, silk_SMULWB( (opus_int32)SA_Q15, SA_Q15 ) );
+
+    if( psEncC->frame_length == 10 * psEncC->fs_kHz ) {
+        smooth_coef_Q16 >>= 1;
+    }
+
+    for( b = 0; b < VAD_N_BANDS; b++ ) {
+        /* compute smoothed energy-to-noise ratio per band */
+        psSilk_VAD->NrgRatioSmth_Q8[ b ] = silk_SMLAWB( psSilk_VAD->NrgRatioSmth_Q8[ b ],
+            NrgToNoiseRatio_Q8[ b ] - psSilk_VAD->NrgRatioSmth_Q8[ b ], smooth_coef_Q16 );
+
+        /* signal to noise ratio in dB per band */
+        SNR_Q7 = 3 * ( silk_lin2log( psSilk_VAD->NrgRatioSmth_Q8[b] ) - 8 * 128 );
+        /* quality = sigmoid( 0.25 * ( SNR_dB - 16 ) ); */
+        psEncC->input_quality_bands_Q15[ b ] = silk_sigm_Q15( silk_RSHIFT( SNR_Q7 - 16 * 128, 4 ) );
+    }
+
+    RESTORE_STACK;
+    return( ret );
+}
+
+/**************************/
+/* Noise level estimation */
+/**************************/
+static OPUS_INLINE void silk_VAD_GetNoiseLevels(
+    const opus_int32            pX[ VAD_N_BANDS ],  /* I    subband energies                            */
+    silk_VAD_state              *psSilk_VAD         /* I/O  Pointer to Silk VAD state                   */
+)
+{
+    opus_int   k;
+    opus_int32 nl, nrg, inv_nrg;
+    opus_int   coef, min_coef;
+
+    /* Initially faster smoothing */
+    if( psSilk_VAD->counter < 1000 ) { /* 1000 = 20 sec */
+        min_coef = silk_DIV32_16( silk_int16_MAX, silk_RSHIFT( psSilk_VAD->counter, 4 ) + 1 );
+    } else {
+        min_coef = 0;
+    }
+
+    for( k = 0; k < VAD_N_BANDS; k++ ) {
+        /* Get old noise level estimate for current band */
+        nl = psSilk_VAD->NL[ k ];
+        silk_assert( nl >= 0 );
+
+        /* Add bias */
+        nrg = silk_ADD_POS_SAT32( pX[ k ], psSilk_VAD->NoiseLevelBias[ k ] );
+        silk_assert( nrg > 0 );
+
+        /* Invert energies */
+        inv_nrg = silk_DIV32( silk_int32_MAX, nrg );
+        silk_assert( inv_nrg >= 0 );
+
+        /* Less update when subband energy is high */
+        if( nrg > silk_LSHIFT( nl, 3 ) ) {
+            coef = VAD_NOISE_LEVEL_SMOOTH_COEF_Q16 >> 3;
+        } else if( nrg < nl ) {
+            coef = VAD_NOISE_LEVEL_SMOOTH_COEF_Q16;
+        } else {
+            coef = silk_SMULWB( silk_SMULWW( inv_nrg, nl ), VAD_NOISE_LEVEL_SMOOTH_COEF_Q16 << 1 );
+        }
+
+        /* Initially faster smoothing */
+        coef = silk_max_int( coef, min_coef );
+
+        /* Smooth inverse energies */
+        psSilk_VAD->inv_NL[ k ] = silk_SMLAWB( psSilk_VAD->inv_NL[ k ], inv_nrg - psSilk_VAD->inv_NL[ k ], coef );
+        silk_assert( psSilk_VAD->inv_NL[ k ] >= 0 );
+
+        /* Compute noise level by inverting again */
+        nl = silk_DIV32( silk_int32_MAX, psSilk_VAD->inv_NL[ k ] );
+        silk_assert( nl >= 0 );
+
+        /* Limit noise levels (guarantee 7 bits of head room) */
+        nl = silk_min( nl, 0x00FFFFFF );
+
+        /* Store as part of state */
+        psSilk_VAD->NL[ k ] = nl;
+    }
+
+    /* Increment frame counter */
+    psSilk_VAD->counter++;
+}
diff --git a/drivers/opus/silk/VQ_WMat_EC.c b/drivers/opus/silk/VQ_WMat_EC.c
new file mode 100644
index 0000000000..28c5fc7e6f
--- /dev/null
+++ b/drivers/opus/silk/VQ_WMat_EC.c
@@ -0,0 +1,120 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef OPUS_HAVE_CONFIG_H
+#include "opus_config.h"
+#endif
+
+#include "silk_main.h"
+
+/* Entropy constrained matrix-weighted VQ, hard-coded to 5-element vectors, for a single input data vector */
+void silk_VQ_WMat_EC(
+    opus_int8                   *ind,                           /* O    index of best codebook vector               */
+    opus_int32                  *rate_dist_Q14,                 /* O    best weighted quant error + mu * rate       */
+    opus_int                    *gain_Q7,                       /* O    sum of absolute LTP coefficients            */
+    const opus_int16            *in_Q14,                        /* I    input vector to be quantized                */
+    const opus_int32            *W_Q18,                         /* I    weighting matrix                            */
+    const opus_int8             *cb_Q7,                         /* I    codebook                                    */
+    const opus_uint8            *cb_gain_Q7,                    /* I    codebook effective gain                     */
+    const opus_uint8            *cl_Q5,                         /* I    code length for each codebook vector        */
+    const opus_int              mu_Q9,                          /* I    tradeoff betw. weighted error and rate      */
+    const opus_int32            max_gain_Q7,                    /* I    maximum sum of absolute LTP coefficients    */
+    opus_int                    L                               /* I    number of vectors in codebook               */
+)
+{
+    opus_int   k, gain_tmp_Q7;
+    const opus_int8 *cb_row_Q7;
+    opus_int16 diff_Q14[ 5 ];
+    opus_int32 sum1_Q14, sum2_Q16;
+
+    /* Loop over codebook */
+    *rate_dist_Q14 = silk_int32_MAX;
+    cb_row_Q7 = cb_Q7;
+    for( k = 0; k < L; k++ ) {
+	    gain_tmp_Q7 = cb_gain_Q7[k];
+
+        diff_Q14[ 0 ] = in_Q14[ 0 ] - silk_LSHIFT( cb_row_Q7[ 0 ], 7 );
+        diff_Q14[ 1 ] = in_Q14[ 1 ] - silk_LSHIFT( cb_row_Q7[ 1 ], 7 );
+        diff_Q14[ 2 ] = in_Q14[ 2 ] - silk_LSHIFT( cb_row_Q7[ 2 ], 7 );
+        diff_Q14[ 3 ] = in_Q14[ 3 ] - silk_LSHIFT( cb_row_Q7[ 3 ], 7 );
+        diff_Q14[ 4 ] = in_Q14[ 4 ] - silk_LSHIFT( cb_row_Q7[ 4 ], 7 );
+
+        /* Weighted rate */
+        sum1_Q14 = silk_SMULBB( mu_Q9, cl_Q5[ k ] );
+
+		/* Penalty for too large gain */
+		sum1_Q14 = silk_ADD_LSHIFT32( sum1_Q14, silk_max( silk_SUB32( gain_tmp_Q7, max_gain_Q7 ), 0 ), 10 );
+
+        silk_assert( sum1_Q14 >= 0 );
+
+        /* first row of W_Q18 */
+        sum2_Q16 = silk_SMULWB(           W_Q18[  1 ], diff_Q14[ 1 ] );
+        sum2_Q16 = silk_SMLAWB( sum2_Q16, W_Q18[  2 ], diff_Q14[ 2 ] );
+        sum2_Q16 = silk_SMLAWB( sum2_Q16, W_Q18[  3 ], diff_Q14[ 3 ] );
+        sum2_Q16 = silk_SMLAWB( sum2_Q16, W_Q18[  4 ], diff_Q14[ 4 ] );
+        sum2_Q16 = silk_LSHIFT( sum2_Q16, 1 );
+        sum2_Q16 = silk_SMLAWB( sum2_Q16, W_Q18[  0 ], diff_Q14[ 0 ] );
+        sum1_Q14 = silk_SMLAWB( sum1_Q14, sum2_Q16,    diff_Q14[ 0 ] );
+
+        /* second row of W_Q18 */
+        sum2_Q16 = silk_SMULWB(           W_Q18[  7 ], diff_Q14[ 2 ] );
+        sum2_Q16 = silk_SMLAWB( sum2_Q16, W_Q18[  8 ], diff_Q14[ 3 ] );
+        sum2_Q16 = silk_SMLAWB( sum2_Q16, W_Q18[  9 ], diff_Q14[ 4 ] );
+        sum2_Q16 = silk_LSHIFT( sum2_Q16, 1 );
+        sum2_Q16 = silk_SMLAWB( sum2_Q16, W_Q18[  6 ], diff_Q14[ 1 ] );
+        sum1_Q14 = silk_SMLAWB( sum1_Q14, sum2_Q16,    diff_Q14[ 1 ] );
+
+        /* third row of W_Q18 */
+        sum2_Q16 = silk_SMULWB(           W_Q18[ 13 ], diff_Q14[ 3 ] );
+        sum2_Q16 = silk_SMLAWB( sum2_Q16, W_Q18[ 14 ], diff_Q14[ 4 ] );
+        sum2_Q16 = silk_LSHIFT( sum2_Q16, 1 );
+        sum2_Q16 = silk_SMLAWB( sum2_Q16, W_Q18[ 12 ], diff_Q14[ 2 ] );
+        sum1_Q14 = silk_SMLAWB( sum1_Q14, sum2_Q16,    diff_Q14[ 2 ] );
+
+        /* fourth row of W_Q18 */
+        sum2_Q16 = silk_SMULWB(           W_Q18[ 19 ], diff_Q14[ 4 ] );
+        sum2_Q16 = silk_LSHIFT( sum2_Q16, 1 );
+        sum2_Q16 = silk_SMLAWB( sum2_Q16, W_Q18[ 18 ], diff_Q14[ 3 ] );
+        sum1_Q14 = silk_SMLAWB( sum1_Q14, sum2_Q16,    diff_Q14[ 3 ] );
+
+        /* last row of W_Q18 */
+        sum2_Q16 = silk_SMULWB(           W_Q18[ 24 ], diff_Q14[ 4 ] );
+        sum1_Q14 = silk_SMLAWB( sum1_Q14, sum2_Q16,    diff_Q14[ 4 ] );
+
+        silk_assert( sum1_Q14 >= 0 );
+
+        /* find best */
+        if( sum1_Q14 < *rate_dist_Q14 ) {
+            *rate_dist_Q14 = sum1_Q14;
+            *ind = (opus_int8)k;
+			*gain_Q7 = gain_tmp_Q7;
+        }
+
+        /* Go to next cbk vector */
+        cb_row_Q7 += LTP_ORDER;
+    }
+}
diff --git a/drivers/opus/silk/ana_filt_bank_1.c b/drivers/opus/silk/ana_filt_bank_1.c
new file mode 100644
index 0000000000..387dcd87e7
--- /dev/null
+++ b/drivers/opus/silk/ana_filt_bank_1.c
@@ -0,0 +1,74 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef OPUS_HAVE_CONFIG_H
+#include "opus_config.h"
+#endif
+
+#include "SigProc_FIX.h"
+
+/* Coefficients for 2-band filter bank based on first-order allpass filters */
+static opus_int16 A_fb1_20 = 5394 << 1;
+static opus_int16 A_fb1_21 = -24290; /* (opus_int16)(20623 << 1) */
+
+/* Split signal into two decimated bands using first-order allpass filters */
+void silk_ana_filt_bank_1(
+    const opus_int16            *in,                /* I    Input signal [N]                                            */
+    opus_int32                  *S,                 /* I/O  State vector [2]                                            */
+    opus_int16                  *outL,              /* O    Low band [N/2]                                              */
+    opus_int16                  *outH,              /* O    High band [N/2]                                             */
+    const opus_int32            N                   /* I    Number of input samples                                     */
+)
+{
+    opus_int      k, N2 = silk_RSHIFT( N, 1 );
+    opus_int32    in32, X, Y, out_1, out_2;
+
+    /* Internal variables and state are in Q10 format */
+    for( k = 0; k < N2; k++ ) {
+        /* Convert to Q10 */
+        in32 = silk_LSHIFT( (opus_int32)in[ 2 * k ], 10 );
+
+        /* All-pass section for even input sample */
+        Y      = silk_SUB32( in32, S[ 0 ] );
+        X      = silk_SMLAWB( Y, Y, A_fb1_21 );
+        out_1  = silk_ADD32( S[ 0 ], X );
+        S[ 0 ] = silk_ADD32( in32, X );
+
+        /* Convert to Q10 */
+        in32 = silk_LSHIFT( (opus_int32)in[ 2 * k + 1 ], 10 );
+
+        /* All-pass section for odd input sample, and add to output of previous section */
+        Y      = silk_SUB32( in32, S[ 1 ] );
+        X      = silk_SMULWB( Y, A_fb1_20 );
+        out_2  = silk_ADD32( S[ 1 ], X );
+        S[ 1 ] = silk_ADD32( in32, X );
+
+        /* Add/subtract, convert back to int16 and store to output */
+        outL[ k ] = (opus_int16)silk_SAT16( silk_RSHIFT_ROUND( silk_ADD32( out_2, out_1 ), 11 ) );
+        outH[ k ] = (opus_int16)silk_SAT16( silk_RSHIFT_ROUND( silk_SUB32( out_2, out_1 ), 11 ) );
+    }
+}
diff --git a/drivers/opus/silk/arm/SigProc_FIX_armv4.h b/drivers/opus/silk/arm/SigProc_FIX_armv4.h
new file mode 100644
index 0000000000..ff62b1e5d6
--- /dev/null
+++ b/drivers/opus/silk/arm/SigProc_FIX_armv4.h
@@ -0,0 +1,47 @@
+/***********************************************************************
+Copyright (C) 2013 Xiph.Org Foundation and contributors
+Copyright (c) 2013       Parrot
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifndef SILK_SIGPROC_FIX_ARMv4_H
+#define SILK_SIGPROC_FIX_ARMv4_H
+
+#undef silk_MLA
+static OPUS_INLINE opus_int32 silk_MLA_armv4(opus_int32 a, opus_int32 b,
+ opus_int32 c)
+{
+  opus_int32 res;
+  __asm__(
+      "#silk_MLA\n\t"
+      "mla %0, %1, %2, %3\n\t"
+      : "=&r"(res)
+      : "r"(b), "r"(c), "r"(a)
+  );
+  return res;
+}
+#define silk_MLA(a, b, c) (silk_MLA_armv4(a, b, c))
+
+#endif
diff --git a/drivers/opus/silk/arm/SigProc_FIX_armv5e.h b/drivers/opus/silk/arm/SigProc_FIX_armv5e.h
new file mode 100644
index 0000000000..617a09cab1
--- /dev/null
+++ b/drivers/opus/silk/arm/SigProc_FIX_armv5e.h
@@ -0,0 +1,61 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Copyright (c) 2013       Parrot
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifndef SILK_SIGPROC_FIX_ARMv5E_H
+#define SILK_SIGPROC_FIX_ARMv5E_H
+
+#undef silk_SMULTT
+static OPUS_INLINE opus_int32 silk_SMULTT_armv5e(opus_int32 a, opus_int32 b)
+{
+  opus_int32 res;
+  __asm__(
+      "#silk_SMULTT\n\t"
+      "smultt %0, %1, %2\n\t"
+      : "=r"(res)
+      : "%r"(a), "r"(b)
+  );
+  return res;
+}
+#define silk_SMULTT(a, b) (silk_SMULTT_armv5e(a, b))
+
+#undef silk_SMLATT
+static OPUS_INLINE opus_int32 silk_SMLATT_armv5e(opus_int32 a, opus_int32 b,
+ opus_int32 c)
+{
+  opus_int32 res;
+  __asm__(
+      "#silk_SMLATT\n\t"
+      "smlatt %0, %1, %2, %3\n\t"
+      : "=r"(res)
+      : "%r"(b), "r"(c), "r"(a)
+  );
+  return res;
+}
+#define silk_SMLATT(a, b, c) (silk_SMLATT_armv5e(a, b, c))
+
+#endif
diff --git a/drivers/opus/silk/arm/macros_armv4.h b/drivers/opus/silk/arm/macros_armv4.h
new file mode 100644
index 0000000000..3f30e97288
--- /dev/null
+++ b/drivers/opus/silk/arm/macros_armv4.h
@@ -0,0 +1,103 @@
+/***********************************************************************
+Copyright (C) 2013 Xiph.Org Foundation and contributors.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifndef SILK_MACROS_ARMv4_H
+#define SILK_MACROS_ARMv4_H
+
+/* (a32 * (opus_int32)((opus_int16)(b32))) >> 16 output have to be 32bit int */
+#undef silk_SMULWB
+static OPUS_INLINE opus_int32 silk_SMULWB_armv4(opus_int32 a, opus_int16 b)
+{
+  unsigned rd_lo;
+  int rd_hi;
+  __asm__(
+      "#silk_SMULWB\n\t"
+      "smull %0, %1, %2, %3\n\t"
+      : "=&r"(rd_lo), "=&r"(rd_hi)
+      : "%r"(a), "r"(b<<16)
+  );
+  return rd_hi;
+}
+#define silk_SMULWB(a, b) (silk_SMULWB_armv4(a, b))
+
+/* a32 + (b32 * (opus_int32)((opus_int16)(c32))) >> 16 output have to be 32bit int */
+#undef silk_SMLAWB
+#define silk_SMLAWB(a, b, c) ((a) + silk_SMULWB(b, c))
+
+/* (a32 * (b32 >> 16)) >> 16 */
+#undef silk_SMULWT
+static OPUS_INLINE opus_int32 silk_SMULWT_armv4(opus_int32 a, opus_int32 b)
+{
+  unsigned rd_lo;
+  int rd_hi;
+  __asm__(
+      "#silk_SMULWT\n\t"
+      "smull %0, %1, %2, %3\n\t"
+      : "=&r"(rd_lo), "=&r"(rd_hi)
+      : "%r"(a), "r"(b&~0xFFFF)
+  );
+  return rd_hi;
+}
+#define silk_SMULWT(a, b) (silk_SMULWT_armv4(a, b))
+
+/* a32 + (b32 * (c32 >> 16)) >> 16 */
+#undef silk_SMLAWT
+#define silk_SMLAWT(a, b, c) ((a) + silk_SMULWT(b, c))
+
+/* (a32 * b32) >> 16 */
+#undef silk_SMULWW
+static OPUS_INLINE opus_int32 silk_SMULWW_armv4(opus_int32 a, opus_int32 b)
+{
+  unsigned rd_lo;
+  int rd_hi;
+  __asm__(
+    "#silk_SMULWW\n\t"
+    "smull %0, %1, %2, %3\n\t"
+    : "=&r"(rd_lo), "=&r"(rd_hi)
+    : "%r"(a), "r"(b)
+  );
+  return (rd_hi<<16)+(rd_lo>>16);
+}
+#define silk_SMULWW(a, b) (silk_SMULWW_armv4(a, b))
+
+#undef silk_SMLAWW
+static OPUS_INLINE opus_int32 silk_SMLAWW_armv4(opus_int32 a, opus_int32 b,
+ opus_int32 c)
+{
+  unsigned rd_lo;
+  int rd_hi;
+  __asm__(
+    "#silk_SMLAWW\n\t"
+    "smull %0, %1, %2, %3\n\t"
+    : "=&r"(rd_lo), "=&r"(rd_hi)
+    : "%r"(b), "r"(c)
+  );
+  return a+(rd_hi<<16)+(rd_lo>>16);
+}
+#define silk_SMLAWW(a, b, c) (silk_SMLAWW_armv4(a, b, c))
+
+#endif /* SILK_MACROS_ARMv4_H */
diff --git a/drivers/opus/silk/arm/macros_armv5e.h b/drivers/opus/silk/arm/macros_armv5e.h
new file mode 100644
index 0000000000..aad4117e46
--- /dev/null
+++ b/drivers/opus/silk/arm/macros_armv5e.h
@@ -0,0 +1,213 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Copyright (c) 2013       Parrot
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifndef SILK_MACROS_ARMv5E_H
+#define SILK_MACROS_ARMv5E_H
+
+/* (a32 * (opus_int32)((opus_int16)(b32))) >> 16 output have to be 32bit int */
+#undef silk_SMULWB
+static OPUS_INLINE opus_int32 silk_SMULWB_armv5e(opus_int32 a, opus_int16 b)
+{
+  int res;
+  __asm__(
+      "#silk_SMULWB\n\t"
+      "smulwb %0, %1, %2\n\t"
+      : "=r"(res)
+      : "r"(a), "r"(b)
+  );
+  return res;
+}
+#define silk_SMULWB(a, b) (silk_SMULWB_armv5e(a, b))
+
+/* a32 + (b32 * (opus_int32)((opus_int16)(c32))) >> 16 output have to be 32bit int */
+#undef silk_SMLAWB
+static OPUS_INLINE opus_int32 silk_SMLAWB_armv5e(opus_int32 a, opus_int32 b,
+ opus_int16 c)
+{
+  int res;
+  __asm__(
+      "#silk_SMLAWB\n\t"
+      "smlawb %0, %1, %2, %3\n\t"
+      : "=r"(res)
+      : "r"(b), "r"(c), "r"(a)
+  );
+  return res;
+}
+#define silk_SMLAWB(a, b, c) (silk_SMLAWB_armv5e(a, b, c))
+
+/* (a32 * (b32 >> 16)) >> 16 */
+#undef silk_SMULWT
+static OPUS_INLINE opus_int32 silk_SMULWT_armv5e(opus_int32 a, opus_int32 b)
+{
+  int res;
+  __asm__(
+      "#silk_SMULWT\n\t"
+      "smulwt %0, %1, %2\n\t"
+      : "=r"(res)
+      : "r"(a), "r"(b)
+  );
+  return res;
+}
+#define silk_SMULWT(a, b) (silk_SMULWT_armv5e(a, b))
+
+/* a32 + (b32 * (c32 >> 16)) >> 16 */
+#undef silk_SMLAWT
+static OPUS_INLINE opus_int32 silk_SMLAWT_armv5e(opus_int32 a, opus_int32 b,
+ opus_int32 c)
+{
+  int res;
+  __asm__(
+      "#silk_SMLAWT\n\t"
+      "smlawt %0, %1, %2, %3\n\t"
+      : "=r"(res)
+      : "r"(b), "r"(c), "r"(a)
+  );
+  return res;
+}
+#define silk_SMLAWT(a, b, c) (silk_SMLAWT_armv5e(a, b, c))
+
+/* (opus_int32)((opus_int16)(a3))) * (opus_int32)((opus_int16)(b32)) output have to be 32bit int */
+#undef silk_SMULBB
+static OPUS_INLINE opus_int32 silk_SMULBB_armv5e(opus_int32 a, opus_int32 b)
+{
+  int res;
+  __asm__(
+      "#silk_SMULBB\n\t"
+      "smulbb %0, %1, %2\n\t"
+      : "=r"(res)
+      : "%r"(a), "r"(b)
+  );
+  return res;
+}
+#define silk_SMULBB(a, b) (silk_SMULBB_armv5e(a, b))
+
+/* a32 + (opus_int32)((opus_int16)(b32)) * (opus_int32)((opus_int16)(c32)) output have to be 32bit int */
+#undef silk_SMLABB
+static OPUS_INLINE opus_int32 silk_SMLABB_armv5e(opus_int32 a, opus_int32 b,
+ opus_int32 c)
+{
+  int res;
+  __asm__(
+      "#silk_SMLABB\n\t"
+      "smlabb %0, %1, %2, %3\n\t"
+      : "=r"(res)
+      : "%r"(b), "r"(c), "r"(a)
+  );
+  return res;
+}
+#define silk_SMLABB(a, b, c) (silk_SMLABB_armv5e(a, b, c))
+
+/* (opus_int32)((opus_int16)(a32)) * (b32 >> 16) */
+#undef silk_SMULBT
+static OPUS_INLINE opus_int32 silk_SMULBT_armv5e(opus_int32 a, opus_int32 b)
+{
+  int res;
+  __asm__(
+      "#silk_SMULBT\n\t"
+      "smulbt %0, %1, %2\n\t"
+      : "=r"(res)
+      : "r"(a), "r"(b)
+  );
+  return res;
+}
+#define silk_SMULBT(a, b) (silk_SMULBT_armv5e(a, b))
+
+/* a32 + (opus_int32)((opus_int16)(b32)) * (c32 >> 16) */
+#undef silk_SMLABT
+static OPUS_INLINE opus_int32 silk_SMLABT_armv5e(opus_int32 a, opus_int32 b,
+ opus_int32 c)
+{
+  int res;
+  __asm__(
+      "#silk_SMLABT\n\t"
+      "smlabt %0, %1, %2, %3\n\t"
+      : "=r"(res)
+      : "r"(b), "r"(c), "r"(a)
+  );
+  return res;
+}
+#define silk_SMLABT(a, b, c) (silk_SMLABT_armv5e(a, b, c))
+
+/* add/subtract with output saturated */
+#undef silk_ADD_SAT32
+static OPUS_INLINE opus_int32 silk_ADD_SAT32_armv5e(opus_int32 a, opus_int32 b)
+{
+  int res;
+  __asm__(
+      "#silk_ADD_SAT32\n\t"
+      "qadd %0, %1, %2\n\t"
+      : "=r"(res)
+      : "%r"(a), "r"(b)
+  );
+  return res;
+}
+#define silk_ADD_SAT32(a, b) (silk_ADD_SAT32_armv5e(a, b))
+
+#undef silk_SUB_SAT32
+static OPUS_INLINE opus_int32 silk_SUB_SAT32_armv5e(opus_int32 a, opus_int32 b)
+{
+  int res;
+  __asm__(
+      "#silk_SUB_SAT32\n\t"
+      "qsub %0, %1, %2\n\t"
+      : "=r"(res)
+      : "r"(a), "r"(b)
+  );
+  return res;
+}
+#define silk_SUB_SAT32(a, b) (silk_SUB_SAT32_armv5e(a, b))
+
+#undef silk_CLZ16
+static OPUS_INLINE opus_int32 silk_CLZ16_armv5(opus_int16 in16)
+{
+  int res;
+  __asm__(
+      "#silk_CLZ16\n\t"
+      "clz %0, %1;\n"
+      : "=r"(res)
+      : "r"(in16<<16|0x8000)
+  );
+  return res;
+}
+#define silk_CLZ16(in16) (silk_CLZ16_armv5(in16))
+
+#undef silk_CLZ32
+static OPUS_INLINE opus_int32 silk_CLZ32_armv5(opus_int32 in32)
+{
+  int res;
+  __asm__(
+      "#silk_CLZ32\n\t"
+      "clz %0, %1\n\t"
+      : "=r"(res)
+      : "r"(in32)
+  );
+  return res;
+}
+#define silk_CLZ32(in32) (silk_CLZ32_armv5(in32))
+
+#endif /* SILK_MACROS_ARMv5E_H */
diff --git a/drivers/opus/silk/biquad_alt.c b/drivers/opus/silk/biquad_alt.c
new file mode 100644
index 0000000000..5f1d6d25c3
--- /dev/null
+++ b/drivers/opus/silk/biquad_alt.c
@@ -0,0 +1,78 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+/*                                                                      *
+ * silk_biquad_alt.c                                              *
+ *                                                                      *
+ * Second order ARMA filter                                             *
+ * Can handle slowly varying filter coefficients                        *
+ *                                                                      */
+
+#ifdef OPUS_HAVE_CONFIG_H
+#include "opus_config.h"
+#endif
+
+#include "SigProc_FIX.h"
+
+/* Second order ARMA filter, alternative implementation */
+void silk_biquad_alt(
+    const opus_int16            *in,                /* I     input signal                                               */
+    const opus_int32            *B_Q28,             /* I     MA coefficients [3]                                        */
+    const opus_int32            *A_Q28,             /* I     AR coefficients [2]                                        */
+    opus_int32                  *S,                 /* I/O   State vector [2]                                           */
+    opus_int16                  *out,               /* O     output signal                                              */
+    const opus_int32            len,                /* I     signal length (must be even)                               */
+    opus_int                    stride              /* I     Operate on interleaved signal if > 1                       */
+)
+{
+    /* DIRECT FORM II TRANSPOSED (uses 2 element state vector) */
+    opus_int   k;
+    opus_int32 inval, A0_U_Q28, A0_L_Q28, A1_U_Q28, A1_L_Q28, out32_Q14;
+
+    /* Negate A_Q28 values and split in two parts */
+    A0_L_Q28 = ( -A_Q28[ 0 ] ) & 0x00003FFF;        /* lower part */
+    A0_U_Q28 = silk_RSHIFT( -A_Q28[ 0 ], 14 );      /* upper part */
+    A1_L_Q28 = ( -A_Q28[ 1 ] ) & 0x00003FFF;        /* lower part */
+    A1_U_Q28 = silk_RSHIFT( -A_Q28[ 1 ], 14 );      /* upper part */
+
+    for( k = 0; k < len; k++ ) {
+        /* S[ 0 ], S[ 1 ]: Q12 */
+        inval = in[ k * stride ];
+        out32_Q14 = silk_LSHIFT( silk_SMLAWB( S[ 0 ], B_Q28[ 0 ], inval ), 2 );
+
+        S[ 0 ] = S[1] + silk_RSHIFT_ROUND( silk_SMULWB( out32_Q14, A0_L_Q28 ), 14 );
+        S[ 0 ] = silk_SMLAWB( S[ 0 ], out32_Q14, A0_U_Q28 );
+        S[ 0 ] = silk_SMLAWB( S[ 0 ], B_Q28[ 1 ], inval);
+
+        S[ 1 ] = silk_RSHIFT_ROUND( silk_SMULWB( out32_Q14, A1_L_Q28 ), 14 );
+        S[ 1 ] = silk_SMLAWB( S[ 1 ], out32_Q14, A1_U_Q28 );
+        S[ 1 ] = silk_SMLAWB( S[ 1 ], B_Q28[ 2 ], inval );
+
+        /* Scale back to Q0 and saturate */
+        out[ k * stride ] = (opus_int16)silk_SAT16( silk_RSHIFT( out32_Q14 + (1<<14) - 1, 14 ) );
+    }
+}
diff --git a/drivers/opus/silk/bwexpander.c b/drivers/opus/silk/bwexpander.c
new file mode 100644
index 0000000000..d757483872
--- /dev/null
+++ b/drivers/opus/silk/bwexpander.c
@@ -0,0 +1,51 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef OPUS_HAVE_CONFIG_H
+#include "opus_config.h"
+#endif
+
+#include "SigProc_FIX.h"
+
+/* Chirp (bandwidth expand) LP AR filter */
+void silk_bwexpander(
+    opus_int16                  *ar,                /* I/O  AR filter to be expanded (without leading 1)                */
+    const opus_int              d,                  /* I    Length of ar                                                */
+    opus_int32                  chirp_Q16           /* I    Chirp factor (typically in the range 0 to 1)                */
+)
+{
+    opus_int   i;
+    opus_int32 chirp_minus_one_Q16 = chirp_Q16 - 65536;
+
+    /* NB: Dont use silk_SMULWB, instead of silk_RSHIFT_ROUND( silk_MUL(), 16 ), below.  */
+    /* Bias in silk_SMULWB can lead to unstable filters                                */
+    for( i = 0; i < d - 1; i++ ) {
+        ar[ i ]    = (opus_int16)silk_RSHIFT_ROUND( silk_MUL( chirp_Q16, ar[ i ]             ), 16 );
+        chirp_Q16 +=            silk_RSHIFT_ROUND( silk_MUL( chirp_Q16, chirp_minus_one_Q16 ), 16 );
+    }
+    ar[ d - 1 ] = (opus_int16)silk_RSHIFT_ROUND( silk_MUL( chirp_Q16, ar[ d - 1 ] ), 16 );
+}
diff --git a/drivers/opus/silk/bwexpander_32.c b/drivers/opus/silk/bwexpander_32.c
new file mode 100644
index 0000000000..8a60767c0d
--- /dev/null
+++ b/drivers/opus/silk/bwexpander_32.c
@@ -0,0 +1,50 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef OPUS_HAVE_CONFIG_H
+#include "opus_config.h"
+#endif
+
+#include "SigProc_FIX.h"
+
+/* Chirp (bandwidth expand) LP AR filter */
+void silk_bwexpander_32(
+    opus_int32                  *ar,                /* I/O  AR filter to be expanded (without leading 1)                */
+    const opus_int              d,                  /* I    Length of ar                                                */
+    opus_int32                  chirp_Q16           /* I    Chirp factor in Q16                                         */
+)
+{
+    opus_int   i;
+    opus_int32 chirp_minus_one_Q16 = chirp_Q16 - 65536;
+
+    for( i = 0; i < d - 1; i++ ) {
+        ar[ i ]    = silk_SMULWW( chirp_Q16, ar[ i ] );
+        chirp_Q16 += silk_RSHIFT_ROUND( silk_MUL( chirp_Q16, chirp_minus_one_Q16 ), 16 );
+    }
+    ar[ d - 1 ] = silk_SMULWW( chirp_Q16, ar[ d - 1 ] );
+}
+
diff --git a/drivers/opus/silk/check_control_input.c b/drivers/opus/silk/check_control_input.c
new file mode 100644
index 0000000000..0e02fff22d
--- /dev/null
+++ b/drivers/opus/silk/check_control_input.c
@@ -0,0 +1,106 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef OPUS_HAVE_CONFIG_H
+#include "opus_config.h"
+#endif
+
+#include "silk_main.h"
+#include "control.h"
+#include "errors.h"
+
+/* Check encoder control struct */
+opus_int check_control_input(
+    silk_EncControlStruct        *encControl                    /* I    Control structure                           */
+)
+{
+    silk_assert( encControl != NULL );
+
+    if( ( ( encControl->API_sampleRate            !=  8000 ) &&
+          ( encControl->API_sampleRate            != 12000 ) &&
+          ( encControl->API_sampleRate            != 16000 ) &&
+          ( encControl->API_sampleRate            != 24000 ) &&
+          ( encControl->API_sampleRate            != 32000 ) &&
+          ( encControl->API_sampleRate            != 44100 ) &&
+          ( encControl->API_sampleRate            != 48000 ) ) ||
+        ( ( encControl->desiredInternalSampleRate !=  8000 ) &&
+          ( encControl->desiredInternalSampleRate != 12000 ) &&
+          ( encControl->desiredInternalSampleRate != 16000 ) ) ||
+        ( ( encControl->maxInternalSampleRate     !=  8000 ) &&
+          ( encControl->maxInternalSampleRate     != 12000 ) &&
+          ( encControl->maxInternalSampleRate     != 16000 ) ) ||
+        ( ( encControl->minInternalSampleRate     !=  8000 ) &&
+          ( encControl->minInternalSampleRate     != 12000 ) &&
+          ( encControl->minInternalSampleRate     != 16000 ) ) ||
+          ( encControl->minInternalSampleRate > encControl->desiredInternalSampleRate ) ||
+          ( encControl->maxInternalSampleRate < encControl->desiredInternalSampleRate ) ||
+          ( encControl->minInternalSampleRate > encControl->maxInternalSampleRate ) ) {
+        silk_assert( 0 );
+        return SILK_ENC_FS_NOT_SUPPORTED;
+    }
+    if( encControl->payloadSize_ms != 10 &&
+        encControl->payloadSize_ms != 20 &&
+        encControl->payloadSize_ms != 40 &&
+        encControl->payloadSize_ms != 60 ) {
+        silk_assert( 0 );
+        return SILK_ENC_PACKET_SIZE_NOT_SUPPORTED;
+    }
+    if( encControl->packetLossPercentage < 0 || encControl->packetLossPercentage > 100 ) {
+        silk_assert( 0 );
+        return SILK_ENC_INVALID_LOSS_RATE;
+    }
+    if( encControl->useDTX < 0 || encControl->useDTX > 1 ) {
+        silk_assert( 0 );
+        return SILK_ENC_INVALID_DTX_SETTING;
+    }
+    if( encControl->useCBR < 0 || encControl->useCBR > 1 ) {
+        silk_assert( 0 );
+        return SILK_ENC_INVALID_CBR_SETTING;
+    }
+    if( encControl->useInBandFEC < 0 || encControl->useInBandFEC > 1 ) {
+        silk_assert( 0 );
+        return SILK_ENC_INVALID_INBAND_FEC_SETTING;
+    }
+    if( encControl->nChannelsAPI < 1 || encControl->nChannelsAPI > ENCODER_NUM_CHANNELS ) {
+        silk_assert( 0 );
+        return SILK_ENC_INVALID_NUMBER_OF_CHANNELS_ERROR;
+    }
+    if( encControl->nChannelsInternal < 1 || encControl->nChannelsInternal > ENCODER_NUM_CHANNELS ) {
+        silk_assert( 0 );
+        return SILK_ENC_INVALID_NUMBER_OF_CHANNELS_ERROR;
+    }
+    if( encControl->nChannelsInternal > encControl->nChannelsAPI ) {
+        silk_assert( 0 );
+        return SILK_ENC_INVALID_NUMBER_OF_CHANNELS_ERROR;
+    }
+    if( encControl->complexity < 0 || encControl->complexity > 10 ) {
+        silk_assert( 0 );
+        return SILK_ENC_INVALID_COMPLEXITY_SETTING;
+    }
+
+    return SILK_NO_ERROR;
+}
diff --git a/drivers/opus/silk/code_signs.c b/drivers/opus/silk/code_signs.c
new file mode 100644
index 0000000000..8bcc6ecde1
--- /dev/null
+++ b/drivers/opus/silk/code_signs.c
@@ -0,0 +1,115 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef OPUS_HAVE_CONFIG_H
+#include "opus_config.h"
+#endif
+
+#include "silk_main.h"
+
+/*#define silk_enc_map(a)                ((a) > 0 ? 1 : 0)*/
+/*#define silk_dec_map(a)                ((a) > 0 ? 1 : -1)*/
+/* shifting avoids if-statement */
+#define silk_enc_map(a)                  ( silk_RSHIFT( (a), 15 ) + 1 )
+#define silk_dec_map(a)                  ( silk_LSHIFT( (a),  1 ) - 1 )
+
+/* Encodes signs of excitation */
+void silk_encode_signs(
+    ec_enc                      *psRangeEnc,                        /* I/O  Compressor data structure                   */
+    const opus_int8             pulses[],                           /* I    pulse signal                                */
+    opus_int                    length,                             /* I    length of input                             */
+    const opus_int              signalType,                         /* I    Signal type                                 */
+    const opus_int              quantOffsetType,                    /* I    Quantization offset type                    */
+    const opus_int              sum_pulses[ MAX_NB_SHELL_BLOCKS ]   /* I    Sum of absolute pulses per block            */
+)
+{
+    opus_int         i, j, p;
+    opus_uint8       icdf[ 2 ];
+    const opus_int8  *q_ptr;
+    const opus_uint8 *icdf_ptr;
+
+    icdf[ 1 ] = 0;
+    q_ptr = pulses;
+    i = silk_SMULBB( 7, silk_ADD_LSHIFT( quantOffsetType, signalType, 1 ) );
+    icdf_ptr = &silk_sign_iCDF[ i ];
+    length = silk_RSHIFT( length + SHELL_CODEC_FRAME_LENGTH/2, LOG2_SHELL_CODEC_FRAME_LENGTH );
+    for( i = 0; i < length; i++ ) {
+        p = sum_pulses[ i ];
+        if( p > 0 ) {
+            icdf[ 0 ] = icdf_ptr[ silk_min( p & 0x1F, 6 ) ];
+            for( j = 0; j < SHELL_CODEC_FRAME_LENGTH; j++ ) {
+                if( q_ptr[ j ] != 0 ) {
+                    ec_enc_icdf( psRangeEnc, silk_enc_map( q_ptr[ j ]), icdf, 8 );
+                }
+            }
+        }
+        q_ptr += SHELL_CODEC_FRAME_LENGTH;
+    }
+}
+
+/* Decodes signs of excitation */
+void silk_decode_signs(
+    ec_dec                      *psRangeDec,                        /* I/O  Compressor data structure                   */
+    opus_int                    pulses[],                           /* I/O  pulse signal                                */
+    opus_int                    length,                             /* I    length of input                             */
+    const opus_int              signalType,                         /* I    Signal type                                 */
+    const opus_int              quantOffsetType,                    /* I    Quantization offset type                    */
+    const opus_int              sum_pulses[ MAX_NB_SHELL_BLOCKS ]   /* I    Sum of absolute pulses per block            */
+)
+{
+    opus_int         i, j, p;
+    opus_uint8       icdf[ 2 ];
+    opus_int         *q_ptr;
+    const opus_uint8 *icdf_ptr;
+
+    icdf[ 1 ] = 0;
+    q_ptr = pulses;
+    i = silk_SMULBB( 7, silk_ADD_LSHIFT( quantOffsetType, signalType, 1 ) );
+    icdf_ptr = &silk_sign_iCDF[ i ];
+    length = silk_RSHIFT( length + SHELL_CODEC_FRAME_LENGTH/2, LOG2_SHELL_CODEC_FRAME_LENGTH );
+    for( i = 0; i < length; i++ ) {
+        p = sum_pulses[ i ];
+        if( p > 0 ) {
+            icdf[ 0 ] = icdf_ptr[ silk_min( p & 0x1F, 6 ) ];
+            for( j = 0; j < SHELL_CODEC_FRAME_LENGTH; j++ ) {
+                if( q_ptr[ j ] > 0 ) {
+                    /* attach sign */
+#if 0
+                    /* conditional implementation */
+                    if( ec_dec_icdf( psRangeDec, icdf, 8 ) == 0 ) {
+                        q_ptr[ j ] = -q_ptr[ j ];
+                    }
+#else
+                    /* implementation with shift, subtraction, multiplication */
+                    q_ptr[ j ] *= silk_dec_map( ec_dec_icdf( psRangeDec, icdf, 8 ) );
+#endif
+                }
+            }
+        }
+        q_ptr += SHELL_CODEC_FRAME_LENGTH;
+    }
+}
diff --git a/drivers/opus/silk/control.h b/drivers/opus/silk/control.h
new file mode 100644
index 0000000000..747e5426a0
--- /dev/null
+++ b/drivers/opus/silk/control.h
@@ -0,0 +1,142 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifndef SILK_CONTROL_H
+#define SILK_CONTROL_H
+
+#include "typedef.h"
+
+#ifdef __cplusplus
+extern "C"
+{
+#endif
+
+/* Decoder API flags */
+#define FLAG_DECODE_NORMAL                      0
+#define FLAG_PACKET_LOST                        1
+#define FLAG_DECODE_LBRR                        2
+
+/***********************************************/
+/* Structure for controlling encoder operation */
+/***********************************************/
+typedef struct {
+    /* I:   Number of channels; 1/2                                                         */
+    opus_int32 nChannelsAPI;
+
+    /* I:   Number of channels; 1/2                                                         */
+    opus_int32 nChannelsInternal;
+
+    /* I:   Input signal sampling rate in Hertz; 8000/12000/16000/24000/32000/44100/48000   */
+    opus_int32 API_sampleRate;
+
+    /* I:   Maximum internal sampling rate in Hertz; 8000/12000/16000                       */
+    opus_int32 maxInternalSampleRate;
+
+    /* I:   Minimum internal sampling rate in Hertz; 8000/12000/16000                       */
+    opus_int32 minInternalSampleRate;
+
+    /* I:   Soft request for internal sampling rate in Hertz; 8000/12000/16000              */
+    opus_int32 desiredInternalSampleRate;
+
+    /* I:   Number of samples per packet in milliseconds; 10/20/40/60                       */
+    opus_int payloadSize_ms;
+
+    /* I:   Bitrate during active speech in bits/second; internally limited                 */
+    opus_int32 bitRate;
+
+    /* I:   Uplink packet loss in percent (0-100)                                           */
+    opus_int packetLossPercentage;
+
+    /* I:   Complexity mode; 0 is lowest, 10 is highest complexity                          */
+    opus_int complexity;
+
+    /* I:   Flag to enable in-band Forward Error Correction (FEC); 0/1                      */
+    opus_int useInBandFEC;
+
+    /* I:   Flag to enable discontinuous transmission (DTX); 0/1                            */
+    opus_int useDTX;
+
+    /* I:   Flag to use constant bitrate                                                    */
+    opus_int useCBR;
+
+    /* I:   Maximum number of bits allowed for the frame                                    */
+    opus_int maxBits;
+
+    /* I:   Causes a smooth downmix to mono                                                 */
+    opus_int toMono;
+
+    /* I:   Opus encoder is allowing us to switch bandwidth                                 */
+    opus_int opusCanSwitch;
+
+    /* I: Make frames as independent as possible (but still use LPC)                        */
+    opus_int reducedDependency;
+
+    /* O:   Internal sampling rate used, in Hertz; 8000/12000/16000                         */
+    opus_int32 internalSampleRate;
+
+    /* O: Flag that bandwidth switching is allowed (because low voice activity)             */
+    opus_int allowBandwidthSwitch;
+
+    /* O:   Flag that SILK runs in WB mode without variable LP filter (use for switching between WB/SWB/FB) */
+    opus_int inWBmodeWithoutVariableLP;
+
+    /* O:   Stereo width */
+    opus_int stereoWidth_Q14;
+
+    /* O:   Tells the Opus encoder we're ready to switch                                    */
+    opus_int switchReady;
+
+} silk_EncControlStruct;
+
+/**************************************************************************/
+/* Structure for controlling decoder operation and reading decoder status */
+/**************************************************************************/
+typedef struct {
+    /* I:   Number of channels; 1/2                                                         */
+    opus_int32 nChannelsAPI;
+
+    /* I:   Number of channels; 1/2                                                         */
+    opus_int32 nChannelsInternal;
+
+    /* I:   Output signal sampling rate in Hertz; 8000/12000/16000/24000/32000/44100/48000  */
+    opus_int32 API_sampleRate;
+
+    /* I:   Internal sampling rate used, in Hertz; 8000/12000/16000                         */
+    opus_int32 internalSampleRate;
+
+    /* I:   Number of samples per packet in milliseconds; 10/20/40/60                       */
+    opus_int payloadSize_ms;
+
+    /* O:   Pitch lag of previous frame (0 if unvoiced), measured in samples at 48 kHz      */
+    opus_int prevPitchLag;
+} silk_DecControlStruct;
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/drivers/opus/silk/control_SNR.c b/drivers/opus/silk/control_SNR.c
new file mode 100644
index 0000000000..ae6351798b
--- /dev/null
+++ b/drivers/opus/silk/control_SNR.c
@@ -0,0 +1,81 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef OPUS_HAVE_CONFIG_H
+#include "opus_config.h"
+#endif
+
+#include "silk_main.h"
+#include "tuning_parameters.h"
+
+/* Control SNR of redidual quantizer */
+opus_int silk_control_SNR(
+    silk_encoder_state          *psEncC,                        /* I/O  Pointer to Silk encoder state               */
+    opus_int32                  TargetRate_bps                  /* I    Target max bitrate (bps)                    */
+)
+{
+    opus_int k, ret = SILK_NO_ERROR;
+    opus_int32 frac_Q6;
+    const opus_int32 *rateTable;
+
+    /* Set bitrate/coding quality */
+    TargetRate_bps = silk_LIMIT( TargetRate_bps, MIN_TARGET_RATE_BPS, MAX_TARGET_RATE_BPS );
+    if( TargetRate_bps != psEncC->TargetRate_bps ) {
+        psEncC->TargetRate_bps = TargetRate_bps;
+
+        /* If new TargetRate_bps, translate to SNR_dB value */
+        if( psEncC->fs_kHz == 8 ) {
+            rateTable = silk_TargetRate_table_NB;
+        } else if( psEncC->fs_kHz == 12 ) {
+            rateTable = silk_TargetRate_table_MB;
+        } else {
+            rateTable = silk_TargetRate_table_WB;
+        }
+
+        /* Reduce bitrate for 10 ms modes in these calculations */
+        if( psEncC->nb_subfr == 2 ) {
+            TargetRate_bps -= REDUCE_BITRATE_10_MS_BPS;
+        }
+
+        /* Find bitrate interval in table and interpolate */
+        for( k = 1; k < TARGET_RATE_TAB_SZ; k++ ) {
+            if( TargetRate_bps <= rateTable[ k ] ) {
+                frac_Q6 = silk_DIV32( silk_LSHIFT( TargetRate_bps - rateTable[ k - 1 ], 6 ),
+                                                 rateTable[ k ] - rateTable[ k - 1 ] );
+                psEncC->SNR_dB_Q7 = silk_LSHIFT( silk_SNR_table_Q1[ k - 1 ], 6 ) + silk_MUL( frac_Q6, silk_SNR_table_Q1[ k ] - silk_SNR_table_Q1[ k - 1 ] );
+                break;
+            }
+        }
+
+        /* Reduce coding quality whenever LBRR is enabled, to free up some bits */
+        if( psEncC->LBRR_enabled ) {
+            psEncC->SNR_dB_Q7 = silk_SMLABB( psEncC->SNR_dB_Q7, 12 - psEncC->LBRR_GainIncreases, SILK_FIX_CONST( -0.25, 7 ) );
+        }
+    }
+
+    return ret;
+}
diff --git a/drivers/opus/silk/control_audio_bandwidth.c b/drivers/opus/silk/control_audio_bandwidth.c
new file mode 100644
index 0000000000..6f060bbd29
--- /dev/null
+++ b/drivers/opus/silk/control_audio_bandwidth.c
@@ -0,0 +1,126 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef OPUS_HAVE_CONFIG_H
+#include "opus_config.h"
+#endif
+
+#include "silk_main.h"
+#include "tuning_parameters.h"
+
+/* Control internal sampling rate */
+opus_int silk_control_audio_bandwidth(
+    silk_encoder_state          *psEncC,                        /* I/O  Pointer to Silk encoder state               */
+    silk_EncControlStruct       *encControl                     /* I    Control structure                           */
+)
+{
+    opus_int   fs_kHz;
+    opus_int32 fs_Hz;
+
+    fs_kHz = psEncC->fs_kHz;
+    fs_Hz = silk_SMULBB( fs_kHz, 1000 );
+    if( fs_Hz == 0 ) {
+        /* Encoder has just been initialized */
+        fs_Hz  = silk_min( psEncC->desiredInternal_fs_Hz, psEncC->API_fs_Hz );
+        fs_kHz = silk_DIV32_16( fs_Hz, 1000 );
+    } else if( fs_Hz > psEncC->API_fs_Hz || fs_Hz > psEncC->maxInternal_fs_Hz || fs_Hz < psEncC->minInternal_fs_Hz ) {
+        /* Make sure internal rate is not higher than external rate or maximum allowed, or lower than minimum allowed */
+        fs_Hz  = psEncC->API_fs_Hz;
+        fs_Hz  = silk_min( fs_Hz, psEncC->maxInternal_fs_Hz );
+        fs_Hz  = silk_max( fs_Hz, psEncC->minInternal_fs_Hz );
+        fs_kHz = silk_DIV32_16( fs_Hz, 1000 );
+    } else {
+        /* State machine for the internal sampling rate switching */
+        if( psEncC->sLP.transition_frame_no >= TRANSITION_FRAMES ) {
+            /* Stop transition phase */
+            psEncC->sLP.mode = 0;
+        }
+        if( psEncC->allow_bandwidth_switch || encControl->opusCanSwitch ) {
+            /* Check if we should switch down */
+            if( silk_SMULBB( psEncC->fs_kHz, 1000 ) > psEncC->desiredInternal_fs_Hz )
+            {
+                /* Switch down */
+                if( psEncC->sLP.mode == 0 ) {
+                    /* New transition */
+                    psEncC->sLP.transition_frame_no = TRANSITION_FRAMES;
+
+                    /* Reset transition filter state */
+                    silk_memset( psEncC->sLP.In_LP_State, 0, sizeof( psEncC->sLP.In_LP_State ) );
+                }
+                if( encControl->opusCanSwitch ) {
+                    /* Stop transition phase */
+                    psEncC->sLP.mode = 0;
+
+                    /* Switch to a lower sample frequency */
+                    fs_kHz = psEncC->fs_kHz == 16 ? 12 : 8;
+                } else {
+                   if( psEncC->sLP.transition_frame_no <= 0 ) {
+                       encControl->switchReady = 1;
+                       /* Make room for redundancy */
+                       encControl->maxBits -= encControl->maxBits * 5 / ( encControl->payloadSize_ms + 5 );
+                   } else {
+                       /* Direction: down (at double speed) */
+                       psEncC->sLP.mode = -2;
+                   }
+                }
+            }
+            else
+            /* Check if we should switch up */
+            if( silk_SMULBB( psEncC->fs_kHz, 1000 ) < psEncC->desiredInternal_fs_Hz )
+            {
+                /* Switch up */
+                if( encControl->opusCanSwitch ) {
+                    /* Switch to a higher sample frequency */
+                    fs_kHz = psEncC->fs_kHz == 8 ? 12 : 16;
+
+                    /* New transition */
+                    psEncC->sLP.transition_frame_no = 0;
+
+                    /* Reset transition filter state */
+                    silk_memset( psEncC->sLP.In_LP_State, 0, sizeof( psEncC->sLP.In_LP_State ) );
+
+                    /* Direction: up */
+                    psEncC->sLP.mode = 1;
+                } else {
+                   if( psEncC->sLP.mode == 0 ) {
+                       encControl->switchReady = 1;
+                       /* Make room for redundancy */
+                       encControl->maxBits -= encControl->maxBits * 5 / ( encControl->payloadSize_ms + 5 );
+                   } else {
+                       /* Direction: up */
+                       psEncC->sLP.mode = 1;
+                   }
+                }
+            } else {
+               if (psEncC->sLP.mode<0)
+                  psEncC->sLP.mode = 1;
+            }
+        }
+    }
+
+    return fs_kHz;
+}
diff --git a/drivers/opus/silk/control_codec.c b/drivers/opus/silk/control_codec.c
new file mode 100644
index 0000000000..2d7b10e9b7
--- /dev/null
+++ b/drivers/opus/silk/control_codec.c
@@ -0,0 +1,422 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef OPUS_HAVE_CONFIG_H
+#include "opus_config.h"
+#endif
+#ifdef OPUS_FIXED_POINT
+#include "main_FIX.h"
+#define silk_encoder_state_Fxx      silk_encoder_state_FIX
+#else
+#include "main_FLP.h"
+#define silk_encoder_state_Fxx      silk_encoder_state_FLP
+#endif
+#include "stack_alloc.h"
+#include "tuning_parameters.h"
+#include "pitch_est_defines.h"
+
+static opus_int silk_setup_resamplers(
+    silk_encoder_state_Fxx          *psEnc,             /* I/O                      */
+    opus_int                        fs_kHz              /* I                        */
+);
+
+static opus_int silk_setup_fs(
+    silk_encoder_state_Fxx          *psEnc,             /* I/O                      */
+    opus_int                        fs_kHz,             /* I                        */
+    opus_int                        PacketSize_ms       /* I                        */
+);
+
+static opus_int silk_setup_complexity(
+    silk_encoder_state              *psEncC,            /* I/O                      */
+    opus_int                        Complexity          /* I                        */
+);
+
+static OPUS_INLINE opus_int silk_setup_LBRR(
+    silk_encoder_state              *psEncC,            /* I/O                      */
+    const opus_int32                TargetRate_bps      /* I                        */
+);
+
+
+/* Control encoder */
+opus_int silk_control_encoder(
+    silk_encoder_state_Fxx          *psEnc,                                 /* I/O  Pointer to Silk encoder state                                               */
+    silk_EncControlStruct           *encControl,                            /* I    Control structure                                                           */
+    const opus_int32                TargetRate_bps,                         /* I    Target max bitrate (bps)                                                    */
+    const opus_int                  allow_bw_switch,                        /* I    Flag to allow switching audio bandwidth                                     */
+    const opus_int                  channelNb,                              /* I    Channel number                                                              */
+    const opus_int                  force_fs_kHz
+)
+{
+    opus_int   fs_kHz, ret = 0;
+
+    psEnc->sCmn.useDTX                 = encControl->useDTX;
+    psEnc->sCmn.useCBR                 = encControl->useCBR;
+    psEnc->sCmn.API_fs_Hz              = encControl->API_sampleRate;
+    psEnc->sCmn.maxInternal_fs_Hz      = encControl->maxInternalSampleRate;
+    psEnc->sCmn.minInternal_fs_Hz      = encControl->minInternalSampleRate;
+    psEnc->sCmn.desiredInternal_fs_Hz  = encControl->desiredInternalSampleRate;
+    psEnc->sCmn.useInBandFEC           = encControl->useInBandFEC;
+    psEnc->sCmn.nChannelsAPI           = encControl->nChannelsAPI;
+    psEnc->sCmn.nChannelsInternal      = encControl->nChannelsInternal;
+    psEnc->sCmn.allow_bandwidth_switch = allow_bw_switch;
+    psEnc->sCmn.channelNb              = channelNb;
+
+    if( psEnc->sCmn.controlled_since_last_payload != 0 && psEnc->sCmn.prefillFlag == 0 ) {
+        if( psEnc->sCmn.API_fs_Hz != psEnc->sCmn.prev_API_fs_Hz && psEnc->sCmn.fs_kHz > 0 ) {
+            /* Change in API sampling rate in the middle of encoding a packet */
+            ret += silk_setup_resamplers( psEnc, psEnc->sCmn.fs_kHz );
+        }
+        return ret;
+    }
+
+    /* Beyond this point we know that there are no previously coded frames in the payload buffer */
+
+    /********************************************/
+    /* Determine internal sampling rate         */
+    /********************************************/
+    fs_kHz = silk_control_audio_bandwidth( &psEnc->sCmn, encControl );
+    if( force_fs_kHz ) {
+       fs_kHz = force_fs_kHz;
+    }
+    /********************************************/
+    /* Prepare resampler and buffered data      */
+    /********************************************/
+    ret += silk_setup_resamplers( psEnc, fs_kHz );
+
+    /********************************************/
+    /* Set internal sampling frequency          */
+    /********************************************/
+    ret += silk_setup_fs( psEnc, fs_kHz, encControl->payloadSize_ms );
+
+    /********************************************/
+    /* Set encoding complexity                  */
+    /********************************************/
+    ret += silk_setup_complexity( &psEnc->sCmn, encControl->complexity  );
+
+    /********************************************/
+    /* Set packet loss rate measured by farend  */
+    /********************************************/
+    psEnc->sCmn.PacketLoss_perc = encControl->packetLossPercentage;
+
+    /********************************************/
+    /* Set LBRR usage                           */
+    /********************************************/
+    ret += silk_setup_LBRR( &psEnc->sCmn, TargetRate_bps );
+
+    psEnc->sCmn.controlled_since_last_payload = 1;
+
+    return ret;
+}
+
+static opus_int silk_setup_resamplers(
+    silk_encoder_state_Fxx          *psEnc,             /* I/O                      */
+    opus_int                         fs_kHz              /* I                        */
+)
+{
+    opus_int   ret = SILK_NO_ERROR;
+    SAVE_STACK;
+
+    if( psEnc->sCmn.fs_kHz != fs_kHz || psEnc->sCmn.prev_API_fs_Hz != psEnc->sCmn.API_fs_Hz )
+    {
+        if( psEnc->sCmn.fs_kHz == 0 ) {
+            /* Initialize the resampler for enc_API.c preparing resampling from API_fs_Hz to fs_kHz */
+            ret += silk_resampler_init( &psEnc->sCmn.resampler_state, psEnc->sCmn.API_fs_Hz, fs_kHz * 1000, 1 );
+        } else {
+            VARDECL( opus_int16, x_buf_API_fs_Hz );
+            VARDECL( silk_resampler_state_struct, temp_resampler_state );
+#ifdef OPUS_FIXED_POINT
+            opus_int16 *x_bufFIX = psEnc->x_buf;
+#else
+            VARDECL( opus_int16, x_bufFIX );
+            opus_int32 new_buf_samples;
+#endif
+            opus_int32 api_buf_samples;
+            opus_int32 old_buf_samples;
+            opus_int32 buf_length_ms;
+
+            buf_length_ms = silk_LSHIFT( psEnc->sCmn.nb_subfr * 5, 1 ) + LA_SHAPE_MS;
+            old_buf_samples = buf_length_ms * psEnc->sCmn.fs_kHz;
+
+#ifndef OPUS_FIXED_POINT
+            new_buf_samples = buf_length_ms * fs_kHz;
+            ALLOC( x_bufFIX, silk_max( old_buf_samples, new_buf_samples ),
+                   opus_int16 );
+            silk_float2short_array( x_bufFIX, psEnc->x_buf, old_buf_samples );
+#endif
+
+            /* Initialize resampler for temporary resampling of x_buf data to API_fs_Hz */
+            ALLOC( temp_resampler_state, 1, silk_resampler_state_struct );
+            ret += silk_resampler_init( temp_resampler_state, silk_SMULBB( psEnc->sCmn.fs_kHz, 1000 ), psEnc->sCmn.API_fs_Hz, 0 );
+
+            /* Calculate number of samples to temporarily upsample */
+            api_buf_samples = buf_length_ms * silk_DIV32_16( psEnc->sCmn.API_fs_Hz, 1000 );
+
+            /* Temporary resampling of x_buf data to API_fs_Hz */
+            ALLOC( x_buf_API_fs_Hz, api_buf_samples, opus_int16 );
+            ret += silk_resampler( temp_resampler_state, x_buf_API_fs_Hz, x_bufFIX, old_buf_samples );
+
+            /* Initialize the resampler for enc_API.c preparing resampling from API_fs_Hz to fs_kHz */
+            ret += silk_resampler_init( &psEnc->sCmn.resampler_state, psEnc->sCmn.API_fs_Hz, silk_SMULBB( fs_kHz, 1000 ), 1 );
+
+            /* Correct resampler state by resampling buffered data from API_fs_Hz to fs_kHz */
+            ret += silk_resampler( &psEnc->sCmn.resampler_state, x_bufFIX, x_buf_API_fs_Hz, api_buf_samples );
+
+#ifndef OPUS_FIXED_POINT
+            silk_short2float_array( psEnc->x_buf, x_bufFIX, new_buf_samples);
+#endif
+        }
+    }
+
+    psEnc->sCmn.prev_API_fs_Hz = psEnc->sCmn.API_fs_Hz;
+
+    RESTORE_STACK;
+    return ret;
+}
+
+static opus_int silk_setup_fs(
+    silk_encoder_state_Fxx          *psEnc,             /* I/O                      */
+    opus_int                        fs_kHz,             /* I                        */
+    opus_int                        PacketSize_ms       /* I                        */
+)
+{
+    opus_int ret = SILK_NO_ERROR;
+
+    /* Set packet size */
+    if( PacketSize_ms != psEnc->sCmn.PacketSize_ms ) {
+        if( ( PacketSize_ms !=  10 ) &&
+            ( PacketSize_ms !=  20 ) &&
+            ( PacketSize_ms !=  40 ) &&
+            ( PacketSize_ms !=  60 ) ) {
+            ret = SILK_ENC_PACKET_SIZE_NOT_SUPPORTED;
+        }
+        if( PacketSize_ms <= 10 ) {
+            psEnc->sCmn.nFramesPerPacket = 1;
+            psEnc->sCmn.nb_subfr = PacketSize_ms == 10 ? 2 : 1;
+            psEnc->sCmn.frame_length = silk_SMULBB( PacketSize_ms, fs_kHz );
+            psEnc->sCmn.pitch_LPC_win_length = silk_SMULBB( FIND_PITCH_LPC_WIN_MS_2_SF, fs_kHz );
+            if( psEnc->sCmn.fs_kHz == 8 ) {
+                psEnc->sCmn.pitch_contour_iCDF = silk_pitch_contour_10_ms_NB_iCDF;
+            } else {
+                psEnc->sCmn.pitch_contour_iCDF = silk_pitch_contour_10_ms_iCDF;
+            }
+        } else {
+            psEnc->sCmn.nFramesPerPacket = silk_DIV32_16( PacketSize_ms, MAX_FRAME_LENGTH_MS );
+            psEnc->sCmn.nb_subfr = MAX_NB_SUBFR;
+            psEnc->sCmn.frame_length = silk_SMULBB( 20, fs_kHz );
+            psEnc->sCmn.pitch_LPC_win_length = silk_SMULBB( FIND_PITCH_LPC_WIN_MS, fs_kHz );
+            if( psEnc->sCmn.fs_kHz == 8 ) {
+                psEnc->sCmn.pitch_contour_iCDF = silk_pitch_contour_NB_iCDF;
+            } else {
+                psEnc->sCmn.pitch_contour_iCDF = silk_pitch_contour_iCDF;
+            }
+        }
+        psEnc->sCmn.PacketSize_ms  = PacketSize_ms;
+        psEnc->sCmn.TargetRate_bps = 0;         /* trigger new SNR computation */
+    }
+
+    /* Set internal sampling frequency */
+    silk_assert( fs_kHz == 8 || fs_kHz == 12 || fs_kHz == 16 );
+    silk_assert( psEnc->sCmn.nb_subfr == 2 || psEnc->sCmn.nb_subfr == 4 );
+    if( psEnc->sCmn.fs_kHz != fs_kHz ) {
+        /* reset part of the state */
+        silk_memset( &psEnc->sShape,               0, sizeof( psEnc->sShape ) );
+        silk_memset( &psEnc->sPrefilt,             0, sizeof( psEnc->sPrefilt ) );
+        silk_memset( &psEnc->sCmn.sNSQ,            0, sizeof( psEnc->sCmn.sNSQ ) );
+        silk_memset( psEnc->sCmn.prev_NLSFq_Q15,   0, sizeof( psEnc->sCmn.prev_NLSFq_Q15 ) );
+        silk_memset( &psEnc->sCmn.sLP.In_LP_State, 0, sizeof( psEnc->sCmn.sLP.In_LP_State ) );
+        psEnc->sCmn.inputBufIx                  = 0;
+        psEnc->sCmn.nFramesEncoded              = 0;
+        psEnc->sCmn.TargetRate_bps              = 0;     /* trigger new SNR computation */
+
+        /* Initialize non-zero parameters */
+        psEnc->sCmn.prevLag                     = 100;
+        psEnc->sCmn.first_frame_after_reset     = 1;
+        psEnc->sPrefilt.lagPrev                 = 100;
+        psEnc->sShape.LastGainIndex             = 10;
+        psEnc->sCmn.sNSQ.lagPrev                = 100;
+        psEnc->sCmn.sNSQ.prev_gain_Q16          = 65536;
+        psEnc->sCmn.prevSignalType              = TYPE_NO_VOICE_ACTIVITY;
+
+        psEnc->sCmn.fs_kHz = fs_kHz;
+        if( psEnc->sCmn.fs_kHz == 8 ) {
+            if( psEnc->sCmn.nb_subfr == MAX_NB_SUBFR ) {
+                psEnc->sCmn.pitch_contour_iCDF = silk_pitch_contour_NB_iCDF;
+            } else {
+                psEnc->sCmn.pitch_contour_iCDF = silk_pitch_contour_10_ms_NB_iCDF;
+            }
+        } else {
+            if( psEnc->sCmn.nb_subfr == MAX_NB_SUBFR ) {
+                psEnc->sCmn.pitch_contour_iCDF = silk_pitch_contour_iCDF;
+            } else {
+                psEnc->sCmn.pitch_contour_iCDF = silk_pitch_contour_10_ms_iCDF;
+            }
+        }
+        if( psEnc->sCmn.fs_kHz == 8 || psEnc->sCmn.fs_kHz == 12 ) {
+            psEnc->sCmn.predictLPCOrder = MIN_LPC_ORDER;
+            psEnc->sCmn.psNLSF_CB  = &silk_NLSF_CB_NB_MB;
+        } else {
+            psEnc->sCmn.predictLPCOrder = MAX_LPC_ORDER;
+            psEnc->sCmn.psNLSF_CB  = &silk_NLSF_CB_WB;
+        }
+        psEnc->sCmn.subfr_length   = SUB_FRAME_LENGTH_MS * fs_kHz;
+        psEnc->sCmn.frame_length   = silk_SMULBB( psEnc->sCmn.subfr_length, psEnc->sCmn.nb_subfr );
+        psEnc->sCmn.ltp_mem_length = silk_SMULBB( LTP_MEM_LENGTH_MS, fs_kHz );
+        psEnc->sCmn.la_pitch       = silk_SMULBB( LA_PITCH_MS, fs_kHz );
+        psEnc->sCmn.max_pitch_lag  = silk_SMULBB( 18, fs_kHz );
+        if( psEnc->sCmn.nb_subfr == MAX_NB_SUBFR ) {
+            psEnc->sCmn.pitch_LPC_win_length = silk_SMULBB( FIND_PITCH_LPC_WIN_MS, fs_kHz );
+        } else {
+            psEnc->sCmn.pitch_LPC_win_length = silk_SMULBB( FIND_PITCH_LPC_WIN_MS_2_SF, fs_kHz );
+        }
+        if( psEnc->sCmn.fs_kHz == 16 ) {
+            psEnc->sCmn.mu_LTP_Q9 = SILK_FIX_CONST( MU_LTP_QUANT_WB, 9 );
+            psEnc->sCmn.pitch_lag_low_bits_iCDF = silk_uniform8_iCDF;
+        } else if( psEnc->sCmn.fs_kHz == 12 ) {
+            psEnc->sCmn.mu_LTP_Q9 = SILK_FIX_CONST( MU_LTP_QUANT_MB, 9 );
+            psEnc->sCmn.pitch_lag_low_bits_iCDF = silk_uniform6_iCDF;
+        } else {
+            psEnc->sCmn.mu_LTP_Q9 = SILK_FIX_CONST( MU_LTP_QUANT_NB, 9 );
+            psEnc->sCmn.pitch_lag_low_bits_iCDF = silk_uniform4_iCDF;
+        }
+    }
+
+    /* Check that settings are valid */
+    silk_assert( ( psEnc->sCmn.subfr_length * psEnc->sCmn.nb_subfr ) == psEnc->sCmn.frame_length );
+
+    return ret;
+}
+
+static opus_int silk_setup_complexity(
+    silk_encoder_state              *psEncC,            /* I/O                      */
+    opus_int                        Complexity          /* I                        */
+)
+{
+    opus_int ret = 0;
+
+    /* Set encoding complexity */
+    silk_assert( Complexity >= 0 && Complexity <= 10 );
+    if( Complexity < 2 ) {
+        psEncC->pitchEstimationComplexity       = SILK_PE_MIN_COMPLEX;
+        psEncC->pitchEstimationThreshold_Q16    = SILK_FIX_CONST( 0.8, 16 );
+        psEncC->pitchEstimationLPCOrder         = 6;
+        psEncC->shapingLPCOrder                 = 8;
+        psEncC->la_shape                        = 3 * psEncC->fs_kHz;
+        psEncC->nStatesDelayedDecision          = 1;
+        psEncC->useInterpolatedNLSFs            = 0;
+        psEncC->LTPQuantLowComplexity           = 1;
+        psEncC->NLSF_MSVQ_Survivors             = 2;
+        psEncC->warping_Q16                     = 0;
+    } else if( Complexity < 4 ) {
+        psEncC->pitchEstimationComplexity       = SILK_PE_MID_COMPLEX;
+        psEncC->pitchEstimationThreshold_Q16    = SILK_FIX_CONST( 0.76, 16 );
+        psEncC->pitchEstimationLPCOrder         = 8;
+        psEncC->shapingLPCOrder                 = 10;
+        psEncC->la_shape                        = 5 * psEncC->fs_kHz;
+        psEncC->nStatesDelayedDecision          = 1;
+        psEncC->useInterpolatedNLSFs            = 0;
+        psEncC->LTPQuantLowComplexity           = 0;
+        psEncC->NLSF_MSVQ_Survivors             = 4;
+        psEncC->warping_Q16                     = 0;
+    } else if( Complexity < 6 ) {
+        psEncC->pitchEstimationComplexity       = SILK_PE_MID_COMPLEX;
+        psEncC->pitchEstimationThreshold_Q16    = SILK_FIX_CONST( 0.74, 16 );
+        psEncC->pitchEstimationLPCOrder         = 10;
+        psEncC->shapingLPCOrder                 = 12;
+        psEncC->la_shape                        = 5 * psEncC->fs_kHz;
+        psEncC->nStatesDelayedDecision          = 2;
+        psEncC->useInterpolatedNLSFs            = 1;
+        psEncC->LTPQuantLowComplexity           = 0;
+        psEncC->NLSF_MSVQ_Survivors             = 8;
+        psEncC->warping_Q16                     = psEncC->fs_kHz * SILK_FIX_CONST( WARPING_MULTIPLIER, 16 );
+    } else if( Complexity < 8 ) {
+        psEncC->pitchEstimationComplexity       = SILK_PE_MID_COMPLEX;
+        psEncC->pitchEstimationThreshold_Q16    = SILK_FIX_CONST( 0.72, 16 );
+        psEncC->pitchEstimationLPCOrder         = 12;
+        psEncC->shapingLPCOrder                 = 14;
+        psEncC->la_shape                        = 5 * psEncC->fs_kHz;
+        psEncC->nStatesDelayedDecision          = 3;
+        psEncC->useInterpolatedNLSFs            = 1;
+        psEncC->LTPQuantLowComplexity           = 0;
+        psEncC->NLSF_MSVQ_Survivors             = 16;
+        psEncC->warping_Q16                     = psEncC->fs_kHz * SILK_FIX_CONST( WARPING_MULTIPLIER, 16 );
+    } else {
+        psEncC->pitchEstimationComplexity       = SILK_PE_MAX_COMPLEX;
+        psEncC->pitchEstimationThreshold_Q16    = SILK_FIX_CONST( 0.7, 16 );
+        psEncC->pitchEstimationLPCOrder         = 16;
+        psEncC->shapingLPCOrder                 = 16;
+        psEncC->la_shape                        = 5 * psEncC->fs_kHz;
+        psEncC->nStatesDelayedDecision          = MAX_DEL_DEC_STATES;
+        psEncC->useInterpolatedNLSFs            = 1;
+        psEncC->LTPQuantLowComplexity           = 0;
+        psEncC->NLSF_MSVQ_Survivors             = 32;
+        psEncC->warping_Q16                     = psEncC->fs_kHz * SILK_FIX_CONST( WARPING_MULTIPLIER, 16 );
+    }
+
+    /* Do not allow higher pitch estimation LPC order than predict LPC order */
+    psEncC->pitchEstimationLPCOrder = silk_min_int( psEncC->pitchEstimationLPCOrder, psEncC->predictLPCOrder );
+    psEncC->shapeWinLength          = SUB_FRAME_LENGTH_MS * psEncC->fs_kHz + 2 * psEncC->la_shape;
+    psEncC->Complexity              = Complexity;
+
+    silk_assert( psEncC->pitchEstimationLPCOrder <= MAX_FIND_PITCH_LPC_ORDER );
+    silk_assert( psEncC->shapingLPCOrder         <= MAX_SHAPE_LPC_ORDER      );
+    silk_assert( psEncC->nStatesDelayedDecision  <= MAX_DEL_DEC_STATES       );
+    silk_assert( psEncC->warping_Q16             <= 32767                    );
+    silk_assert( psEncC->la_shape                <= LA_SHAPE_MAX             );
+    silk_assert( psEncC->shapeWinLength          <= SHAPE_LPC_WIN_MAX        );
+    silk_assert( psEncC->NLSF_MSVQ_Survivors     <= NLSF_VQ_MAX_SURVIVORS    );
+
+    return ret;
+}
+
+static OPUS_INLINE opus_int silk_setup_LBRR(
+    silk_encoder_state          *psEncC,            /* I/O                      */
+    const opus_int32            TargetRate_bps      /* I                        */
+)
+{
+    opus_int   ret = SILK_NO_ERROR;
+    opus_int32 LBRR_rate_thres_bps;
+
+    psEncC->LBRR_enabled = 0;
+    if( psEncC->useInBandFEC && psEncC->PacketLoss_perc > 0 ) {
+        if( psEncC->fs_kHz == 8 ) {
+            LBRR_rate_thres_bps = LBRR_NB_MIN_RATE_BPS;
+        } else if( psEncC->fs_kHz == 12 ) {
+            LBRR_rate_thres_bps = LBRR_MB_MIN_RATE_BPS;
+        } else {
+            LBRR_rate_thres_bps = LBRR_WB_MIN_RATE_BPS;
+        }
+        LBRR_rate_thres_bps = silk_SMULWB( silk_MUL( LBRR_rate_thres_bps, 125 - silk_min( psEncC->PacketLoss_perc, 25 ) ), SILK_FIX_CONST( 0.01, 16 ) );
+
+        if( TargetRate_bps > LBRR_rate_thres_bps ) {
+            /* Set gain increase for coding LBRR excitation */
+            psEncC->LBRR_enabled = 1;
+            psEncC->LBRR_GainIncreases = silk_max_int( 7 - silk_SMULWB( (opus_int32)psEncC->PacketLoss_perc, SILK_FIX_CONST( 0.4, 16 ) ), 2 );
+        }
+    }
+
+    return ret;
+}
diff --git a/drivers/opus/silk/debug.c b/drivers/opus/silk/debug.c
new file mode 100644
index 0000000000..2230813fae
--- /dev/null
+++ b/drivers/opus/silk/debug.c
@@ -0,0 +1,170 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef OPUS_HAVE_CONFIG_H
+#include "opus_config.h"
+#endif
+
+#include "debug.h"
+#include "SigProc_FIX.h"
+
+#if SILK_TIC_TOC
+
+#ifdef _WIN32
+
+#if (defined(_WIN32) || defined(_WINCE))
+#include <windows.h>    /* timer */
+#else   /* Linux or Mac*/
+#include <sys/time.h>
+#endif
+
+unsigned long silk_GetHighResolutionTime(void) /* O  time in usec*/
+{
+    /* Returns a time counter in microsec   */
+    /* the resolution is platform dependent */
+    /* but is typically 1.62 us resolution  */
+    LARGE_INTEGER lpPerformanceCount;
+    LARGE_INTEGER lpFrequency;
+    QueryPerformanceCounter(&lpPerformanceCount);
+    QueryPerformanceFrequency(&lpFrequency);
+    return (unsigned long)((1000000*(lpPerformanceCount.QuadPart)) / lpFrequency.QuadPart);
+}
+#else   /* Linux or Mac*/
+unsigned long GetHighResolutionTime(void) /* O  time in usec*/
+{
+    struct timeval tv;
+    gettimeofday(&tv, 0);
+    return((tv.tv_sec*1000000)+(tv.tv_usec));
+}
+#endif
+
+int           silk_Timer_nTimers = 0;
+int           silk_Timer_depth_ctr = 0;
+char          silk_Timer_tags[silk_NUM_TIMERS_MAX][silk_NUM_TIMERS_MAX_TAG_LEN];
+#ifdef WIN32
+LARGE_INTEGER silk_Timer_start[silk_NUM_TIMERS_MAX];
+#else
+unsigned long silk_Timer_start[silk_NUM_TIMERS_MAX];
+#endif
+unsigned int  silk_Timer_cnt[silk_NUM_TIMERS_MAX];
+opus_int64     silk_Timer_min[silk_NUM_TIMERS_MAX];
+opus_int64     silk_Timer_sum[silk_NUM_TIMERS_MAX];
+opus_int64     silk_Timer_max[silk_NUM_TIMERS_MAX];
+opus_int64     silk_Timer_depth[silk_NUM_TIMERS_MAX];
+
+#ifdef WIN32
+void silk_TimerSave(char *file_name)
+{
+    if( silk_Timer_nTimers > 0 )
+    {
+        int k;
+        FILE *fp;
+        LARGE_INTEGER lpFrequency;
+        LARGE_INTEGER lpPerformanceCount1, lpPerformanceCount2;
+        int del = 0x7FFFFFFF;
+        double avg, sum_avg;
+        /* estimate overhead of calling performance counters */
+        for( k = 0; k < 1000; k++ ) {
+            QueryPerformanceCounter(&lpPerformanceCount1);
+            QueryPerformanceCounter(&lpPerformanceCount2);
+            lpPerformanceCount2.QuadPart -= lpPerformanceCount1.QuadPart;
+            if( (int)lpPerformanceCount2.LowPart < del )
+                del = lpPerformanceCount2.LowPart;
+        }
+        QueryPerformanceFrequency(&lpFrequency);
+        /* print results to file */
+        sum_avg = 0.0f;
+        for( k = 0; k < silk_Timer_nTimers; k++ ) {
+            if (silk_Timer_depth[k] == 0) {
+                sum_avg += (1e6 * silk_Timer_sum[k] / silk_Timer_cnt[k] - del) / lpFrequency.QuadPart * silk_Timer_cnt[k];
+            }
+        }
+        fp = fopen(file_name, "w");
+        fprintf(fp, "                                min         avg     %%         max      count\n");
+        for( k = 0; k < silk_Timer_nTimers; k++ ) {
+            if (silk_Timer_depth[k] == 0) {
+                fprintf(fp, "%-28s", silk_Timer_tags[k]);
+            } else if (silk_Timer_depth[k] == 1) {
+                fprintf(fp, " %-27s", silk_Timer_tags[k]);
+            } else if (silk_Timer_depth[k] == 2) {
+                fprintf(fp, "  %-26s", silk_Timer_tags[k]);
+            } else if (silk_Timer_depth[k] == 3) {
+                fprintf(fp, "   %-25s", silk_Timer_tags[k]);
+            } else {
+                fprintf(fp, "    %-24s", silk_Timer_tags[k]);
+            }
+            avg = (1e6 * silk_Timer_sum[k] / silk_Timer_cnt[k] - del) / lpFrequency.QuadPart;
+            fprintf(fp, "%8.2f", (1e6 * (silk_max_64(silk_Timer_min[k] - del, 0))) / lpFrequency.QuadPart);
+            fprintf(fp, "%12.2f %6.2f", avg, 100.0 * avg / sum_avg * silk_Timer_cnt[k]);
+            fprintf(fp, "%12.2f", (1e6 * (silk_max_64(silk_Timer_max[k] - del, 0))) / lpFrequency.QuadPart);
+            fprintf(fp, "%10d\n", silk_Timer_cnt[k]);
+        }
+        fprintf(fp, "                                microseconds\n");
+        fclose(fp);
+    }
+}
+#else
+void silk_TimerSave(char *file_name)
+{
+    if( silk_Timer_nTimers > 0 )
+    {
+        int k;
+        FILE *fp;
+        /* print results to file */
+        fp = fopen(file_name, "w");
+        fprintf(fp, "                                min         avg         max      count\n");
+        for( k = 0; k < silk_Timer_nTimers; k++ )
+        {
+            if (silk_Timer_depth[k] == 0) {
+                fprintf(fp, "%-28s", silk_Timer_tags[k]);
+            } else if (silk_Timer_depth[k] == 1) {
+                fprintf(fp, " %-27s", silk_Timer_tags[k]);
+            } else if (silk_Timer_depth[k] == 2) {
+                fprintf(fp, "  %-26s", silk_Timer_tags[k]);
+            } else if (silk_Timer_depth[k] == 3) {
+                fprintf(fp, "   %-25s", silk_Timer_tags[k]);
+            } else {
+                fprintf(fp, "    %-24s", silk_Timer_tags[k]);
+            }
+            fprintf(fp, "%d ", silk_Timer_min[k]);
+            fprintf(fp, "%f ", (double)silk_Timer_sum[k] / (double)silk_Timer_cnt[k]);
+            fprintf(fp, "%d ", silk_Timer_max[k]);
+            fprintf(fp, "%10d\n", silk_Timer_cnt[k]);
+        }
+        fprintf(fp, "                                microseconds\n");
+        fclose(fp);
+    }
+}
+#endif
+
+#endif /* SILK_TIC_TOC */
+
+#if SILK_DEBUG
+FILE *silk_debug_store_fp[ silk_NUM_STORES_MAX ];
+int silk_debug_store_count = 0;
+#endif /* SILK_DEBUG */
+
diff --git a/drivers/opus/silk/debug.h b/drivers/opus/silk/debug.h
new file mode 100644
index 0000000000..efb6d3e99e
--- /dev/null
+++ b/drivers/opus/silk/debug.h
@@ -0,0 +1,279 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifndef SILK_DEBUG_H
+#define SILK_DEBUG_H
+
+#include "typedef.h"
+#include <stdio.h>      /* file writing */
+#include <string.h>     /* strcpy, strcmp */
+
+#ifdef  __cplusplus
+extern "C"
+{
+#endif
+
+unsigned long GetHighResolutionTime(void); /* O  time in usec*/
+
+/* make SILK_DEBUG dependent on compiler's _DEBUG */
+#if defined _WIN32
+    #ifdef _DEBUG
+        #define SILK_DEBUG  1
+    #else
+        #define SILK_DEBUG  0
+    #endif
+
+    /* overrule the above */
+    #if 0
+    /*  #define NO_ASSERTS*/
+    #undef  SILK_DEBUG
+    #define SILK_DEBUG  1
+    #endif
+#else
+    #define SILK_DEBUG  0
+#endif
+
+/* Flag for using timers */
+#define SILK_TIC_TOC    0
+
+
+#if SILK_TIC_TOC
+
+#if (defined(_WIN32) || defined(_WINCE))
+#include <windows.h>    /* timer */
+#else   /* Linux or Mac*/
+#include <sys/time.h>
+#endif
+
+/*********************************/
+/* timer functions for profiling */
+/*********************************/
+/* example:                                                         */
+/*                                                                  */
+/* TIC(LPC)                                                         */
+/* do_LPC(in_vec, order, acoef);    // do LPC analysis              */
+/* TOC(LPC)                                                         */
+/*                                                                  */
+/* and call the following just before exiting (from main)           */
+/*                                                                  */
+/* silk_TimerSave("silk_TimingData.txt");                           */
+/*                                                                  */
+/* results are now in silk_TimingData.txt                           */
+
+void silk_TimerSave(char *file_name);
+
+/* max number of timers (in different locations) */
+#define silk_NUM_TIMERS_MAX                  50
+/* max length of name tags in TIC(..), TOC(..) */
+#define silk_NUM_TIMERS_MAX_TAG_LEN          30
+
+extern int           silk_Timer_nTimers;
+extern int           silk_Timer_depth_ctr;
+extern char          silk_Timer_tags[silk_NUM_TIMERS_MAX][silk_NUM_TIMERS_MAX_TAG_LEN];
+#ifdef _WIN32
+extern LARGE_INTEGER silk_Timer_start[silk_NUM_TIMERS_MAX];
+#else
+extern unsigned long silk_Timer_start[silk_NUM_TIMERS_MAX];
+#endif
+extern unsigned int  silk_Timer_cnt[silk_NUM_TIMERS_MAX];
+extern opus_int64    silk_Timer_sum[silk_NUM_TIMERS_MAX];
+extern opus_int64    silk_Timer_max[silk_NUM_TIMERS_MAX];
+extern opus_int64    silk_Timer_min[silk_NUM_TIMERS_MAX];
+extern opus_int64    silk_Timer_depth[silk_NUM_TIMERS_MAX];
+
+/* WARNING: TIC()/TOC can measure only up to 0.1 seconds at a time */
+#ifdef _WIN32
+#define TIC(TAG_NAME) {                                     \
+    static int init = 0;                                    \
+    static int ID = -1;                                     \
+    if( init == 0 )                                         \
+    {                                                       \
+        int k;                                              \
+        init = 1;                                           \
+        for( k = 0; k < silk_Timer_nTimers; k++ ) {         \
+            if( strcmp(silk_Timer_tags[k], #TAG_NAME) == 0 ) { \
+                ID = k;                                     \
+                break;                                      \
+            }                                               \
+        }                                                   \
+        if (ID == -1) {                                     \
+            ID = silk_Timer_nTimers;                        \
+            silk_Timer_nTimers++;                           \
+            silk_Timer_depth[ID] = silk_Timer_depth_ctr;    \
+            strcpy(silk_Timer_tags[ID], #TAG_NAME);         \
+            silk_Timer_cnt[ID] = 0;                         \
+            silk_Timer_sum[ID] = 0;                         \
+            silk_Timer_min[ID] = 0xFFFFFFFF;                \
+            silk_Timer_max[ID] = 0;                         \
+        }                                                   \
+    }                                                       \
+    silk_Timer_depth_ctr++;                                 \
+    QueryPerformanceCounter(&silk_Timer_start[ID]);         \
+}
+#else
+#define TIC(TAG_NAME) {                                     \
+    static int init = 0;                                    \
+    static int ID = -1;                                     \
+    if( init == 0 )                                         \
+    {                                                       \
+        int k;                                              \
+        init = 1;                                           \
+        for( k = 0; k < silk_Timer_nTimers; k++ ) {         \
+        if( strcmp(silk_Timer_tags[k], #TAG_NAME) == 0 ) {  \
+                ID = k;                                     \
+                break;                                      \
+            }                                               \
+        }                                                   \
+        if (ID == -1) {                                     \
+            ID = silk_Timer_nTimers;                        \
+            silk_Timer_nTimers++;                           \
+            silk_Timer_depth[ID] = silk_Timer_depth_ctr;    \
+            strcpy(silk_Timer_tags[ID], #TAG_NAME);         \
+            silk_Timer_cnt[ID] = 0;                         \
+            silk_Timer_sum[ID] = 0;                         \
+            silk_Timer_min[ID] = 0xFFFFFFFF;                \
+            silk_Timer_max[ID] = 0;                         \
+        }                                                   \
+    }                                                       \
+    silk_Timer_depth_ctr++;                                 \
+    silk_Timer_start[ID] = GetHighResolutionTime();         \
+}
+#endif
+
+#ifdef _WIN32
+#define TOC(TAG_NAME) {                                             \
+    LARGE_INTEGER lpPerformanceCount;                               \
+    static int init = 0;                                            \
+    static int ID = 0;                                              \
+    if( init == 0 )                                                 \
+    {                                                               \
+        int k;                                                      \
+        init = 1;                                                   \
+        for( k = 0; k < silk_Timer_nTimers; k++ ) {                 \
+            if( strcmp(silk_Timer_tags[k], #TAG_NAME) == 0 ) {      \
+                ID = k;                                             \
+                break;                                              \
+            }                                                       \
+        }                                                           \
+    }                                                               \
+    QueryPerformanceCounter(&lpPerformanceCount);                   \
+    lpPerformanceCount.QuadPart -= silk_Timer_start[ID].QuadPart;   \
+    if((lpPerformanceCount.QuadPart < 100000000) &&                 \
+        (lpPerformanceCount.QuadPart >= 0)) {                       \
+        silk_Timer_cnt[ID]++;                                       \
+        silk_Timer_sum[ID] += lpPerformanceCount.QuadPart;          \
+        if( lpPerformanceCount.QuadPart > silk_Timer_max[ID] )      \
+            silk_Timer_max[ID] = lpPerformanceCount.QuadPart;       \
+        if( lpPerformanceCount.QuadPart < silk_Timer_min[ID] )      \
+            silk_Timer_min[ID] = lpPerformanceCount.QuadPart;       \
+    }                                                               \
+    silk_Timer_depth_ctr--;                                         \
+}
+#else
+#define TOC(TAG_NAME) {                                             \
+    unsigned long endTime;                                          \
+    static int init = 0;                                            \
+    static int ID = 0;                                              \
+    if( init == 0 )                                                 \
+    {                                                               \
+        int k;                                                      \
+        init = 1;                                                   \
+        for( k = 0; k < silk_Timer_nTimers; k++ ) {                 \
+            if( strcmp(silk_Timer_tags[k], #TAG_NAME) == 0 ) {      \
+                ID = k;                                             \
+                break;                                              \
+            }                                                       \
+        }                                                           \
+    }                                                               \
+    endTime = GetHighResolutionTime();                              \
+    endTime -= silk_Timer_start[ID];                                \
+    if((endTime < 100000000) &&                                     \
+        (endTime >= 0)) {                                           \
+        silk_Timer_cnt[ID]++;                                       \
+        silk_Timer_sum[ID] += endTime;                              \
+        if( endTime > silk_Timer_max[ID] )                          \
+            silk_Timer_max[ID] = endTime;                           \
+        if( endTime < silk_Timer_min[ID] )                          \
+            silk_Timer_min[ID] = endTime;                           \
+    }                                                               \
+        silk_Timer_depth_ctr--;                                     \
+}
+#endif
+
+#else /* SILK_TIC_TOC */
+
+/* define macros as empty strings */
+#define TIC(TAG_NAME)
+#define TOC(TAG_NAME)
+#define silk_TimerSave(FILE_NAME)
+
+#endif /* SILK_TIC_TOC */
+
+
+#if SILK_DEBUG
+/************************************/
+/* write data to file for debugging */
+/************************************/
+/* Example: DEBUG_STORE_DATA(testfile.pcm, &RIN[0], 160*sizeof(opus_int16)); */
+
+#define silk_NUM_STORES_MAX                                  100
+extern FILE *silk_debug_store_fp[ silk_NUM_STORES_MAX ];
+extern int silk_debug_store_count;
+
+/* Faster way of storing the data */
+#define DEBUG_STORE_DATA( FILE_NAME, DATA_PTR, N_BYTES ) {          \
+    static opus_int init = 0, cnt = 0;                              \
+    static FILE **fp;                                               \
+    if (init == 0) {                                                \
+        init = 1;                                                   \
+        cnt = silk_debug_store_count++;                             \
+        silk_debug_store_fp[ cnt ] = fopen(#FILE_NAME, "wb");       \
+    }                                                               \
+    fwrite((DATA_PTR), (N_BYTES), 1, silk_debug_store_fp[ cnt ]);   \
+}
+
+/* Call this at the end of main() */
+#define SILK_DEBUG_STORE_CLOSE_FILES {                              \
+    opus_int i;                                                     \
+    for( i = 0; i < silk_debug_store_count; i++ ) {                 \
+        fclose( silk_debug_store_fp[ i ] );                         \
+    }                                                               \
+}
+
+#else /* SILK_DEBUG */
+
+/* define macros as empty strings */
+#define DEBUG_STORE_DATA(FILE_NAME, DATA_PTR, N_BYTES)
+#define SILK_DEBUG_STORE_CLOSE_FILES
+
+#endif /* SILK_DEBUG */
+
+#ifdef  __cplusplus
+}
+#endif
+
+#endif /* SILK_DEBUG_H */
diff --git a/drivers/opus/silk/dec_API.c b/drivers/opus/silk/dec_API.c
new file mode 100644
index 0000000000..cd72115a20
--- /dev/null
+++ b/drivers/opus/silk/dec_API.c
@@ -0,0 +1,397 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef OPUS_HAVE_CONFIG_H
+#include "opus_config.h"
+#endif
+#include "API.h"
+#include "silk_main.h"
+#include "stack_alloc.h"
+
+/************************/
+/* Decoder Super Struct */
+/************************/
+typedef struct {
+    silk_decoder_state          channel_state[ DECODER_NUM_CHANNELS ];
+    stereo_dec_state                sStereo;
+    opus_int                         nChannelsAPI;
+    opus_int                         nChannelsInternal;
+    opus_int                         prev_decode_only_middle;
+} silk_decoder;
+
+/*********************/
+/* Decoder functions */
+/*********************/
+
+opus_int silk_Get_Decoder_Size(                         /* O    Returns error code                              */
+    opus_int                        *decSizeBytes       /* O    Number of bytes in SILK decoder state           */
+)
+{
+    opus_int ret = SILK_NO_ERROR;
+
+    *decSizeBytes = sizeof( silk_decoder );
+
+    return ret;
+}
+
+/* Reset decoder state */
+opus_int silk_InitDecoder(                              /* O    Returns error code                              */
+    void                            *decState           /* I/O  State                                           */
+)
+{
+    opus_int n, ret = SILK_NO_ERROR;
+    silk_decoder_state *channel_state = ((silk_decoder *)decState)->channel_state;
+
+    for( n = 0; n < DECODER_NUM_CHANNELS; n++ ) {
+        ret  = silk_init_decoder( &channel_state[ n ] );
+    }
+    silk_memset(&((silk_decoder *)decState)->sStereo, 0, sizeof(((silk_decoder *)decState)->sStereo));
+    /* Not strictly needed, but it's cleaner that way */
+    ((silk_decoder *)decState)->prev_decode_only_middle = 0;
+
+    return ret;
+}
+
+/* Decode a frame */
+opus_int silk_Decode(                                   /* O    Returns error code                              */
+    void*                           decState,           /* I/O  State                                           */
+    silk_DecControlStruct*          decControl,         /* I/O  Control Structure                               */
+    opus_int                        lostFlag,           /* I    0: no loss, 1 loss, 2 decode fec                */
+    opus_int                        newPacketFlag,      /* I    Indicates first decoder call for this packet    */
+    ec_dec                          *psRangeDec,        /* I/O  Compressor data structure                       */
+    opus_int16                      *samplesOut,        /* O    Decoded output speech vector                    */
+    opus_int32                      *nSamplesOut        /* O    Number of samples decoded                       */
+)
+{
+    opus_int   i, n, decode_only_middle = 0, ret = SILK_NO_ERROR;
+    opus_int32 nSamplesOutDec, LBRR_symbol;
+    opus_int16 *samplesOut1_tmp[ 2 ];
+    VARDECL( opus_int16, samplesOut1_tmp_storage );
+    VARDECL( opus_int16, samplesOut2_tmp );
+    opus_int32 MS_pred_Q13[ 2 ] = { 0 };
+    opus_int16 *resample_out_ptr;
+    silk_decoder *psDec = ( silk_decoder * )decState;
+    silk_decoder_state *channel_state = psDec->channel_state;
+    opus_int has_side;
+    opus_int stereo_to_mono;
+    SAVE_STACK;
+
+    silk_assert( decControl->nChannelsInternal == 1 || decControl->nChannelsInternal == 2 );
+
+    /**********************************/
+    /* Test if first frame in payload */
+    /**********************************/
+    if( newPacketFlag ) {
+        for( n = 0; n < decControl->nChannelsInternal; n++ ) {
+            channel_state[ n ].nFramesDecoded = 0;  /* Used to count frames in packet */
+        }
+    }
+
+    /* If Mono -> Stereo transition in bitstream: init state of second channel */
+    if( decControl->nChannelsInternal > psDec->nChannelsInternal ) {
+        ret += silk_init_decoder( &channel_state[ 1 ] );
+    }
+
+    stereo_to_mono = decControl->nChannelsInternal == 1 && psDec->nChannelsInternal == 2 &&
+                     ( decControl->internalSampleRate == 1000*channel_state[ 0 ].fs_kHz );
+
+    if( channel_state[ 0 ].nFramesDecoded == 0 ) {
+        for( n = 0; n < decControl->nChannelsInternal; n++ ) {
+            opus_int fs_kHz_dec;
+            if( decControl->payloadSize_ms == 0 ) {
+                /* Assuming packet loss, use 10 ms */
+                channel_state[ n ].nFramesPerPacket = 1;
+                channel_state[ n ].nb_subfr = 2;
+            } else if( decControl->payloadSize_ms == 10 ) {
+                channel_state[ n ].nFramesPerPacket = 1;
+                channel_state[ n ].nb_subfr = 2;
+            } else if( decControl->payloadSize_ms == 20 ) {
+                channel_state[ n ].nFramesPerPacket = 1;
+                channel_state[ n ].nb_subfr = 4;
+            } else if( decControl->payloadSize_ms == 40 ) {
+                channel_state[ n ].nFramesPerPacket = 2;
+                channel_state[ n ].nb_subfr = 4;
+            } else if( decControl->payloadSize_ms == 60 ) {
+                channel_state[ n ].nFramesPerPacket = 3;
+                channel_state[ n ].nb_subfr = 4;
+            } else {
+                silk_assert( 0 );
+                RESTORE_STACK;
+                return SILK_DEC_INVALID_FRAME_SIZE;
+            }
+            fs_kHz_dec = ( decControl->internalSampleRate >> 10 ) + 1;
+            if( fs_kHz_dec != 8 && fs_kHz_dec != 12 && fs_kHz_dec != 16 ) {
+                silk_assert( 0 );
+                RESTORE_STACK;
+                return SILK_DEC_INVALID_SAMPLING_FREQUENCY;
+            }
+            ret += silk_decoder_set_fs( &channel_state[ n ], fs_kHz_dec, decControl->API_sampleRate );
+        }
+    }
+
+    if( decControl->nChannelsAPI == 2 && decControl->nChannelsInternal == 2 && ( psDec->nChannelsAPI == 1 || psDec->nChannelsInternal == 1 ) ) {
+        silk_memset( psDec->sStereo.pred_prev_Q13, 0, sizeof( psDec->sStereo.pred_prev_Q13 ) );
+        silk_memset( psDec->sStereo.sSide, 0, sizeof( psDec->sStereo.sSide ) );
+        silk_memcpy( &channel_state[ 1 ].resampler_state, &channel_state[ 0 ].resampler_state, sizeof( silk_resampler_state_struct ) );
+    }
+    psDec->nChannelsAPI      = decControl->nChannelsAPI;
+    psDec->nChannelsInternal = decControl->nChannelsInternal;
+
+    if( decControl->API_sampleRate > (opus_int32)MAX_API_FS_KHZ * 1000 || decControl->API_sampleRate < 8000 ) {
+        ret = SILK_DEC_INVALID_SAMPLING_FREQUENCY;
+        RESTORE_STACK;
+        return( ret );
+    }
+
+    if( lostFlag != FLAG_PACKET_LOST && channel_state[ 0 ].nFramesDecoded == 0 ) {
+        /* First decoder call for this payload */
+        /* Decode VAD flags and LBRR flag */
+        for( n = 0; n < decControl->nChannelsInternal; n++ ) {
+            for( i = 0; i < channel_state[ n ].nFramesPerPacket; i++ ) {
+                channel_state[ n ].VAD_flags[ i ] = ec_dec_bit_logp(psRangeDec, 1);
+            }
+            channel_state[ n ].LBRR_flag = ec_dec_bit_logp(psRangeDec, 1);
+        }
+        /* Decode LBRR flags */
+        for( n = 0; n < decControl->nChannelsInternal; n++ ) {
+            silk_memset( channel_state[ n ].LBRR_flags, 0, sizeof( channel_state[ n ].LBRR_flags ) );
+            if( channel_state[ n ].LBRR_flag ) {
+                if( channel_state[ n ].nFramesPerPacket == 1 ) {
+                    channel_state[ n ].LBRR_flags[ 0 ] = 1;
+                } else {
+                    LBRR_symbol = ec_dec_icdf( psRangeDec, silk_LBRR_flags_iCDF_ptr[ channel_state[ n ].nFramesPerPacket - 2 ], 8 ) + 1;
+                    for( i = 0; i < channel_state[ n ].nFramesPerPacket; i++ ) {
+                        channel_state[ n ].LBRR_flags[ i ] = silk_RSHIFT( LBRR_symbol, i ) & 1;
+                    }
+                }
+            }
+        }
+
+        if( lostFlag == FLAG_DECODE_NORMAL ) {
+            /* Regular decoding: skip all LBRR data */
+            for( i = 0; i < channel_state[ 0 ].nFramesPerPacket; i++ ) {
+                for( n = 0; n < decControl->nChannelsInternal; n++ ) {
+                    if( channel_state[ n ].LBRR_flags[ i ] ) {
+                        opus_int pulses[ MAX_FRAME_LENGTH ];
+                        opus_int condCoding;
+
+                        if( decControl->nChannelsInternal == 2 && n == 0 ) {
+                            silk_stereo_decode_pred( psRangeDec, MS_pred_Q13 );
+                            if( channel_state[ 1 ].LBRR_flags[ i ] == 0 ) {
+                                silk_stereo_decode_mid_only( psRangeDec, &decode_only_middle );
+                            }
+                        }
+                        /* Use conditional coding if previous frame available */
+                        if( i > 0 && channel_state[ n ].LBRR_flags[ i - 1 ] ) {
+                            condCoding = CODE_CONDITIONALLY;
+                        } else {
+                            condCoding = CODE_INDEPENDENTLY;
+                        }
+                        silk_decode_indices( &channel_state[ n ], psRangeDec, i, 1, condCoding );
+                        silk_decode_pulses( psRangeDec, pulses, channel_state[ n ].indices.signalType,
+                            channel_state[ n ].indices.quantOffsetType, channel_state[ n ].frame_length );
+                    }
+                }
+            }
+        }
+    }
+
+    /* Get MS predictor index */
+    if( decControl->nChannelsInternal == 2 ) {
+        if(   lostFlag == FLAG_DECODE_NORMAL ||
+            ( lostFlag == FLAG_DECODE_LBRR && channel_state[ 0 ].LBRR_flags[ channel_state[ 0 ].nFramesDecoded ] == 1 ) )
+        {
+            silk_stereo_decode_pred( psRangeDec, MS_pred_Q13 );
+            /* For LBRR data, decode mid-only flag only if side-channel's LBRR flag is false */
+            if( ( lostFlag == FLAG_DECODE_NORMAL && channel_state[ 1 ].VAD_flags[ channel_state[ 0 ].nFramesDecoded ] == 0 ) ||
+                ( lostFlag == FLAG_DECODE_LBRR && channel_state[ 1 ].LBRR_flags[ channel_state[ 0 ].nFramesDecoded ] == 0 ) )
+            {
+                silk_stereo_decode_mid_only( psRangeDec, &decode_only_middle );
+            } else {
+                decode_only_middle = 0;
+            }
+        } else {
+            for( n = 0; n < 2; n++ ) {
+                MS_pred_Q13[ n ] = psDec->sStereo.pred_prev_Q13[ n ];
+            }
+        }
+    }
+
+    /* Reset side channel decoder prediction memory for first frame with side coding */
+    if( decControl->nChannelsInternal == 2 && decode_only_middle == 0 && psDec->prev_decode_only_middle == 1 ) {
+        silk_memset( psDec->channel_state[ 1 ].outBuf, 0, sizeof(psDec->channel_state[ 1 ].outBuf) );
+        silk_memset( psDec->channel_state[ 1 ].sLPC_Q14_buf, 0, sizeof(psDec->channel_state[ 1 ].sLPC_Q14_buf) );
+        psDec->channel_state[ 1 ].lagPrev        = 100;
+        psDec->channel_state[ 1 ].LastGainIndex  = 10;
+        psDec->channel_state[ 1 ].prevSignalType = TYPE_NO_VOICE_ACTIVITY;
+        psDec->channel_state[ 1 ].first_frame_after_reset = 1;
+    }
+
+    ALLOC( samplesOut1_tmp_storage,
+           decControl->nChannelsInternal*(
+               channel_state[ 0 ].frame_length + 2 ),
+           opus_int16 );
+    samplesOut1_tmp[ 0 ] = samplesOut1_tmp_storage;
+    samplesOut1_tmp[ 1 ] = samplesOut1_tmp_storage
+                           + channel_state[ 0 ].frame_length + 2;
+
+    if( lostFlag == FLAG_DECODE_NORMAL ) {
+        has_side = !decode_only_middle;
+    } else {
+        has_side = !psDec->prev_decode_only_middle
+              || (decControl->nChannelsInternal == 2 && lostFlag == FLAG_DECODE_LBRR && channel_state[1].LBRR_flags[ channel_state[1].nFramesDecoded ] == 1 );
+    }
+    /* Call decoder for one frame */
+    for( n = 0; n < decControl->nChannelsInternal; n++ ) {
+        if( n == 0 || has_side ) {
+            opus_int FrameIndex;
+            opus_int condCoding;
+
+            FrameIndex = channel_state[ 0 ].nFramesDecoded - n;
+            /* Use independent coding if no previous frame available */
+            if( FrameIndex <= 0 ) {
+                condCoding = CODE_INDEPENDENTLY;
+            } else if( lostFlag == FLAG_DECODE_LBRR ) {
+                condCoding = channel_state[ n ].LBRR_flags[ FrameIndex - 1 ] ? CODE_CONDITIONALLY : CODE_INDEPENDENTLY;
+            } else if( n > 0 && psDec->prev_decode_only_middle ) {
+                /* If we skipped a side frame in this packet, we don't
+                   need LTP scaling; the LTP state is well-defined. */
+                condCoding = CODE_INDEPENDENTLY_NO_LTP_SCALING;
+            } else {
+                condCoding = CODE_CONDITIONALLY;
+            }
+            ret += silk_decode_frame( &channel_state[ n ], psRangeDec, &samplesOut1_tmp[ n ][ 2 ], &nSamplesOutDec, lostFlag, condCoding);
+        } else {
+            silk_memset( &samplesOut1_tmp[ n ][ 2 ], 0, nSamplesOutDec * sizeof( opus_int16 ) );
+        }
+        channel_state[ n ].nFramesDecoded++;
+    }
+
+    if( decControl->nChannelsAPI == 2 && decControl->nChannelsInternal == 2 ) {
+        /* Convert Mid/Side to Left/Right */
+        silk_stereo_MS_to_LR( &psDec->sStereo, samplesOut1_tmp[ 0 ], samplesOut1_tmp[ 1 ], MS_pred_Q13, channel_state[ 0 ].fs_kHz, nSamplesOutDec );
+    } else {
+        /* Buffering */
+        silk_memcpy( samplesOut1_tmp[ 0 ], psDec->sStereo.sMid, 2 * sizeof( opus_int16 ) );
+        silk_memcpy( psDec->sStereo.sMid, &samplesOut1_tmp[ 0 ][ nSamplesOutDec ], 2 * sizeof( opus_int16 ) );
+    }
+
+    /* Number of output samples */
+    *nSamplesOut = silk_DIV32( nSamplesOutDec * decControl->API_sampleRate, silk_SMULBB( channel_state[ 0 ].fs_kHz, 1000 ) );
+
+    /* Set up pointers to temp buffers */
+    ALLOC( samplesOut2_tmp,
+           decControl->nChannelsAPI == 2 ? *nSamplesOut : ALLOC_NONE, opus_int16 );
+    if( decControl->nChannelsAPI == 2 ) {
+        resample_out_ptr = samplesOut2_tmp;
+    } else {
+        resample_out_ptr = samplesOut;
+    }
+
+    for( n = 0; n < silk_min( decControl->nChannelsAPI, decControl->nChannelsInternal ); n++ ) {
+
+        /* Resample decoded signal to API_sampleRate */
+        ret += silk_resampler( &channel_state[ n ].resampler_state, resample_out_ptr, &samplesOut1_tmp[ n ][ 1 ], nSamplesOutDec );
+
+        /* Interleave if stereo output and stereo stream */
+        if( decControl->nChannelsAPI == 2 ) {
+            for( i = 0; i < *nSamplesOut; i++ ) {
+                samplesOut[ n + 2 * i ] = resample_out_ptr[ i ];
+            }
+        }
+    }
+
+    /* Create two channel output from mono stream */
+    if( decControl->nChannelsAPI == 2 && decControl->nChannelsInternal == 1 ) {
+        if ( stereo_to_mono ){
+            /* Resample right channel for newly collapsed stereo just in case
+               we weren't doing collapsing when switching to mono */
+            ret += silk_resampler( &channel_state[ 1 ].resampler_state, resample_out_ptr, &samplesOut1_tmp[ 0 ][ 1 ], nSamplesOutDec );
+
+            for( i = 0; i < *nSamplesOut; i++ ) {
+                samplesOut[ 1 + 2 * i ] = resample_out_ptr[ i ];
+            }
+        } else {
+            for( i = 0; i < *nSamplesOut; i++ ) {
+                samplesOut[ 1 + 2 * i ] = samplesOut[ 0 + 2 * i ];
+            }
+        }
+    }
+
+    /* Export pitch lag, measured at 48 kHz sampling rate */
+    if( channel_state[ 0 ].prevSignalType == TYPE_VOICED ) {
+        int mult_tab[ 3 ] = { 6, 4, 3 };
+        decControl->prevPitchLag = channel_state[ 0 ].lagPrev * mult_tab[ ( channel_state[ 0 ].fs_kHz - 8 ) >> 2 ];
+    } else {
+        decControl->prevPitchLag = 0;
+    }
+
+    if( lostFlag == FLAG_PACKET_LOST ) {
+       /* On packet loss, remove the gain clamping to prevent having the energy "bounce back"
+          if we lose packets when the energy is going down */
+       for ( i = 0; i < psDec->nChannelsInternal; i++ )
+          psDec->channel_state[ i ].LastGainIndex = 10;
+    } else {
+       psDec->prev_decode_only_middle = decode_only_middle;
+    }
+    RESTORE_STACK;
+    return ret;
+}
+
+#if 0
+/* Getting table of contents for a packet */
+opus_int silk_get_TOC(
+    const opus_uint8                *payload,           /* I    Payload data                                */
+    const opus_int                  nBytesIn,           /* I    Number of input bytes                       */
+    const opus_int                  nFramesPerPayload,  /* I    Number of SILK frames per payload           */
+    silk_TOC_struct                 *Silk_TOC           /* O    Type of content                             */
+)
+{
+    opus_int i, flags, ret = SILK_NO_ERROR;
+
+    if( nBytesIn < 1 ) {
+        return -1;
+    }
+    if( nFramesPerPayload < 0 || nFramesPerPayload > 3 ) {
+        return -1;
+    }
+
+    silk_memset( Silk_TOC, 0, sizeof( *Silk_TOC ) );
+
+    /* For stereo, extract the flags for the mid channel */
+    flags = silk_RSHIFT( payload[ 0 ], 7 - nFramesPerPayload ) & ( silk_LSHIFT( 1, nFramesPerPayload + 1 ) - 1 );
+
+    Silk_TOC->inbandFECFlag = flags & 1;
+    for( i = nFramesPerPayload - 1; i >= 0 ; i-- ) {
+        flags = silk_RSHIFT( flags, 1 );
+        Silk_TOC->VADFlags[ i ] = flags & 1;
+        Silk_TOC->VADFlag |= flags & 1;
+    }
+
+    return ret;
+}
+#endif
diff --git a/drivers/opus/silk/decode_core.c b/drivers/opus/silk/decode_core.c
new file mode 100644
index 0000000000..8f801ea7ad
--- /dev/null
+++ b/drivers/opus/silk/decode_core.c
@@ -0,0 +1,238 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef OPUS_HAVE_CONFIG_H
+#include "opus_config.h"
+#endif
+
+#include "silk_main.h"
+#include "stack_alloc.h"
+
+/**********************************************************/
+/* Core decoder. Performs inverse NSQ operation LTP + LPC */
+/**********************************************************/
+void silk_decode_core(
+    silk_decoder_state          *psDec,                         /* I/O  Decoder state                               */
+    silk_decoder_control        *psDecCtrl,                     /* I    Decoder control                             */
+    opus_int16                  xq[],                           /* O    Decoded speech                              */
+    const opus_int              pulses[ MAX_FRAME_LENGTH ]      /* I    Pulse signal                                */
+)
+{
+    opus_int   i, k, lag = 0, start_idx, sLTP_buf_idx, NLSF_interpolation_flag, signalType;
+    opus_int16 *A_Q12, *B_Q14, *pxq, A_Q12_tmp[ MAX_LPC_ORDER ];
+    VARDECL( opus_int16, sLTP );
+    VARDECL( opus_int32, sLTP_Q15 );
+    opus_int32 LTP_pred_Q13, LPC_pred_Q10, Gain_Q10, inv_gain_Q31, gain_adj_Q16, rand_seed, offset_Q10;
+    opus_int32 *pred_lag_ptr, *pexc_Q14, *pres_Q14;
+    VARDECL( opus_int32, res_Q14 );
+    VARDECL( opus_int32, sLPC_Q14 );
+    SAVE_STACK;
+
+    silk_assert( psDec->prev_gain_Q16 != 0 );
+
+    ALLOC( sLTP, psDec->ltp_mem_length, opus_int16 );
+    ALLOC( sLTP_Q15, psDec->ltp_mem_length + psDec->frame_length, opus_int32 );
+    ALLOC( res_Q14, psDec->subfr_length, opus_int32 );
+    ALLOC( sLPC_Q14, psDec->subfr_length + MAX_LPC_ORDER, opus_int32 );
+
+    offset_Q10 = silk_Quantization_Offsets_Q10[ psDec->indices.signalType >> 1 ][ psDec->indices.quantOffsetType ];
+
+    if( psDec->indices.NLSFInterpCoef_Q2 < 1 << 2 ) {
+        NLSF_interpolation_flag = 1;
+    } else {
+        NLSF_interpolation_flag = 0;
+    }
+
+    /* Decode excitation */
+    rand_seed = psDec->indices.Seed;
+    for( i = 0; i < psDec->frame_length; i++ ) {
+        rand_seed = silk_RAND( rand_seed );
+        psDec->exc_Q14[ i ] = silk_LSHIFT( (opus_int32)pulses[ i ], 14 );
+        if( psDec->exc_Q14[ i ] > 0 ) {
+            psDec->exc_Q14[ i ] -= QUANT_LEVEL_ADJUST_Q10 << 4;
+        } else
+        if( psDec->exc_Q14[ i ] < 0 ) {
+            psDec->exc_Q14[ i ] += QUANT_LEVEL_ADJUST_Q10 << 4;
+        }
+        psDec->exc_Q14[ i ] += offset_Q10 << 4;
+        if( rand_seed < 0 ) {
+           psDec->exc_Q14[ i ] = -psDec->exc_Q14[ i ];
+        }
+
+        rand_seed = silk_ADD32_ovflw( rand_seed, pulses[ i ] );
+    }
+
+    /* Copy LPC state */
+    silk_memcpy( sLPC_Q14, psDec->sLPC_Q14_buf, MAX_LPC_ORDER * sizeof( opus_int32 ) );
+
+    pexc_Q14 = psDec->exc_Q14;
+    pxq      = xq;
+    sLTP_buf_idx = psDec->ltp_mem_length;
+    /* Loop over subframes */
+    for( k = 0; k < psDec->nb_subfr; k++ ) {
+        pres_Q14 = res_Q14;
+        A_Q12 = psDecCtrl->PredCoef_Q12[ k >> 1 ];
+
+        /* Preload LPC coeficients to array on stack. Gives small performance gain */
+        silk_memcpy( A_Q12_tmp, A_Q12, psDec->LPC_order * sizeof( opus_int16 ) );
+        B_Q14        = &psDecCtrl->LTPCoef_Q14[ k * LTP_ORDER ];
+        signalType   = psDec->indices.signalType;
+
+        Gain_Q10     = silk_RSHIFT( psDecCtrl->Gains_Q16[ k ], 6 );
+        inv_gain_Q31 = silk_INVERSE32_varQ( psDecCtrl->Gains_Q16[ k ], 47 );
+
+        /* Calculate gain adjustment factor */
+        if( psDecCtrl->Gains_Q16[ k ] != psDec->prev_gain_Q16 ) {
+            gain_adj_Q16 =  silk_DIV32_varQ( psDec->prev_gain_Q16, psDecCtrl->Gains_Q16[ k ], 16 );
+
+            /* Scale short term state */
+            for( i = 0; i < MAX_LPC_ORDER; i++ ) {
+                sLPC_Q14[ i ] = silk_SMULWW( gain_adj_Q16, sLPC_Q14[ i ] );
+            }
+        } else {
+            gain_adj_Q16 = (opus_int32)1 << 16;
+        }
+
+        /* Save inv_gain */
+        silk_assert( inv_gain_Q31 != 0 );
+        psDec->prev_gain_Q16 = psDecCtrl->Gains_Q16[ k ];
+
+        /* Avoid abrupt transition from voiced PLC to unvoiced normal decoding */
+        if( psDec->lossCnt && psDec->prevSignalType == TYPE_VOICED &&
+            psDec->indices.signalType != TYPE_VOICED && k < MAX_NB_SUBFR/2 ) {
+
+            silk_memset( B_Q14, 0, LTP_ORDER * sizeof( opus_int16 ) );
+            B_Q14[ LTP_ORDER/2 ] = SILK_FIX_CONST( 0.25, 14 );
+
+            signalType = TYPE_VOICED;
+            psDecCtrl->pitchL[ k ] = psDec->lagPrev;
+        }
+
+        if( signalType == TYPE_VOICED ) {
+            /* Voiced */
+            lag = psDecCtrl->pitchL[ k ];
+
+            /* Re-whitening */
+            if( k == 0 || ( k == 2 && NLSF_interpolation_flag ) ) {
+                /* Rewhiten with new A coefs */
+                start_idx = psDec->ltp_mem_length - lag - psDec->LPC_order - LTP_ORDER / 2;
+                silk_assert( start_idx > 0 );
+
+                if( k == 2 ) {
+                    silk_memcpy( &psDec->outBuf[ psDec->ltp_mem_length ], xq, 2 * psDec->subfr_length * sizeof( opus_int16 ) );
+                }
+
+                silk_LPC_analysis_filter( &sLTP[ start_idx ], &psDec->outBuf[ start_idx + k * psDec->subfr_length ],
+                    A_Q12, psDec->ltp_mem_length - start_idx, psDec->LPC_order );
+
+                /* After rewhitening the LTP state is unscaled */
+                if( k == 0 ) {
+                    /* Do LTP downscaling to reduce inter-packet dependency */
+                    inv_gain_Q31 = silk_LSHIFT( silk_SMULWB( inv_gain_Q31, psDecCtrl->LTP_scale_Q14 ), 2 );
+                }
+                for( i = 0; i < lag + LTP_ORDER/2; i++ ) {
+                    sLTP_Q15[ sLTP_buf_idx - i - 1 ] = silk_SMULWB( inv_gain_Q31, sLTP[ psDec->ltp_mem_length - i - 1 ] );
+                }
+            } else {
+                /* Update LTP state when Gain changes */
+                if( gain_adj_Q16 != (opus_int32)1 << 16 ) {
+                    for( i = 0; i < lag + LTP_ORDER/2; i++ ) {
+                        sLTP_Q15[ sLTP_buf_idx - i - 1 ] = silk_SMULWW( gain_adj_Q16, sLTP_Q15[ sLTP_buf_idx - i - 1 ] );
+                    }
+                }
+            }
+        }
+
+        /* Long-term prediction */
+        if( signalType == TYPE_VOICED ) {
+            /* Set up pointer */
+            pred_lag_ptr = &sLTP_Q15[ sLTP_buf_idx - lag + LTP_ORDER / 2 ];
+            for( i = 0; i < psDec->subfr_length; i++ ) {
+                /* Unrolled loop */
+                /* Avoids introducing a bias because silk_SMLAWB() always rounds to -inf */
+                LTP_pred_Q13 = 2;
+                LTP_pred_Q13 = silk_SMLAWB( LTP_pred_Q13, pred_lag_ptr[  0 ], B_Q14[ 0 ] );
+                LTP_pred_Q13 = silk_SMLAWB( LTP_pred_Q13, pred_lag_ptr[ -1 ], B_Q14[ 1 ] );
+                LTP_pred_Q13 = silk_SMLAWB( LTP_pred_Q13, pred_lag_ptr[ -2 ], B_Q14[ 2 ] );
+                LTP_pred_Q13 = silk_SMLAWB( LTP_pred_Q13, pred_lag_ptr[ -3 ], B_Q14[ 3 ] );
+                LTP_pred_Q13 = silk_SMLAWB( LTP_pred_Q13, pred_lag_ptr[ -4 ], B_Q14[ 4 ] );
+                pred_lag_ptr++;
+
+                /* Generate LPC excitation */
+                pres_Q14[ i ] = silk_ADD_LSHIFT32( pexc_Q14[ i ], LTP_pred_Q13, 1 );
+
+                /* Update states */
+                sLTP_Q15[ sLTP_buf_idx ] = silk_LSHIFT( pres_Q14[ i ], 1 );
+                sLTP_buf_idx++;
+            }
+        } else {
+            pres_Q14 = pexc_Q14;
+        }
+
+        for( i = 0; i < psDec->subfr_length; i++ ) {
+            /* Short-term prediction */
+            silk_assert( psDec->LPC_order == 10 || psDec->LPC_order == 16 );
+            /* Avoids introducing a bias because silk_SMLAWB() always rounds to -inf */
+            LPC_pred_Q10 = silk_RSHIFT( psDec->LPC_order, 1 );
+            LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, sLPC_Q14[ MAX_LPC_ORDER + i -  1 ], A_Q12_tmp[ 0 ] );
+            LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, sLPC_Q14[ MAX_LPC_ORDER + i -  2 ], A_Q12_tmp[ 1 ] );
+            LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, sLPC_Q14[ MAX_LPC_ORDER + i -  3 ], A_Q12_tmp[ 2 ] );
+            LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, sLPC_Q14[ MAX_LPC_ORDER + i -  4 ], A_Q12_tmp[ 3 ] );
+            LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, sLPC_Q14[ MAX_LPC_ORDER + i -  5 ], A_Q12_tmp[ 4 ] );
+            LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, sLPC_Q14[ MAX_LPC_ORDER + i -  6 ], A_Q12_tmp[ 5 ] );
+            LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, sLPC_Q14[ MAX_LPC_ORDER + i -  7 ], A_Q12_tmp[ 6 ] );
+            LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, sLPC_Q14[ MAX_LPC_ORDER + i -  8 ], A_Q12_tmp[ 7 ] );
+            LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, sLPC_Q14[ MAX_LPC_ORDER + i -  9 ], A_Q12_tmp[ 8 ] );
+            LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, sLPC_Q14[ MAX_LPC_ORDER + i - 10 ], A_Q12_tmp[ 9 ] );
+            if( psDec->LPC_order == 16 ) {
+                LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, sLPC_Q14[ MAX_LPC_ORDER + i - 11 ], A_Q12_tmp[ 10 ] );
+                LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, sLPC_Q14[ MAX_LPC_ORDER + i - 12 ], A_Q12_tmp[ 11 ] );
+                LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, sLPC_Q14[ MAX_LPC_ORDER + i - 13 ], A_Q12_tmp[ 12 ] );
+                LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, sLPC_Q14[ MAX_LPC_ORDER + i - 14 ], A_Q12_tmp[ 13 ] );
+                LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, sLPC_Q14[ MAX_LPC_ORDER + i - 15 ], A_Q12_tmp[ 14 ] );
+                LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, sLPC_Q14[ MAX_LPC_ORDER + i - 16 ], A_Q12_tmp[ 15 ] );
+            }
+
+            /* Add prediction to LPC excitation */
+            sLPC_Q14[ MAX_LPC_ORDER + i ] = silk_ADD_LSHIFT32( pres_Q14[ i ], LPC_pred_Q10, 4 );
+
+            /* Scale with gain */
+            pxq[ i ] = (opus_int16)silk_SAT16( silk_RSHIFT_ROUND( silk_SMULWW( sLPC_Q14[ MAX_LPC_ORDER + i ], Gain_Q10 ), 8 ) );
+        }
+
+        /* DEBUG_STORE_DATA( dec.pcm, pxq, psDec->subfr_length * sizeof( opus_int16 ) ) */
+
+        /* Update LPC filter state */
+        silk_memcpy( sLPC_Q14, &sLPC_Q14[ psDec->subfr_length ], MAX_LPC_ORDER * sizeof( opus_int32 ) );
+        pexc_Q14 += psDec->subfr_length;
+        pxq      += psDec->subfr_length;
+    }
+
+    /* Save LPC state */
+    silk_memcpy( psDec->sLPC_Q14_buf, sLPC_Q14, MAX_LPC_ORDER * sizeof( opus_int32 ) );
+    RESTORE_STACK;
+}
diff --git a/drivers/opus/silk/decode_frame.c b/drivers/opus/silk/decode_frame.c
new file mode 100644
index 0000000000..38500227c2
--- /dev/null
+++ b/drivers/opus/silk/decode_frame.c
@@ -0,0 +1,128 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef OPUS_HAVE_CONFIG_H
+#include "opus_config.h"
+#endif
+
+#include "silk_main.h"
+#include "stack_alloc.h"
+#include "PLC.h"
+
+/****************/
+/* Decode frame */
+/****************/
+opus_int silk_decode_frame(
+    silk_decoder_state          *psDec,                         /* I/O  Pointer to Silk decoder state               */
+    ec_dec                      *psRangeDec,                    /* I/O  Compressor data structure                   */
+    opus_int16                  pOut[],                         /* O    Pointer to output speech frame              */
+    opus_int32                  *pN,                            /* O    Pointer to size of output frame             */
+    opus_int                    lostFlag,                       /* I    0: no loss, 1 loss, 2 decode fec            */
+    opus_int                    condCoding                      /* I    The type of conditional coding to use       */
+)
+{
+    VARDECL( silk_decoder_control, psDecCtrl );
+    opus_int         L, mv_len, ret = 0;
+    VARDECL( opus_int, pulses );
+    SAVE_STACK;
+
+    L = psDec->frame_length;
+    ALLOC( psDecCtrl, 1, silk_decoder_control );
+    ALLOC( pulses, (L + SHELL_CODEC_FRAME_LENGTH - 1) &
+                   ~(SHELL_CODEC_FRAME_LENGTH - 1), opus_int );
+    psDecCtrl->LTP_scale_Q14 = 0;
+
+    /* Safety checks */
+    silk_assert( L > 0 && L <= MAX_FRAME_LENGTH );
+
+    if(   lostFlag == FLAG_DECODE_NORMAL ||
+        ( lostFlag == FLAG_DECODE_LBRR && psDec->LBRR_flags[ psDec->nFramesDecoded ] == 1 ) )
+    {
+        /*********************************************/
+        /* Decode quantization indices of side info  */
+        /*********************************************/
+        silk_decode_indices( psDec, psRangeDec, psDec->nFramesDecoded, lostFlag, condCoding );
+
+        /*********************************************/
+        /* Decode quantization indices of excitation */
+        /*********************************************/
+        silk_decode_pulses( psRangeDec, pulses, psDec->indices.signalType,
+                psDec->indices.quantOffsetType, psDec->frame_length );
+
+        /********************************************/
+        /* Decode parameters and pulse signal       */
+        /********************************************/
+        silk_decode_parameters( psDec, psDecCtrl, condCoding );
+
+        /********************************************************/
+        /* Run inverse NSQ                                      */
+        /********************************************************/
+        silk_decode_core( psDec, psDecCtrl, pOut, pulses );
+
+        /********************************************************/
+        /* Update PLC state                                     */
+        /********************************************************/
+        silk_PLC( psDec, psDecCtrl, pOut, 0 );
+
+        psDec->lossCnt = 0;
+        psDec->prevSignalType = psDec->indices.signalType;
+        silk_assert( psDec->prevSignalType >= 0 && psDec->prevSignalType <= 2 );
+
+        /* A frame has been decoded without errors */
+        psDec->first_frame_after_reset = 0;
+    } else {
+        /* Handle packet loss by extrapolation */
+        silk_PLC( psDec, psDecCtrl, pOut, 1 );
+    }
+
+    /*************************/
+    /* Update output buffer. */
+    /*************************/
+    silk_assert( psDec->ltp_mem_length >= psDec->frame_length );
+    mv_len = psDec->ltp_mem_length - psDec->frame_length;
+    silk_memmove( psDec->outBuf, &psDec->outBuf[ psDec->frame_length ], mv_len * sizeof(opus_int16) );
+    silk_memcpy( &psDec->outBuf[ mv_len ], pOut, psDec->frame_length * sizeof( opus_int16 ) );
+
+    /****************************************************************/
+    /* Ensure smooth connection of extrapolated and good frames     */
+    /****************************************************************/
+    silk_PLC_glue_frames( psDec, pOut, L );
+
+    /************************************************/
+    /* Comfort noise generation / estimation        */
+    /************************************************/
+    silk_CNG( psDec, psDecCtrl, pOut, L );
+
+    /* Update some decoder state variables */
+    psDec->lagPrev = psDecCtrl->pitchL[ psDec->nb_subfr - 1 ];
+
+    /* Set output frame length */
+    *pN = L;
+
+    RESTORE_STACK;
+    return ret;
+}
diff --git a/drivers/opus/silk/decode_indices.c b/drivers/opus/silk/decode_indices.c
new file mode 100644
index 0000000000..c2aaad2606
--- /dev/null
+++ b/drivers/opus/silk/decode_indices.c
@@ -0,0 +1,151 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef OPUS_HAVE_CONFIG_H
+#include "opus_config.h"
+#endif
+
+#include "silk_main.h"
+
+/* Decode side-information parameters from payload */
+void silk_decode_indices(
+    silk_decoder_state          *psDec,                         /* I/O  State                                       */
+    ec_dec                      *psRangeDec,                    /* I/O  Compressor data structure                   */
+    opus_int                    FrameIndex,                     /* I    Frame number                                */
+    opus_int                    decode_LBRR,                    /* I    Flag indicating LBRR data is being decoded  */
+    opus_int                    condCoding                      /* I    The type of conditional coding to use       */
+)
+{
+    opus_int   i, k, Ix;
+    opus_int   decode_absolute_lagIndex, delta_lagIndex;
+    opus_int16 ec_ix[ MAX_LPC_ORDER ];
+    opus_uint8 pred_Q8[ MAX_LPC_ORDER ];
+
+    /*******************************************/
+    /* Decode signal type and quantizer offset */
+    /*******************************************/
+    if( decode_LBRR || psDec->VAD_flags[ FrameIndex ] ) {
+        Ix = ec_dec_icdf( psRangeDec, silk_type_offset_VAD_iCDF, 8 ) + 2;
+    } else {
+        Ix = ec_dec_icdf( psRangeDec, silk_type_offset_no_VAD_iCDF, 8 );
+    }
+    psDec->indices.signalType      = (opus_int8)silk_RSHIFT( Ix, 1 );
+    psDec->indices.quantOffsetType = (opus_int8)( Ix & 1 );
+
+    /****************/
+    /* Decode gains */
+    /****************/
+    /* First subframe */
+    if( condCoding == CODE_CONDITIONALLY ) {
+        /* Conditional coding */
+        psDec->indices.GainsIndices[ 0 ] = (opus_int8)ec_dec_icdf( psRangeDec, silk_delta_gain_iCDF, 8 );
+    } else {
+        /* Independent coding, in two stages: MSB bits followed by 3 LSBs */
+        psDec->indices.GainsIndices[ 0 ]  = (opus_int8)silk_LSHIFT( ec_dec_icdf( psRangeDec, silk_gain_iCDF[ psDec->indices.signalType ], 8 ), 3 );
+        psDec->indices.GainsIndices[ 0 ] += (opus_int8)ec_dec_icdf( psRangeDec, silk_uniform8_iCDF, 8 );
+    }
+
+    /* Remaining subframes */
+    for( i = 1; i < psDec->nb_subfr; i++ ) {
+        psDec->indices.GainsIndices[ i ] = (opus_int8)ec_dec_icdf( psRangeDec, silk_delta_gain_iCDF, 8 );
+    }
+
+    /**********************/
+    /* Decode LSF Indices */
+    /**********************/
+    psDec->indices.NLSFIndices[ 0 ] = (opus_int8)ec_dec_icdf( psRangeDec, &psDec->psNLSF_CB->CB1_iCDF[ ( psDec->indices.signalType >> 1 ) * psDec->psNLSF_CB->nVectors ], 8 );
+    silk_NLSF_unpack( ec_ix, pred_Q8, psDec->psNLSF_CB, psDec->indices.NLSFIndices[ 0 ] );
+    silk_assert( psDec->psNLSF_CB->order == psDec->LPC_order );
+    for( i = 0; i < psDec->psNLSF_CB->order; i++ ) {
+        Ix = ec_dec_icdf( psRangeDec, &psDec->psNLSF_CB->ec_iCDF[ ec_ix[ i ] ], 8 );
+        if( Ix == 0 ) {
+            Ix -= ec_dec_icdf( psRangeDec, silk_NLSF_EXT_iCDF, 8 );
+        } else if( Ix == 2 * NLSF_QUANT_MAX_AMPLITUDE ) {
+            Ix += ec_dec_icdf( psRangeDec, silk_NLSF_EXT_iCDF, 8 );
+        }
+        psDec->indices.NLSFIndices[ i+1 ] = (opus_int8)( Ix - NLSF_QUANT_MAX_AMPLITUDE );
+    }
+
+    /* Decode LSF interpolation factor */
+    if( psDec->nb_subfr == MAX_NB_SUBFR ) {
+        psDec->indices.NLSFInterpCoef_Q2 = (opus_int8)ec_dec_icdf( psRangeDec, silk_NLSF_interpolation_factor_iCDF, 8 );
+    } else {
+        psDec->indices.NLSFInterpCoef_Q2 = 4;
+    }
+
+    if( psDec->indices.signalType == TYPE_VOICED )
+    {
+        /*********************/
+        /* Decode pitch lags */
+        /*********************/
+        /* Get lag index */
+        decode_absolute_lagIndex = 1;
+        if( condCoding == CODE_CONDITIONALLY && psDec->ec_prevSignalType == TYPE_VOICED ) {
+            /* Decode Delta index */
+            delta_lagIndex = (opus_int16)ec_dec_icdf( psRangeDec, silk_pitch_delta_iCDF, 8 );
+            if( delta_lagIndex > 0 ) {
+                delta_lagIndex = delta_lagIndex - 9;
+                psDec->indices.lagIndex = (opus_int16)( psDec->ec_prevLagIndex + delta_lagIndex );
+                decode_absolute_lagIndex = 0;
+            }
+        }
+        if( decode_absolute_lagIndex ) {
+            /* Absolute decoding */
+            psDec->indices.lagIndex  = (opus_int16)ec_dec_icdf( psRangeDec, silk_pitch_lag_iCDF, 8 ) * silk_RSHIFT( psDec->fs_kHz, 1 );
+            psDec->indices.lagIndex += (opus_int16)ec_dec_icdf( psRangeDec, psDec->pitch_lag_low_bits_iCDF, 8 );
+        }
+        psDec->ec_prevLagIndex = psDec->indices.lagIndex;
+
+        /* Get countour index */
+        psDec->indices.contourIndex = (opus_int8)ec_dec_icdf( psRangeDec, psDec->pitch_contour_iCDF, 8 );
+
+        /********************/
+        /* Decode LTP gains */
+        /********************/
+        /* Decode PERIndex value */
+        psDec->indices.PERIndex = (opus_int8)ec_dec_icdf( psRangeDec, silk_LTP_per_index_iCDF, 8 );
+
+        for( k = 0; k < psDec->nb_subfr; k++ ) {
+            psDec->indices.LTPIndex[ k ] = (opus_int8)ec_dec_icdf( psRangeDec, silk_LTP_gain_iCDF_ptrs[ psDec->indices.PERIndex ], 8 );
+        }
+
+        /**********************/
+        /* Decode LTP scaling */
+        /**********************/
+        if( condCoding == CODE_INDEPENDENTLY ) {
+            psDec->indices.LTP_scaleIndex = (opus_int8)ec_dec_icdf( psRangeDec, silk_LTPscale_iCDF, 8 );
+        } else {
+            psDec->indices.LTP_scaleIndex = 0;
+        }
+    }
+    psDec->ec_prevSignalType = psDec->indices.signalType;
+
+    /***************/
+    /* Decode seed */
+    /***************/
+    psDec->indices.Seed = (opus_int8)ec_dec_icdf( psRangeDec, silk_uniform4_iCDF, 8 );
+}
diff --git a/drivers/opus/silk/decode_parameters.c b/drivers/opus/silk/decode_parameters.c
new file mode 100644
index 0000000000..72df4fcdb2
--- /dev/null
+++ b/drivers/opus/silk/decode_parameters.c
@@ -0,0 +1,115 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef OPUS_HAVE_CONFIG_H
+#include "opus_config.h"
+#endif
+
+#include "silk_main.h"
+
+/* Decode parameters from payload */
+void silk_decode_parameters(
+    silk_decoder_state          *psDec,                         /* I/O  State                                       */
+    silk_decoder_control        *psDecCtrl,                     /* I/O  Decoder control                             */
+    opus_int                    condCoding                      /* I    The type of conditional coding to use       */
+)
+{
+    opus_int   i, k, Ix;
+    opus_int16 pNLSF_Q15[ MAX_LPC_ORDER ], pNLSF0_Q15[ MAX_LPC_ORDER ];
+    const opus_int8 *cbk_ptr_Q7;
+
+    /* Dequant Gains */
+    silk_gains_dequant( psDecCtrl->Gains_Q16, psDec->indices.GainsIndices,
+        &psDec->LastGainIndex, condCoding == CODE_CONDITIONALLY, psDec->nb_subfr );
+
+    /****************/
+    /* Decode NLSFs */
+    /****************/
+    silk_NLSF_decode( pNLSF_Q15, psDec->indices.NLSFIndices, psDec->psNLSF_CB );
+
+    /* Convert NLSF parameters to AR prediction filter coefficients */
+    silk_NLSF2A( psDecCtrl->PredCoef_Q12[ 1 ], pNLSF_Q15, psDec->LPC_order );
+
+    /* If just reset, e.g., because internal Fs changed, do not allow interpolation */
+    /* improves the case of packet loss in the first frame after a switch           */
+    if( psDec->first_frame_after_reset == 1 ) {
+        psDec->indices.NLSFInterpCoef_Q2 = 4;
+    }
+
+    if( psDec->indices.NLSFInterpCoef_Q2 < 4 ) {
+        /* Calculation of the interpolated NLSF0 vector from the interpolation factor, */
+        /* the previous NLSF1, and the current NLSF1                                   */
+        for( i = 0; i < psDec->LPC_order; i++ ) {
+            pNLSF0_Q15[ i ] = psDec->prevNLSF_Q15[ i ] + silk_RSHIFT( silk_MUL( psDec->indices.NLSFInterpCoef_Q2,
+                pNLSF_Q15[ i ] - psDec->prevNLSF_Q15[ i ] ), 2 );
+        }
+
+        /* Convert NLSF parameters to AR prediction filter coefficients */
+        silk_NLSF2A( psDecCtrl->PredCoef_Q12[ 0 ], pNLSF0_Q15, psDec->LPC_order );
+    } else {
+        /* Copy LPC coefficients for first half from second half */
+        silk_memcpy( psDecCtrl->PredCoef_Q12[ 0 ], psDecCtrl->PredCoef_Q12[ 1 ], psDec->LPC_order * sizeof( opus_int16 ) );
+    }
+
+    silk_memcpy( psDec->prevNLSF_Q15, pNLSF_Q15, psDec->LPC_order * sizeof( opus_int16 ) );
+
+    /* After a packet loss do BWE of LPC coefs */
+    if( psDec->lossCnt ) {
+        silk_bwexpander( psDecCtrl->PredCoef_Q12[ 0 ], psDec->LPC_order, BWE_AFTER_LOSS_Q16 );
+        silk_bwexpander( psDecCtrl->PredCoef_Q12[ 1 ], psDec->LPC_order, BWE_AFTER_LOSS_Q16 );
+    }
+
+    if( psDec->indices.signalType == TYPE_VOICED ) {
+        /*********************/
+        /* Decode pitch lags */
+        /*********************/
+
+        /* Decode pitch values */
+        silk_decode_pitch( psDec->indices.lagIndex, psDec->indices.contourIndex, psDecCtrl->pitchL, psDec->fs_kHz, psDec->nb_subfr );
+
+        /* Decode Codebook Index */
+        cbk_ptr_Q7 = silk_LTP_vq_ptrs_Q7[ psDec->indices.PERIndex ]; /* set pointer to start of codebook */
+
+        for( k = 0; k < psDec->nb_subfr; k++ ) {
+            Ix = psDec->indices.LTPIndex[ k ];
+            for( i = 0; i < LTP_ORDER; i++ ) {
+                psDecCtrl->LTPCoef_Q14[ k * LTP_ORDER + i ] = silk_LSHIFT( cbk_ptr_Q7[ Ix * LTP_ORDER + i ], 7 );
+            }
+        }
+
+        /**********************/
+        /* Decode LTP scaling */
+        /**********************/
+        Ix = psDec->indices.LTP_scaleIndex;
+        psDecCtrl->LTP_scale_Q14 = silk_LTPScales_table_Q14[ Ix ];
+    } else {
+        silk_memset( psDecCtrl->pitchL,      0,             psDec->nb_subfr * sizeof( opus_int   ) );
+        silk_memset( psDecCtrl->LTPCoef_Q14, 0, LTP_ORDER * psDec->nb_subfr * sizeof( opus_int16 ) );
+        psDec->indices.PERIndex  = 0;
+        psDecCtrl->LTP_scale_Q14 = 0;
+    }
+}
diff --git a/drivers/opus/silk/decode_pitch.c b/drivers/opus/silk/decode_pitch.c
new file mode 100644
index 0000000000..3e1dd2d35b
--- /dev/null
+++ b/drivers/opus/silk/decode_pitch.c
@@ -0,0 +1,77 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef OPUS_HAVE_CONFIG_H
+#include "opus_config.h"
+#endif
+
+/***********************************************************
+* Pitch analyser function
+********************************************************** */
+#include "SigProc_FIX.h"
+#include "pitch_est_defines.h"
+
+void silk_decode_pitch(
+    opus_int16                  lagIndex,           /* I                                                                */
+    opus_int8                   contourIndex,       /* O                                                                */
+    opus_int                    pitch_lags[],       /* O    4 pitch values                                              */
+    const opus_int              Fs_kHz,             /* I    sampling frequency (kHz)                                    */
+    const opus_int              nb_subfr            /* I    number of sub frames                                        */
+)
+{
+    opus_int   lag, k, min_lag, max_lag, cbk_size;
+    const opus_int8 *Lag_CB_ptr;
+
+    if( Fs_kHz == 8 ) {
+        if( nb_subfr == PE_MAX_NB_SUBFR ) {
+            Lag_CB_ptr = &silk_CB_lags_stage2[ 0 ][ 0 ];
+            cbk_size   = PE_NB_CBKS_STAGE2_EXT;
+        } else {
+            silk_assert( nb_subfr == PE_MAX_NB_SUBFR >> 1 );
+            Lag_CB_ptr = &silk_CB_lags_stage2_10_ms[ 0 ][ 0 ];
+            cbk_size   = PE_NB_CBKS_STAGE2_10MS;
+        }
+    } else {
+        if( nb_subfr == PE_MAX_NB_SUBFR ) {
+            Lag_CB_ptr = &silk_CB_lags_stage3[ 0 ][ 0 ];
+            cbk_size   = PE_NB_CBKS_STAGE3_MAX;
+        } else {
+            silk_assert( nb_subfr == PE_MAX_NB_SUBFR >> 1 );
+            Lag_CB_ptr = &silk_CB_lags_stage3_10_ms[ 0 ][ 0 ];
+            cbk_size   = PE_NB_CBKS_STAGE3_10MS;
+        }
+    }
+
+    min_lag = silk_SMULBB( PE_MIN_LAG_MS, Fs_kHz );
+    max_lag = silk_SMULBB( PE_MAX_LAG_MS, Fs_kHz );
+    lag = min_lag + lagIndex;
+
+    for( k = 0; k < nb_subfr; k++ ) {
+        pitch_lags[ k ] = lag + matrix_ptr( Lag_CB_ptr, k, contourIndex, cbk_size );
+        pitch_lags[ k ] = silk_LIMIT( pitch_lags[ k ], min_lag, max_lag );
+    }
+}
diff --git a/drivers/opus/silk/decode_pulses.c b/drivers/opus/silk/decode_pulses.c
new file mode 100644
index 0000000000..13772f8a57
--- /dev/null
+++ b/drivers/opus/silk/decode_pulses.c
@@ -0,0 +1,115 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef OPUS_HAVE_CONFIG_H
+#include "opus_config.h"
+#endif
+
+#include "silk_main.h"
+
+/*********************************************/
+/* Decode quantization indices of excitation */
+/*********************************************/
+void silk_decode_pulses(
+    ec_dec                      *psRangeDec,                    /* I/O  Compressor data structure                   */
+    opus_int                    pulses[],                       /* O    Excitation signal                           */
+    const opus_int              signalType,                     /* I    Sigtype                                     */
+    const opus_int              quantOffsetType,                /* I    quantOffsetType                             */
+    const opus_int              frame_length                    /* I    Frame length                                */
+)
+{
+    opus_int   i, j, k, iter, abs_q, nLS, RateLevelIndex;
+    opus_int   sum_pulses[ MAX_NB_SHELL_BLOCKS ], nLshifts[ MAX_NB_SHELL_BLOCKS ];
+    opus_int   *pulses_ptr;
+    const opus_uint8 *cdf_ptr;
+
+    /*********************/
+    /* Decode rate level */
+    /*********************/
+    RateLevelIndex = ec_dec_icdf( psRangeDec, silk_rate_levels_iCDF[ signalType >> 1 ], 8 );
+
+    /* Calculate number of shell blocks */
+    silk_assert( 1 << LOG2_SHELL_CODEC_FRAME_LENGTH == SHELL_CODEC_FRAME_LENGTH );
+    iter = silk_RSHIFT( frame_length, LOG2_SHELL_CODEC_FRAME_LENGTH );
+    if( iter * SHELL_CODEC_FRAME_LENGTH < frame_length ) {
+        silk_assert( frame_length == 12 * 10 ); /* Make sure only happens for 10 ms @ 12 kHz */
+        iter++;
+    }
+
+    /***************************************************/
+    /* Sum-Weighted-Pulses Decoding                    */
+    /***************************************************/
+    cdf_ptr = silk_pulses_per_block_iCDF[ RateLevelIndex ];
+    for( i = 0; i < iter; i++ ) {
+        nLshifts[ i ] = 0;
+        sum_pulses[ i ] = ec_dec_icdf( psRangeDec, cdf_ptr, 8 );
+
+        /* LSB indication */
+        while( sum_pulses[ i ] == MAX_PULSES + 1 ) {
+            nLshifts[ i ]++;
+            /* When we've already got 10 LSBs, we shift the table to not allow (MAX_PULSES + 1) */
+            sum_pulses[ i ] = ec_dec_icdf( psRangeDec,
+                    silk_pulses_per_block_iCDF[ N_RATE_LEVELS - 1] + ( nLshifts[ i ] == 10 ), 8 );
+        }
+    }
+
+    /***************************************************/
+    /* Shell decoding                                  */
+    /***************************************************/
+    for( i = 0; i < iter; i++ ) {
+        if( sum_pulses[ i ] > 0 ) {
+            silk_shell_decoder( &pulses[ silk_SMULBB( i, SHELL_CODEC_FRAME_LENGTH ) ], psRangeDec, sum_pulses[ i ] );
+        } else {
+            silk_memset( &pulses[ silk_SMULBB( i, SHELL_CODEC_FRAME_LENGTH ) ], 0, SHELL_CODEC_FRAME_LENGTH * sizeof( opus_int ) );
+        }
+    }
+
+    /***************************************************/
+    /* LSB Decoding                                    */
+    /***************************************************/
+    for( i = 0; i < iter; i++ ) {
+        if( nLshifts[ i ] > 0 ) {
+            nLS = nLshifts[ i ];
+            pulses_ptr = &pulses[ silk_SMULBB( i, SHELL_CODEC_FRAME_LENGTH ) ];
+            for( k = 0; k < SHELL_CODEC_FRAME_LENGTH; k++ ) {
+                abs_q = pulses_ptr[ k ];
+                for( j = 0; j < nLS; j++ ) {
+                    abs_q = silk_LSHIFT( abs_q, 1 );
+                    abs_q += ec_dec_icdf( psRangeDec, silk_lsb_iCDF, 8 );
+                }
+                pulses_ptr[ k ] = abs_q;
+            }
+            /* Mark the number of pulses non-zero for sign decoding. */
+            sum_pulses[ i ] |= nLS << 5;
+        }
+    }
+
+    /****************************************/
+    /* Decode and add signs to pulse signal */
+    /****************************************/
+    silk_decode_signs( psRangeDec, pulses, frame_length, signalType, quantOffsetType, sum_pulses );
+}
diff --git a/drivers/opus/silk/decoder_set_fs.c b/drivers/opus/silk/decoder_set_fs.c
new file mode 100644
index 0000000000..6d2de56647
--- /dev/null
+++ b/drivers/opus/silk/decoder_set_fs.c
@@ -0,0 +1,108 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef OPUS_HAVE_CONFIG_H
+#include "opus_config.h"
+#endif
+
+#include "silk_main.h"
+
+/* Set decoder sampling rate */
+opus_int silk_decoder_set_fs(
+    silk_decoder_state          *psDec,                         /* I/O  Decoder state pointer                       */
+    opus_int                    fs_kHz,                         /* I    Sampling frequency (kHz)                    */
+    opus_int32                  fs_API_Hz                       /* I    API Sampling frequency (Hz)                 */
+)
+{
+    opus_int frame_length, ret = 0;
+
+    silk_assert( fs_kHz == 8 || fs_kHz == 12 || fs_kHz == 16 );
+    silk_assert( psDec->nb_subfr == MAX_NB_SUBFR || psDec->nb_subfr == MAX_NB_SUBFR/2 );
+
+    /* New (sub)frame length */
+    psDec->subfr_length = silk_SMULBB( SUB_FRAME_LENGTH_MS, fs_kHz );
+    frame_length = silk_SMULBB( psDec->nb_subfr, psDec->subfr_length );
+
+    /* Initialize resampler when switching internal or external sampling frequency */
+    if( psDec->fs_kHz != fs_kHz || psDec->fs_API_hz != fs_API_Hz ) {
+        /* Initialize the resampler for dec_API.c preparing resampling from fs_kHz to API_fs_Hz */
+        ret += silk_resampler_init( &psDec->resampler_state, silk_SMULBB( fs_kHz, 1000 ), fs_API_Hz, 0 );
+
+        psDec->fs_API_hz = fs_API_Hz;
+    }
+
+    if( psDec->fs_kHz != fs_kHz || frame_length != psDec->frame_length ) {
+        if( fs_kHz == 8 ) {
+            if( psDec->nb_subfr == MAX_NB_SUBFR ) {
+                psDec->pitch_contour_iCDF = silk_pitch_contour_NB_iCDF;
+            } else {
+                psDec->pitch_contour_iCDF = silk_pitch_contour_10_ms_NB_iCDF;
+            }
+        } else {
+            if( psDec->nb_subfr == MAX_NB_SUBFR ) {
+                psDec->pitch_contour_iCDF = silk_pitch_contour_iCDF;
+            } else {
+                psDec->pitch_contour_iCDF = silk_pitch_contour_10_ms_iCDF;
+            }
+        }
+        if( psDec->fs_kHz != fs_kHz ) {
+            psDec->ltp_mem_length = silk_SMULBB( LTP_MEM_LENGTH_MS, fs_kHz );
+            if( fs_kHz == 8 || fs_kHz == 12 ) {
+                psDec->LPC_order = MIN_LPC_ORDER;
+                psDec->psNLSF_CB = &silk_NLSF_CB_NB_MB;
+            } else {
+                psDec->LPC_order = MAX_LPC_ORDER;
+                psDec->psNLSF_CB = &silk_NLSF_CB_WB;
+            }
+            if( fs_kHz == 16 ) {
+                psDec->pitch_lag_low_bits_iCDF = silk_uniform8_iCDF;
+            } else if( fs_kHz == 12 ) {
+                psDec->pitch_lag_low_bits_iCDF = silk_uniform6_iCDF;
+            } else if( fs_kHz == 8 ) {
+                psDec->pitch_lag_low_bits_iCDF = silk_uniform4_iCDF;
+            } else {
+                /* unsupported sampling rate */
+                silk_assert( 0 );
+            }
+            psDec->first_frame_after_reset = 1;
+            psDec->lagPrev                 = 100;
+            psDec->LastGainIndex           = 10;
+            psDec->prevSignalType          = TYPE_NO_VOICE_ACTIVITY;
+            silk_memset( psDec->outBuf, 0, sizeof(psDec->outBuf));
+            silk_memset( psDec->sLPC_Q14_buf, 0, sizeof(psDec->sLPC_Q14_buf) );
+        }
+
+        psDec->fs_kHz       = fs_kHz;
+        psDec->frame_length = frame_length;
+    }
+
+    /* Check that settings are valid */
+    silk_assert( psDec->frame_length > 0 && psDec->frame_length <= MAX_FRAME_LENGTH );
+
+    return ret;
+}
+
diff --git a/drivers/opus/silk/define.h b/drivers/opus/silk/define.h
new file mode 100644
index 0000000000..c47aca9f58
--- /dev/null
+++ b/drivers/opus/silk/define.h
@@ -0,0 +1,235 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifndef SILK_DEFINE_H
+#define SILK_DEFINE_H
+
+#include "errors.h"
+#include "typedef.h"
+
+#ifdef __cplusplus
+extern "C"
+{
+#endif
+
+/* Max number of encoder channels (1/2) */
+#define ENCODER_NUM_CHANNELS                    2
+/* Number of decoder channels (1/2) */
+#define DECODER_NUM_CHANNELS                    2
+
+#define MAX_FRAMES_PER_PACKET                   3
+
+/* Limits on bitrate */
+#define MIN_TARGET_RATE_BPS                     5000
+#define MAX_TARGET_RATE_BPS                     80000
+#define TARGET_RATE_TAB_SZ                      8
+
+/* LBRR thresholds */
+#define LBRR_NB_MIN_RATE_BPS                    12000
+#define LBRR_MB_MIN_RATE_BPS                    14000
+#define LBRR_WB_MIN_RATE_BPS                    16000
+
+/* DTX settings */
+#define NB_SPEECH_FRAMES_BEFORE_DTX             10      /* eq 200 ms */
+#define MAX_CONSECUTIVE_DTX                     20      /* eq 400 ms */
+
+/* Maximum sampling frequency */
+#define MAX_FS_KHZ                              16
+#define MAX_API_FS_KHZ                          48
+
+/* Signal types */
+#define TYPE_NO_VOICE_ACTIVITY                  0
+#define TYPE_UNVOICED                           1
+#define TYPE_VOICED                             2
+
+/* Conditional coding types */
+#define CODE_INDEPENDENTLY                      0
+#define CODE_INDEPENDENTLY_NO_LTP_SCALING       1
+#define CODE_CONDITIONALLY                      2
+
+/* Settings for stereo processing */
+#define STEREO_QUANT_TAB_SIZE                   16
+#define STEREO_QUANT_SUB_STEPS                  5
+#define STEREO_INTERP_LEN_MS                    8       /* must be even */
+#define STEREO_RATIO_SMOOTH_COEF                0.01    /* smoothing coef for signal norms and stereo width */
+
+/* Range of pitch lag estimates */
+#define PITCH_EST_MIN_LAG_MS                    2       /* 2 ms -> 500 Hz */
+#define PITCH_EST_MAX_LAG_MS                    18      /* 18 ms -> 56 Hz */
+
+/* Maximum number of subframes */
+#define MAX_NB_SUBFR                            4
+
+/* Number of samples per frame */
+#define LTP_MEM_LENGTH_MS                       20
+#define SUB_FRAME_LENGTH_MS                     5
+#define MAX_SUB_FRAME_LENGTH                    ( SUB_FRAME_LENGTH_MS * MAX_FS_KHZ )
+#define MAX_FRAME_LENGTH_MS                     ( SUB_FRAME_LENGTH_MS * MAX_NB_SUBFR )
+#define MAX_FRAME_LENGTH                        ( MAX_FRAME_LENGTH_MS * MAX_FS_KHZ )
+
+/* Milliseconds of lookahead for pitch analysis */
+#define LA_PITCH_MS                             2
+#define LA_PITCH_MAX                            ( LA_PITCH_MS * MAX_FS_KHZ )
+
+/* Order of LPC used in find pitch */
+#define MAX_FIND_PITCH_LPC_ORDER                16
+
+/* Length of LPC window used in find pitch */
+#define FIND_PITCH_LPC_WIN_MS                   ( 20 + (LA_PITCH_MS << 1) )
+#define FIND_PITCH_LPC_WIN_MS_2_SF              ( 10 + (LA_PITCH_MS << 1) )
+#define FIND_PITCH_LPC_WIN_MAX                  ( FIND_PITCH_LPC_WIN_MS * MAX_FS_KHZ )
+
+/* Milliseconds of lookahead for noise shape analysis */
+#define LA_SHAPE_MS                             5
+#define LA_SHAPE_MAX                            ( LA_SHAPE_MS * MAX_FS_KHZ )
+
+/* Maximum length of LPC window used in noise shape analysis */
+#define SHAPE_LPC_WIN_MAX                       ( 15 * MAX_FS_KHZ )
+
+/* dB level of lowest gain quantization level */
+#define MIN_QGAIN_DB                            2
+/* dB level of highest gain quantization level */
+#define MAX_QGAIN_DB                            88
+/* Number of gain quantization levels */
+#define N_LEVELS_QGAIN                          64
+/* Max increase in gain quantization index */
+#define MAX_DELTA_GAIN_QUANT                    36
+/* Max decrease in gain quantization index */
+#define MIN_DELTA_GAIN_QUANT                    -4
+
+/* Quantization offsets (multiples of 4) */
+#define OFFSET_VL_Q10                           32
+#define OFFSET_VH_Q10                           100
+#define OFFSET_UVL_Q10                          100
+#define OFFSET_UVH_Q10                          240
+
+#define QUANT_LEVEL_ADJUST_Q10                  80
+
+/* Maximum numbers of iterations used to stabilize an LPC vector */
+#define MAX_LPC_STABILIZE_ITERATIONS            16
+#define MAX_PREDICTION_POWER_GAIN               1e4f
+#define MAX_PREDICTION_POWER_GAIN_AFTER_RESET   1e2f
+
+#define MAX_LPC_ORDER                           16
+#define MIN_LPC_ORDER                           10
+
+/* Find Pred Coef defines */
+#define LTP_ORDER                               5
+
+/* LTP quantization settings */
+#define NB_LTP_CBKS                             3
+
+/* Flag to use harmonic noise shaping */
+#define USE_HARM_SHAPING                        1
+
+/* Max LPC order of noise shaping filters */
+#define MAX_SHAPE_LPC_ORDER                     16
+
+#define HARM_SHAPE_FIR_TAPS                     3
+
+/* Maximum number of delayed decision states */
+#define MAX_DEL_DEC_STATES                      4
+
+#define LTP_BUF_LENGTH                          512
+#define LTP_MASK                                ( LTP_BUF_LENGTH - 1 )
+
+#define DECISION_DELAY                          32
+#define DECISION_DELAY_MASK                     ( DECISION_DELAY - 1 )
+
+/* Number of subframes for excitation entropy coding */
+#define SHELL_CODEC_FRAME_LENGTH                16
+#define LOG2_SHELL_CODEC_FRAME_LENGTH           4
+#define MAX_NB_SHELL_BLOCKS                     ( MAX_FRAME_LENGTH / SHELL_CODEC_FRAME_LENGTH )
+
+/* Number of rate levels, for entropy coding of excitation */
+#define N_RATE_LEVELS                           10
+
+/* Maximum sum of pulses per shell coding frame */
+#define MAX_PULSES                              16
+
+#define MAX_MATRIX_SIZE                         MAX_LPC_ORDER /* Max of LPC Order and LTP order */
+
+#if( MAX_LPC_ORDER > DECISION_DELAY )
+# define NSQ_LPC_BUF_LENGTH                     MAX_LPC_ORDER
+#else
+# define NSQ_LPC_BUF_LENGTH                     DECISION_DELAY
+#endif
+
+/***************************/
+/* Voice activity detector */
+/***************************/
+#define VAD_N_BANDS                             4
+
+#define VAD_INTERNAL_SUBFRAMES_LOG2             2
+#define VAD_INTERNAL_SUBFRAMES                  ( 1 << VAD_INTERNAL_SUBFRAMES_LOG2 )
+
+#define VAD_NOISE_LEVEL_SMOOTH_COEF_Q16         1024    /* Must be <  4096 */
+#define VAD_NOISE_LEVELS_BIAS                   50
+
+/* Sigmoid settings */
+#define VAD_NEGATIVE_OFFSET_Q5                  128     /* sigmoid is 0 at -128 */
+#define VAD_SNR_FACTOR_Q16                      45000
+
+/* smoothing for SNR measurement */
+#define VAD_SNR_SMOOTH_COEF_Q18                 4096
+
+/* Size of the piecewise linear cosine approximation table for the LSFs */
+#define LSF_COS_TAB_SZ_FIX                      128
+
+/******************/
+/* NLSF quantizer */
+/******************/
+#define NLSF_W_Q                                2
+#define NLSF_VQ_MAX_VECTORS                     32
+#define NLSF_VQ_MAX_SURVIVORS                   32
+#define NLSF_QUANT_MAX_AMPLITUDE                4
+#define NLSF_QUANT_MAX_AMPLITUDE_EXT            10
+#define NLSF_QUANT_LEVEL_ADJ                    0.1
+#define NLSF_QUANT_DEL_DEC_STATES_LOG2          2
+#define NLSF_QUANT_DEL_DEC_STATES               ( 1 << NLSF_QUANT_DEL_DEC_STATES_LOG2 )
+
+/* Transition filtering for mode switching */
+#define TRANSITION_TIME_MS                      5120    /* 5120 = 64 * FRAME_LENGTH_MS * ( TRANSITION_INT_NUM - 1 ) = 64*(20*4)*/
+#define TRANSITION_NB                           3       /* Hardcoded in tables */
+#define TRANSITION_NA                           2       /* Hardcoded in tables */
+#define TRANSITION_INT_NUM                      5       /* Hardcoded in tables */
+#define TRANSITION_FRAMES                       ( TRANSITION_TIME_MS / MAX_FRAME_LENGTH_MS )
+#define TRANSITION_INT_STEPS                    ( TRANSITION_FRAMES  / ( TRANSITION_INT_NUM - 1 ) )
+
+/* BWE factors to apply after packet loss */
+#define BWE_AFTER_LOSS_Q16                      63570
+
+/* Defines for CN generation */
+#define CNG_BUF_MASK_MAX                        255     /* 2^floor(log2(MAX_FRAME_LENGTH))-1    */
+#define CNG_GAIN_SMTH_Q16                       4634    /* 0.25^(1/4)                           */
+#define CNG_NLSF_SMTH_Q16                       16348   /* 0.25                                 */
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/drivers/opus/silk/enc_API.c b/drivers/opus/silk/enc_API.c
new file mode 100644
index 0000000000..66a9bb67de
--- /dev/null
+++ b/drivers/opus/silk/enc_API.c
@@ -0,0 +1,556 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef OPUS_HAVE_CONFIG_H
+#include "opus_config.h"
+#endif
+#include "define.h"
+#include "API.h"
+#include "control.h"
+#include "typedef.h"
+#include "stack_alloc.h"
+#include "structs.h"
+#include "tuning_parameters.h"
+#ifdef OPUS_FIXED_POINT
+#include "main_FIX.h"
+#else
+#include "main_FLP.h"
+#endif
+
+/***************************************/
+/* Read control structure from encoder */
+/***************************************/
+static opus_int silk_QueryEncoder(                      /* O    Returns error code                              */
+    const void                      *encState,          /* I    State                                           */
+    silk_EncControlStruct           *encStatus          /* O    Encoder Status                                  */
+);
+
+/****************************************/
+/* Encoder functions                    */
+/****************************************/
+
+opus_int silk_Get_Encoder_Size(                         /* O    Returns error code                              */
+    opus_int                        *encSizeBytes       /* O    Number of bytes in SILK encoder state           */
+)
+{
+    opus_int ret = SILK_NO_ERROR;
+
+    *encSizeBytes = sizeof( silk_encoder );
+
+    return ret;
+}
+
+/*************************/
+/* Init or Reset encoder */
+/*************************/
+opus_int silk_InitEncoder(                              /* O    Returns error code                              */
+    void                            *encState,          /* I/O  State                                           */
+    int                              arch,              /* I    Run-time architecture                           */
+    silk_EncControlStruct           *encStatus          /* O    Encoder Status                                  */
+)
+{
+    silk_encoder *psEnc;
+    opus_int n, ret = SILK_NO_ERROR;
+
+    psEnc = (silk_encoder *)encState;
+
+    /* Reset encoder */
+    silk_memset( psEnc, 0, sizeof( silk_encoder ) );
+    for( n = 0; n < ENCODER_NUM_CHANNELS; n++ ) {
+        if( ret += silk_init_encoder( &psEnc->state_Fxx[ n ], arch ) ) {
+            silk_assert( 0 );
+        }
+    }
+
+    psEnc->nChannelsAPI = 1;
+    psEnc->nChannelsInternal = 1;
+
+    /* Read control structure */
+    if( ret += silk_QueryEncoder( encState, encStatus ) ) {
+        silk_assert( 0 );
+    }
+
+    return ret;
+}
+
+/***************************************/
+/* Read control structure from encoder */
+/***************************************/
+static opus_int silk_QueryEncoder(                      /* O    Returns error code                              */
+    const void                      *encState,          /* I    State                                           */
+    silk_EncControlStruct           *encStatus          /* O    Encoder Status                                  */
+)
+{
+    opus_int ret = SILK_NO_ERROR;
+    silk_encoder_state_Fxx *state_Fxx;
+    silk_encoder *psEnc = (silk_encoder *)encState;
+
+    state_Fxx = psEnc->state_Fxx;
+
+    encStatus->nChannelsAPI              = psEnc->nChannelsAPI;
+    encStatus->nChannelsInternal         = psEnc->nChannelsInternal;
+    encStatus->API_sampleRate            = state_Fxx[ 0 ].sCmn.API_fs_Hz;
+    encStatus->maxInternalSampleRate     = state_Fxx[ 0 ].sCmn.maxInternal_fs_Hz;
+    encStatus->minInternalSampleRate     = state_Fxx[ 0 ].sCmn.minInternal_fs_Hz;
+    encStatus->desiredInternalSampleRate = state_Fxx[ 0 ].sCmn.desiredInternal_fs_Hz;
+    encStatus->payloadSize_ms            = state_Fxx[ 0 ].sCmn.PacketSize_ms;
+    encStatus->bitRate                   = state_Fxx[ 0 ].sCmn.TargetRate_bps;
+    encStatus->packetLossPercentage      = state_Fxx[ 0 ].sCmn.PacketLoss_perc;
+    encStatus->complexity                = state_Fxx[ 0 ].sCmn.Complexity;
+    encStatus->useInBandFEC              = state_Fxx[ 0 ].sCmn.useInBandFEC;
+    encStatus->useDTX                    = state_Fxx[ 0 ].sCmn.useDTX;
+    encStatus->useCBR                    = state_Fxx[ 0 ].sCmn.useCBR;
+    encStatus->internalSampleRate        = silk_SMULBB( state_Fxx[ 0 ].sCmn.fs_kHz, 1000 );
+    encStatus->allowBandwidthSwitch      = state_Fxx[ 0 ].sCmn.allow_bandwidth_switch;
+    encStatus->inWBmodeWithoutVariableLP = state_Fxx[ 0 ].sCmn.fs_kHz == 16 && state_Fxx[ 0 ].sCmn.sLP.mode == 0;
+
+    return ret;
+}
+
+
+/**************************/
+/* Encode frame with Silk */
+/**************************/
+/* Note: if prefillFlag is set, the input must contain 10 ms of audio, irrespective of what                     */
+/* encControl->payloadSize_ms is set to                                                                         */
+opus_int silk_Encode(                                   /* O    Returns error code                              */
+    void                            *encState,          /* I/O  State                                           */
+    silk_EncControlStruct           *encControl,        /* I    Control status                                  */
+    const opus_int16                *samplesIn,         /* I    Speech sample input vector                      */
+    opus_int                        nSamplesIn,         /* I    Number of samples in input vector               */
+    ec_enc                          *psRangeEnc,        /* I/O  Compressor data structure                       */
+    opus_int32                      *nBytesOut,         /* I/O  Number of bytes in payload (input: Max bytes)   */
+    const opus_int                  prefillFlag         /* I    Flag to indicate prefilling buffers no coding   */
+)
+{
+    opus_int   n, i, nBits, flags, tmp_payloadSize_ms = 0, tmp_complexity = 0, ret = 0;
+    opus_int   nSamplesToBuffer, nSamplesToBufferMax, nBlocksOf10ms;
+    opus_int   nSamplesFromInput = 0, nSamplesFromInputMax;
+    opus_int   speech_act_thr_for_switch_Q8;
+    opus_int32 TargetRate_bps, MStargetRates_bps[ 2 ], channelRate_bps, LBRR_symbol, sum;
+    silk_encoder *psEnc = ( silk_encoder * )encState;
+    VARDECL( opus_int16, buf );
+    opus_int transition, curr_block, tot_blocks;
+    SAVE_STACK;
+
+    if (encControl->reducedDependency)
+    {
+       psEnc->state_Fxx[0].sCmn.first_frame_after_reset = 1;
+       psEnc->state_Fxx[1].sCmn.first_frame_after_reset = 1;
+    }
+    psEnc->state_Fxx[ 0 ].sCmn.nFramesEncoded = psEnc->state_Fxx[ 1 ].sCmn.nFramesEncoded = 0;
+
+    /* Check values in encoder control structure */
+    if( ( ret = check_control_input( encControl ) != 0 ) ) {
+        silk_assert( 0 );
+        RESTORE_STACK;
+        return ret;
+    }
+
+    encControl->switchReady = 0;
+
+    if( encControl->nChannelsInternal > psEnc->nChannelsInternal ) {
+        /* Mono -> Stereo transition: init state of second channel and stereo state */
+        ret += silk_init_encoder( &psEnc->state_Fxx[ 1 ], psEnc->state_Fxx[ 0 ].sCmn.arch );
+        silk_memset( psEnc->sStereo.pred_prev_Q13, 0, sizeof( psEnc->sStereo.pred_prev_Q13 ) );
+        silk_memset( psEnc->sStereo.sSide, 0, sizeof( psEnc->sStereo.sSide ) );
+        psEnc->sStereo.mid_side_amp_Q0[ 0 ] = 0;
+        psEnc->sStereo.mid_side_amp_Q0[ 1 ] = 1;
+        psEnc->sStereo.mid_side_amp_Q0[ 2 ] = 0;
+        psEnc->sStereo.mid_side_amp_Q0[ 3 ] = 1;
+        psEnc->sStereo.width_prev_Q14 = 0;
+        psEnc->sStereo.smth_width_Q14 = SILK_FIX_CONST( 1, 14 );
+        if( psEnc->nChannelsAPI == 2 ) {
+            silk_memcpy( &psEnc->state_Fxx[ 1 ].sCmn.resampler_state, &psEnc->state_Fxx[ 0 ].sCmn.resampler_state, sizeof( silk_resampler_state_struct ) );
+            silk_memcpy( &psEnc->state_Fxx[ 1 ].sCmn.In_HP_State,     &psEnc->state_Fxx[ 0 ].sCmn.In_HP_State,     sizeof( psEnc->state_Fxx[ 1 ].sCmn.In_HP_State ) );
+        }
+    }
+
+    transition = (encControl->payloadSize_ms != psEnc->state_Fxx[ 0 ].sCmn.PacketSize_ms) || (psEnc->nChannelsInternal != encControl->nChannelsInternal);
+
+    psEnc->nChannelsAPI = encControl->nChannelsAPI;
+    psEnc->nChannelsInternal = encControl->nChannelsInternal;
+
+    nBlocksOf10ms = silk_DIV32( 100 * nSamplesIn, encControl->API_sampleRate );
+    tot_blocks = ( nBlocksOf10ms > 1 ) ? nBlocksOf10ms >> 1 : 1;
+    curr_block = 0;
+    if( prefillFlag ) {
+        /* Only accept input length of 10 ms */
+        if( nBlocksOf10ms != 1 ) {
+            silk_assert( 0 );
+            RESTORE_STACK;
+            return SILK_ENC_INPUT_INVALID_NO_OF_SAMPLES;
+        }
+        /* Reset Encoder */
+        for( n = 0; n < encControl->nChannelsInternal; n++ ) {
+            ret = silk_init_encoder( &psEnc->state_Fxx[ n ], psEnc->state_Fxx[ n ].sCmn.arch );
+            silk_assert( !ret );
+        }
+        tmp_payloadSize_ms = encControl->payloadSize_ms;
+        encControl->payloadSize_ms = 10;
+        tmp_complexity = encControl->complexity;
+        encControl->complexity = 0;
+        for( n = 0; n < encControl->nChannelsInternal; n++ ) {
+            psEnc->state_Fxx[ n ].sCmn.controlled_since_last_payload = 0;
+            psEnc->state_Fxx[ n ].sCmn.prefillFlag = 1;
+        }
+    } else {
+        /* Only accept input lengths that are a multiple of 10 ms */
+        if( nBlocksOf10ms * encControl->API_sampleRate != 100 * nSamplesIn || nSamplesIn < 0 ) {
+            silk_assert( 0 );
+            RESTORE_STACK;
+            return SILK_ENC_INPUT_INVALID_NO_OF_SAMPLES;
+        }
+        /* Make sure no more than one packet can be produced */
+        if( 1000 * (opus_int32)nSamplesIn > encControl->payloadSize_ms * encControl->API_sampleRate ) {
+            silk_assert( 0 );
+            RESTORE_STACK;
+            return SILK_ENC_INPUT_INVALID_NO_OF_SAMPLES;
+        }
+    }
+
+    TargetRate_bps = silk_RSHIFT32( encControl->bitRate, encControl->nChannelsInternal - 1 );
+    for( n = 0; n < encControl->nChannelsInternal; n++ ) {
+        /* Force the side channel to the same rate as the mid */
+        opus_int force_fs_kHz = (n==1) ? psEnc->state_Fxx[0].sCmn.fs_kHz : 0;
+        if( ( ret = silk_control_encoder( &psEnc->state_Fxx[ n ], encControl, TargetRate_bps, psEnc->allowBandwidthSwitch, n, force_fs_kHz ) ) != 0 ) {
+            silk_assert( 0 );
+            RESTORE_STACK;
+            return ret;
+        }
+        if( psEnc->state_Fxx[n].sCmn.first_frame_after_reset || transition ) {
+            for( i = 0; i < psEnc->state_Fxx[ 0 ].sCmn.nFramesPerPacket; i++ ) {
+                psEnc->state_Fxx[ n ].sCmn.LBRR_flags[ i ] = 0;
+            }
+        }
+        psEnc->state_Fxx[ n ].sCmn.inDTX = psEnc->state_Fxx[ n ].sCmn.useDTX;
+    }
+    silk_assert( encControl->nChannelsInternal == 1 || psEnc->state_Fxx[ 0 ].sCmn.fs_kHz == psEnc->state_Fxx[ 1 ].sCmn.fs_kHz );
+
+    /* Input buffering/resampling and encoding */
+    nSamplesToBufferMax =
+        10 * nBlocksOf10ms * psEnc->state_Fxx[ 0 ].sCmn.fs_kHz;
+    nSamplesFromInputMax =
+        silk_DIV32_16( nSamplesToBufferMax *
+                           psEnc->state_Fxx[ 0 ].sCmn.API_fs_Hz,
+                       psEnc->state_Fxx[ 0 ].sCmn.fs_kHz * 1000 );
+    ALLOC( buf, nSamplesFromInputMax, opus_int16 );
+    while( 1 ) {
+        nSamplesToBuffer  = psEnc->state_Fxx[ 0 ].sCmn.frame_length - psEnc->state_Fxx[ 0 ].sCmn.inputBufIx;
+        nSamplesToBuffer  = silk_min( nSamplesToBuffer, nSamplesToBufferMax );
+        nSamplesFromInput = silk_DIV32_16( nSamplesToBuffer * psEnc->state_Fxx[ 0 ].sCmn.API_fs_Hz, psEnc->state_Fxx[ 0 ].sCmn.fs_kHz * 1000 );
+        /* Resample and write to buffer */
+        if( encControl->nChannelsAPI == 2 && encControl->nChannelsInternal == 2 ) {
+            opus_int id = psEnc->state_Fxx[ 0 ].sCmn.nFramesEncoded;
+            for( n = 0; n < nSamplesFromInput; n++ ) {
+                buf[ n ] = samplesIn[ 2 * n ];
+            }
+            /* Making sure to start both resamplers from the same state when switching from mono to stereo */
+            if( psEnc->nPrevChannelsInternal == 1 && id==0 ) {
+               silk_memcpy( &psEnc->state_Fxx[ 1 ].sCmn.resampler_state, &psEnc->state_Fxx[ 0 ].sCmn.resampler_state, sizeof(psEnc->state_Fxx[ 1 ].sCmn.resampler_state));
+            }
+
+            ret += silk_resampler( &psEnc->state_Fxx[ 0 ].sCmn.resampler_state,
+                &psEnc->state_Fxx[ 0 ].sCmn.inputBuf[ psEnc->state_Fxx[ 0 ].sCmn.inputBufIx + 2 ], buf, nSamplesFromInput );
+            psEnc->state_Fxx[ 0 ].sCmn.inputBufIx += nSamplesToBuffer;
+
+            nSamplesToBuffer  = psEnc->state_Fxx[ 1 ].sCmn.frame_length - psEnc->state_Fxx[ 1 ].sCmn.inputBufIx;
+            nSamplesToBuffer  = silk_min( nSamplesToBuffer, 10 * nBlocksOf10ms * psEnc->state_Fxx[ 1 ].sCmn.fs_kHz );
+            for( n = 0; n < nSamplesFromInput; n++ ) {
+                buf[ n ] = samplesIn[ 2 * n + 1 ];
+            }
+            ret += silk_resampler( &psEnc->state_Fxx[ 1 ].sCmn.resampler_state,
+                &psEnc->state_Fxx[ 1 ].sCmn.inputBuf[ psEnc->state_Fxx[ 1 ].sCmn.inputBufIx + 2 ], buf, nSamplesFromInput );
+
+            psEnc->state_Fxx[ 1 ].sCmn.inputBufIx += nSamplesToBuffer;
+        } else if( encControl->nChannelsAPI == 2 && encControl->nChannelsInternal == 1 ) {
+            /* Combine left and right channels before resampling */
+            for( n = 0; n < nSamplesFromInput; n++ ) {
+                sum = samplesIn[ 2 * n ] + samplesIn[ 2 * n + 1 ];
+                buf[ n ] = (opus_int16)silk_RSHIFT_ROUND( sum,  1 );
+            }
+            ret += silk_resampler( &psEnc->state_Fxx[ 0 ].sCmn.resampler_state,
+                &psEnc->state_Fxx[ 0 ].sCmn.inputBuf[ psEnc->state_Fxx[ 0 ].sCmn.inputBufIx + 2 ], buf, nSamplesFromInput );
+            /* On the first mono frame, average the results for the two resampler states  */
+            if( psEnc->nPrevChannelsInternal == 2 && psEnc->state_Fxx[ 0 ].sCmn.nFramesEncoded == 0 ) {
+               ret += silk_resampler( &psEnc->state_Fxx[ 1 ].sCmn.resampler_state,
+                   &psEnc->state_Fxx[ 1 ].sCmn.inputBuf[ psEnc->state_Fxx[ 1 ].sCmn.inputBufIx + 2 ], buf, nSamplesFromInput );
+               for( n = 0; n < psEnc->state_Fxx[ 0 ].sCmn.frame_length; n++ ) {
+                  psEnc->state_Fxx[ 0 ].sCmn.inputBuf[ psEnc->state_Fxx[ 0 ].sCmn.inputBufIx+n+2 ] =
+                        silk_RSHIFT(psEnc->state_Fxx[ 0 ].sCmn.inputBuf[ psEnc->state_Fxx[ 0 ].sCmn.inputBufIx+n+2 ]
+                                  + psEnc->state_Fxx[ 1 ].sCmn.inputBuf[ psEnc->state_Fxx[ 1 ].sCmn.inputBufIx+n+2 ], 1);
+               }
+            }
+            psEnc->state_Fxx[ 0 ].sCmn.inputBufIx += nSamplesToBuffer;
+        } else {
+            silk_assert( encControl->nChannelsAPI == 1 && encControl->nChannelsInternal == 1 );
+            silk_memcpy(buf, samplesIn, nSamplesFromInput*sizeof(opus_int16));
+            ret += silk_resampler( &psEnc->state_Fxx[ 0 ].sCmn.resampler_state,
+                &psEnc->state_Fxx[ 0 ].sCmn.inputBuf[ psEnc->state_Fxx[ 0 ].sCmn.inputBufIx + 2 ], buf, nSamplesFromInput );
+            psEnc->state_Fxx[ 0 ].sCmn.inputBufIx += nSamplesToBuffer;
+        }
+
+        samplesIn  += nSamplesFromInput * encControl->nChannelsAPI;
+        nSamplesIn -= nSamplesFromInput;
+
+        /* Default */
+        psEnc->allowBandwidthSwitch = 0;
+
+        /* Silk encoder */
+        if( psEnc->state_Fxx[ 0 ].sCmn.inputBufIx >= psEnc->state_Fxx[ 0 ].sCmn.frame_length ) {
+            /* Enough data in input buffer, so encode */
+            silk_assert( psEnc->state_Fxx[ 0 ].sCmn.inputBufIx == psEnc->state_Fxx[ 0 ].sCmn.frame_length );
+            silk_assert( encControl->nChannelsInternal == 1 || psEnc->state_Fxx[ 1 ].sCmn.inputBufIx == psEnc->state_Fxx[ 1 ].sCmn.frame_length );
+
+            /* Deal with LBRR data */
+            if( psEnc->state_Fxx[ 0 ].sCmn.nFramesEncoded == 0 && !prefillFlag ) {
+                /* Create space at start of payload for VAD and FEC flags */
+                opus_uint8 iCDF[ 2 ] = { 0, 0 };
+                iCDF[ 0 ] = 256 - silk_RSHIFT( 256, ( psEnc->state_Fxx[ 0 ].sCmn.nFramesPerPacket + 1 ) * encControl->nChannelsInternal );
+                ec_enc_icdf( psRangeEnc, 0, iCDF, 8 );
+
+                /* Encode any LBRR data from previous packet */
+                /* Encode LBRR flags */
+                for( n = 0; n < encControl->nChannelsInternal; n++ ) {
+                    LBRR_symbol = 0;
+                    for( i = 0; i < psEnc->state_Fxx[ n ].sCmn.nFramesPerPacket; i++ ) {
+                        LBRR_symbol |= silk_LSHIFT( psEnc->state_Fxx[ n ].sCmn.LBRR_flags[ i ], i );
+                    }
+                    psEnc->state_Fxx[ n ].sCmn.LBRR_flag = LBRR_symbol > 0 ? 1 : 0;
+                    if( LBRR_symbol && psEnc->state_Fxx[ n ].sCmn.nFramesPerPacket > 1 ) {
+                        ec_enc_icdf( psRangeEnc, LBRR_symbol - 1, silk_LBRR_flags_iCDF_ptr[ psEnc->state_Fxx[ n ].sCmn.nFramesPerPacket - 2 ], 8 );
+                    }
+                }
+
+                /* Code LBRR indices and excitation signals */
+                for( i = 0; i < psEnc->state_Fxx[ 0 ].sCmn.nFramesPerPacket; i++ ) {
+                    for( n = 0; n < encControl->nChannelsInternal; n++ ) {
+                        if( psEnc->state_Fxx[ n ].sCmn.LBRR_flags[ i ] ) {
+                            opus_int condCoding;
+
+                            if( encControl->nChannelsInternal == 2 && n == 0 ) {
+                                silk_stereo_encode_pred( psRangeEnc, psEnc->sStereo.predIx[ i ] );
+                                /* For LBRR data there's no need to code the mid-only flag if the side-channel LBRR flag is set */
+                                if( psEnc->state_Fxx[ 1 ].sCmn.LBRR_flags[ i ] == 0 ) {
+                                    silk_stereo_encode_mid_only( psRangeEnc, psEnc->sStereo.mid_only_flags[ i ] );
+                                }
+                            }
+                            /* Use conditional coding if previous frame available */
+                            if( i > 0 && psEnc->state_Fxx[ n ].sCmn.LBRR_flags[ i - 1 ] ) {
+                                condCoding = CODE_CONDITIONALLY;
+                            } else {
+                                condCoding = CODE_INDEPENDENTLY;
+                            }
+                            silk_encode_indices( &psEnc->state_Fxx[ n ].sCmn, psRangeEnc, i, 1, condCoding );
+                            silk_encode_pulses( psRangeEnc, psEnc->state_Fxx[ n ].sCmn.indices_LBRR[i].signalType, psEnc->state_Fxx[ n ].sCmn.indices_LBRR[i].quantOffsetType,
+                                psEnc->state_Fxx[ n ].sCmn.pulses_LBRR[ i ], psEnc->state_Fxx[ n ].sCmn.frame_length );
+                        }
+                    }
+                }
+
+                /* Reset LBRR flags */
+                for( n = 0; n < encControl->nChannelsInternal; n++ ) {
+                    silk_memset( psEnc->state_Fxx[ n ].sCmn.LBRR_flags, 0, sizeof( psEnc->state_Fxx[ n ].sCmn.LBRR_flags ) );
+                }
+            }
+
+            silk_HP_variable_cutoff( psEnc->state_Fxx );
+
+            /* Total target bits for packet */
+            nBits = silk_DIV32_16( silk_MUL( encControl->bitRate, encControl->payloadSize_ms ), 1000 );
+            /* Subtract half of the bits already used */
+            if( !prefillFlag ) {
+                nBits -= ec_tell( psRangeEnc ) >> 1;
+            }
+            /* Divide by number of uncoded frames left in packet */
+            nBits = silk_DIV32_16( nBits, psEnc->state_Fxx[ 0 ].sCmn.nFramesPerPacket - psEnc->state_Fxx[ 0 ].sCmn.nFramesEncoded );
+            /* Convert to bits/second */
+            if( encControl->payloadSize_ms == 10 ) {
+                TargetRate_bps = silk_SMULBB( nBits, 100 );
+            } else {
+                TargetRate_bps = silk_SMULBB( nBits, 50 );
+            }
+            /* Subtract fraction of bits in excess of target in previous packets */
+            TargetRate_bps -= silk_DIV32_16( silk_MUL( psEnc->nBitsExceeded, 1000 ), BITRESERVOIR_DECAY_TIME_MS );
+            /* Never exceed input bitrate */
+            TargetRate_bps = silk_LIMIT( TargetRate_bps, encControl->bitRate, 5000 );
+
+            /* Convert Left/Right to Mid/Side */
+            if( encControl->nChannelsInternal == 2 ) {
+                silk_stereo_LR_to_MS( &psEnc->sStereo, &psEnc->state_Fxx[ 0 ].sCmn.inputBuf[ 2 ], &psEnc->state_Fxx[ 1 ].sCmn.inputBuf[ 2 ],
+                    psEnc->sStereo.predIx[ psEnc->state_Fxx[ 0 ].sCmn.nFramesEncoded ], &psEnc->sStereo.mid_only_flags[ psEnc->state_Fxx[ 0 ].sCmn.nFramesEncoded ],
+                    MStargetRates_bps, TargetRate_bps, psEnc->state_Fxx[ 0 ].sCmn.speech_activity_Q8, encControl->toMono,
+                    psEnc->state_Fxx[ 0 ].sCmn.fs_kHz, psEnc->state_Fxx[ 0 ].sCmn.frame_length );
+                if( psEnc->sStereo.mid_only_flags[ psEnc->state_Fxx[ 0 ].sCmn.nFramesEncoded ] == 0 ) {
+                    /* Reset side channel encoder memory for first frame with side coding */
+                    if( psEnc->prev_decode_only_middle == 1 ) {
+                        silk_memset( &psEnc->state_Fxx[ 1 ].sShape,               0, sizeof( psEnc->state_Fxx[ 1 ].sShape ) );
+                        silk_memset( &psEnc->state_Fxx[ 1 ].sPrefilt,             0, sizeof( psEnc->state_Fxx[ 1 ].sPrefilt ) );
+                        silk_memset( &psEnc->state_Fxx[ 1 ].sCmn.sNSQ,            0, sizeof( psEnc->state_Fxx[ 1 ].sCmn.sNSQ ) );
+                        silk_memset( psEnc->state_Fxx[ 1 ].sCmn.prev_NLSFq_Q15,   0, sizeof( psEnc->state_Fxx[ 1 ].sCmn.prev_NLSFq_Q15 ) );
+                        silk_memset( &psEnc->state_Fxx[ 1 ].sCmn.sLP.In_LP_State, 0, sizeof( psEnc->state_Fxx[ 1 ].sCmn.sLP.In_LP_State ) );
+                        psEnc->state_Fxx[ 1 ].sCmn.prevLag                 = 100;
+                        psEnc->state_Fxx[ 1 ].sCmn.sNSQ.lagPrev            = 100;
+                        psEnc->state_Fxx[ 1 ].sShape.LastGainIndex         = 10;
+                        psEnc->state_Fxx[ 1 ].sCmn.prevSignalType          = TYPE_NO_VOICE_ACTIVITY;
+                        psEnc->state_Fxx[ 1 ].sCmn.sNSQ.prev_gain_Q16      = 65536;
+                        psEnc->state_Fxx[ 1 ].sCmn.first_frame_after_reset = 1;
+                    }
+                    silk_encode_do_VAD_Fxx( &psEnc->state_Fxx[ 1 ] );
+                } else {
+                    psEnc->state_Fxx[ 1 ].sCmn.VAD_flags[ psEnc->state_Fxx[ 0 ].sCmn.nFramesEncoded ] = 0;
+                }
+                if( !prefillFlag ) {
+                    silk_stereo_encode_pred( psRangeEnc, psEnc->sStereo.predIx[ psEnc->state_Fxx[ 0 ].sCmn.nFramesEncoded ] );
+                    if( psEnc->state_Fxx[ 1 ].sCmn.VAD_flags[ psEnc->state_Fxx[ 0 ].sCmn.nFramesEncoded ] == 0 ) {
+                        silk_stereo_encode_mid_only( psRangeEnc, psEnc->sStereo.mid_only_flags[ psEnc->state_Fxx[ 0 ].sCmn.nFramesEncoded ] );
+                    }
+                }
+            } else {
+                /* Buffering */
+                silk_memcpy( psEnc->state_Fxx[ 0 ].sCmn.inputBuf, psEnc->sStereo.sMid, 2 * sizeof( opus_int16 ) );
+                silk_memcpy( psEnc->sStereo.sMid, &psEnc->state_Fxx[ 0 ].sCmn.inputBuf[ psEnc->state_Fxx[ 0 ].sCmn.frame_length ], 2 * sizeof( opus_int16 ) );
+            }
+            silk_encode_do_VAD_Fxx( &psEnc->state_Fxx[ 0 ] );
+
+            /* Encode */
+            for( n = 0; n < encControl->nChannelsInternal; n++ ) {
+                opus_int maxBits, useCBR;
+
+                /* Handling rate constraints */
+                maxBits = encControl->maxBits;
+                if( tot_blocks == 2 && curr_block == 0 ) {
+                    maxBits = maxBits * 3 / 5;
+                } else if( tot_blocks == 3 ) {
+                    if( curr_block == 0 ) {
+                        maxBits = maxBits * 2 / 5;
+                    } else if( curr_block == 1 ) {
+                        maxBits = maxBits * 3 / 4;
+                    }
+                }
+                useCBR = encControl->useCBR && curr_block == tot_blocks - 1;
+
+                if( encControl->nChannelsInternal == 1 ) {
+                    channelRate_bps = TargetRate_bps;
+                } else {
+                    channelRate_bps = MStargetRates_bps[ n ];
+                    if( n == 0 && MStargetRates_bps[ 1 ] > 0 ) {
+                        useCBR = 0;
+                        /* Give mid up to 1/2 of the max bits for that frame */
+                        maxBits -= encControl->maxBits / ( tot_blocks * 2 );
+                    }
+                }
+
+                if( channelRate_bps > 0 ) {
+                    opus_int condCoding;
+
+                    silk_control_SNR( &psEnc->state_Fxx[ n ].sCmn, channelRate_bps );
+
+                    /* Use independent coding if no previous frame available */
+                    if( psEnc->state_Fxx[ 0 ].sCmn.nFramesEncoded - n <= 0 ) {
+                        condCoding = CODE_INDEPENDENTLY;
+                    } else if( n > 0 && psEnc->prev_decode_only_middle ) {
+                        /* If we skipped a side frame in this packet, we don't
+                           need LTP scaling; the LTP state is well-defined. */
+                        condCoding = CODE_INDEPENDENTLY_NO_LTP_SCALING;
+                    } else {
+                        condCoding = CODE_CONDITIONALLY;
+                    }
+                    if( ( ret = silk_encode_frame_Fxx( &psEnc->state_Fxx[ n ], nBytesOut, psRangeEnc, condCoding, maxBits, useCBR ) ) != 0 ) {
+                        silk_assert( 0 );
+                    }
+                }
+                psEnc->state_Fxx[ n ].sCmn.controlled_since_last_payload = 0;
+                psEnc->state_Fxx[ n ].sCmn.inputBufIx = 0;
+                psEnc->state_Fxx[ n ].sCmn.nFramesEncoded++;
+            }
+            psEnc->prev_decode_only_middle = psEnc->sStereo.mid_only_flags[ psEnc->state_Fxx[ 0 ].sCmn.nFramesEncoded - 1 ];
+
+            /* Insert VAD and FEC flags at beginning of bitstream */
+            if( *nBytesOut > 0 && psEnc->state_Fxx[ 0 ].sCmn.nFramesEncoded == psEnc->state_Fxx[ 0 ].sCmn.nFramesPerPacket) {
+                flags = 0;
+                for( n = 0; n < encControl->nChannelsInternal; n++ ) {
+                    for( i = 0; i < psEnc->state_Fxx[ n ].sCmn.nFramesPerPacket; i++ ) {
+                        flags  = silk_LSHIFT( flags, 1 );
+                        flags |= psEnc->state_Fxx[ n ].sCmn.VAD_flags[ i ];
+                    }
+                    flags  = silk_LSHIFT( flags, 1 );
+                    flags |= psEnc->state_Fxx[ n ].sCmn.LBRR_flag;
+                }
+                if( !prefillFlag ) {
+                    ec_enc_patch_initial_bits( psRangeEnc, flags, ( psEnc->state_Fxx[ 0 ].sCmn.nFramesPerPacket + 1 ) * encControl->nChannelsInternal );
+                }
+
+                /* Return zero bytes if all channels DTXed */
+                if( psEnc->state_Fxx[ 0 ].sCmn.inDTX && ( encControl->nChannelsInternal == 1 || psEnc->state_Fxx[ 1 ].sCmn.inDTX ) ) {
+                    *nBytesOut = 0;
+                }
+
+                psEnc->nBitsExceeded += *nBytesOut * 8;
+                psEnc->nBitsExceeded -= silk_DIV32_16( silk_MUL( encControl->bitRate, encControl->payloadSize_ms ), 1000 );
+                psEnc->nBitsExceeded  = silk_LIMIT( psEnc->nBitsExceeded, 0, 10000 );
+
+                /* Update flag indicating if bandwidth switching is allowed */
+                speech_act_thr_for_switch_Q8 = silk_SMLAWB( SILK_FIX_CONST( SPEECH_ACTIVITY_DTX_THRES, 8 ),
+                    SILK_FIX_CONST( ( 1 - SPEECH_ACTIVITY_DTX_THRES ) / MAX_BANDWIDTH_SWITCH_DELAY_MS, 16 + 8 ), psEnc->timeSinceSwitchAllowed_ms );
+                if( psEnc->state_Fxx[ 0 ].sCmn.speech_activity_Q8 < speech_act_thr_for_switch_Q8 ) {
+                    psEnc->allowBandwidthSwitch = 1;
+                    psEnc->timeSinceSwitchAllowed_ms = 0;
+                } else {
+                    psEnc->allowBandwidthSwitch = 0;
+                    psEnc->timeSinceSwitchAllowed_ms += encControl->payloadSize_ms;
+                }
+            }
+
+            if( nSamplesIn == 0 ) {
+                break;
+            }
+        } else {
+            break;
+        }
+        curr_block++;
+    }
+
+    psEnc->nPrevChannelsInternal = encControl->nChannelsInternal;
+
+    encControl->allowBandwidthSwitch = psEnc->allowBandwidthSwitch;
+    encControl->inWBmodeWithoutVariableLP = psEnc->state_Fxx[ 0 ].sCmn.fs_kHz == 16 && psEnc->state_Fxx[ 0 ].sCmn.sLP.mode == 0;
+    encControl->internalSampleRate = silk_SMULBB( psEnc->state_Fxx[ 0 ].sCmn.fs_kHz, 1000 );
+    encControl->stereoWidth_Q14 = encControl->toMono ? 0 : psEnc->sStereo.smth_width_Q14;
+    if( prefillFlag ) {
+        encControl->payloadSize_ms = tmp_payloadSize_ms;
+        encControl->complexity = tmp_complexity;
+        for( n = 0; n < encControl->nChannelsInternal; n++ ) {
+            psEnc->state_Fxx[ n ].sCmn.controlled_since_last_payload = 0;
+            psEnc->state_Fxx[ n ].sCmn.prefillFlag = 0;
+        }
+    }
+
+    RESTORE_STACK;
+    return ret;
+}
+
diff --git a/drivers/opus/silk/encode_indices.c b/drivers/opus/silk/encode_indices.c
new file mode 100644
index 0000000000..c6679b34f6
--- /dev/null
+++ b/drivers/opus/silk/encode_indices.c
@@ -0,0 +1,181 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef OPUS_HAVE_CONFIG_H
+#include "opus_config.h"
+#endif
+
+#include "silk_main.h"
+
+/* Encode side-information parameters to payload */
+void silk_encode_indices(
+    silk_encoder_state          *psEncC,                        /* I/O  Encoder state                               */
+    ec_enc                      *psRangeEnc,                    /* I/O  Compressor data structure                   */
+    opus_int                    FrameIndex,                     /* I    Frame number                                */
+    opus_int                    encode_LBRR,                    /* I    Flag indicating LBRR data is being encoded  */
+    opus_int                    condCoding                      /* I    The type of conditional coding to use       */
+)
+{
+    opus_int   i, k, typeOffset;
+    opus_int   encode_absolute_lagIndex, delta_lagIndex;
+    opus_int16 ec_ix[ MAX_LPC_ORDER ];
+    opus_uint8 pred_Q8[ MAX_LPC_ORDER ];
+    const SideInfoIndices *psIndices;
+
+    if( encode_LBRR ) {
+         psIndices = &psEncC->indices_LBRR[ FrameIndex ];
+    } else {
+         psIndices = &psEncC->indices;
+    }
+
+    /*******************************************/
+    /* Encode signal type and quantizer offset */
+    /*******************************************/
+    typeOffset = 2 * psIndices->signalType + psIndices->quantOffsetType;
+    silk_assert( typeOffset >= 0 && typeOffset < 6 );
+    silk_assert( encode_LBRR == 0 || typeOffset >= 2 );
+    if( encode_LBRR || typeOffset >= 2 ) {
+        ec_enc_icdf( psRangeEnc, typeOffset - 2, silk_type_offset_VAD_iCDF, 8 );
+    } else {
+        ec_enc_icdf( psRangeEnc, typeOffset, silk_type_offset_no_VAD_iCDF, 8 );
+    }
+
+    /****************/
+    /* Encode gains */
+    /****************/
+    /* first subframe */
+    if( condCoding == CODE_CONDITIONALLY ) {
+        /* conditional coding */
+        silk_assert( psIndices->GainsIndices[ 0 ] >= 0 && psIndices->GainsIndices[ 0 ] < MAX_DELTA_GAIN_QUANT - MIN_DELTA_GAIN_QUANT + 1 );
+        ec_enc_icdf( psRangeEnc, psIndices->GainsIndices[ 0 ], silk_delta_gain_iCDF, 8 );
+    } else {
+        /* independent coding, in two stages: MSB bits followed by 3 LSBs */
+        silk_assert( psIndices->GainsIndices[ 0 ] >= 0 && psIndices->GainsIndices[ 0 ] < N_LEVELS_QGAIN );
+        ec_enc_icdf( psRangeEnc, silk_RSHIFT( psIndices->GainsIndices[ 0 ], 3 ), silk_gain_iCDF[ psIndices->signalType ], 8 );
+        ec_enc_icdf( psRangeEnc, psIndices->GainsIndices[ 0 ] & 7, silk_uniform8_iCDF, 8 );
+    }
+
+    /* remaining subframes */
+    for( i = 1; i < psEncC->nb_subfr; i++ ) {
+        silk_assert( psIndices->GainsIndices[ i ] >= 0 && psIndices->GainsIndices[ i ] < MAX_DELTA_GAIN_QUANT - MIN_DELTA_GAIN_QUANT + 1 );
+        ec_enc_icdf( psRangeEnc, psIndices->GainsIndices[ i ], silk_delta_gain_iCDF, 8 );
+    }
+
+    /****************/
+    /* Encode NLSFs */
+    /****************/
+    ec_enc_icdf( psRangeEnc, psIndices->NLSFIndices[ 0 ], &psEncC->psNLSF_CB->CB1_iCDF[ ( psIndices->signalType >> 1 ) * psEncC->psNLSF_CB->nVectors ], 8 );
+    silk_NLSF_unpack( ec_ix, pred_Q8, psEncC->psNLSF_CB, psIndices->NLSFIndices[ 0 ] );
+    silk_assert( psEncC->psNLSF_CB->order == psEncC->predictLPCOrder );
+    for( i = 0; i < psEncC->psNLSF_CB->order; i++ ) {
+        if( psIndices->NLSFIndices[ i+1 ] >= NLSF_QUANT_MAX_AMPLITUDE ) {
+            ec_enc_icdf( psRangeEnc, 2 * NLSF_QUANT_MAX_AMPLITUDE, &psEncC->psNLSF_CB->ec_iCDF[ ec_ix[ i ] ], 8 );
+            ec_enc_icdf( psRangeEnc, psIndices->NLSFIndices[ i+1 ] - NLSF_QUANT_MAX_AMPLITUDE, silk_NLSF_EXT_iCDF, 8 );
+        } else if( psIndices->NLSFIndices[ i+1 ] <= -NLSF_QUANT_MAX_AMPLITUDE ) {
+            ec_enc_icdf( psRangeEnc, 0, &psEncC->psNLSF_CB->ec_iCDF[ ec_ix[ i ] ], 8 );
+            ec_enc_icdf( psRangeEnc, -psIndices->NLSFIndices[ i+1 ] - NLSF_QUANT_MAX_AMPLITUDE, silk_NLSF_EXT_iCDF, 8 );
+        } else {
+            ec_enc_icdf( psRangeEnc, psIndices->NLSFIndices[ i+1 ] + NLSF_QUANT_MAX_AMPLITUDE, &psEncC->psNLSF_CB->ec_iCDF[ ec_ix[ i ] ], 8 );
+        }
+    }
+
+    /* Encode NLSF interpolation factor */
+    if( psEncC->nb_subfr == MAX_NB_SUBFR ) {
+        silk_assert( psIndices->NLSFInterpCoef_Q2 >= 0 && psIndices->NLSFInterpCoef_Q2 < 5 );
+        ec_enc_icdf( psRangeEnc, psIndices->NLSFInterpCoef_Q2, silk_NLSF_interpolation_factor_iCDF, 8 );
+    }
+
+    if( psIndices->signalType == TYPE_VOICED )
+    {
+        /*********************/
+        /* Encode pitch lags */
+        /*********************/
+        /* lag index */
+        encode_absolute_lagIndex = 1;
+        if( condCoding == CODE_CONDITIONALLY && psEncC->ec_prevSignalType == TYPE_VOICED ) {
+            /* Delta Encoding */
+            delta_lagIndex = psIndices->lagIndex - psEncC->ec_prevLagIndex;
+            if( delta_lagIndex < -8 || delta_lagIndex > 11 ) {
+                delta_lagIndex = 0;
+            } else {
+                delta_lagIndex = delta_lagIndex + 9;
+                encode_absolute_lagIndex = 0; /* Only use delta */
+            }
+            silk_assert( delta_lagIndex >= 0 && delta_lagIndex < 21 );
+            ec_enc_icdf( psRangeEnc, delta_lagIndex, silk_pitch_delta_iCDF, 8 );
+        }
+        if( encode_absolute_lagIndex ) {
+            /* Absolute encoding */
+            opus_int32 pitch_high_bits, pitch_low_bits;
+            pitch_high_bits = silk_DIV32_16( psIndices->lagIndex, silk_RSHIFT( psEncC->fs_kHz, 1 ) );
+            pitch_low_bits = psIndices->lagIndex - silk_SMULBB( pitch_high_bits, silk_RSHIFT( psEncC->fs_kHz, 1 ) );
+            silk_assert( pitch_low_bits < psEncC->fs_kHz / 2 );
+            silk_assert( pitch_high_bits < 32 );
+            ec_enc_icdf( psRangeEnc, pitch_high_bits, silk_pitch_lag_iCDF, 8 );
+            ec_enc_icdf( psRangeEnc, pitch_low_bits, psEncC->pitch_lag_low_bits_iCDF, 8 );
+        }
+        psEncC->ec_prevLagIndex = psIndices->lagIndex;
+
+        /* Countour index */
+        silk_assert(   psIndices->contourIndex  >= 0 );
+        silk_assert( ( psIndices->contourIndex < 34 && psEncC->fs_kHz  > 8 && psEncC->nb_subfr == 4 ) ||
+                    ( psIndices->contourIndex < 11 && psEncC->fs_kHz == 8 && psEncC->nb_subfr == 4 ) ||
+                    ( psIndices->contourIndex < 12 && psEncC->fs_kHz  > 8 && psEncC->nb_subfr == 2 ) ||
+                    ( psIndices->contourIndex <  3 && psEncC->fs_kHz == 8 && psEncC->nb_subfr == 2 ) );
+        ec_enc_icdf( psRangeEnc, psIndices->contourIndex, psEncC->pitch_contour_iCDF, 8 );
+
+        /********************/
+        /* Encode LTP gains */
+        /********************/
+        /* PERIndex value */
+        silk_assert( psIndices->PERIndex >= 0 && psIndices->PERIndex < 3 );
+        ec_enc_icdf( psRangeEnc, psIndices->PERIndex, silk_LTP_per_index_iCDF, 8 );
+
+        /* Codebook Indices */
+        for( k = 0; k < psEncC->nb_subfr; k++ ) {
+            silk_assert( psIndices->LTPIndex[ k ] >= 0 && psIndices->LTPIndex[ k ] < ( 8 << psIndices->PERIndex ) );
+            ec_enc_icdf( psRangeEnc, psIndices->LTPIndex[ k ], silk_LTP_gain_iCDF_ptrs[ psIndices->PERIndex ], 8 );
+        }
+
+        /**********************/
+        /* Encode LTP scaling */
+        /**********************/
+        if( condCoding == CODE_INDEPENDENTLY ) {
+            silk_assert( psIndices->LTP_scaleIndex >= 0 && psIndices->LTP_scaleIndex < 3 );
+            ec_enc_icdf( psRangeEnc, psIndices->LTP_scaleIndex, silk_LTPscale_iCDF, 8 );
+        }
+        silk_assert( !condCoding || psIndices->LTP_scaleIndex == 0 );
+    }
+
+    psEncC->ec_prevSignalType = psIndices->signalType;
+
+    /***************/
+    /* Encode seed */
+    /***************/
+    silk_assert( psIndices->Seed >= 0 && psIndices->Seed < 4 );
+    ec_enc_icdf( psRangeEnc, psIndices->Seed, silk_uniform4_iCDF, 8 );
+}
diff --git a/drivers/opus/silk/encode_pulses.c b/drivers/opus/silk/encode_pulses.c
new file mode 100644
index 0000000000..d148b9d1e6
--- /dev/null
+++ b/drivers/opus/silk/encode_pulses.c
@@ -0,0 +1,206 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef OPUS_HAVE_CONFIG_H
+#include "opus_config.h"
+#endif
+
+#include "silk_main.h"
+#include "stack_alloc.h"
+
+/*********************************************/
+/* Encode quantization indices of excitation */
+/*********************************************/
+
+static OPUS_INLINE opus_int combine_and_check(    /* return ok                           */
+    opus_int         *pulses_comb,           /* O                                   */
+    const opus_int   *pulses_in,             /* I                                   */
+    opus_int         max_pulses,             /* I    max value for sum of pulses    */
+    opus_int         len                     /* I    number of output values        */
+)
+{
+    opus_int k, sum;
+
+    for( k = 0; k < len; k++ ) {
+        sum = pulses_in[ 2 * k ] + pulses_in[ 2 * k + 1 ];
+        if( sum > max_pulses ) {
+            return 1;
+        }
+        pulses_comb[ k ] = sum;
+    }
+
+    return 0;
+}
+
+/* Encode quantization indices of excitation */
+void silk_encode_pulses(
+    ec_enc                      *psRangeEnc,                    /* I/O  compressor data structure                   */
+    const opus_int              signalType,                     /* I    Signal type                                 */
+    const opus_int              quantOffsetType,                /* I    quantOffsetType                             */
+    opus_int8                   pulses[],                       /* I    quantization indices                        */
+    const opus_int              frame_length                    /* I    Frame length                                */
+)
+{
+    opus_int   i, k, j, iter, bit, nLS, scale_down, RateLevelIndex = 0;
+    opus_int32 abs_q, minSumBits_Q5, sumBits_Q5;
+    VARDECL( opus_int, abs_pulses );
+    VARDECL( opus_int, sum_pulses );
+    VARDECL( opus_int, nRshifts );
+    opus_int   pulses_comb[ 8 ];
+    opus_int   *abs_pulses_ptr;
+    const opus_int8 *pulses_ptr;
+    const opus_uint8 *cdf_ptr;
+    const opus_uint8 *nBits_ptr;
+    SAVE_STACK;
+
+    silk_memset( pulses_comb, 0, 8 * sizeof( opus_int ) ); /* Fixing Valgrind reported problem*/
+
+    /****************************/
+    /* Prepare for shell coding */
+    /****************************/
+    /* Calculate number of shell blocks */
+    silk_assert( 1 << LOG2_SHELL_CODEC_FRAME_LENGTH == SHELL_CODEC_FRAME_LENGTH );
+    iter = silk_RSHIFT( frame_length, LOG2_SHELL_CODEC_FRAME_LENGTH );
+    if( iter * SHELL_CODEC_FRAME_LENGTH < frame_length ) {
+        silk_assert( frame_length == 12 * 10 ); /* Make sure only happens for 10 ms @ 12 kHz */
+        iter++;
+        silk_memset( &pulses[ frame_length ], 0, SHELL_CODEC_FRAME_LENGTH * sizeof(opus_int8));
+    }
+
+    /* Take the absolute value of the pulses */
+    ALLOC( abs_pulses, iter * SHELL_CODEC_FRAME_LENGTH, opus_int );
+    silk_assert( !( SHELL_CODEC_FRAME_LENGTH & 3 ) );
+    for( i = 0; i < iter * SHELL_CODEC_FRAME_LENGTH; i+=4 ) {
+        abs_pulses[i+0] = ( opus_int )silk_abs( pulses[ i + 0 ] );
+        abs_pulses[i+1] = ( opus_int )silk_abs( pulses[ i + 1 ] );
+        abs_pulses[i+2] = ( opus_int )silk_abs( pulses[ i + 2 ] );
+        abs_pulses[i+3] = ( opus_int )silk_abs( pulses[ i + 3 ] );
+    }
+
+    /* Calc sum pulses per shell code frame */
+    ALLOC( sum_pulses, iter, opus_int );
+    ALLOC( nRshifts, iter, opus_int );
+    abs_pulses_ptr = abs_pulses;
+    for( i = 0; i < iter; i++ ) {
+        nRshifts[ i ] = 0;
+
+        while( 1 ) {
+            /* 1+1 -> 2 */
+            scale_down = combine_and_check( pulses_comb, abs_pulses_ptr, silk_max_pulses_table[ 0 ], 8 );
+            /* 2+2 -> 4 */
+            scale_down += combine_and_check( pulses_comb, pulses_comb, silk_max_pulses_table[ 1 ], 4 );
+            /* 4+4 -> 8 */
+            scale_down += combine_and_check( pulses_comb, pulses_comb, silk_max_pulses_table[ 2 ], 2 );
+            /* 8+8 -> 16 */
+            scale_down += combine_and_check( &sum_pulses[ i ], pulses_comb, silk_max_pulses_table[ 3 ], 1 );
+
+            if( scale_down ) {
+                /* We need to downscale the quantization signal */
+                nRshifts[ i ]++;
+                for( k = 0; k < SHELL_CODEC_FRAME_LENGTH; k++ ) {
+                    abs_pulses_ptr[ k ] = silk_RSHIFT( abs_pulses_ptr[ k ], 1 );
+                }
+            } else {
+                /* Jump out of while(1) loop and go to next shell coding frame */
+                break;
+            }
+        }
+        abs_pulses_ptr += SHELL_CODEC_FRAME_LENGTH;
+    }
+
+    /**************/
+    /* Rate level */
+    /**************/
+    /* find rate level that leads to fewest bits for coding of pulses per block info */
+    minSumBits_Q5 = silk_int32_MAX;
+    for( k = 0; k < N_RATE_LEVELS - 1; k++ ) {
+        nBits_ptr  = silk_pulses_per_block_BITS_Q5[ k ];
+        sumBits_Q5 = silk_rate_levels_BITS_Q5[ signalType >> 1 ][ k ];
+        for( i = 0; i < iter; i++ ) {
+            if( nRshifts[ i ] > 0 ) {
+                sumBits_Q5 += nBits_ptr[ MAX_PULSES + 1 ];
+            } else {
+                sumBits_Q5 += nBits_ptr[ sum_pulses[ i ] ];
+            }
+        }
+        if( sumBits_Q5 < minSumBits_Q5 ) {
+            minSumBits_Q5 = sumBits_Q5;
+            RateLevelIndex = k;
+        }
+    }
+    ec_enc_icdf( psRangeEnc, RateLevelIndex, silk_rate_levels_iCDF[ signalType >> 1 ], 8 );
+
+    /***************************************************/
+    /* Sum-Weighted-Pulses Encoding                    */
+    /***************************************************/
+    cdf_ptr = silk_pulses_per_block_iCDF[ RateLevelIndex ];
+    for( i = 0; i < iter; i++ ) {
+        if( nRshifts[ i ] == 0 ) {
+            ec_enc_icdf( psRangeEnc, sum_pulses[ i ], cdf_ptr, 8 );
+        } else {
+            ec_enc_icdf( psRangeEnc, MAX_PULSES + 1, cdf_ptr, 8 );
+            for( k = 0; k < nRshifts[ i ] - 1; k++ ) {
+                ec_enc_icdf( psRangeEnc, MAX_PULSES + 1, silk_pulses_per_block_iCDF[ N_RATE_LEVELS - 1 ], 8 );
+            }
+            ec_enc_icdf( psRangeEnc, sum_pulses[ i ], silk_pulses_per_block_iCDF[ N_RATE_LEVELS - 1 ], 8 );
+        }
+    }
+
+    /******************/
+    /* Shell Encoding */
+    /******************/
+    for( i = 0; i < iter; i++ ) {
+        if( sum_pulses[ i ] > 0 ) {
+            silk_shell_encoder( psRangeEnc, &abs_pulses[ i * SHELL_CODEC_FRAME_LENGTH ] );
+        }
+    }
+
+    /****************/
+    /* LSB Encoding */
+    /****************/
+    for( i = 0; i < iter; i++ ) {
+        if( nRshifts[ i ] > 0 ) {
+            pulses_ptr = &pulses[ i * SHELL_CODEC_FRAME_LENGTH ];
+            nLS = nRshifts[ i ] - 1;
+            for( k = 0; k < SHELL_CODEC_FRAME_LENGTH; k++ ) {
+                abs_q = (opus_int8)silk_abs( pulses_ptr[ k ] );
+                for( j = nLS; j > 0; j-- ) {
+                    bit = silk_RSHIFT( abs_q, j ) & 1;
+                    ec_enc_icdf( psRangeEnc, bit, silk_lsb_iCDF, 8 );
+                }
+                bit = abs_q & 1;
+                ec_enc_icdf( psRangeEnc, bit, silk_lsb_iCDF, 8 );
+            }
+        }
+    }
+
+    /****************/
+    /* Encode signs */
+    /****************/
+    silk_encode_signs( psRangeEnc, pulses, frame_length, signalType, quantOffsetType, sum_pulses );
+    RESTORE_STACK;
+}
diff --git a/drivers/opus/silk/errors.h b/drivers/opus/silk/errors.h
new file mode 100644
index 0000000000..45070800f2
--- /dev/null
+++ b/drivers/opus/silk/errors.h
@@ -0,0 +1,98 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifndef SILK_ERRORS_H
+#define SILK_ERRORS_H
+
+#ifdef __cplusplus
+extern "C"
+{
+#endif
+
+/******************/
+/* Error messages */
+/******************/
+#define SILK_NO_ERROR                               0
+
+/**************************/
+/* Encoder error messages */
+/**************************/
+
+/* Input length is not a multiple of 10 ms, or length is longer than the packet length */
+#define SILK_ENC_INPUT_INVALID_NO_OF_SAMPLES        -101
+
+/* Sampling frequency not 8000, 12000 or 16000 Hertz */
+#define SILK_ENC_FS_NOT_SUPPORTED                   -102
+
+/* Packet size not 10, 20, 40, or 60 ms */
+#define SILK_ENC_PACKET_SIZE_NOT_SUPPORTED          -103
+
+/* Allocated payload buffer too short */
+#define SILK_ENC_PAYLOAD_BUF_TOO_SHORT              -104
+
+/* Loss rate not between 0 and 100 percent */
+#define SILK_ENC_INVALID_LOSS_RATE                  -105
+
+/* Complexity setting not valid, use 0...10 */
+#define SILK_ENC_INVALID_COMPLEXITY_SETTING         -106
+
+/* Inband FEC setting not valid, use 0 or 1 */
+#define SILK_ENC_INVALID_INBAND_FEC_SETTING         -107
+
+/* DTX setting not valid, use 0 or 1 */
+#define SILK_ENC_INVALID_DTX_SETTING                -108
+
+/* CBR setting not valid, use 0 or 1 */
+#define SILK_ENC_INVALID_CBR_SETTING                -109
+
+/* Internal encoder error */
+#define SILK_ENC_INTERNAL_ERROR                     -110
+
+/* Internal encoder error */
+#define SILK_ENC_INVALID_NUMBER_OF_CHANNELS_ERROR   -111
+
+/**************************/
+/* Decoder error messages */
+/**************************/
+
+/* Output sampling frequency lower than internal decoded sampling frequency */
+#define SILK_DEC_INVALID_SAMPLING_FREQUENCY         -200
+
+/* Payload size exceeded the maximum allowed 1024 bytes */
+#define SILK_DEC_PAYLOAD_TOO_LARGE                  -201
+
+/* Payload has bit errors */
+#define SILK_DEC_PAYLOAD_ERROR                      -202
+
+/* Payload has bit errors */
+#define SILK_DEC_INVALID_FRAME_SIZE                 -203
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/drivers/opus/silk/fixed/LTP_analysis_filter_FIX.c b/drivers/opus/silk/fixed/LTP_analysis_filter_FIX.c
new file mode 100644
index 0000000000..1df4b01d20
--- /dev/null
+++ b/drivers/opus/silk/fixed/LTP_analysis_filter_FIX.c
@@ -0,0 +1,85 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef OPUS_HAVE_CONFIG_H
+#include "opus_config.h"
+#endif
+
+#include "main_FIX.h"
+
+void silk_LTP_analysis_filter_FIX(
+    opus_int16                      *LTP_res,                               /* O    LTP residual signal of length MAX_NB_SUBFR * ( pre_length + subfr_length )  */
+    const opus_int16                *x,                                     /* I    Pointer to input signal with at least max( pitchL ) preceding samples       */
+    const opus_int16                LTPCoef_Q14[ LTP_ORDER * MAX_NB_SUBFR ],/* I    LTP_ORDER LTP coefficients for each MAX_NB_SUBFR subframe                   */
+    const opus_int                  pitchL[ MAX_NB_SUBFR ],                 /* I    Pitch lag, one for each subframe                                            */
+    const opus_int32                invGains_Q16[ MAX_NB_SUBFR ],           /* I    Inverse quantization gains, one for each subframe                           */
+    const opus_int                  subfr_length,                           /* I    Length of each subframe                                                     */
+    const opus_int                  nb_subfr,                               /* I    Number of subframes                                                         */
+    const opus_int                  pre_length                              /* I    Length of the preceding samples starting at &x[0] for each subframe         */
+)
+{
+    const opus_int16 *x_ptr, *x_lag_ptr;
+    opus_int16   Btmp_Q14[ LTP_ORDER ];
+    opus_int16   *LTP_res_ptr;
+    opus_int     k, i, j;
+    opus_int32   LTP_est;
+
+    x_ptr = x;
+    LTP_res_ptr = LTP_res;
+    for( k = 0; k < nb_subfr; k++ ) {
+
+        x_lag_ptr = x_ptr - pitchL[ k ];
+        for( i = 0; i < LTP_ORDER; i++ ) {
+            Btmp_Q14[ i ] = LTPCoef_Q14[ k * LTP_ORDER + i ];
+        }
+
+        /* LTP analysis FIR filter */
+        for( i = 0; i < subfr_length + pre_length; i++ ) {
+            LTP_res_ptr[ i ] = x_ptr[ i ];
+
+            /* Long-term prediction */
+            LTP_est = silk_SMULBB( x_lag_ptr[ LTP_ORDER / 2 ], Btmp_Q14[ 0 ] );
+            for( j = 1; j < LTP_ORDER; j++ ) {
+                LTP_est = silk_SMLABB_ovflw( LTP_est, x_lag_ptr[ LTP_ORDER / 2 - j ], Btmp_Q14[ j ] );
+            }
+            LTP_est = silk_RSHIFT_ROUND( LTP_est, 14 ); /* round and -> Q0*/
+
+            /* Subtract long-term prediction */
+            LTP_res_ptr[ i ] = (opus_int16)silk_SAT16( (opus_int32)x_ptr[ i ] - LTP_est );
+
+            /* Scale residual */
+            LTP_res_ptr[ i ] = silk_SMULWB( invGains_Q16[ k ], LTP_res_ptr[ i ] );
+
+            x_lag_ptr++;
+        }
+
+        /* Update pointers */
+        LTP_res_ptr += subfr_length + pre_length;
+        x_ptr       += subfr_length;
+    }
+}
+
diff --git a/drivers/opus/silk/fixed/LTP_scale_ctrl_FIX.c b/drivers/opus/silk/fixed/LTP_scale_ctrl_FIX.c
new file mode 100644
index 0000000000..ab6923c5c9
--- /dev/null
+++ b/drivers/opus/silk/fixed/LTP_scale_ctrl_FIX.c
@@ -0,0 +1,53 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef OPUS_HAVE_CONFIG_H
+#include "opus_config.h"
+#endif
+
+#include "main_FIX.h"
+
+/* Calculation of LTP state scaling */
+void silk_LTP_scale_ctrl_FIX(
+    silk_encoder_state_FIX          *psEnc,                                 /* I/O  encoder state                                                               */
+    silk_encoder_control_FIX        *psEncCtrl,                             /* I/O  encoder control                                                             */
+    opus_int                        condCoding                              /* I    The type of conditional coding to use                                       */
+)
+{
+    opus_int round_loss;
+
+    if( condCoding == CODE_INDEPENDENTLY ) {
+        /* Only scale if first frame in packet */
+        round_loss = psEnc->sCmn.PacketLoss_perc + psEnc->sCmn.nFramesPerPacket;
+        psEnc->sCmn.indices.LTP_scaleIndex = (opus_int8)silk_LIMIT(
+            silk_SMULWB( silk_SMULBB( round_loss, psEncCtrl->LTPredCodGain_Q7 ), SILK_FIX_CONST( 0.1, 9 ) ), 0, 2 );
+    } else {
+        /* Default is minimum scaling */
+        psEnc->sCmn.indices.LTP_scaleIndex = 0;
+    }
+    psEncCtrl->LTP_scale_Q14 = silk_LTPScales_table_Q14[ psEnc->sCmn.indices.LTP_scaleIndex ];
+}
diff --git a/drivers/opus/silk/fixed/apply_sine_window_FIX.c b/drivers/opus/silk/fixed/apply_sine_window_FIX.c
new file mode 100644
index 0000000000..0998b49eca
--- /dev/null
+++ b/drivers/opus/silk/fixed/apply_sine_window_FIX.c
@@ -0,0 +1,101 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef OPUS_HAVE_CONFIG_H
+#include "opus_config.h"
+#endif
+
+#include "SigProc_FIX.h"
+
+/* Apply sine window to signal vector.                                      */
+/* Window types:                                                            */
+/*    1 -> sine window from 0 to pi/2                                       */
+/*    2 -> sine window from pi/2 to pi                                      */
+/* Every other sample is linearly interpolated, for speed.                  */
+/* Window length must be between 16 and 120 (incl) and a multiple of 4.     */
+
+/* Matlab code for table:
+   for k=16:9*4:16+2*9*4, fprintf(' %7.d,', -round(65536*pi ./ (k:4:k+8*4))); fprintf('\n'); end
+*/
+static const opus_int16 freq_table_Q16[ 27 ] = {
+   12111,    9804,    8235,    7100,    6239,    5565,    5022,    4575,    4202,
+    3885,    3612,    3375,    3167,    2984,    2820,    2674,    2542,    2422,
+    2313,    2214,    2123,    2038,    1961,    1889,    1822,    1760,    1702,
+};
+
+void silk_apply_sine_window(
+    opus_int16                  px_win[],           /* O    Pointer to windowed signal                                  */
+    const opus_int16            px[],               /* I    Pointer to input signal                                     */
+    const opus_int              win_type,           /* I    Selects a window type                                       */
+    const opus_int              length              /* I    Window length, multiple of 4                                */
+)
+{
+    opus_int   k, f_Q16, c_Q16;
+    opus_int32 S0_Q16, S1_Q16;
+
+    silk_assert( win_type == 1 || win_type == 2 );
+
+    /* Length must be in a range from 16 to 120 and a multiple of 4 */
+    silk_assert( length >= 16 && length <= 120 );
+    silk_assert( ( length & 3 ) == 0 );
+
+    /* Frequency */
+    k = ( length >> 2 ) - 4;
+    silk_assert( k >= 0 && k <= 26 );
+    f_Q16 = (opus_int)freq_table_Q16[ k ];
+
+    /* Factor used for cosine approximation */
+    c_Q16 = silk_SMULWB( (opus_int32)f_Q16, -f_Q16 );
+    silk_assert( c_Q16 >= -32768 );
+
+    /* initialize state */
+    if( win_type == 1 ) {
+        /* start from 0 */
+        S0_Q16 = 0;
+        /* approximation of sin(f) */
+        S1_Q16 = f_Q16 + silk_RSHIFT( length, 3 );
+    } else {
+        /* start from 1 */
+        S0_Q16 = ( (opus_int32)1 << 16 );
+        /* approximation of cos(f) */
+        S1_Q16 = ( (opus_int32)1 << 16 ) + silk_RSHIFT( c_Q16, 1 ) + silk_RSHIFT( length, 4 );
+    }
+
+    /* Uses the recursive equation:   sin(n*f) = 2 * cos(f) * sin((n-1)*f) - sin((n-2)*f)    */
+    /* 4 samples at a time */
+    for( k = 0; k < length; k += 4 ) {
+        px_win[ k ]     = (opus_int16)silk_SMULWB( silk_RSHIFT( S0_Q16 + S1_Q16, 1 ), px[ k ] );
+        px_win[ k + 1 ] = (opus_int16)silk_SMULWB( S1_Q16, px[ k + 1] );
+        S0_Q16 = silk_SMULWB( S1_Q16, c_Q16 ) + silk_LSHIFT( S1_Q16, 1 ) - S0_Q16 + 1;
+        S0_Q16 = silk_min( S0_Q16, ( (opus_int32)1 << 16 ) );
+
+        px_win[ k + 2 ] = (opus_int16)silk_SMULWB( silk_RSHIFT( S0_Q16 + S1_Q16, 1 ), px[ k + 2] );
+        px_win[ k + 3 ] = (opus_int16)silk_SMULWB( S0_Q16, px[ k + 3 ] );
+        S1_Q16 = silk_SMULWB( S0_Q16, c_Q16 ) + silk_LSHIFT( S0_Q16, 1 ) - S1_Q16;
+        S1_Q16 = silk_min( S1_Q16, ( (opus_int32)1 << 16 ) );
+    }
+}
diff --git a/drivers/opus/silk/fixed/autocorr_FIX.c b/drivers/opus/silk/fixed/autocorr_FIX.c
new file mode 100644
index 0000000000..438b42f85b
--- /dev/null
+++ b/drivers/opus/silk/fixed/autocorr_FIX.c
@@ -0,0 +1,48 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef OPUS_HAVE_CONFIG_H
+#include "opus_config.h"
+#endif
+
+#include "SigProc_FIX.h"
+#include "celt_lpc.h"
+
+/* Compute autocorrelation */
+void silk_autocorr(
+    opus_int32                  *results,           /* O    Result (length correlationCount)                            */
+    opus_int                    *scale,             /* O    Scaling of the correlation vector                           */
+    const opus_int16            *inputData,         /* I    Input data to correlate                                     */
+    const opus_int              inputDataSize,      /* I    Length of input                                             */
+    const opus_int              correlationCount,   /* I    Number of correlation taps to compute                       */
+    int                         arch                /* I    Run-time architecture                                       */
+)
+{
+    opus_int   corrCount;
+    corrCount = silk_min_int( inputDataSize, correlationCount );
+    *scale = _celt_autocorr(inputData, results, NULL, 0, corrCount-1, inputDataSize, arch);
+}
diff --git a/drivers/opus/silk/fixed/burg_modified_FIX.c b/drivers/opus/silk/fixed/burg_modified_FIX.c
new file mode 100644
index 0000000000..ce2a560e6d
--- /dev/null
+++ b/drivers/opus/silk/fixed/burg_modified_FIX.c
@@ -0,0 +1,279 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef OPUS_HAVE_CONFIG_H
+#include "opus_config.h"
+#endif
+
+#include "SigProc_FIX.h"
+#include "define.h"
+#include "tuning_parameters.h"
+#include "pitch.h"
+
+#define MAX_FRAME_SIZE              384             /* subfr_length * nb_subfr = ( 0.005 * 16000 + 16 ) * 4 = 384 */
+
+#define QA                          25
+#define N_BITS_HEAD_ROOM            2
+#define MIN_RSHIFTS                 -16
+#define MAX_RSHIFTS                 (32 - QA)
+
+/* Compute reflection coefficients from input signal */
+void silk_burg_modified(
+    opus_int32                  *res_nrg,           /* O    Residual energy                                             */
+    opus_int                    *res_nrg_Q,         /* O    Residual energy Q value                                     */
+    opus_int32                  A_Q16[],            /* O    Prediction coefficients (length order)                      */
+    const opus_int16            x[],                /* I    Input signal, length: nb_subfr * ( D + subfr_length )       */
+    const opus_int32            minInvGain_Q30,     /* I    Inverse of max prediction gain                              */
+    const opus_int              subfr_length,       /* I    Input signal subframe length (incl. D preceding samples)    */
+    const opus_int              nb_subfr,           /* I    Number of subframes stacked in x                            */
+    const opus_int              D,                  /* I    Order                                                       */
+    int                         arch                /* I    Run-time architecture                                       */
+)
+{
+    opus_int         k, n, s, lz, rshifts, rshifts_extra, reached_max_gain;
+    opus_int32       C0, num, nrg, rc_Q31, invGain_Q30, Atmp_QA, Atmp1, tmp1, tmp2, x1, x2;
+    const opus_int16 *x_ptr;
+    opus_int32       C_first_row[ SILK_MAX_ORDER_LPC ];
+    opus_int32       C_last_row[  SILK_MAX_ORDER_LPC ];
+    opus_int32       Af_QA[       SILK_MAX_ORDER_LPC ];
+    opus_int32       CAf[ SILK_MAX_ORDER_LPC + 1 ];
+    opus_int32       CAb[ SILK_MAX_ORDER_LPC + 1 ];
+    opus_int32       xcorr[ SILK_MAX_ORDER_LPC ];
+
+    silk_assert( subfr_length * nb_subfr <= MAX_FRAME_SIZE );
+
+    /* Compute autocorrelations, added over subframes */
+    silk_sum_sqr_shift( &C0, &rshifts, x, nb_subfr * subfr_length );
+    if( rshifts > MAX_RSHIFTS ) {
+        C0 = silk_LSHIFT32( C0, rshifts - MAX_RSHIFTS );
+        silk_assert( C0 > 0 );
+        rshifts = MAX_RSHIFTS;
+    } else {
+        lz = silk_CLZ32( C0 ) - 1;
+        rshifts_extra = N_BITS_HEAD_ROOM - lz;
+        if( rshifts_extra > 0 ) {
+            rshifts_extra = silk_min( rshifts_extra, MAX_RSHIFTS - rshifts );
+            C0 = silk_RSHIFT32( C0, rshifts_extra );
+        } else {
+            rshifts_extra = silk_max( rshifts_extra, MIN_RSHIFTS - rshifts );
+            C0 = silk_LSHIFT32( C0, -rshifts_extra );
+        }
+        rshifts += rshifts_extra;
+    }
+    CAb[ 0 ] = CAf[ 0 ] = C0 + silk_SMMUL( SILK_FIX_CONST( FIND_LPC_COND_FAC, 32 ), C0 ) + 1;                                /* Q(-rshifts) */
+    silk_memset( C_first_row, 0, SILK_MAX_ORDER_LPC * sizeof( opus_int32 ) );
+    if( rshifts > 0 ) {
+        for( s = 0; s < nb_subfr; s++ ) {
+            x_ptr = x + s * subfr_length;
+            for( n = 1; n < D + 1; n++ ) {
+                C_first_row[ n - 1 ] += (opus_int32)silk_RSHIFT64(
+                    silk_inner_prod16_aligned_64( x_ptr, x_ptr + n, subfr_length - n ), rshifts );
+            }
+        }
+    } else {
+        for( s = 0; s < nb_subfr; s++ ) {
+            int i;
+            opus_int32 d;
+            x_ptr = x + s * subfr_length;
+            celt_pitch_xcorr(x_ptr, x_ptr + 1, xcorr, subfr_length - D, D, arch );
+            for( n = 1; n < D + 1; n++ ) {
+               for ( i = n + subfr_length - D, d = 0; i < subfr_length; i++ )
+                  d = MAC16_16( d, x_ptr[ i ], x_ptr[ i - n ] );
+               xcorr[ n - 1 ] += d;
+            }
+            for( n = 1; n < D + 1; n++ ) {
+                C_first_row[ n - 1 ] += silk_LSHIFT32( xcorr[ n - 1 ], -rshifts );
+            }
+        }
+    }
+    silk_memcpy( C_last_row, C_first_row, SILK_MAX_ORDER_LPC * sizeof( opus_int32 ) );
+
+    /* Initialize */
+    CAb[ 0 ] = CAf[ 0 ] = C0 + silk_SMMUL( SILK_FIX_CONST( FIND_LPC_COND_FAC, 32 ), C0 ) + 1;                                /* Q(-rshifts) */
+
+    invGain_Q30 = (opus_int32)1 << 30;
+    reached_max_gain = 0;
+    for( n = 0; n < D; n++ ) {
+        /* Update first row of correlation matrix (without first element) */
+        /* Update last row of correlation matrix (without last element, stored in reversed order) */
+        /* Update C * Af */
+        /* Update C * flipud(Af) (stored in reversed order) */
+        if( rshifts > -2 ) {
+            for( s = 0; s < nb_subfr; s++ ) {
+                x_ptr = x + s * subfr_length;
+                x1  = -silk_LSHIFT32( (opus_int32)x_ptr[ n ],                    16 - rshifts );        /* Q(16-rshifts) */
+                x2  = -silk_LSHIFT32( (opus_int32)x_ptr[ subfr_length - n - 1 ], 16 - rshifts );        /* Q(16-rshifts) */
+                tmp1 = silk_LSHIFT32( (opus_int32)x_ptr[ n ],                    QA - 16 );             /* Q(QA-16) */
+                tmp2 = silk_LSHIFT32( (opus_int32)x_ptr[ subfr_length - n - 1 ], QA - 16 );             /* Q(QA-16) */
+                for( k = 0; k < n; k++ ) {
+                    C_first_row[ k ] = silk_SMLAWB( C_first_row[ k ], x1, x_ptr[ n - k - 1 ]            ); /* Q( -rshifts ) */
+                    C_last_row[ k ]  = silk_SMLAWB( C_last_row[ k ],  x2, x_ptr[ subfr_length - n + k ] ); /* Q( -rshifts ) */
+                    Atmp_QA = Af_QA[ k ];
+                    tmp1 = silk_SMLAWB( tmp1, Atmp_QA, x_ptr[ n - k - 1 ]            );                 /* Q(QA-16) */
+                    tmp2 = silk_SMLAWB( tmp2, Atmp_QA, x_ptr[ subfr_length - n + k ] );                 /* Q(QA-16) */
+                }
+                tmp1 = silk_LSHIFT32( -tmp1, 32 - QA - rshifts );                                       /* Q(16-rshifts) */
+                tmp2 = silk_LSHIFT32( -tmp2, 32 - QA - rshifts );                                       /* Q(16-rshifts) */
+                for( k = 0; k <= n; k++ ) {
+                    CAf[ k ] = silk_SMLAWB( CAf[ k ], tmp1, x_ptr[ n - k ]                    );        /* Q( -rshift ) */
+                    CAb[ k ] = silk_SMLAWB( CAb[ k ], tmp2, x_ptr[ subfr_length - n + k - 1 ] );        /* Q( -rshift ) */
+                }
+            }
+        } else {
+            for( s = 0; s < nb_subfr; s++ ) {
+                x_ptr = x + s * subfr_length;
+                x1  = -silk_LSHIFT32( (opus_int32)x_ptr[ n ],                    -rshifts );            /* Q( -rshifts ) */
+                x2  = -silk_LSHIFT32( (opus_int32)x_ptr[ subfr_length - n - 1 ], -rshifts );            /* Q( -rshifts ) */
+                tmp1 = silk_LSHIFT32( (opus_int32)x_ptr[ n ],                    17 );                  /* Q17 */
+                tmp2 = silk_LSHIFT32( (opus_int32)x_ptr[ subfr_length - n - 1 ], 17 );                  /* Q17 */
+                for( k = 0; k < n; k++ ) {
+                    C_first_row[ k ] = silk_MLA( C_first_row[ k ], x1, x_ptr[ n - k - 1 ]            ); /* Q( -rshifts ) */
+                    C_last_row[ k ]  = silk_MLA( C_last_row[ k ],  x2, x_ptr[ subfr_length - n + k ] ); /* Q( -rshifts ) */
+                    Atmp1 = silk_RSHIFT_ROUND( Af_QA[ k ], QA - 17 );                                   /* Q17 */
+                    tmp1 = silk_MLA( tmp1, x_ptr[ n - k - 1 ],            Atmp1 );                      /* Q17 */
+                    tmp2 = silk_MLA( tmp2, x_ptr[ subfr_length - n + k ], Atmp1 );                      /* Q17 */
+                }
+                tmp1 = -tmp1;                                                                           /* Q17 */
+                tmp2 = -tmp2;                                                                           /* Q17 */
+                for( k = 0; k <= n; k++ ) {
+                    CAf[ k ] = silk_SMLAWW( CAf[ k ], tmp1,
+                        silk_LSHIFT32( (opus_int32)x_ptr[ n - k ], -rshifts - 1 ) );                    /* Q( -rshift ) */
+                    CAb[ k ] = silk_SMLAWW( CAb[ k ], tmp2,
+                        silk_LSHIFT32( (opus_int32)x_ptr[ subfr_length - n + k - 1 ], -rshifts - 1 ) ); /* Q( -rshift ) */
+                }
+            }
+        }
+
+        /* Calculate nominator and denominator for the next order reflection (parcor) coefficient */
+        tmp1 = C_first_row[ n ];                                                                        /* Q( -rshifts ) */
+        tmp2 = C_last_row[ n ];                                                                         /* Q( -rshifts ) */
+        num  = 0;                                                                                       /* Q( -rshifts ) */
+        nrg  = silk_ADD32( CAb[ 0 ], CAf[ 0 ] );                                                        /* Q( 1-rshifts ) */
+        for( k = 0; k < n; k++ ) {
+            Atmp_QA = Af_QA[ k ];
+            lz = silk_CLZ32( silk_abs( Atmp_QA ) ) - 1;
+            lz = silk_min( 32 - QA, lz );
+            Atmp1 = silk_LSHIFT32( Atmp_QA, lz );                                                       /* Q( QA + lz ) */
+
+            tmp1 = silk_ADD_LSHIFT32( tmp1, silk_SMMUL( C_last_row[  n - k - 1 ], Atmp1 ), 32 - QA - lz );  /* Q( -rshifts ) */
+            tmp2 = silk_ADD_LSHIFT32( tmp2, silk_SMMUL( C_first_row[ n - k - 1 ], Atmp1 ), 32 - QA - lz );  /* Q( -rshifts ) */
+            num  = silk_ADD_LSHIFT32( num,  silk_SMMUL( CAb[ n - k ],             Atmp1 ), 32 - QA - lz );  /* Q( -rshifts ) */
+            nrg  = silk_ADD_LSHIFT32( nrg,  silk_SMMUL( silk_ADD32( CAb[ k + 1 ], CAf[ k + 1 ] ),
+                                                                                Atmp1 ), 32 - QA - lz );    /* Q( 1-rshifts ) */
+        }
+        CAf[ n + 1 ] = tmp1;                                                                            /* Q( -rshifts ) */
+        CAb[ n + 1 ] = tmp2;                                                                            /* Q( -rshifts ) */
+        num = silk_ADD32( num, tmp2 );                                                                  /* Q( -rshifts ) */
+        num = silk_LSHIFT32( -num, 1 );                                                                 /* Q( 1-rshifts ) */
+
+        /* Calculate the next order reflection (parcor) coefficient */
+        if( silk_abs( num ) < nrg ) {
+            rc_Q31 = silk_DIV32_varQ( num, nrg, 31 );
+        } else {
+            rc_Q31 = ( num > 0 ) ? silk_int32_MAX : silk_int32_MIN;
+        }
+
+        /* Update inverse prediction gain */
+        tmp1 = ( (opus_int32)1 << 30 ) - silk_SMMUL( rc_Q31, rc_Q31 );
+        tmp1 = silk_LSHIFT( silk_SMMUL( invGain_Q30, tmp1 ), 2 );
+        if( tmp1 <= minInvGain_Q30 ) {
+            /* Max prediction gain exceeded; set reflection coefficient such that max prediction gain is exactly hit */
+            tmp2 = ( (opus_int32)1 << 30 ) - silk_DIV32_varQ( minInvGain_Q30, invGain_Q30, 30 );            /* Q30 */
+            rc_Q31 = silk_SQRT_APPROX( tmp2 );                                                  /* Q15 */
+            /* Newton-Raphson iteration */
+            rc_Q31 = silk_RSHIFT32( rc_Q31 + silk_DIV32( tmp2, rc_Q31 ), 1 );                   /* Q15 */
+            rc_Q31 = silk_LSHIFT32( rc_Q31, 16 );                                               /* Q31 */
+            if( num < 0 ) {
+                /* Ensure adjusted reflection coefficients has the original sign */
+                rc_Q31 = -rc_Q31;
+            }
+            invGain_Q30 = minInvGain_Q30;
+            reached_max_gain = 1;
+        } else {
+            invGain_Q30 = tmp1;
+        }
+
+        /* Update the AR coefficients */
+        for( k = 0; k < (n + 1) >> 1; k++ ) {
+            tmp1 = Af_QA[ k ];                                                                  /* QA */
+            tmp2 = Af_QA[ n - k - 1 ];                                                          /* QA */
+            Af_QA[ k ]         = silk_ADD_LSHIFT32( tmp1, silk_SMMUL( tmp2, rc_Q31 ), 1 );      /* QA */
+            Af_QA[ n - k - 1 ] = silk_ADD_LSHIFT32( tmp2, silk_SMMUL( tmp1, rc_Q31 ), 1 );      /* QA */
+        }
+        Af_QA[ n ] = silk_RSHIFT32( rc_Q31, 31 - QA );                                          /* QA */
+
+        if( reached_max_gain ) {
+            /* Reached max prediction gain; set remaining coefficients to zero and exit loop */
+            for( k = n + 1; k < D; k++ ) {
+                Af_QA[ k ] = 0;
+            }
+            break;
+        }
+
+        /* Update C * Af and C * Ab */
+        for( k = 0; k <= n + 1; k++ ) {
+            tmp1 = CAf[ k ];                                                                    /* Q( -rshifts ) */
+            tmp2 = CAb[ n - k + 1 ];                                                            /* Q( -rshifts ) */
+            CAf[ k ]         = silk_ADD_LSHIFT32( tmp1, silk_SMMUL( tmp2, rc_Q31 ), 1 );        /* Q( -rshifts ) */
+            CAb[ n - k + 1 ] = silk_ADD_LSHIFT32( tmp2, silk_SMMUL( tmp1, rc_Q31 ), 1 );        /* Q( -rshifts ) */
+        }
+    }
+
+    if( reached_max_gain ) {
+        for( k = 0; k < D; k++ ) {
+            /* Scale coefficients */
+            A_Q16[ k ] = -silk_RSHIFT_ROUND( Af_QA[ k ], QA - 16 );
+        }
+        /* Subtract energy of preceding samples from C0 */
+        if( rshifts > 0 ) {
+            for( s = 0; s < nb_subfr; s++ ) {
+                x_ptr = x + s * subfr_length;
+                C0 -= (opus_int32)silk_RSHIFT64( silk_inner_prod16_aligned_64( x_ptr, x_ptr, D ), rshifts );
+            }
+        } else {
+            for( s = 0; s < nb_subfr; s++ ) {
+                x_ptr = x + s * subfr_length;
+                C0 -= silk_LSHIFT32( silk_inner_prod_aligned( x_ptr, x_ptr, D ), -rshifts );
+            }
+        }
+        /* Approximate residual energy */
+        *res_nrg = silk_LSHIFT( silk_SMMUL( invGain_Q30, C0 ), 2 );
+        *res_nrg_Q = -rshifts;
+    } else {
+        /* Return residual energy */
+        nrg  = CAf[ 0 ];                                                                            /* Q( -rshifts ) */
+        tmp1 = (opus_int32)1 << 16;                                                                             /* Q16 */
+        for( k = 0; k < D; k++ ) {
+            Atmp1 = silk_RSHIFT_ROUND( Af_QA[ k ], QA - 16 );                                       /* Q16 */
+            nrg  = silk_SMLAWW( nrg, CAf[ k + 1 ], Atmp1 );                                         /* Q( -rshifts ) */
+            tmp1 = silk_SMLAWW( tmp1, Atmp1, Atmp1 );                                               /* Q16 */
+            A_Q16[ k ] = -Atmp1;
+        }
+        *res_nrg = silk_SMLAWW( nrg, silk_SMMUL( SILK_FIX_CONST( FIND_LPC_COND_FAC, 32 ), C0 ), -tmp1 );/* Q( -rshifts ) */
+        *res_nrg_Q = -rshifts;
+    }
+}
diff --git a/drivers/opus/silk/fixed/corrMatrix_FIX.c b/drivers/opus/silk/fixed/corrMatrix_FIX.c
new file mode 100644
index 0000000000..28543fc204
--- /dev/null
+++ b/drivers/opus/silk/fixed/corrMatrix_FIX.c
@@ -0,0 +1,156 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef OPUS_HAVE_CONFIG_H
+#include "opus_config.h"
+#endif
+
+/**********************************************************************
+ * Correlation Matrix Computations for LS estimate.
+ **********************************************************************/
+
+#include "main_FIX.h"
+
+/* Calculates correlation vector X'*t */
+void silk_corrVector_FIX(
+    const opus_int16                *x,                                     /* I    x vector [L + order - 1] used to form data matrix X                         */
+    const opus_int16                *t,                                     /* I    Target vector [L]                                                           */
+    const opus_int                  L,                                      /* I    Length of vectors                                                           */
+    const opus_int                  order,                                  /* I    Max lag for correlation                                                     */
+    opus_int32                      *Xt,                                    /* O    Pointer to X'*t correlation vector [order]                                  */
+    const opus_int                  rshifts                                 /* I    Right shifts of correlations                                                */
+)
+{
+    opus_int         lag, i;
+    const opus_int16 *ptr1, *ptr2;
+    opus_int32       inner_prod;
+
+    ptr1 = &x[ order - 1 ]; /* Points to first sample of column 0 of X: X[:,0] */
+    ptr2 = t;
+    /* Calculate X'*t */
+    if( rshifts > 0 ) {
+        /* Right shifting used */
+        for( lag = 0; lag < order; lag++ ) {
+            inner_prod = 0;
+            for( i = 0; i < L; i++ ) {
+                inner_prod += silk_RSHIFT32( silk_SMULBB( ptr1[ i ], ptr2[i] ), rshifts );
+            }
+            Xt[ lag ] = inner_prod; /* X[:,lag]'*t */
+            ptr1--; /* Go to next column of X */
+        }
+    } else {
+        silk_assert( rshifts == 0 );
+        for( lag = 0; lag < order; lag++ ) {
+            Xt[ lag ] = silk_inner_prod_aligned( ptr1, ptr2, L ); /* X[:,lag]'*t */
+            ptr1--; /* Go to next column of X */
+        }
+    }
+}
+
+/* Calculates correlation matrix X'*X */
+void silk_corrMatrix_FIX(
+    const opus_int16                *x,                                     /* I    x vector [L + order - 1] used to form data matrix X                         */
+    const opus_int                  L,                                      /* I    Length of vectors                                                           */
+    const opus_int                  order,                                  /* I    Max lag for correlation                                                     */
+    const opus_int                  head_room,                              /* I    Desired headroom                                                            */
+    opus_int32                      *XX,                                    /* O    Pointer to X'*X correlation matrix [ order x order ]                        */
+    opus_int                        *rshifts                                /* I/O  Right shifts of correlations                                                */
+)
+{
+    opus_int         i, j, lag, rshifts_local, head_room_rshifts;
+    opus_int32       energy;
+    const opus_int16 *ptr1, *ptr2;
+
+    /* Calculate energy to find shift used to fit in 32 bits */
+    silk_sum_sqr_shift( &energy, &rshifts_local, x, L + order - 1 );
+    /* Add shifts to get the desired head room */
+    head_room_rshifts = silk_max( head_room - silk_CLZ32( energy ), 0 );
+
+    energy = silk_RSHIFT32( energy, head_room_rshifts );
+    rshifts_local += head_room_rshifts;
+
+    /* Calculate energy of first column (0) of X: X[:,0]'*X[:,0] */
+    /* Remove contribution of first order - 1 samples */
+    for( i = 0; i < order - 1; i++ ) {
+        energy -= silk_RSHIFT32( silk_SMULBB( x[ i ], x[ i ] ), rshifts_local );
+    }
+    if( rshifts_local < *rshifts ) {
+        /* Adjust energy */
+        energy = silk_RSHIFT32( energy, *rshifts - rshifts_local );
+        rshifts_local = *rshifts;
+    }
+
+    /* Calculate energy of remaining columns of X: X[:,j]'*X[:,j] */
+    /* Fill out the diagonal of the correlation matrix */
+    matrix_ptr( XX, 0, 0, order ) = energy;
+    ptr1 = &x[ order - 1 ]; /* First sample of column 0 of X */
+    for( j = 1; j < order; j++ ) {
+        energy = silk_SUB32( energy, silk_RSHIFT32( silk_SMULBB( ptr1[ L - j ], ptr1[ L - j ] ), rshifts_local ) );
+        energy = silk_ADD32( energy, silk_RSHIFT32( silk_SMULBB( ptr1[ -j ], ptr1[ -j ] ), rshifts_local ) );
+        matrix_ptr( XX, j, j, order ) = energy;
+    }
+
+    ptr2 = &x[ order - 2 ]; /* First sample of column 1 of X */
+    /* Calculate the remaining elements of the correlation matrix */
+    if( rshifts_local > 0 ) {
+        /* Right shifting used */
+        for( lag = 1; lag < order; lag++ ) {
+            /* Inner product of column 0 and column lag: X[:,0]'*X[:,lag] */
+            energy = 0;
+            for( i = 0; i < L; i++ ) {
+                energy += silk_RSHIFT32( silk_SMULBB( ptr1[ i ], ptr2[i] ), rshifts_local );
+            }
+            /* Calculate remaining off diagonal: X[:,j]'*X[:,j + lag] */
+            matrix_ptr( XX, lag, 0, order ) = energy;
+            matrix_ptr( XX, 0, lag, order ) = energy;
+            for( j = 1; j < ( order - lag ); j++ ) {
+                energy = silk_SUB32( energy, silk_RSHIFT32( silk_SMULBB( ptr1[ L - j ], ptr2[ L - j ] ), rshifts_local ) );
+                energy = silk_ADD32( energy, silk_RSHIFT32( silk_SMULBB( ptr1[ -j ], ptr2[ -j ] ), rshifts_local ) );
+                matrix_ptr( XX, lag + j, j, order ) = energy;
+                matrix_ptr( XX, j, lag + j, order ) = energy;
+            }
+            ptr2--; /* Update pointer to first sample of next column (lag) in X */
+        }
+    } else {
+        for( lag = 1; lag < order; lag++ ) {
+            /* Inner product of column 0 and column lag: X[:,0]'*X[:,lag] */
+            energy = silk_inner_prod_aligned( ptr1, ptr2, L );
+            matrix_ptr( XX, lag, 0, order ) = energy;
+            matrix_ptr( XX, 0, lag, order ) = energy;
+            /* Calculate remaining off diagonal: X[:,j]'*X[:,j + lag] */
+            for( j = 1; j < ( order - lag ); j++ ) {
+                energy = silk_SUB32( energy, silk_SMULBB( ptr1[ L - j ], ptr2[ L - j ] ) );
+                energy = silk_SMLABB( energy, ptr1[ -j ], ptr2[ -j ] );
+                matrix_ptr( XX, lag + j, j, order ) = energy;
+                matrix_ptr( XX, j, lag + j, order ) = energy;
+            }
+            ptr2--;/* Update pointer to first sample of next column (lag) in X */
+        }
+    }
+    *rshifts = rshifts_local;
+}
+
diff --git a/drivers/opus/silk/fixed/encode_frame_FIX.c b/drivers/opus/silk/fixed/encode_frame_FIX.c
new file mode 100644
index 0000000000..2d80ca3583
--- /dev/null
+++ b/drivers/opus/silk/fixed/encode_frame_FIX.c
@@ -0,0 +1,385 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef OPUS_HAVE_CONFIG_H
+#include "opus_config.h"
+#endif
+
+#include "main_FIX.h"
+#include "stack_alloc.h"
+#include "tuning_parameters.h"
+
+/* Low Bitrate Redundancy (LBRR) encoding. Reuse all parameters but encode with lower bitrate           */
+static OPUS_INLINE void silk_LBRR_encode_FIX(
+    silk_encoder_state_FIX          *psEnc,                                 /* I/O  Pointer to Silk FIX encoder state                                           */
+    silk_encoder_control_FIX        *psEncCtrl,                             /* I/O  Pointer to Silk FIX encoder control struct                                  */
+    const opus_int32                xfw_Q3[],                               /* I    Input signal                                                                */
+    opus_int                        condCoding                              /* I    The type of conditional coding used so far for this frame                   */
+);
+
+void silk_encode_do_VAD_FIX(
+    silk_encoder_state_FIX          *psEnc                                  /* I/O  Pointer to Silk FIX encoder state                                           */
+)
+{
+    /****************************/
+    /* Voice Activity Detection */
+    /****************************/
+    silk_VAD_GetSA_Q8( &psEnc->sCmn, psEnc->sCmn.inputBuf + 1 );
+
+    /**************************************************/
+    /* Convert speech activity into VAD and DTX flags */
+    /**************************************************/
+    if( psEnc->sCmn.speech_activity_Q8 < SILK_FIX_CONST( SPEECH_ACTIVITY_DTX_THRES, 8 ) ) {
+        psEnc->sCmn.indices.signalType = TYPE_NO_VOICE_ACTIVITY;
+        psEnc->sCmn.noSpeechCounter++;
+        if( psEnc->sCmn.noSpeechCounter < NB_SPEECH_FRAMES_BEFORE_DTX ) {
+            psEnc->sCmn.inDTX = 0;
+        } else if( psEnc->sCmn.noSpeechCounter > MAX_CONSECUTIVE_DTX + NB_SPEECH_FRAMES_BEFORE_DTX ) {
+            psEnc->sCmn.noSpeechCounter = NB_SPEECH_FRAMES_BEFORE_DTX;
+            psEnc->sCmn.inDTX           = 0;
+        }
+        psEnc->sCmn.VAD_flags[ psEnc->sCmn.nFramesEncoded ] = 0;
+    } else {
+        psEnc->sCmn.noSpeechCounter    = 0;
+        psEnc->sCmn.inDTX              = 0;
+        psEnc->sCmn.indices.signalType = TYPE_UNVOICED;
+        psEnc->sCmn.VAD_flags[ psEnc->sCmn.nFramesEncoded ] = 1;
+    }
+}
+
+/****************/
+/* Encode frame */
+/****************/
+opus_int silk_encode_frame_FIX(
+    silk_encoder_state_FIX          *psEnc,                                 /* I/O  Pointer to Silk FIX encoder state                                           */
+    opus_int32                      *pnBytesOut,                            /* O    Pointer to number of payload bytes;                                         */
+    ec_enc                          *psRangeEnc,                            /* I/O  compressor data structure                                                   */
+    opus_int                        condCoding,                             /* I    The type of conditional coding to use                                       */
+    opus_int                        maxBits,                                /* I    If > 0: maximum number of output bits                                       */
+    opus_int                        useCBR                                  /* I    Flag to force constant-bitrate operation                                    */
+)
+{
+    silk_encoder_control_FIX sEncCtrl;
+    opus_int     i, iter, maxIter, found_upper, found_lower, ret = 0;
+    opus_int16   *x_frame;
+    ec_enc       sRangeEnc_copy, sRangeEnc_copy2;
+    silk_nsq_state sNSQ_copy, sNSQ_copy2;
+    opus_int32   seed_copy, nBits, nBits_lower, nBits_upper, gainMult_lower, gainMult_upper;
+    opus_int32   gainsID, gainsID_lower, gainsID_upper;
+    opus_int16   gainMult_Q8;
+    opus_int16   ec_prevLagIndex_copy;
+    opus_int     ec_prevSignalType_copy;
+    opus_int8    LastGainIndex_copy2;
+    SAVE_STACK;
+
+    /* This is totally unnecessary but many compilers (including gcc) are too dumb to realise it */
+    LastGainIndex_copy2 = nBits_lower = nBits_upper = gainMult_lower = gainMult_upper = 0;
+
+    psEnc->sCmn.indices.Seed = psEnc->sCmn.frameCounter++ & 3;
+
+    /**************************************************************/
+    /* Set up Input Pointers, and insert frame in input buffer   */
+    /*************************************************************/
+    /* start of frame to encode */
+    x_frame = psEnc->x_buf + psEnc->sCmn.ltp_mem_length;
+
+    /***************************************/
+    /* Ensure smooth bandwidth transitions */
+    /***************************************/
+    silk_LP_variable_cutoff( &psEnc->sCmn.sLP, psEnc->sCmn.inputBuf + 1, psEnc->sCmn.frame_length );
+
+    /*******************************************/
+    /* Copy new frame to front of input buffer */
+    /*******************************************/
+    silk_memcpy( x_frame + LA_SHAPE_MS * psEnc->sCmn.fs_kHz, psEnc->sCmn.inputBuf + 1, psEnc->sCmn.frame_length * sizeof( opus_int16 ) );
+
+    if( !psEnc->sCmn.prefillFlag ) {
+        VARDECL( opus_int32, xfw_Q3 );
+        VARDECL( opus_int16, res_pitch );
+        VARDECL( opus_uint8, ec_buf_copy );
+        opus_int16 *res_pitch_frame;
+
+        ALLOC( res_pitch,
+               psEnc->sCmn.la_pitch + psEnc->sCmn.frame_length
+                   + psEnc->sCmn.ltp_mem_length, opus_int16 );
+        /* start of pitch LPC residual frame */
+        res_pitch_frame = res_pitch + psEnc->sCmn.ltp_mem_length;
+
+        /*****************************************/
+        /* Find pitch lags, initial LPC analysis */
+        /*****************************************/
+        silk_find_pitch_lags_FIX( psEnc, &sEncCtrl, res_pitch, x_frame, psEnc->sCmn.arch );
+
+        /************************/
+        /* Noise shape analysis */
+        /************************/
+        silk_noise_shape_analysis_FIX( psEnc, &sEncCtrl, res_pitch_frame, x_frame, psEnc->sCmn.arch );
+
+        /***************************************************/
+        /* Find linear prediction coefficients (LPC + LTP) */
+        /***************************************************/
+        silk_find_pred_coefs_FIX( psEnc, &sEncCtrl, res_pitch, x_frame, condCoding );
+
+        /****************************************/
+        /* Process gains                        */
+        /****************************************/
+        silk_process_gains_FIX( psEnc, &sEncCtrl, condCoding );
+
+        /*****************************************/
+        /* Prefiltering for noise shaper         */
+        /*****************************************/
+        ALLOC( xfw_Q3, psEnc->sCmn.frame_length, opus_int32 );
+        silk_prefilter_FIX( psEnc, &sEncCtrl, xfw_Q3, x_frame );
+
+        /****************************************/
+        /* Low Bitrate Redundant Encoding       */
+        /****************************************/
+        silk_LBRR_encode_FIX( psEnc, &sEncCtrl, xfw_Q3, condCoding );
+
+        /* Loop over quantizer and entropy coding to control bitrate */
+        maxIter = 6;
+        gainMult_Q8 = SILK_FIX_CONST( 1, 8 );
+        found_lower = 0;
+        found_upper = 0;
+        gainsID = silk_gains_ID( psEnc->sCmn.indices.GainsIndices, psEnc->sCmn.nb_subfr );
+        gainsID_lower = -1;
+        gainsID_upper = -1;
+        /* Copy part of the input state */
+        silk_memcpy( &sRangeEnc_copy, psRangeEnc, sizeof( ec_enc ) );
+        silk_memcpy( &sNSQ_copy, &psEnc->sCmn.sNSQ, sizeof( silk_nsq_state ) );
+        seed_copy = psEnc->sCmn.indices.Seed;
+        ec_prevLagIndex_copy = psEnc->sCmn.ec_prevLagIndex;
+        ec_prevSignalType_copy = psEnc->sCmn.ec_prevSignalType;
+        ALLOC( ec_buf_copy, 1275, opus_uint8 );
+        for( iter = 0; ; iter++ ) {
+            if( gainsID == gainsID_lower ) {
+                nBits = nBits_lower;
+            } else if( gainsID == gainsID_upper ) {
+                nBits = nBits_upper;
+            } else {
+                /* Restore part of the input state */
+                if( iter > 0 ) {
+                    silk_memcpy( psRangeEnc, &sRangeEnc_copy, sizeof( ec_enc ) );
+                    silk_memcpy( &psEnc->sCmn.sNSQ, &sNSQ_copy, sizeof( silk_nsq_state ) );
+                    psEnc->sCmn.indices.Seed = seed_copy;
+                    psEnc->sCmn.ec_prevLagIndex = ec_prevLagIndex_copy;
+                    psEnc->sCmn.ec_prevSignalType = ec_prevSignalType_copy;
+                }
+
+                /*****************************************/
+                /* Noise shaping quantization            */
+                /*****************************************/
+                if( psEnc->sCmn.nStatesDelayedDecision > 1 || psEnc->sCmn.warping_Q16 > 0 ) {
+                    silk_NSQ_del_dec( &psEnc->sCmn, &psEnc->sCmn.sNSQ, &psEnc->sCmn.indices, xfw_Q3, psEnc->sCmn.pulses,
+                           sEncCtrl.PredCoef_Q12[ 0 ], sEncCtrl.LTPCoef_Q14, sEncCtrl.AR2_Q13, sEncCtrl.HarmShapeGain_Q14,
+                           sEncCtrl.Tilt_Q14, sEncCtrl.LF_shp_Q14, sEncCtrl.Gains_Q16, sEncCtrl.pitchL, sEncCtrl.Lambda_Q10, sEncCtrl.LTP_scale_Q14 );
+                } else {
+                    silk_NSQ( &psEnc->sCmn, &psEnc->sCmn.sNSQ, &psEnc->sCmn.indices, xfw_Q3, psEnc->sCmn.pulses,
+                            sEncCtrl.PredCoef_Q12[ 0 ], sEncCtrl.LTPCoef_Q14, sEncCtrl.AR2_Q13, sEncCtrl.HarmShapeGain_Q14,
+                            sEncCtrl.Tilt_Q14, sEncCtrl.LF_shp_Q14, sEncCtrl.Gains_Q16, sEncCtrl.pitchL, sEncCtrl.Lambda_Q10, sEncCtrl.LTP_scale_Q14 );
+                }
+
+                /****************************************/
+                /* Encode Parameters                    */
+                /****************************************/
+                silk_encode_indices( &psEnc->sCmn, psRangeEnc, psEnc->sCmn.nFramesEncoded, 0, condCoding );
+
+                /****************************************/
+                /* Encode Excitation Signal             */
+                /****************************************/
+                silk_encode_pulses( psRangeEnc, psEnc->sCmn.indices.signalType, psEnc->sCmn.indices.quantOffsetType,
+                    psEnc->sCmn.pulses, psEnc->sCmn.frame_length );
+
+                nBits = ec_tell( psRangeEnc );
+
+                if( useCBR == 0 && iter == 0 && nBits <= maxBits ) {
+                    break;
+                }
+            }
+
+            if( iter == maxIter ) {
+                if( found_lower && ( gainsID == gainsID_lower || nBits > maxBits ) ) {
+                    /* Restore output state from earlier iteration that did meet the bitrate budget */
+                    silk_memcpy( psRangeEnc, &sRangeEnc_copy2, sizeof( ec_enc ) );
+                    silk_assert( sRangeEnc_copy2.offs <= 1275 );
+                    silk_memcpy( psRangeEnc->buf, ec_buf_copy, sRangeEnc_copy2.offs );
+                    silk_memcpy( &psEnc->sCmn.sNSQ, &sNSQ_copy2, sizeof( silk_nsq_state ) );
+                    psEnc->sShape.LastGainIndex = LastGainIndex_copy2;
+                }
+                break;
+            }
+
+            if( nBits > maxBits ) {
+                if( found_lower == 0 && iter >= 2 ) {
+                    /* Adjust the quantizer's rate/distortion tradeoff and discard previous "upper" results */
+                    sEncCtrl.Lambda_Q10 = silk_ADD_RSHIFT32( sEncCtrl.Lambda_Q10, sEncCtrl.Lambda_Q10, 1 );
+                    found_upper = 0;
+                    gainsID_upper = -1;
+                } else {
+                    found_upper = 1;
+                    nBits_upper = nBits;
+                    gainMult_upper = gainMult_Q8;
+                    gainsID_upper = gainsID;
+                }
+            } else if( nBits < maxBits - 5 ) {
+                found_lower = 1;
+                nBits_lower = nBits;
+                gainMult_lower = gainMult_Q8;
+                if( gainsID != gainsID_lower ) {
+                    gainsID_lower = gainsID;
+                    /* Copy part of the output state */
+                    silk_memcpy( &sRangeEnc_copy2, psRangeEnc, sizeof( ec_enc ) );
+                    silk_assert( psRangeEnc->offs <= 1275 );
+                    silk_memcpy( ec_buf_copy, psRangeEnc->buf, psRangeEnc->offs );
+                    silk_memcpy( &sNSQ_copy2, &psEnc->sCmn.sNSQ, sizeof( silk_nsq_state ) );
+                    LastGainIndex_copy2 = psEnc->sShape.LastGainIndex;
+                }
+            } else {
+                /* Within 5 bits of budget: close enough */
+                break;
+            }
+
+            if( ( found_lower & found_upper ) == 0 ) {
+                /* Adjust gain according to high-rate rate/distortion curve */
+                opus_int32 gain_factor_Q16;
+                gain_factor_Q16 = silk_log2lin( silk_LSHIFT( nBits - maxBits, 7 ) / psEnc->sCmn.frame_length + SILK_FIX_CONST( 16, 7 ) );
+                gain_factor_Q16 = silk_min_32( gain_factor_Q16, SILK_FIX_CONST( 2, 16 ) );
+                if( nBits > maxBits ) {
+                    gain_factor_Q16 = silk_max_32( gain_factor_Q16, SILK_FIX_CONST( 1.3, 16 ) );
+                }
+                gainMult_Q8 = silk_SMULWB( gain_factor_Q16, gainMult_Q8 );
+            } else {
+                /* Adjust gain by interpolating */
+                gainMult_Q8 = gainMult_lower + silk_DIV32_16( silk_MUL( gainMult_upper - gainMult_lower, maxBits - nBits_lower ), nBits_upper - nBits_lower );
+                /* New gain multplier must be between 25% and 75% of old range (note that gainMult_upper < gainMult_lower) */
+                if( gainMult_Q8 > silk_ADD_RSHIFT32( gainMult_lower, gainMult_upper - gainMult_lower, 2 ) ) {
+                    gainMult_Q8 = silk_ADD_RSHIFT32( gainMult_lower, gainMult_upper - gainMult_lower, 2 );
+                } else
+                if( gainMult_Q8 < silk_SUB_RSHIFT32( gainMult_upper, gainMult_upper - gainMult_lower, 2 ) ) {
+                    gainMult_Q8 = silk_SUB_RSHIFT32( gainMult_upper, gainMult_upper - gainMult_lower, 2 );
+                }
+            }
+
+            for( i = 0; i < psEnc->sCmn.nb_subfr; i++ ) {
+                sEncCtrl.Gains_Q16[ i ] = silk_LSHIFT_SAT32( silk_SMULWB( sEncCtrl.GainsUnq_Q16[ i ], gainMult_Q8 ), 8 );
+            }
+ 
+            /* Quantize gains */
+            psEnc->sShape.LastGainIndex = sEncCtrl.lastGainIndexPrev;
+            silk_gains_quant( psEnc->sCmn.indices.GainsIndices, sEncCtrl.Gains_Q16,
+                  &psEnc->sShape.LastGainIndex, condCoding == CODE_CONDITIONALLY, psEnc->sCmn.nb_subfr );
+
+            /* Unique identifier of gains vector */
+            gainsID = silk_gains_ID( psEnc->sCmn.indices.GainsIndices, psEnc->sCmn.nb_subfr );
+        }
+    }
+
+    /* Update input buffer */
+    silk_memmove( psEnc->x_buf, &psEnc->x_buf[ psEnc->sCmn.frame_length ],
+        ( psEnc->sCmn.ltp_mem_length + LA_SHAPE_MS * psEnc->sCmn.fs_kHz ) * sizeof( opus_int16 ) );
+
+    /* Exit without entropy coding */
+    if( psEnc->sCmn.prefillFlag ) {
+        /* No payload */
+        *pnBytesOut = 0;
+        RESTORE_STACK;
+        return ret;
+    }
+
+    /* Parameters needed for next frame */
+    psEnc->sCmn.prevLag        = sEncCtrl.pitchL[ psEnc->sCmn.nb_subfr - 1 ];
+    psEnc->sCmn.prevSignalType = psEnc->sCmn.indices.signalType;
+
+    /****************************************/
+    /* Finalize payload                     */
+    /****************************************/
+    psEnc->sCmn.first_frame_after_reset = 0;
+    /* Payload size */
+    *pnBytesOut = silk_RSHIFT( ec_tell( psRangeEnc ) + 7, 3 );
+
+    RESTORE_STACK;
+    return ret;
+}
+
+/* Low-Bitrate Redundancy (LBRR) encoding. Reuse all parameters but encode excitation at lower bitrate  */
+static OPUS_INLINE void silk_LBRR_encode_FIX(
+    silk_encoder_state_FIX          *psEnc,                                 /* I/O  Pointer to Silk FIX encoder state                                           */
+    silk_encoder_control_FIX        *psEncCtrl,                             /* I/O  Pointer to Silk FIX encoder control struct                                  */
+    const opus_int32                xfw_Q3[],                               /* I    Input signal                                                                */
+    opus_int                        condCoding                              /* I    The type of conditional coding used so far for this frame                   */
+)
+{
+    opus_int32   TempGains_Q16[ MAX_NB_SUBFR ];
+    SideInfoIndices *psIndices_LBRR = &psEnc->sCmn.indices_LBRR[ psEnc->sCmn.nFramesEncoded ];
+    silk_nsq_state sNSQ_LBRR;
+
+    /*******************************************/
+    /* Control use of inband LBRR              */
+    /*******************************************/
+    if( psEnc->sCmn.LBRR_enabled && psEnc->sCmn.speech_activity_Q8 > SILK_FIX_CONST( LBRR_SPEECH_ACTIVITY_THRES, 8 ) ) {
+        psEnc->sCmn.LBRR_flags[ psEnc->sCmn.nFramesEncoded ] = 1;
+
+        /* Copy noise shaping quantizer state and quantization indices from regular encoding */
+        silk_memcpy( &sNSQ_LBRR, &psEnc->sCmn.sNSQ, sizeof( silk_nsq_state ) );
+        silk_memcpy( psIndices_LBRR, &psEnc->sCmn.indices, sizeof( SideInfoIndices ) );
+
+        /* Save original gains */
+        silk_memcpy( TempGains_Q16, psEncCtrl->Gains_Q16, psEnc->sCmn.nb_subfr * sizeof( opus_int32 ) );
+
+        if( psEnc->sCmn.nFramesEncoded == 0 || psEnc->sCmn.LBRR_flags[ psEnc->sCmn.nFramesEncoded - 1 ] == 0 ) {
+            /* First frame in packet or previous frame not LBRR coded */
+            psEnc->sCmn.LBRRprevLastGainIndex = psEnc->sShape.LastGainIndex;
+
+            /* Increase Gains to get target LBRR rate */
+            psIndices_LBRR->GainsIndices[ 0 ] = psIndices_LBRR->GainsIndices[ 0 ] + psEnc->sCmn.LBRR_GainIncreases;
+            psIndices_LBRR->GainsIndices[ 0 ] = silk_min_int( psIndices_LBRR->GainsIndices[ 0 ], N_LEVELS_QGAIN - 1 );
+        }
+
+        /* Decode to get gains in sync with decoder         */
+        /* Overwrite unquantized gains with quantized gains */
+        silk_gains_dequant( psEncCtrl->Gains_Q16, psIndices_LBRR->GainsIndices,
+            &psEnc->sCmn.LBRRprevLastGainIndex, condCoding == CODE_CONDITIONALLY, psEnc->sCmn.nb_subfr );
+
+        /*****************************************/
+        /* Noise shaping quantization            */
+        /*****************************************/
+        if( psEnc->sCmn.nStatesDelayedDecision > 1 || psEnc->sCmn.warping_Q16 > 0 ) {
+            silk_NSQ_del_dec( &psEnc->sCmn, &sNSQ_LBRR, psIndices_LBRR, xfw_Q3,
+                psEnc->sCmn.pulses_LBRR[ psEnc->sCmn.nFramesEncoded ], psEncCtrl->PredCoef_Q12[ 0 ], psEncCtrl->LTPCoef_Q14,
+                psEncCtrl->AR2_Q13, psEncCtrl->HarmShapeGain_Q14, psEncCtrl->Tilt_Q14, psEncCtrl->LF_shp_Q14,
+                psEncCtrl->Gains_Q16, psEncCtrl->pitchL, psEncCtrl->Lambda_Q10, psEncCtrl->LTP_scale_Q14 );
+        } else {
+            silk_NSQ( &psEnc->sCmn, &sNSQ_LBRR, psIndices_LBRR, xfw_Q3,
+                psEnc->sCmn.pulses_LBRR[ psEnc->sCmn.nFramesEncoded ], psEncCtrl->PredCoef_Q12[ 0 ], psEncCtrl->LTPCoef_Q14,
+                psEncCtrl->AR2_Q13, psEncCtrl->HarmShapeGain_Q14, psEncCtrl->Tilt_Q14, psEncCtrl->LF_shp_Q14,
+                psEncCtrl->Gains_Q16, psEncCtrl->pitchL, psEncCtrl->Lambda_Q10, psEncCtrl->LTP_scale_Q14 );
+        }
+
+        /* Restore original gains */
+        silk_memcpy( psEncCtrl->Gains_Q16, TempGains_Q16, psEnc->sCmn.nb_subfr * sizeof( opus_int32 ) );
+    }
+}
diff --git a/drivers/opus/silk/fixed/find_LPC_FIX.c b/drivers/opus/silk/fixed/find_LPC_FIX.c
new file mode 100644
index 0000000000..a46cdb7515
--- /dev/null
+++ b/drivers/opus/silk/fixed/find_LPC_FIX.c
@@ -0,0 +1,151 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef OPUS_HAVE_CONFIG_H
+#include "opus_config.h"
+#endif
+
+#include "main_FIX.h"
+#include "stack_alloc.h"
+#include "tuning_parameters.h"
+
+/* Finds LPC vector from correlations, and converts to NLSF */
+void silk_find_LPC_FIX(
+    silk_encoder_state              *psEncC,                                /* I/O  Encoder state                                                               */
+    opus_int16                      NLSF_Q15[],                             /* O    NLSFs                                                                       */
+    const opus_int16                x[],                                    /* I    Input signal                                                                */
+    const opus_int32                minInvGain_Q30                          /* I    Inverse of max prediction gain                                              */
+)
+{
+    opus_int     k, subfr_length;
+    opus_int32   a_Q16[ MAX_LPC_ORDER ];
+    opus_int     isInterpLower, shift;
+    opus_int32   res_nrg0, res_nrg1;
+    opus_int     rshift0, rshift1;
+
+    /* Used only for LSF interpolation */
+    opus_int32   a_tmp_Q16[ MAX_LPC_ORDER ], res_nrg_interp, res_nrg, res_tmp_nrg;
+    opus_int     res_nrg_interp_Q, res_nrg_Q, res_tmp_nrg_Q;
+    opus_int16   a_tmp_Q12[ MAX_LPC_ORDER ];
+    opus_int16   NLSF0_Q15[ MAX_LPC_ORDER ];
+    SAVE_STACK;
+
+    subfr_length = psEncC->subfr_length + psEncC->predictLPCOrder;
+
+    /* Default: no interpolation */
+    psEncC->indices.NLSFInterpCoef_Q2 = 4;
+
+    /* Burg AR analysis for the full frame */
+    silk_burg_modified( &res_nrg, &res_nrg_Q, a_Q16, x, minInvGain_Q30, subfr_length, psEncC->nb_subfr, psEncC->predictLPCOrder, psEncC->arch );
+
+    if( psEncC->useInterpolatedNLSFs && !psEncC->first_frame_after_reset && psEncC->nb_subfr == MAX_NB_SUBFR ) {
+        VARDECL( opus_int16, LPC_res );
+
+        /* Optimal solution for last 10 ms */
+        silk_burg_modified( &res_tmp_nrg, &res_tmp_nrg_Q, a_tmp_Q16, x + 2 * subfr_length, minInvGain_Q30, subfr_length, 2, psEncC->predictLPCOrder, psEncC->arch );
+
+        /* subtract residual energy here, as that's easier than adding it to the    */
+        /* residual energy of the first 10 ms in each iteration of the search below */
+        shift = res_tmp_nrg_Q - res_nrg_Q;
+        if( shift >= 0 ) {
+            if( shift < 32 ) {
+                res_nrg = res_nrg - silk_RSHIFT( res_tmp_nrg, shift );
+            }
+        } else {
+            silk_assert( shift > -32 );
+            res_nrg   = silk_RSHIFT( res_nrg, -shift ) - res_tmp_nrg;
+            res_nrg_Q = res_tmp_nrg_Q;
+        }
+
+        /* Convert to NLSFs */
+        silk_A2NLSF( NLSF_Q15, a_tmp_Q16, psEncC->predictLPCOrder );
+
+        ALLOC( LPC_res, 2 * subfr_length, opus_int16 );
+
+        /* Search over interpolation indices to find the one with lowest residual energy */
+        for( k = 3; k >= 0; k-- ) {
+            /* Interpolate NLSFs for first half */
+            silk_interpolate( NLSF0_Q15, psEncC->prev_NLSFq_Q15, NLSF_Q15, k, psEncC->predictLPCOrder );
+
+            /* Convert to LPC for residual energy evaluation */
+            silk_NLSF2A( a_tmp_Q12, NLSF0_Q15, psEncC->predictLPCOrder );
+
+            /* Calculate residual energy with NLSF interpolation */
+            silk_LPC_analysis_filter( LPC_res, x, a_tmp_Q12, 2 * subfr_length, psEncC->predictLPCOrder );
+
+            silk_sum_sqr_shift( &res_nrg0, &rshift0, LPC_res + psEncC->predictLPCOrder,                subfr_length - psEncC->predictLPCOrder );
+            silk_sum_sqr_shift( &res_nrg1, &rshift1, LPC_res + psEncC->predictLPCOrder + subfr_length, subfr_length - psEncC->predictLPCOrder );
+
+            /* Add subframe energies from first half frame */
+            shift = rshift0 - rshift1;
+            if( shift >= 0 ) {
+                res_nrg1         = silk_RSHIFT( res_nrg1, shift );
+                res_nrg_interp_Q = -rshift0;
+            } else {
+                res_nrg0         = silk_RSHIFT( res_nrg0, -shift );
+                res_nrg_interp_Q = -rshift1;
+            }
+            res_nrg_interp = silk_ADD32( res_nrg0, res_nrg1 );
+
+            /* Compare with first half energy without NLSF interpolation, or best interpolated value so far */
+            shift = res_nrg_interp_Q - res_nrg_Q;
+            if( shift >= 0 ) {
+                if( silk_RSHIFT( res_nrg_interp, shift ) < res_nrg ) {
+                    isInterpLower = silk_TRUE;
+                } else {
+                    isInterpLower = silk_FALSE;
+                }
+            } else {
+                if( -shift < 32 ) {
+                    if( res_nrg_interp < silk_RSHIFT( res_nrg, -shift ) ) {
+                        isInterpLower = silk_TRUE;
+                    } else {
+                        isInterpLower = silk_FALSE;
+                    }
+                } else {
+                    isInterpLower = silk_FALSE;
+                }
+            }
+
+            /* Determine whether current interpolated NLSFs are best so far */
+            if( isInterpLower == silk_TRUE ) {
+                /* Interpolation has lower residual energy */
+                res_nrg   = res_nrg_interp;
+                res_nrg_Q = res_nrg_interp_Q;
+                psEncC->indices.NLSFInterpCoef_Q2 = (opus_int8)k;
+            }
+        }
+    }
+
+    if( psEncC->indices.NLSFInterpCoef_Q2 == 4 ) {
+        /* NLSF interpolation is currently inactive, calculate NLSFs from full frame AR coefficients */
+        silk_A2NLSF( NLSF_Q15, a_Q16, psEncC->predictLPCOrder );
+    }
+
+    silk_assert( psEncC->indices.NLSFInterpCoef_Q2 == 4 || ( psEncC->useInterpolatedNLSFs && !psEncC->first_frame_after_reset && psEncC->nb_subfr == MAX_NB_SUBFR ) );
+    RESTORE_STACK;
+}
diff --git a/drivers/opus/silk/fixed/find_LTP_FIX.c b/drivers/opus/silk/fixed/find_LTP_FIX.c
new file mode 100644
index 0000000000..a1d152eee4
--- /dev/null
+++ b/drivers/opus/silk/fixed/find_LTP_FIX.c
@@ -0,0 +1,244 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef OPUS_HAVE_CONFIG_H
+#include "opus_config.h"
+#endif
+
+#include "main_FIX.h"
+#include "tuning_parameters.h"
+
+/* Head room for correlations */
+#define LTP_CORRS_HEAD_ROOM                             2
+
+void silk_fit_LTP(
+    opus_int32 LTP_coefs_Q16[ LTP_ORDER ],
+    opus_int16 LTP_coefs_Q14[ LTP_ORDER ]
+);
+
+void silk_find_LTP_FIX(
+    opus_int16                      b_Q14[ MAX_NB_SUBFR * LTP_ORDER ],      /* O    LTP coefs                                                                   */
+    opus_int32                      WLTP[ MAX_NB_SUBFR * LTP_ORDER * LTP_ORDER ], /* O    Weight for LTP quantization                                           */
+    opus_int                        *LTPredCodGain_Q7,                      /* O    LTP coding gain                                                             */
+    const opus_int16                r_lpc[],                                /* I    residual signal after LPC signal + state for first 10 ms                    */
+    const opus_int                  lag[ MAX_NB_SUBFR ],                    /* I    LTP lags                                                                    */
+    const opus_int32                Wght_Q15[ MAX_NB_SUBFR ],               /* I    weights                                                                     */
+    const opus_int                  subfr_length,                           /* I    subframe length                                                             */
+    const opus_int                  nb_subfr,                               /* I    number of subframes                                                         */
+    const opus_int                  mem_offset,                             /* I    number of samples in LTP memory                                             */
+    opus_int                        corr_rshifts[ MAX_NB_SUBFR ]            /* O    right shifts applied to correlations                                        */
+)
+{
+    opus_int   i, k, lshift;
+    const opus_int16 *r_ptr, *lag_ptr;
+    opus_int16 *b_Q14_ptr;
+
+    opus_int32 regu;
+    opus_int32 *WLTP_ptr;
+    opus_int32 b_Q16[ LTP_ORDER ], delta_b_Q14[ LTP_ORDER ], d_Q14[ MAX_NB_SUBFR ], nrg[ MAX_NB_SUBFR ], g_Q26;
+    opus_int32 w[ MAX_NB_SUBFR ], WLTP_max, max_abs_d_Q14, max_w_bits;
+
+    opus_int32 temp32, denom32;
+    opus_int   extra_shifts;
+    opus_int   rr_shifts, maxRshifts, maxRshifts_wxtra, LZs;
+    opus_int32 LPC_res_nrg, LPC_LTP_res_nrg, div_Q16;
+    opus_int32 Rr[ LTP_ORDER ], rr[ MAX_NB_SUBFR ];
+    opus_int32 wd, m_Q12;
+
+    b_Q14_ptr = b_Q14;
+    WLTP_ptr  = WLTP;
+    r_ptr     = &r_lpc[ mem_offset ];
+    for( k = 0; k < nb_subfr; k++ ) {
+        lag_ptr = r_ptr - ( lag[ k ] + LTP_ORDER / 2 );
+
+        silk_sum_sqr_shift( &rr[ k ], &rr_shifts, r_ptr, subfr_length ); /* rr[ k ] in Q( -rr_shifts ) */
+
+        /* Assure headroom */
+        LZs = silk_CLZ32( rr[k] );
+        if( LZs < LTP_CORRS_HEAD_ROOM ) {
+            rr[ k ] = silk_RSHIFT_ROUND( rr[ k ], LTP_CORRS_HEAD_ROOM - LZs );
+            rr_shifts += ( LTP_CORRS_HEAD_ROOM - LZs );
+        }
+        corr_rshifts[ k ] = rr_shifts;
+        silk_corrMatrix_FIX( lag_ptr, subfr_length, LTP_ORDER, LTP_CORRS_HEAD_ROOM, WLTP_ptr, &corr_rshifts[ k ] );  /* WLTP_fix_ptr in Q( -corr_rshifts[ k ] ) */
+
+        /* The correlation vector always has lower max abs value than rr and/or RR so head room is assured */
+        silk_corrVector_FIX( lag_ptr, r_ptr, subfr_length, LTP_ORDER, Rr, corr_rshifts[ k ] );  /* Rr_fix_ptr   in Q( -corr_rshifts[ k ] ) */
+        if( corr_rshifts[ k ] > rr_shifts ) {
+            rr[ k ] = silk_RSHIFT( rr[ k ], corr_rshifts[ k ] - rr_shifts ); /* rr[ k ] in Q( -corr_rshifts[ k ] ) */
+        }
+        silk_assert( rr[ k ] >= 0 );
+
+        regu = 1;
+        regu = silk_SMLAWB( regu, rr[ k ], SILK_FIX_CONST( LTP_DAMPING/3, 16 ) );
+        regu = silk_SMLAWB( regu, matrix_ptr( WLTP_ptr, 0, 0, LTP_ORDER ), SILK_FIX_CONST( LTP_DAMPING/3, 16 ) );
+        regu = silk_SMLAWB( regu, matrix_ptr( WLTP_ptr, LTP_ORDER-1, LTP_ORDER-1, LTP_ORDER ), SILK_FIX_CONST( LTP_DAMPING/3, 16 ) );
+        silk_regularize_correlations_FIX( WLTP_ptr, &rr[k], regu, LTP_ORDER );
+
+        silk_solve_LDL_FIX( WLTP_ptr, LTP_ORDER, Rr, b_Q16 ); /* WLTP_fix_ptr and Rr_fix_ptr both in Q(-corr_rshifts[k]) */
+
+        /* Limit and store in Q14 */
+        silk_fit_LTP( b_Q16, b_Q14_ptr );
+
+        /* Calculate residual energy */
+        nrg[ k ] = silk_residual_energy16_covar_FIX( b_Q14_ptr, WLTP_ptr, Rr, rr[ k ], LTP_ORDER, 14 ); /* nrg_fix in Q( -corr_rshifts[ k ] ) */
+
+        /* temp = Wght[ k ] / ( nrg[ k ] * Wght[ k ] + 0.01f * subfr_length ); */
+        extra_shifts = silk_min_int( corr_rshifts[ k ], LTP_CORRS_HEAD_ROOM );
+        denom32 = silk_LSHIFT_SAT32( silk_SMULWB( nrg[ k ], Wght_Q15[ k ] ), 1 + extra_shifts ) + /* Q( -corr_rshifts[ k ] + extra_shifts ) */
+            silk_RSHIFT( silk_SMULWB( (opus_int32)subfr_length, 655 ), corr_rshifts[ k ] - extra_shifts );    /* Q( -corr_rshifts[ k ] + extra_shifts ) */
+        denom32 = silk_max( denom32, 1 );
+        silk_assert( ((opus_int64)Wght_Q15[ k ] << 16 ) < silk_int32_MAX );                       /* Wght always < 0.5 in Q0 */
+        temp32 = silk_DIV32( silk_LSHIFT( (opus_int32)Wght_Q15[ k ], 16 ), denom32 );             /* Q( 15 + 16 + corr_rshifts[k] - extra_shifts ) */
+        temp32 = silk_RSHIFT( temp32, 31 + corr_rshifts[ k ] - extra_shifts - 26 );               /* Q26 */
+
+        /* Limit temp such that the below scaling never wraps around */
+        WLTP_max = 0;
+        for( i = 0; i < LTP_ORDER * LTP_ORDER; i++ ) {
+            WLTP_max = silk_max( WLTP_ptr[ i ], WLTP_max );
+        }
+        lshift = silk_CLZ32( WLTP_max ) - 1 - 3; /* keep 3 bits free for vq_nearest_neighbor_fix */
+        silk_assert( 26 - 18 + lshift >= 0 );
+        if( 26 - 18 + lshift < 31 ) {
+            temp32 = silk_min_32( temp32, silk_LSHIFT( (opus_int32)1, 26 - 18 + lshift ) );
+        }
+
+        silk_scale_vector32_Q26_lshift_18( WLTP_ptr, temp32, LTP_ORDER * LTP_ORDER ); /* WLTP_ptr in Q( 18 - corr_rshifts[ k ] ) */
+
+        w[ k ] = matrix_ptr( WLTP_ptr, LTP_ORDER/2, LTP_ORDER/2, LTP_ORDER ); /* w in Q( 18 - corr_rshifts[ k ] ) */
+        silk_assert( w[k] >= 0 );
+
+        r_ptr     += subfr_length;
+        b_Q14_ptr += LTP_ORDER;
+        WLTP_ptr  += LTP_ORDER * LTP_ORDER;
+    }
+
+    maxRshifts = 0;
+    for( k = 0; k < nb_subfr; k++ ) {
+        maxRshifts = silk_max_int( corr_rshifts[ k ], maxRshifts );
+    }
+
+    /* Compute LTP coding gain */
+    if( LTPredCodGain_Q7 != NULL ) {
+        LPC_LTP_res_nrg = 0;
+        LPC_res_nrg     = 0;
+        silk_assert( LTP_CORRS_HEAD_ROOM >= 2 ); /* Check that no overflow will happen when adding */
+        for( k = 0; k < nb_subfr; k++ ) {
+            LPC_res_nrg     = silk_ADD32( LPC_res_nrg,     silk_RSHIFT( silk_ADD32( silk_SMULWB(  rr[ k ], Wght_Q15[ k ] ), 1 ), 1 + ( maxRshifts - corr_rshifts[ k ] ) ) ); /* Q( -maxRshifts ) */
+            LPC_LTP_res_nrg = silk_ADD32( LPC_LTP_res_nrg, silk_RSHIFT( silk_ADD32( silk_SMULWB( nrg[ k ], Wght_Q15[ k ] ), 1 ), 1 + ( maxRshifts - corr_rshifts[ k ] ) ) ); /* Q( -maxRshifts ) */
+        }
+        LPC_LTP_res_nrg = silk_max( LPC_LTP_res_nrg, 1 ); /* avoid division by zero */
+
+        div_Q16 = silk_DIV32_varQ( LPC_res_nrg, LPC_LTP_res_nrg, 16 );
+        *LTPredCodGain_Q7 = ( opus_int )silk_SMULBB( 3, silk_lin2log( div_Q16 ) - ( 16 << 7 ) );
+
+        silk_assert( *LTPredCodGain_Q7 == ( opus_int )silk_SAT16( silk_MUL( 3, silk_lin2log( div_Q16 ) - ( 16 << 7 ) ) ) );
+    }
+
+    /* smoothing */
+    /* d = sum( B, 1 ); */
+    b_Q14_ptr = b_Q14;
+    for( k = 0; k < nb_subfr; k++ ) {
+        d_Q14[ k ] = 0;
+        for( i = 0; i < LTP_ORDER; i++ ) {
+            d_Q14[ k ] += b_Q14_ptr[ i ];
+        }
+        b_Q14_ptr += LTP_ORDER;
+    }
+
+    /* m = ( w * d' ) / ( sum( w ) + 1e-3 ); */
+
+    /* Find maximum absolute value of d_Q14 and the bits used by w in Q0 */
+    max_abs_d_Q14 = 0;
+    max_w_bits    = 0;
+    for( k = 0; k < nb_subfr; k++ ) {
+        max_abs_d_Q14 = silk_max_32( max_abs_d_Q14, silk_abs( d_Q14[ k ] ) );
+        /* w[ k ] is in Q( 18 - corr_rshifts[ k ] ) */
+        /* Find bits needed in Q( 18 - maxRshifts ) */
+        max_w_bits = silk_max_32( max_w_bits, 32 - silk_CLZ32( w[ k ] ) + corr_rshifts[ k ] - maxRshifts );
+    }
+
+    /* max_abs_d_Q14 = (5 << 15); worst case, i.e. LTP_ORDER * -silk_int16_MIN */
+    silk_assert( max_abs_d_Q14 <= ( 5 << 15 ) );
+
+    /* How many bits is needed for w*d' in Q( 18 - maxRshifts ) in the worst case, of all d_Q14's being equal to max_abs_d_Q14 */
+    extra_shifts = max_w_bits + 32 - silk_CLZ32( max_abs_d_Q14 ) - 14;
+
+    /* Subtract what we got available; bits in output var plus maxRshifts */
+    extra_shifts -= ( 32 - 1 - 2 + maxRshifts ); /* Keep sign bit free as well as 2 bits for accumulation */
+    extra_shifts = silk_max_int( extra_shifts, 0 );
+
+    maxRshifts_wxtra = maxRshifts + extra_shifts;
+
+    temp32 = silk_RSHIFT( 262, maxRshifts + extra_shifts ) + 1; /* 1e-3f in Q( 18 - (maxRshifts + extra_shifts) ) */
+    wd = 0;
+    for( k = 0; k < nb_subfr; k++ ) {
+        /* w has at least 2 bits of headroom so no overflow should happen */
+        temp32 = silk_ADD32( temp32,                     silk_RSHIFT( w[ k ], maxRshifts_wxtra - corr_rshifts[ k ] ) );                      /* Q( 18 - maxRshifts_wxtra ) */
+        wd     = silk_ADD32( wd, silk_LSHIFT( silk_SMULWW( silk_RSHIFT( w[ k ], maxRshifts_wxtra - corr_rshifts[ k ] ), d_Q14[ k ] ), 2 ) ); /* Q( 18 - maxRshifts_wxtra ) */
+    }
+    m_Q12 = silk_DIV32_varQ( wd, temp32, 12 );
+
+    b_Q14_ptr = b_Q14;
+    for( k = 0; k < nb_subfr; k++ ) {
+        /* w_fix[ k ] from Q( 18 - corr_rshifts[ k ] ) to Q( 16 ) */
+        if( 2 - corr_rshifts[k] > 0 ) {
+            temp32 = silk_RSHIFT( w[ k ], 2 - corr_rshifts[ k ] );
+        } else {
+            temp32 = silk_LSHIFT_SAT32( w[ k ], corr_rshifts[ k ] - 2 );
+        }
+
+        g_Q26 = silk_MUL(
+            silk_DIV32(
+                SILK_FIX_CONST( LTP_SMOOTHING, 26 ),
+                silk_RSHIFT( SILK_FIX_CONST( LTP_SMOOTHING, 26 ), 10 ) + temp32 ),                          /* Q10 */
+            silk_LSHIFT_SAT32( silk_SUB_SAT32( (opus_int32)m_Q12, silk_RSHIFT( d_Q14[ k ], 2 ) ), 4 ) );    /* Q16 */
+
+        temp32 = 0;
+        for( i = 0; i < LTP_ORDER; i++ ) {
+            delta_b_Q14[ i ] = silk_max_16( b_Q14_ptr[ i ], 1638 );     /* 1638_Q14 = 0.1_Q0 */
+            temp32 += delta_b_Q14[ i ];                                 /* Q14 */
+        }
+        temp32 = silk_DIV32( g_Q26, temp32 );                           /* Q14 -> Q12 */
+        for( i = 0; i < LTP_ORDER; i++ ) {
+            b_Q14_ptr[ i ] = silk_LIMIT_32( (opus_int32)b_Q14_ptr[ i ] + silk_SMULWB( silk_LSHIFT_SAT32( temp32, 4 ), delta_b_Q14[ i ] ), -16000, 28000 );
+        }
+        b_Q14_ptr += LTP_ORDER;
+    }
+}
+
+void silk_fit_LTP(
+    opus_int32 LTP_coefs_Q16[ LTP_ORDER ],
+    opus_int16 LTP_coefs_Q14[ LTP_ORDER ]
+)
+{
+    opus_int i;
+
+    for( i = 0; i < LTP_ORDER; i++ ) {
+        LTP_coefs_Q14[ i ] = (opus_int16)silk_SAT16( silk_RSHIFT_ROUND( LTP_coefs_Q16[ i ], 2 ) );
+    }
+}
diff --git a/drivers/opus/silk/fixed/find_pitch_lags_FIX.c b/drivers/opus/silk/fixed/find_pitch_lags_FIX.c
new file mode 100644
index 0000000000..0598477cd1
--- /dev/null
+++ b/drivers/opus/silk/fixed/find_pitch_lags_FIX.c
@@ -0,0 +1,145 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef OPUS_HAVE_CONFIG_H
+#include "opus_config.h"
+#endif
+
+#include "main_FIX.h"
+#include "stack_alloc.h"
+#include "tuning_parameters.h"
+
+/* Find pitch lags */
+void silk_find_pitch_lags_FIX(
+    silk_encoder_state_FIX          *psEnc,                                 /* I/O  encoder state                                                               */
+    silk_encoder_control_FIX        *psEncCtrl,                             /* I/O  encoder control                                                             */
+    opus_int16                      res[],                                  /* O    residual                                                                    */
+    const opus_int16                x[],                                    /* I    Speech signal                                                               */
+    int                             arch                                    /* I    Run-time architecture                                                       */
+)
+{
+    opus_int   buf_len, i, scale;
+    opus_int32 thrhld_Q13, res_nrg;
+    const opus_int16 *x_buf, *x_buf_ptr;
+    VARDECL( opus_int16, Wsig );
+    opus_int16 *Wsig_ptr;
+    opus_int32 auto_corr[ MAX_FIND_PITCH_LPC_ORDER + 1 ];
+    opus_int16 rc_Q15[    MAX_FIND_PITCH_LPC_ORDER ];
+    opus_int32 A_Q24[     MAX_FIND_PITCH_LPC_ORDER ];
+    opus_int16 A_Q12[     MAX_FIND_PITCH_LPC_ORDER ];
+    SAVE_STACK;
+
+    /******************************************/
+    /* Set up buffer lengths etc based on Fs  */
+    /******************************************/
+    buf_len = psEnc->sCmn.la_pitch + psEnc->sCmn.frame_length + psEnc->sCmn.ltp_mem_length;
+
+    /* Safety check */
+    silk_assert( buf_len >= psEnc->sCmn.pitch_LPC_win_length );
+
+    x_buf = x - psEnc->sCmn.ltp_mem_length;
+
+    /*************************************/
+    /* Estimate LPC AR coefficients      */
+    /*************************************/
+
+    /* Calculate windowed signal */
+
+    ALLOC( Wsig, psEnc->sCmn.pitch_LPC_win_length, opus_int16 );
+
+    /* First LA_LTP samples */
+    x_buf_ptr = x_buf + buf_len - psEnc->sCmn.pitch_LPC_win_length;
+    Wsig_ptr  = Wsig;
+    silk_apply_sine_window( Wsig_ptr, x_buf_ptr, 1, psEnc->sCmn.la_pitch );
+
+    /* Middle un - windowed samples */
+    Wsig_ptr  += psEnc->sCmn.la_pitch;
+    x_buf_ptr += psEnc->sCmn.la_pitch;
+    silk_memcpy( Wsig_ptr, x_buf_ptr, ( psEnc->sCmn.pitch_LPC_win_length - silk_LSHIFT( psEnc->sCmn.la_pitch, 1 ) ) * sizeof( opus_int16 ) );
+
+    /* Last LA_LTP samples */
+    Wsig_ptr  += psEnc->sCmn.pitch_LPC_win_length - silk_LSHIFT( psEnc->sCmn.la_pitch, 1 );
+    x_buf_ptr += psEnc->sCmn.pitch_LPC_win_length - silk_LSHIFT( psEnc->sCmn.la_pitch, 1 );
+    silk_apply_sine_window( Wsig_ptr, x_buf_ptr, 2, psEnc->sCmn.la_pitch );
+
+    /* Calculate autocorrelation sequence */
+    silk_autocorr( auto_corr, &scale, Wsig, psEnc->sCmn.pitch_LPC_win_length, psEnc->sCmn.pitchEstimationLPCOrder + 1, arch );
+
+    /* Add white noise, as fraction of energy */
+    auto_corr[ 0 ] = silk_SMLAWB( auto_corr[ 0 ], auto_corr[ 0 ], SILK_FIX_CONST( FIND_PITCH_WHITE_NOISE_FRACTION, 16 ) ) + 1;
+
+    /* Calculate the reflection coefficients using schur */
+    res_nrg = silk_schur( rc_Q15, auto_corr, psEnc->sCmn.pitchEstimationLPCOrder );
+
+    /* Prediction gain */
+    psEncCtrl->predGain_Q16 = silk_DIV32_varQ( auto_corr[ 0 ], silk_max_int( res_nrg, 1 ), 16 );
+
+    /* Convert reflection coefficients to prediction coefficients */
+    silk_k2a( A_Q24, rc_Q15, psEnc->sCmn.pitchEstimationLPCOrder );
+
+    /* Convert From 32 bit Q24 to 16 bit Q12 coefs */
+    for( i = 0; i < psEnc->sCmn.pitchEstimationLPCOrder; i++ ) {
+        A_Q12[ i ] = (opus_int16)silk_SAT16( silk_RSHIFT( A_Q24[ i ], 12 ) );
+    }
+
+    /* Do BWE */
+    silk_bwexpander( A_Q12, psEnc->sCmn.pitchEstimationLPCOrder, SILK_FIX_CONST( FIND_PITCH_BANDWIDTH_EXPANSION, 16 ) );
+
+    /*****************************************/
+    /* LPC analysis filtering                */
+    /*****************************************/
+    silk_LPC_analysis_filter( res, x_buf, A_Q12, buf_len, psEnc->sCmn.pitchEstimationLPCOrder );
+
+    if( psEnc->sCmn.indices.signalType != TYPE_NO_VOICE_ACTIVITY && psEnc->sCmn.first_frame_after_reset == 0 ) {
+        /* Threshold for pitch estimator */
+        thrhld_Q13 = SILK_FIX_CONST( 0.6, 13 );
+        thrhld_Q13 = silk_SMLABB( thrhld_Q13, SILK_FIX_CONST( -0.004, 13 ), psEnc->sCmn.pitchEstimationLPCOrder );
+        thrhld_Q13 = silk_SMLAWB( thrhld_Q13, SILK_FIX_CONST( -0.1,   21  ), psEnc->sCmn.speech_activity_Q8 );
+        thrhld_Q13 = silk_SMLABB( thrhld_Q13, SILK_FIX_CONST( -0.15,  13 ), silk_RSHIFT( psEnc->sCmn.prevSignalType, 1 ) );
+        thrhld_Q13 = silk_SMLAWB( thrhld_Q13, SILK_FIX_CONST( -0.1,   14 ), psEnc->sCmn.input_tilt_Q15 );
+        thrhld_Q13 = silk_SAT16(  thrhld_Q13 );
+
+        /*****************************************/
+        /* Call pitch estimator                  */
+        /*****************************************/
+        if( silk_pitch_analysis_core( res, psEncCtrl->pitchL, &psEnc->sCmn.indices.lagIndex, &psEnc->sCmn.indices.contourIndex,
+                &psEnc->LTPCorr_Q15, psEnc->sCmn.prevLag, psEnc->sCmn.pitchEstimationThreshold_Q16,
+                (opus_int)thrhld_Q13, psEnc->sCmn.fs_kHz, psEnc->sCmn.pitchEstimationComplexity, psEnc->sCmn.nb_subfr,
+                psEnc->sCmn.arch) == 0 )
+        {
+            psEnc->sCmn.indices.signalType = TYPE_VOICED;
+        } else {
+            psEnc->sCmn.indices.signalType = TYPE_UNVOICED;
+        }
+    } else {
+        silk_memset( psEncCtrl->pitchL, 0, sizeof( psEncCtrl->pitchL ) );
+        psEnc->sCmn.indices.lagIndex = 0;
+        psEnc->sCmn.indices.contourIndex = 0;
+        psEnc->LTPCorr_Q15 = 0;
+    }
+    RESTORE_STACK;
+}
diff --git a/drivers/opus/silk/fixed/find_pred_coefs_FIX.c b/drivers/opus/silk/fixed/find_pred_coefs_FIX.c
new file mode 100644
index 0000000000..0ab70df09d
--- /dev/null
+++ b/drivers/opus/silk/fixed/find_pred_coefs_FIX.c
@@ -0,0 +1,147 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef OPUS_HAVE_CONFIG_H
+#include "opus_config.h"
+#endif
+
+#include "main_FIX.h"
+#include "stack_alloc.h"
+
+void silk_find_pred_coefs_FIX(
+    silk_encoder_state_FIX          *psEnc,                                 /* I/O  encoder state                                                               */
+    silk_encoder_control_FIX        *psEncCtrl,                             /* I/O  encoder control                                                             */
+    const opus_int16                res_pitch[],                            /* I    Residual from pitch analysis                                                */
+    const opus_int16                x[],                                    /* I    Speech signal                                                               */
+    opus_int                        condCoding                              /* I    The type of conditional coding to use                                       */
+)
+{
+    opus_int         i;
+    opus_int32       invGains_Q16[ MAX_NB_SUBFR ], local_gains[ MAX_NB_SUBFR ], Wght_Q15[ MAX_NB_SUBFR ];
+    opus_int16       NLSF_Q15[ MAX_LPC_ORDER ];
+    const opus_int16 *x_ptr;
+    opus_int16       *x_pre_ptr;
+    VARDECL( opus_int16, LPC_in_pre );
+    opus_int32       tmp, min_gain_Q16, minInvGain_Q30;
+    opus_int         LTP_corrs_rshift[ MAX_NB_SUBFR ];
+    SAVE_STACK;
+
+    /* weighting for weighted least squares */
+    min_gain_Q16 = silk_int32_MAX >> 6;
+    for( i = 0; i < psEnc->sCmn.nb_subfr; i++ ) {
+        min_gain_Q16 = silk_min( min_gain_Q16, psEncCtrl->Gains_Q16[ i ] );
+    }
+    for( i = 0; i < psEnc->sCmn.nb_subfr; i++ ) {
+        /* Divide to Q16 */
+        silk_assert( psEncCtrl->Gains_Q16[ i ] > 0 );
+        /* Invert and normalize gains, and ensure that maximum invGains_Q16 is within range of a 16 bit int */
+        invGains_Q16[ i ] = silk_DIV32_varQ( min_gain_Q16, psEncCtrl->Gains_Q16[ i ], 16 - 2 );
+
+        /* Ensure Wght_Q15 a minimum value 1 */
+        invGains_Q16[ i ] = silk_max( invGains_Q16[ i ], 363 );
+
+        /* Square the inverted gains */
+        silk_assert( invGains_Q16[ i ] == silk_SAT16( invGains_Q16[ i ] ) );
+        tmp = silk_SMULWB( invGains_Q16[ i ], invGains_Q16[ i ] );
+        Wght_Q15[ i ] = silk_RSHIFT( tmp, 1 );
+
+        /* Invert the inverted and normalized gains */
+        local_gains[ i ] = silk_DIV32( ( (opus_int32)1 << 16 ), invGains_Q16[ i ] );
+    }
+
+    ALLOC( LPC_in_pre,
+           psEnc->sCmn.nb_subfr * psEnc->sCmn.predictLPCOrder
+               + psEnc->sCmn.frame_length, opus_int16 );
+    if( psEnc->sCmn.indices.signalType == TYPE_VOICED ) {
+        VARDECL( opus_int32, WLTP );
+
+        /**********/
+        /* VOICED */
+        /**********/
+        silk_assert( psEnc->sCmn.ltp_mem_length - psEnc->sCmn.predictLPCOrder >= psEncCtrl->pitchL[ 0 ] + LTP_ORDER / 2 );
+
+        ALLOC( WLTP, psEnc->sCmn.nb_subfr * LTP_ORDER * LTP_ORDER, opus_int32 );
+
+        /* LTP analysis */
+        silk_find_LTP_FIX( psEncCtrl->LTPCoef_Q14, WLTP, &psEncCtrl->LTPredCodGain_Q7,
+            res_pitch, psEncCtrl->pitchL, Wght_Q15, psEnc->sCmn.subfr_length,
+            psEnc->sCmn.nb_subfr, psEnc->sCmn.ltp_mem_length, LTP_corrs_rshift );
+
+        /* Quantize LTP gain parameters */
+        silk_quant_LTP_gains( psEncCtrl->LTPCoef_Q14, psEnc->sCmn.indices.LTPIndex, &psEnc->sCmn.indices.PERIndex,
+            &psEnc->sCmn.sum_log_gain_Q7, WLTP, psEnc->sCmn.mu_LTP_Q9, psEnc->sCmn.LTPQuantLowComplexity, psEnc->sCmn.nb_subfr);
+
+        /* Control LTP scaling */
+        silk_LTP_scale_ctrl_FIX( psEnc, psEncCtrl, condCoding );
+
+        /* Create LTP residual */
+        silk_LTP_analysis_filter_FIX( LPC_in_pre, x - psEnc->sCmn.predictLPCOrder, psEncCtrl->LTPCoef_Q14,
+            psEncCtrl->pitchL, invGains_Q16, psEnc->sCmn.subfr_length, psEnc->sCmn.nb_subfr, psEnc->sCmn.predictLPCOrder );
+
+    } else {
+        /************/
+        /* UNVOICED */
+        /************/
+        /* Create signal with prepended subframes, scaled by inverse gains */
+        x_ptr     = x - psEnc->sCmn.predictLPCOrder;
+        x_pre_ptr = LPC_in_pre;
+        for( i = 0; i < psEnc->sCmn.nb_subfr; i++ ) {
+            silk_scale_copy_vector16( x_pre_ptr, x_ptr, invGains_Q16[ i ],
+                psEnc->sCmn.subfr_length + psEnc->sCmn.predictLPCOrder );
+            x_pre_ptr += psEnc->sCmn.subfr_length + psEnc->sCmn.predictLPCOrder;
+            x_ptr     += psEnc->sCmn.subfr_length;
+        }
+
+        silk_memset( psEncCtrl->LTPCoef_Q14, 0, psEnc->sCmn.nb_subfr * LTP_ORDER * sizeof( opus_int16 ) );
+        psEncCtrl->LTPredCodGain_Q7 = 0;
+		psEnc->sCmn.sum_log_gain_Q7 = 0;
+    }
+
+    /* Limit on total predictive coding gain */
+    if( psEnc->sCmn.first_frame_after_reset ) {
+        minInvGain_Q30 = SILK_FIX_CONST( 1.0f / MAX_PREDICTION_POWER_GAIN_AFTER_RESET, 30 );
+    } else {        
+        minInvGain_Q30 = silk_log2lin( silk_SMLAWB( 16 << 7, (opus_int32)psEncCtrl->LTPredCodGain_Q7, SILK_FIX_CONST( 1.0 / 3, 16 ) ) );      /* Q16 */
+        minInvGain_Q30 = silk_DIV32_varQ( minInvGain_Q30, 
+            silk_SMULWW( SILK_FIX_CONST( MAX_PREDICTION_POWER_GAIN, 0 ), 
+                silk_SMLAWB( SILK_FIX_CONST( 0.25, 18 ), SILK_FIX_CONST( 0.75, 18 ), psEncCtrl->coding_quality_Q14 ) ), 14 );
+    }
+
+    /* LPC_in_pre contains the LTP-filtered input for voiced, and the unfiltered input for unvoiced */
+    silk_find_LPC_FIX( &psEnc->sCmn, NLSF_Q15, LPC_in_pre, minInvGain_Q30 );
+
+    /* Quantize LSFs */
+    silk_process_NLSFs( &psEnc->sCmn, psEncCtrl->PredCoef_Q12, NLSF_Q15, psEnc->sCmn.prev_NLSFq_Q15 );
+
+    /* Calculate residual energy using quantized LPC coefficients */
+    silk_residual_energy_FIX( psEncCtrl->ResNrg, psEncCtrl->ResNrgQ, LPC_in_pre, psEncCtrl->PredCoef_Q12, local_gains,
+        psEnc->sCmn.subfr_length, psEnc->sCmn.nb_subfr, psEnc->sCmn.predictLPCOrder );
+
+    /* Copy to prediction struct for use in next frame for interpolation */
+    silk_memcpy( psEnc->sCmn.prev_NLSFq_Q15, NLSF_Q15, sizeof( psEnc->sCmn.prev_NLSFq_Q15 ) );
+    RESTORE_STACK;
+}
diff --git a/drivers/opus/silk/fixed/k2a_FIX.c b/drivers/opus/silk/fixed/k2a_FIX.c
new file mode 100644
index 0000000000..848666ee3b
--- /dev/null
+++ b/drivers/opus/silk/fixed/k2a_FIX.c
@@ -0,0 +1,53 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef OPUS_HAVE_CONFIG_H
+#include "opus_config.h"
+#endif
+
+#include "SigProc_FIX.h"
+
+/* Step up function, converts reflection coefficients to prediction coefficients */
+void silk_k2a(
+    opus_int32                  *A_Q24,             /* O    Prediction coefficients [order] Q24                         */
+    const opus_int16            *rc_Q15,            /* I    Reflection coefficients [order] Q15                         */
+    const opus_int32            order               /* I    Prediction order                                            */
+)
+{
+    opus_int   k, n;
+    opus_int32 Atmp[ SILK_MAX_ORDER_LPC ];
+
+    for( k = 0; k < order; k++ ) {
+        for( n = 0; n < k; n++ ) {
+            Atmp[ n ] = A_Q24[ n ];
+        }
+        for( n = 0; n < k; n++ ) {
+            A_Q24[ n ] = silk_SMLAWB( A_Q24[ n ], silk_LSHIFT( Atmp[ k - n - 1 ], 1 ), rc_Q15[ k ] );
+        }
+        A_Q24[ k ] = -silk_LSHIFT( (opus_int32)rc_Q15[ k ], 9 );
+    }
+}
diff --git a/drivers/opus/silk/fixed/k2a_Q16_FIX.c b/drivers/opus/silk/fixed/k2a_Q16_FIX.c
new file mode 100644
index 0000000000..f7e62e95fe
--- /dev/null
+++ b/drivers/opus/silk/fixed/k2a_Q16_FIX.c
@@ -0,0 +1,53 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef OPUS_HAVE_CONFIG_H
+#include "opus_config.h"
+#endif
+
+#include "SigProc_FIX.h"
+
+/* Step up function, converts reflection coefficients to prediction coefficients */
+void silk_k2a_Q16(
+    opus_int32                  *A_Q24,             /* O    Prediction coefficients [order] Q24                         */
+    const opus_int32            *rc_Q16,            /* I    Reflection coefficients [order] Q16                         */
+    const opus_int32            order               /* I    Prediction order                                            */
+)
+{
+    opus_int   k, n;
+    opus_int32 Atmp[ SILK_MAX_ORDER_LPC ];
+
+    for( k = 0; k < order; k++ ) {
+        for( n = 0; n < k; n++ ) {
+            Atmp[ n ] = A_Q24[ n ];
+        }
+        for( n = 0; n < k; n++ ) {
+            A_Q24[ n ] = silk_SMLAWW( A_Q24[ n ], Atmp[ k - n - 1 ], rc_Q16[ k ] );
+        }
+        A_Q24[ k ] = -silk_LSHIFT( rc_Q16[ k ], 8 );
+    }
+}
diff --git a/drivers/opus/silk/fixed/main_FIX.h b/drivers/opus/silk/fixed/main_FIX.h
new file mode 100644
index 0000000000..fb47ffe700
--- /dev/null
+++ b/drivers/opus/silk/fixed/main_FIX.h
@@ -0,0 +1,257 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifndef SILK_MAIN_FIX_H
+#define SILK_MAIN_FIX_H
+
+#include "SigProc_FIX.h"
+#include "structs_FIX.h"
+#include "control.h"
+#include "silk_main.h"
+#include "PLC.h"
+#include "debug.h"
+#include "entenc.h"
+
+#ifndef FORCE_CPP_BUILD
+#ifdef __cplusplus
+extern "C"
+{
+#endif
+#endif
+
+#define silk_encoder_state_Fxx      silk_encoder_state_FIX
+#define silk_encode_do_VAD_Fxx      silk_encode_do_VAD_FIX
+#define silk_encode_frame_Fxx       silk_encode_frame_FIX
+
+/*********************/
+/* Encoder Functions */
+/*********************/
+
+/* High-pass filter with cutoff frequency adaptation based on pitch lag statistics */
+void silk_HP_variable_cutoff(
+    silk_encoder_state_Fxx          state_Fxx[]                             /* I/O  Encoder states                                                              */
+);
+
+/* Encoder main function */
+void silk_encode_do_VAD_FIX(
+    silk_encoder_state_FIX          *psEnc                                  /* I/O  Pointer to Silk FIX encoder state                                           */
+);
+
+/* Encoder main function */
+opus_int silk_encode_frame_FIX(
+    silk_encoder_state_FIX          *psEnc,                                 /* I/O  Pointer to Silk FIX encoder state                                           */
+    opus_int32                      *pnBytesOut,                            /* O    Pointer to number of payload bytes;                                         */
+    ec_enc                          *psRangeEnc,                            /* I/O  compressor data structure                                                   */
+    opus_int                        condCoding,                             /* I    The type of conditional coding to use                                       */
+    opus_int                        maxBits,                                /* I    If > 0: maximum number of output bits                                       */
+    opus_int                        useCBR                                  /* I    Flag to force constant-bitrate operation                                    */
+);
+
+/* Initializes the Silk encoder state */
+opus_int silk_init_encoder(
+    silk_encoder_state_Fxx          *psEnc,                                 /* I/O  Pointer to Silk FIX encoder state                                           */
+    int                              arch                                   /* I    Run-time architecture                                                       */
+);
+
+/* Control the Silk encoder */
+opus_int silk_control_encoder(
+    silk_encoder_state_Fxx          *psEnc,                                 /* I/O  Pointer to Silk encoder state                                               */
+    silk_EncControlStruct           *encControl,                            /* I    Control structure                                                           */
+    const opus_int32                TargetRate_bps,                         /* I    Target max bitrate (bps)                                                    */
+    const opus_int                  allow_bw_switch,                        /* I    Flag to allow switching audio bandwidth                                     */
+    const opus_int                  channelNb,                              /* I    Channel number                                                              */
+    const opus_int                  force_fs_kHz
+);
+
+/****************/
+/* Prefiltering */
+/****************/
+void silk_prefilter_FIX(
+    silk_encoder_state_FIX          *psEnc,                                 /* I/O  Encoder state                                                               */
+    const silk_encoder_control_FIX  *psEncCtrl,                             /* I    Encoder control                                                             */
+    opus_int32                      xw_Q10[],                               /* O    Weighted signal                                                             */
+    const opus_int16                x[]                                     /* I    Speech signal                                                               */
+);
+
+/**************************/
+/* Noise shaping analysis */
+/**************************/
+/* Compute noise shaping coefficients and initial gain values */
+void silk_noise_shape_analysis_FIX(
+    silk_encoder_state_FIX          *psEnc,                                 /* I/O  Encoder state FIX                                                           */
+    silk_encoder_control_FIX        *psEncCtrl,                             /* I/O  Encoder control FIX                                                         */
+    const opus_int16                *pitch_res,                             /* I    LPC residual from pitch analysis                                            */
+    const opus_int16                *x,                                     /* I    Input signal [ frame_length + la_shape ]                                    */
+    int                              arch                                   /* I    Run-time architecture                                                       */
+);
+
+/* Autocorrelations for a warped frequency axis */
+void silk_warped_autocorrelation_FIX(
+          opus_int32                *corr,                                  /* O    Result [order + 1]                                                          */
+          opus_int                  *scale,                                 /* O    Scaling of the correlation vector                                           */
+    const opus_int16                *input,                                 /* I    Input data to correlate                                                     */
+    const opus_int                  warping_Q16,                            /* I    Warping coefficient                                                         */
+    const opus_int                  length,                                 /* I    Length of input                                                             */
+    const opus_int                  order                                   /* I    Correlation order (even)                                                    */
+);
+
+/* Calculation of LTP state scaling */
+void silk_LTP_scale_ctrl_FIX(
+    silk_encoder_state_FIX          *psEnc,                                 /* I/O  encoder state                                                               */
+    silk_encoder_control_FIX        *psEncCtrl,                             /* I/O  encoder control                                                             */
+    opus_int                        condCoding                              /* I    The type of conditional coding to use                                       */
+);
+
+/**********************************************/
+/* Prediction Analysis                        */
+/**********************************************/
+/* Find pitch lags */
+void silk_find_pitch_lags_FIX(
+    silk_encoder_state_FIX          *psEnc,                                 /* I/O  encoder state                                                               */
+    silk_encoder_control_FIX        *psEncCtrl,                             /* I/O  encoder control                                                             */
+    opus_int16                      res[],                                  /* O    residual                                                                    */
+    const opus_int16                x[],                                    /* I    Speech signal                                                               */
+    int                             arch                                    /* I    Run-time architecture                                                       */
+);
+
+/* Find LPC and LTP coefficients */
+void silk_find_pred_coefs_FIX(
+    silk_encoder_state_FIX          *psEnc,                                 /* I/O  encoder state                                                               */
+    silk_encoder_control_FIX        *psEncCtrl,                             /* I/O  encoder control                                                             */
+    const opus_int16                res_pitch[],                            /* I    Residual from pitch analysis                                                */
+    const opus_int16                x[],                                    /* I    Speech signal                                                               */
+    opus_int                        condCoding                              /* I    The type of conditional coding to use                                       */
+);
+
+/* LPC analysis */
+void silk_find_LPC_FIX(
+    silk_encoder_state              *psEncC,                                /* I/O  Encoder state                                                               */
+    opus_int16                      NLSF_Q15[],                             /* O    NLSFs                                                                       */
+    const opus_int16                x[],                                    /* I    Input signal                                                                */
+    const opus_int32                minInvGain_Q30                          /* I    Inverse of max prediction gain                                              */
+);
+
+/* LTP analysis */
+void silk_find_LTP_FIX(
+    opus_int16                      b_Q14[ MAX_NB_SUBFR * LTP_ORDER ],      /* O    LTP coefs                                                                   */
+    opus_int32                      WLTP[ MAX_NB_SUBFR * LTP_ORDER * LTP_ORDER ], /* O    Weight for LTP quantization                                           */
+    opus_int                        *LTPredCodGain_Q7,                      /* O    LTP coding gain                                                             */
+    const opus_int16                r_lpc[],                                /* I    residual signal after LPC signal + state for first 10 ms                    */
+    const opus_int                  lag[ MAX_NB_SUBFR ],                    /* I    LTP lags                                                                    */
+    const opus_int32                Wght_Q15[ MAX_NB_SUBFR ],               /* I    weights                                                                     */
+    const opus_int                  subfr_length,                           /* I    subframe length                                                             */
+    const opus_int                  nb_subfr,                               /* I    number of subframes                                                         */
+    const opus_int                  mem_offset,                             /* I    number of samples in LTP memory                                             */
+    opus_int                        corr_rshifts[ MAX_NB_SUBFR ]            /* O    right shifts applied to correlations                                        */
+);
+
+void silk_LTP_analysis_filter_FIX(
+    opus_int16                      *LTP_res,                               /* O    LTP residual signal of length MAX_NB_SUBFR * ( pre_length + subfr_length )  */
+    const opus_int16                *x,                                     /* I    Pointer to input signal with at least max( pitchL ) preceding samples       */
+    const opus_int16                LTPCoef_Q14[ LTP_ORDER * MAX_NB_SUBFR ],/* I    LTP_ORDER LTP coefficients for each MAX_NB_SUBFR subframe                   */
+    const opus_int                  pitchL[ MAX_NB_SUBFR ],                 /* I    Pitch lag, one for each subframe                                            */
+    const opus_int32                invGains_Q16[ MAX_NB_SUBFR ],           /* I    Inverse quantization gains, one for each subframe                           */
+    const opus_int                  subfr_length,                           /* I    Length of each subframe                                                     */
+    const opus_int                  nb_subfr,                               /* I    Number of subframes                                                         */
+    const opus_int                  pre_length                              /* I    Length of the preceding samples starting at &x[0] for each subframe         */
+);
+
+/* Calculates residual energies of input subframes where all subframes have LPC_order   */
+/* of preceding samples                                                                 */
+void silk_residual_energy_FIX(
+          opus_int32                nrgs[ MAX_NB_SUBFR ],                   /* O    Residual energy per subframe                                                */
+          opus_int                  nrgsQ[ MAX_NB_SUBFR ],                  /* O    Q value per subframe                                                        */
+    const opus_int16                x[],                                    /* I    Input signal                                                                */
+          opus_int16                a_Q12[ 2 ][ MAX_LPC_ORDER ],            /* I    AR coefs for each frame half                                                */
+    const opus_int32                gains[ MAX_NB_SUBFR ],                  /* I    Quantization gains                                                          */
+    const opus_int                  subfr_length,                           /* I    Subframe length                                                             */
+    const opus_int                  nb_subfr,                               /* I    Number of subframes                                                         */
+    const opus_int                  LPC_order                               /* I    LPC order                                                                   */
+);
+
+/* Residual energy: nrg = wxx - 2 * wXx * c + c' * wXX * c */
+opus_int32 silk_residual_energy16_covar_FIX(
+    const opus_int16                *c,                                     /* I    Prediction vector                                                           */
+    const opus_int32                *wXX,                                   /* I    Correlation matrix                                                          */
+    const opus_int32                *wXx,                                   /* I    Correlation vector                                                          */
+    opus_int32                      wxx,                                    /* I    Signal energy                                                               */
+    opus_int                        D,                                      /* I    Dimension                                                                   */
+    opus_int                        cQ                                      /* I    Q value for c vector 0 - 15                                                 */
+);
+
+/* Processing of gains */
+void silk_process_gains_FIX(
+    silk_encoder_state_FIX          *psEnc,                                 /* I/O  Encoder state                                                               */
+    silk_encoder_control_FIX        *psEncCtrl,                             /* I/O  Encoder control                                                             */
+    opus_int                        condCoding                              /* I    The type of conditional coding to use                                       */
+);
+
+/******************/
+/* Linear Algebra */
+/******************/
+/* Calculates correlation matrix X'*X */
+void silk_corrMatrix_FIX(
+    const opus_int16                *x,                                     /* I    x vector [L + order - 1] used to form data matrix X                         */
+    const opus_int                  L,                                      /* I    Length of vectors                                                           */
+    const opus_int                  order,                                  /* I    Max lag for correlation                                                     */
+    const opus_int                  head_room,                              /* I    Desired headroom                                                            */
+    opus_int32                      *XX,                                    /* O    Pointer to X'*X correlation matrix [ order x order ]                        */
+    opus_int                        *rshifts                                /* I/O  Right shifts of correlations                                                */
+);
+
+/* Calculates correlation vector X'*t */
+void silk_corrVector_FIX(
+    const opus_int16                *x,                                     /* I    x vector [L + order - 1] used to form data matrix X                         */
+    const opus_int16                *t,                                     /* I    Target vector [L]                                                           */
+    const opus_int                  L,                                      /* I    Length of vectors                                                           */
+    const opus_int                  order,                                  /* I    Max lag for correlation                                                     */
+    opus_int32                      *Xt,                                    /* O    Pointer to X'*t correlation vector [order]                                  */
+    const opus_int                  rshifts                                 /* I    Right shifts of correlations                                                */
+);
+
+/* Add noise to matrix diagonal */
+void silk_regularize_correlations_FIX(
+    opus_int32                      *XX,                                    /* I/O  Correlation matrices                                                        */
+    opus_int32                      *xx,                                    /* I/O  Correlation values                                                          */
+    opus_int32                      noise,                                  /* I    Noise to add                                                                */
+    opus_int                        D                                       /* I    Dimension of XX                                                             */
+);
+
+/* Solves Ax = b, assuming A is symmetric */
+void silk_solve_LDL_FIX(
+    opus_int32                      *A,                                     /* I    Pointer to symetric square matrix A                                         */
+    opus_int                        M,                                      /* I    Size of matrix                                                              */
+    const opus_int32                *b,                                     /* I    Pointer to b vector                                                         */
+    opus_int32                      *x_Q16                                  /* O    Pointer to x solution vector                                                */
+);
+
+#ifndef FORCE_CPP_BUILD
+#ifdef __cplusplus
+}
+#endif /* __cplusplus */
+#endif /* FORCE_CPP_BUILD */
+#endif /* SILK_MAIN_FIX_H */
diff --git a/drivers/opus/silk/fixed/noise_shape_analysis_FIX.c b/drivers/opus/silk/fixed/noise_shape_analysis_FIX.c
new file mode 100644
index 0000000000..420cbeedfc
--- /dev/null
+++ b/drivers/opus/silk/fixed/noise_shape_analysis_FIX.c
@@ -0,0 +1,445 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef OPUS_HAVE_CONFIG_H
+#include "opus_config.h"
+#endif
+
+#include "main_FIX.h"
+#include "stack_alloc.h"
+#include "tuning_parameters.h"
+
+/* Compute gain to make warped filter coefficients have a zero mean log frequency response on a   */
+/* non-warped frequency scale. (So that it can be implemented with a minimum-phase monic filter.) */
+/* Note: A monic filter is one with the first coefficient equal to 1.0. In Silk we omit the first */
+/* coefficient in an array of coefficients, for monic filters.                                    */
+static OPUS_INLINE opus_int32 warped_gain( /* gain in Q16*/
+    const opus_int32     *coefs_Q24,
+    opus_int             lambda_Q16,
+    opus_int             order
+) {
+    opus_int   i;
+    opus_int32 gain_Q24;
+
+    lambda_Q16 = -lambda_Q16;
+    gain_Q24 = coefs_Q24[ order - 1 ];
+    for( i = order - 2; i >= 0; i-- ) {
+        gain_Q24 = silk_SMLAWB( coefs_Q24[ i ], gain_Q24, lambda_Q16 );
+    }
+    gain_Q24  = silk_SMLAWB( SILK_FIX_CONST( 1.0, 24 ), gain_Q24, -lambda_Q16 );
+    return silk_INVERSE32_varQ( gain_Q24, 40 );
+}
+
+/* Convert warped filter coefficients to monic pseudo-warped coefficients and limit maximum     */
+/* amplitude of monic warped coefficients by using bandwidth expansion on the true coefficients */
+static OPUS_INLINE void limit_warped_coefs(
+    opus_int32           *coefs_syn_Q24,
+    opus_int32           *coefs_ana_Q24,
+    opus_int             lambda_Q16,
+    opus_int32           limit_Q24,
+    opus_int             order
+) {
+    opus_int   i, iter, ind = 0;
+    opus_int32 tmp, maxabs_Q24, chirp_Q16, gain_syn_Q16, gain_ana_Q16;
+    opus_int32 nom_Q16, den_Q24;
+
+    /* Convert to monic coefficients */
+    lambda_Q16 = -lambda_Q16;
+    for( i = order - 1; i > 0; i-- ) {
+        coefs_syn_Q24[ i - 1 ] = silk_SMLAWB( coefs_syn_Q24[ i - 1 ], coefs_syn_Q24[ i ], lambda_Q16 );
+        coefs_ana_Q24[ i - 1 ] = silk_SMLAWB( coefs_ana_Q24[ i - 1 ], coefs_ana_Q24[ i ], lambda_Q16 );
+    }
+    lambda_Q16 = -lambda_Q16;
+    nom_Q16  = silk_SMLAWB( SILK_FIX_CONST( 1.0, 16 ), -(opus_int32)lambda_Q16,        lambda_Q16 );
+    den_Q24  = silk_SMLAWB( SILK_FIX_CONST( 1.0, 24 ), coefs_syn_Q24[ 0 ], lambda_Q16 );
+    gain_syn_Q16 = silk_DIV32_varQ( nom_Q16, den_Q24, 24 );
+    den_Q24  = silk_SMLAWB( SILK_FIX_CONST( 1.0, 24 ), coefs_ana_Q24[ 0 ], lambda_Q16 );
+    gain_ana_Q16 = silk_DIV32_varQ( nom_Q16, den_Q24, 24 );
+    for( i = 0; i < order; i++ ) {
+        coefs_syn_Q24[ i ] = silk_SMULWW( gain_syn_Q16, coefs_syn_Q24[ i ] );
+        coefs_ana_Q24[ i ] = silk_SMULWW( gain_ana_Q16, coefs_ana_Q24[ i ] );
+    }
+
+    for( iter = 0; iter < 10; iter++ ) {
+        /* Find maximum absolute value */
+        maxabs_Q24 = -1;
+        for( i = 0; i < order; i++ ) {
+            tmp = silk_max( silk_abs_int32( coefs_syn_Q24[ i ] ), silk_abs_int32( coefs_ana_Q24[ i ] ) );
+            if( tmp > maxabs_Q24 ) {
+                maxabs_Q24 = tmp;
+                ind = i;
+            }
+        }
+        if( maxabs_Q24 <= limit_Q24 ) {
+            /* Coefficients are within range - done */
+            return;
+        }
+
+        /* Convert back to true warped coefficients */
+        for( i = 1; i < order; i++ ) {
+            coefs_syn_Q24[ i - 1 ] = silk_SMLAWB( coefs_syn_Q24[ i - 1 ], coefs_syn_Q24[ i ], lambda_Q16 );
+            coefs_ana_Q24[ i - 1 ] = silk_SMLAWB( coefs_ana_Q24[ i - 1 ], coefs_ana_Q24[ i ], lambda_Q16 );
+        }
+        gain_syn_Q16 = silk_INVERSE32_varQ( gain_syn_Q16, 32 );
+        gain_ana_Q16 = silk_INVERSE32_varQ( gain_ana_Q16, 32 );
+        for( i = 0; i < order; i++ ) {
+            coefs_syn_Q24[ i ] = silk_SMULWW( gain_syn_Q16, coefs_syn_Q24[ i ] );
+            coefs_ana_Q24[ i ] = silk_SMULWW( gain_ana_Q16, coefs_ana_Q24[ i ] );
+        }
+
+        /* Apply bandwidth expansion */
+        chirp_Q16 = SILK_FIX_CONST( 0.99, 16 ) - silk_DIV32_varQ(
+            silk_SMULWB( maxabs_Q24 - limit_Q24, silk_SMLABB( SILK_FIX_CONST( 0.8, 10 ), SILK_FIX_CONST( 0.1, 10 ), iter ) ),
+            silk_MUL( maxabs_Q24, ind + 1 ), 22 );
+        silk_bwexpander_32( coefs_syn_Q24, order, chirp_Q16 );
+        silk_bwexpander_32( coefs_ana_Q24, order, chirp_Q16 );
+
+        /* Convert to monic warped coefficients */
+        lambda_Q16 = -lambda_Q16;
+        for( i = order - 1; i > 0; i-- ) {
+            coefs_syn_Q24[ i - 1 ] = silk_SMLAWB( coefs_syn_Q24[ i - 1 ], coefs_syn_Q24[ i ], lambda_Q16 );
+            coefs_ana_Q24[ i - 1 ] = silk_SMLAWB( coefs_ana_Q24[ i - 1 ], coefs_ana_Q24[ i ], lambda_Q16 );
+        }
+        lambda_Q16 = -lambda_Q16;
+        nom_Q16  = silk_SMLAWB( SILK_FIX_CONST( 1.0, 16 ), -(opus_int32)lambda_Q16,        lambda_Q16 );
+        den_Q24  = silk_SMLAWB( SILK_FIX_CONST( 1.0, 24 ), coefs_syn_Q24[ 0 ], lambda_Q16 );
+        gain_syn_Q16 = silk_DIV32_varQ( nom_Q16, den_Q24, 24 );
+        den_Q24  = silk_SMLAWB( SILK_FIX_CONST( 1.0, 24 ), coefs_ana_Q24[ 0 ], lambda_Q16 );
+        gain_ana_Q16 = silk_DIV32_varQ( nom_Q16, den_Q24, 24 );
+        for( i = 0; i < order; i++ ) {
+            coefs_syn_Q24[ i ] = silk_SMULWW( gain_syn_Q16, coefs_syn_Q24[ i ] );
+            coefs_ana_Q24[ i ] = silk_SMULWW( gain_ana_Q16, coefs_ana_Q24[ i ] );
+        }
+    }
+    silk_assert( 0 );
+}
+
+/**************************************************************/
+/* Compute noise shaping coefficients and initial gain values */
+/**************************************************************/
+void silk_noise_shape_analysis_FIX(
+    silk_encoder_state_FIX          *psEnc,                                 /* I/O  Encoder state FIX                                                           */
+    silk_encoder_control_FIX        *psEncCtrl,                             /* I/O  Encoder control FIX                                                         */
+    const opus_int16                *pitch_res,                             /* I    LPC residual from pitch analysis                                            */
+    const opus_int16                *x,                                     /* I    Input signal [ frame_length + la_shape ]                                    */
+    int                              arch                                   /* I    Run-time architecture                                                       */
+)
+{
+    silk_shape_state_FIX *psShapeSt = &psEnc->sShape;
+    opus_int     k, i, nSamples, Qnrg, b_Q14, warping_Q16, scale = 0;
+    opus_int32   SNR_adj_dB_Q7, HarmBoost_Q16, HarmShapeGain_Q16, Tilt_Q16, tmp32;
+    opus_int32   nrg, pre_nrg_Q30, log_energy_Q7, log_energy_prev_Q7, energy_variation_Q7;
+    opus_int32   delta_Q16, BWExp1_Q16, BWExp2_Q16, gain_mult_Q16, gain_add_Q16, strength_Q16, b_Q8;
+    opus_int32   auto_corr[     MAX_SHAPE_LPC_ORDER + 1 ];
+    opus_int32   refl_coef_Q16[ MAX_SHAPE_LPC_ORDER ];
+    opus_int32   AR1_Q24[       MAX_SHAPE_LPC_ORDER ];
+    opus_int32   AR2_Q24[       MAX_SHAPE_LPC_ORDER ];
+    VARDECL( opus_int16, x_windowed );
+    const opus_int16 *x_ptr, *pitch_res_ptr;
+    SAVE_STACK;
+
+    /* Point to start of first LPC analysis block */
+    x_ptr = x - psEnc->sCmn.la_shape;
+
+    /****************/
+    /* GAIN CONTROL */
+    /****************/
+    SNR_adj_dB_Q7 = psEnc->sCmn.SNR_dB_Q7;
+
+    /* Input quality is the average of the quality in the lowest two VAD bands */
+    psEncCtrl->input_quality_Q14 = ( opus_int )silk_RSHIFT( (opus_int32)psEnc->sCmn.input_quality_bands_Q15[ 0 ]
+        + psEnc->sCmn.input_quality_bands_Q15[ 1 ], 2 );
+
+    /* Coding quality level, between 0.0_Q0 and 1.0_Q0, but in Q14 */
+    psEncCtrl->coding_quality_Q14 = silk_RSHIFT( silk_sigm_Q15( silk_RSHIFT_ROUND( SNR_adj_dB_Q7 -
+        SILK_FIX_CONST( 20.0, 7 ), 4 ) ), 1 );
+
+    /* Reduce coding SNR during low speech activity */
+    if( psEnc->sCmn.useCBR == 0 ) {
+        b_Q8 = SILK_FIX_CONST( 1.0, 8 ) - psEnc->sCmn.speech_activity_Q8;
+        b_Q8 = silk_SMULWB( silk_LSHIFT( b_Q8, 8 ), b_Q8 );
+        SNR_adj_dB_Q7 = silk_SMLAWB( SNR_adj_dB_Q7,
+            silk_SMULBB( SILK_FIX_CONST( -BG_SNR_DECR_dB, 7 ) >> ( 4 + 1 ), b_Q8 ),                                       /* Q11*/
+            silk_SMULWB( SILK_FIX_CONST( 1.0, 14 ) + psEncCtrl->input_quality_Q14, psEncCtrl->coding_quality_Q14 ) );     /* Q12*/
+    }
+
+    if( psEnc->sCmn.indices.signalType == TYPE_VOICED ) {
+        /* Reduce gains for periodic signals */
+        SNR_adj_dB_Q7 = silk_SMLAWB( SNR_adj_dB_Q7, SILK_FIX_CONST( HARM_SNR_INCR_dB, 8 ), psEnc->LTPCorr_Q15 );
+    } else {
+        /* For unvoiced signals and low-quality input, adjust the quality slower than SNR_dB setting */
+        SNR_adj_dB_Q7 = silk_SMLAWB( SNR_adj_dB_Q7,
+            silk_SMLAWB( SILK_FIX_CONST( 6.0, 9 ), -SILK_FIX_CONST( 0.4, 18 ), psEnc->sCmn.SNR_dB_Q7 ),
+            SILK_FIX_CONST( 1.0, 14 ) - psEncCtrl->input_quality_Q14 );
+    }
+
+    /*************************/
+    /* SPARSENESS PROCESSING */
+    /*************************/
+    /* Set quantizer offset */
+    if( psEnc->sCmn.indices.signalType == TYPE_VOICED ) {
+        /* Initially set to 0; may be overruled in process_gains(..) */
+        psEnc->sCmn.indices.quantOffsetType = 0;
+        psEncCtrl->sparseness_Q8 = 0;
+    } else {
+        /* Sparseness measure, based on relative fluctuations of energy per 2 milliseconds */
+        nSamples = silk_LSHIFT( psEnc->sCmn.fs_kHz, 1 );
+        energy_variation_Q7 = 0;
+        log_energy_prev_Q7  = 0;
+        pitch_res_ptr = pitch_res;
+        for( k = 0; k < silk_SMULBB( SUB_FRAME_LENGTH_MS, psEnc->sCmn.nb_subfr ) / 2; k++ ) {
+            silk_sum_sqr_shift( &nrg, &scale, pitch_res_ptr, nSamples );
+            nrg += silk_RSHIFT( nSamples, scale );           /* Q(-scale)*/
+
+            log_energy_Q7 = silk_lin2log( nrg );
+            if( k > 0 ) {
+                energy_variation_Q7 += silk_abs( log_energy_Q7 - log_energy_prev_Q7 );
+            }
+            log_energy_prev_Q7 = log_energy_Q7;
+            pitch_res_ptr += nSamples;
+        }
+
+        psEncCtrl->sparseness_Q8 = silk_RSHIFT( silk_sigm_Q15( silk_SMULWB( energy_variation_Q7 -
+            SILK_FIX_CONST( 5.0, 7 ), SILK_FIX_CONST( 0.1, 16 ) ) ), 7 );
+
+        /* Set quantization offset depending on sparseness measure */
+        if( psEncCtrl->sparseness_Q8 > SILK_FIX_CONST( SPARSENESS_THRESHOLD_QNT_OFFSET, 8 ) ) {
+            psEnc->sCmn.indices.quantOffsetType = 0;
+        } else {
+            psEnc->sCmn.indices.quantOffsetType = 1;
+        }
+
+        /* Increase coding SNR for sparse signals */
+        SNR_adj_dB_Q7 = silk_SMLAWB( SNR_adj_dB_Q7, SILK_FIX_CONST( SPARSE_SNR_INCR_dB, 15 ), psEncCtrl->sparseness_Q8 - SILK_FIX_CONST( 0.5, 8 ) );
+    }
+
+    /*******************************/
+    /* Control bandwidth expansion */
+    /*******************************/
+    /* More BWE for signals with high prediction gain */
+    strength_Q16 = silk_SMULWB( psEncCtrl->predGain_Q16, SILK_FIX_CONST( FIND_PITCH_WHITE_NOISE_FRACTION, 16 ) );
+    BWExp1_Q16 = BWExp2_Q16 = silk_DIV32_varQ( SILK_FIX_CONST( BANDWIDTH_EXPANSION, 16 ),
+        silk_SMLAWW( SILK_FIX_CONST( 1.0, 16 ), strength_Q16, strength_Q16 ), 16 );
+    delta_Q16  = silk_SMULWB( SILK_FIX_CONST( 1.0, 16 ) - silk_SMULBB( 3, psEncCtrl->coding_quality_Q14 ),
+        SILK_FIX_CONST( LOW_RATE_BANDWIDTH_EXPANSION_DELTA, 16 ) );
+    BWExp1_Q16 = silk_SUB32( BWExp1_Q16, delta_Q16 );
+    BWExp2_Q16 = silk_ADD32( BWExp2_Q16, delta_Q16 );
+    /* BWExp1 will be applied after BWExp2, so make it relative */
+    BWExp1_Q16 = silk_DIV32_16( silk_LSHIFT( BWExp1_Q16, 14 ), silk_RSHIFT( BWExp2_Q16, 2 ) );
+
+    if( psEnc->sCmn.warping_Q16 > 0 ) {
+        /* Slightly more warping in analysis will move quantization noise up in frequency, where it's better masked */
+        warping_Q16 = silk_SMLAWB( psEnc->sCmn.warping_Q16, (opus_int32)psEncCtrl->coding_quality_Q14, SILK_FIX_CONST( 0.01, 18 ) );
+    } else {
+        warping_Q16 = 0;
+    }
+
+    /********************************************/
+    /* Compute noise shaping AR coefs and gains */
+    /********************************************/
+    ALLOC( x_windowed, psEnc->sCmn.shapeWinLength, opus_int16 );
+    for( k = 0; k < psEnc->sCmn.nb_subfr; k++ ) {
+        /* Apply window: sine slope followed by flat part followed by cosine slope */
+        opus_int shift, slope_part, flat_part;
+        flat_part = psEnc->sCmn.fs_kHz * 3;
+        slope_part = silk_RSHIFT( psEnc->sCmn.shapeWinLength - flat_part, 1 );
+
+        silk_apply_sine_window( x_windowed, x_ptr, 1, slope_part );
+        shift = slope_part;
+        silk_memcpy( x_windowed + shift, x_ptr + shift, flat_part * sizeof(opus_int16) );
+        shift += flat_part;
+        silk_apply_sine_window( x_windowed + shift, x_ptr + shift, 2, slope_part );
+
+        /* Update pointer: next LPC analysis block */
+        x_ptr += psEnc->sCmn.subfr_length;
+
+        if( psEnc->sCmn.warping_Q16 > 0 ) {
+            /* Calculate warped auto correlation */
+            silk_warped_autocorrelation_FIX( auto_corr, &scale, x_windowed, warping_Q16, psEnc->sCmn.shapeWinLength, psEnc->sCmn.shapingLPCOrder );
+        } else {
+            /* Calculate regular auto correlation */
+            silk_autocorr( auto_corr, &scale, x_windowed, psEnc->sCmn.shapeWinLength, psEnc->sCmn.shapingLPCOrder + 1, arch );
+        }
+
+        /* Add white noise, as a fraction of energy */
+        auto_corr[0] = silk_ADD32( auto_corr[0], silk_max_32( silk_SMULWB( silk_RSHIFT( auto_corr[ 0 ], 4 ),
+            SILK_FIX_CONST( SHAPE_WHITE_NOISE_FRACTION, 20 ) ), 1 ) );
+
+        /* Calculate the reflection coefficients using schur */
+        nrg = silk_schur64( refl_coef_Q16, auto_corr, psEnc->sCmn.shapingLPCOrder );
+        silk_assert( nrg >= 0 );
+
+        /* Convert reflection coefficients to prediction coefficients */
+        silk_k2a_Q16( AR2_Q24, refl_coef_Q16, psEnc->sCmn.shapingLPCOrder );
+
+        Qnrg = -scale;          /* range: -12...30*/
+        silk_assert( Qnrg >= -12 );
+        silk_assert( Qnrg <=  30 );
+
+        /* Make sure that Qnrg is an even number */
+        if( Qnrg & 1 ) {
+            Qnrg -= 1;
+            nrg >>= 1;
+        }
+
+        tmp32 = silk_SQRT_APPROX( nrg );
+        Qnrg >>= 1;             /* range: -6...15*/
+
+        psEncCtrl->Gains_Q16[ k ] = silk_LSHIFT_SAT32( tmp32, 16 - Qnrg );
+
+        if( psEnc->sCmn.warping_Q16 > 0 ) {
+            /* Adjust gain for warping */
+            gain_mult_Q16 = warped_gain( AR2_Q24, warping_Q16, psEnc->sCmn.shapingLPCOrder );
+            silk_assert( psEncCtrl->Gains_Q16[ k ] >= 0 );
+            if ( silk_SMULWW( silk_RSHIFT_ROUND( psEncCtrl->Gains_Q16[ k ], 1 ), gain_mult_Q16 ) >= ( silk_int32_MAX >> 1 ) ) {
+               psEncCtrl->Gains_Q16[ k ] = silk_int32_MAX;
+            } else {
+               psEncCtrl->Gains_Q16[ k ] = silk_SMULWW( psEncCtrl->Gains_Q16[ k ], gain_mult_Q16 );
+            }
+        }
+
+        /* Bandwidth expansion for synthesis filter shaping */
+        silk_bwexpander_32( AR2_Q24, psEnc->sCmn.shapingLPCOrder, BWExp2_Q16 );
+
+        /* Compute noise shaping filter coefficients */
+        silk_memcpy( AR1_Q24, AR2_Q24, psEnc->sCmn.shapingLPCOrder * sizeof( opus_int32 ) );
+
+        /* Bandwidth expansion for analysis filter shaping */
+        silk_assert( BWExp1_Q16 <= SILK_FIX_CONST( 1.0, 16 ) );
+        silk_bwexpander_32( AR1_Q24, psEnc->sCmn.shapingLPCOrder, BWExp1_Q16 );
+
+        /* Ratio of prediction gains, in energy domain */
+        pre_nrg_Q30 = silk_LPC_inverse_pred_gain_Q24( AR2_Q24, psEnc->sCmn.shapingLPCOrder );
+        nrg         = silk_LPC_inverse_pred_gain_Q24( AR1_Q24, psEnc->sCmn.shapingLPCOrder );
+
+        /*psEncCtrl->GainsPre[ k ] = 1.0f - 0.7f * ( 1.0f - pre_nrg / nrg ) = 0.3f + 0.7f * pre_nrg / nrg;*/
+        pre_nrg_Q30 = silk_LSHIFT32( silk_SMULWB( pre_nrg_Q30, SILK_FIX_CONST( 0.7, 15 ) ), 1 );
+        psEncCtrl->GainsPre_Q14[ k ] = ( opus_int ) SILK_FIX_CONST( 0.3, 14 ) + silk_DIV32_varQ( pre_nrg_Q30, nrg, 14 );
+
+        /* Convert to monic warped prediction coefficients and limit absolute values */
+        limit_warped_coefs( AR2_Q24, AR1_Q24, warping_Q16, SILK_FIX_CONST( 3.999, 24 ), psEnc->sCmn.shapingLPCOrder );
+
+        /* Convert from Q24 to Q13 and store in int16 */
+        for( i = 0; i < psEnc->sCmn.shapingLPCOrder; i++ ) {
+            psEncCtrl->AR1_Q13[ k * MAX_SHAPE_LPC_ORDER + i ] = (opus_int16)silk_SAT16( silk_RSHIFT_ROUND( AR1_Q24[ i ], 11 ) );
+            psEncCtrl->AR2_Q13[ k * MAX_SHAPE_LPC_ORDER + i ] = (opus_int16)silk_SAT16( silk_RSHIFT_ROUND( AR2_Q24[ i ], 11 ) );
+        }
+    }
+
+    /*****************/
+    /* Gain tweaking */
+    /*****************/
+    /* Increase gains during low speech activity and put lower limit on gains */
+    gain_mult_Q16 = silk_log2lin( -silk_SMLAWB( -SILK_FIX_CONST( 16.0, 7 ), SNR_adj_dB_Q7, SILK_FIX_CONST( 0.16, 16 ) ) );
+    gain_add_Q16  = silk_log2lin(  silk_SMLAWB(  SILK_FIX_CONST( 16.0, 7 ), SILK_FIX_CONST( MIN_QGAIN_DB, 7 ), SILK_FIX_CONST( 0.16, 16 ) ) );
+    silk_assert( gain_mult_Q16 > 0 );
+    for( k = 0; k < psEnc->sCmn.nb_subfr; k++ ) {
+        psEncCtrl->Gains_Q16[ k ] = silk_SMULWW( psEncCtrl->Gains_Q16[ k ], gain_mult_Q16 );
+        silk_assert( psEncCtrl->Gains_Q16[ k ] >= 0 );
+        psEncCtrl->Gains_Q16[ k ] = silk_ADD_POS_SAT32( psEncCtrl->Gains_Q16[ k ], gain_add_Q16 );
+    }
+
+    gain_mult_Q16 = SILK_FIX_CONST( 1.0, 16 ) + silk_RSHIFT_ROUND( silk_MLA( SILK_FIX_CONST( INPUT_TILT, 26 ),
+        psEncCtrl->coding_quality_Q14, SILK_FIX_CONST( HIGH_RATE_INPUT_TILT, 12 ) ), 10 );
+    for( k = 0; k < psEnc->sCmn.nb_subfr; k++ ) {
+        psEncCtrl->GainsPre_Q14[ k ] = silk_SMULWB( gain_mult_Q16, psEncCtrl->GainsPre_Q14[ k ] );
+    }
+
+    /************************************************/
+    /* Control low-frequency shaping and noise tilt */
+    /************************************************/
+    /* Less low frequency shaping for noisy inputs */
+    strength_Q16 = silk_MUL( SILK_FIX_CONST( LOW_FREQ_SHAPING, 4 ), silk_SMLAWB( SILK_FIX_CONST( 1.0, 12 ),
+        SILK_FIX_CONST( LOW_QUALITY_LOW_FREQ_SHAPING_DECR, 13 ), psEnc->sCmn.input_quality_bands_Q15[ 0 ] - SILK_FIX_CONST( 1.0, 15 ) ) );
+    strength_Q16 = silk_RSHIFT( silk_MUL( strength_Q16, psEnc->sCmn.speech_activity_Q8 ), 8 );
+    if( psEnc->sCmn.indices.signalType == TYPE_VOICED ) {
+        /* Reduce low frequencies quantization noise for periodic signals, depending on pitch lag */
+        /*f = 400; freqz([1, -0.98 + 2e-4 * f], [1, -0.97 + 7e-4 * f], 2^12, Fs); axis([0, 1000, -10, 1])*/
+        opus_int fs_kHz_inv = silk_DIV32_16( SILK_FIX_CONST( 0.2, 14 ), psEnc->sCmn.fs_kHz );
+        for( k = 0; k < psEnc->sCmn.nb_subfr; k++ ) {
+            b_Q14 = fs_kHz_inv + silk_DIV32_16( SILK_FIX_CONST( 3.0, 14 ), psEncCtrl->pitchL[ k ] );
+            /* Pack two coefficients in one int32 */
+            psEncCtrl->LF_shp_Q14[ k ]  = silk_LSHIFT( SILK_FIX_CONST( 1.0, 14 ) - b_Q14 - silk_SMULWB( strength_Q16, b_Q14 ), 16 );
+            psEncCtrl->LF_shp_Q14[ k ] |= (opus_uint16)( b_Q14 - SILK_FIX_CONST( 1.0, 14 ) );
+        }
+        silk_assert( SILK_FIX_CONST( HARM_HP_NOISE_COEF, 24 ) < SILK_FIX_CONST( 0.5, 24 ) ); /* Guarantees that second argument to SMULWB() is within range of an opus_int16*/
+        Tilt_Q16 = - SILK_FIX_CONST( HP_NOISE_COEF, 16 ) -
+            silk_SMULWB( SILK_FIX_CONST( 1.0, 16 ) - SILK_FIX_CONST( HP_NOISE_COEF, 16 ),
+                silk_SMULWB( SILK_FIX_CONST( HARM_HP_NOISE_COEF, 24 ), psEnc->sCmn.speech_activity_Q8 ) );
+    } else {
+        b_Q14 = silk_DIV32_16( 21299, psEnc->sCmn.fs_kHz ); /* 1.3_Q0 = 21299_Q14*/
+        /* Pack two coefficients in one int32 */
+        psEncCtrl->LF_shp_Q14[ 0 ]  = silk_LSHIFT( SILK_FIX_CONST( 1.0, 14 ) - b_Q14 -
+            silk_SMULWB( strength_Q16, silk_SMULWB( SILK_FIX_CONST( 0.6, 16 ), b_Q14 ) ), 16 );
+        psEncCtrl->LF_shp_Q14[ 0 ] |= (opus_uint16)( b_Q14 - SILK_FIX_CONST( 1.0, 14 ) );
+        for( k = 1; k < psEnc->sCmn.nb_subfr; k++ ) {
+            psEncCtrl->LF_shp_Q14[ k ] = psEncCtrl->LF_shp_Q14[ 0 ];
+        }
+        Tilt_Q16 = -SILK_FIX_CONST( HP_NOISE_COEF, 16 );
+    }
+
+    /****************************/
+    /* HARMONIC SHAPING CONTROL */
+    /****************************/
+    /* Control boosting of harmonic frequencies */
+    HarmBoost_Q16 = silk_SMULWB( silk_SMULWB( SILK_FIX_CONST( 1.0, 17 ) - silk_LSHIFT( psEncCtrl->coding_quality_Q14, 3 ),
+        psEnc->LTPCorr_Q15 ), SILK_FIX_CONST( LOW_RATE_HARMONIC_BOOST, 16 ) );
+
+    /* More harmonic boost for noisy input signals */
+    HarmBoost_Q16 = silk_SMLAWB( HarmBoost_Q16,
+        SILK_FIX_CONST( 1.0, 16 ) - silk_LSHIFT( psEncCtrl->input_quality_Q14, 2 ), SILK_FIX_CONST( LOW_INPUT_QUALITY_HARMONIC_BOOST, 16 ) );
+
+    if( USE_HARM_SHAPING && psEnc->sCmn.indices.signalType == TYPE_VOICED ) {
+        /* More harmonic noise shaping for high bitrates or noisy input */
+        HarmShapeGain_Q16 = silk_SMLAWB( SILK_FIX_CONST( HARMONIC_SHAPING, 16 ),
+                SILK_FIX_CONST( 1.0, 16 ) - silk_SMULWB( SILK_FIX_CONST( 1.0, 18 ) - silk_LSHIFT( psEncCtrl->coding_quality_Q14, 4 ),
+                psEncCtrl->input_quality_Q14 ), SILK_FIX_CONST( HIGH_RATE_OR_LOW_QUALITY_HARMONIC_SHAPING, 16 ) );
+
+        /* Less harmonic noise shaping for less periodic signals */
+        HarmShapeGain_Q16 = silk_SMULWB( silk_LSHIFT( HarmShapeGain_Q16, 1 ),
+            silk_SQRT_APPROX( silk_LSHIFT( psEnc->LTPCorr_Q15, 15 ) ) );
+    } else {
+        HarmShapeGain_Q16 = 0;
+    }
+
+    /*************************/
+    /* Smooth over subframes */
+    /*************************/
+    for( k = 0; k < MAX_NB_SUBFR; k++ ) {
+        psShapeSt->HarmBoost_smth_Q16 =
+            silk_SMLAWB( psShapeSt->HarmBoost_smth_Q16,     HarmBoost_Q16     - psShapeSt->HarmBoost_smth_Q16,     SILK_FIX_CONST( SUBFR_SMTH_COEF, 16 ) );
+        psShapeSt->HarmShapeGain_smth_Q16 =
+            silk_SMLAWB( psShapeSt->HarmShapeGain_smth_Q16, HarmShapeGain_Q16 - psShapeSt->HarmShapeGain_smth_Q16, SILK_FIX_CONST( SUBFR_SMTH_COEF, 16 ) );
+        psShapeSt->Tilt_smth_Q16 =
+            silk_SMLAWB( psShapeSt->Tilt_smth_Q16,          Tilt_Q16          - psShapeSt->Tilt_smth_Q16,          SILK_FIX_CONST( SUBFR_SMTH_COEF, 16 ) );
+
+        psEncCtrl->HarmBoost_Q14[ k ]     = ( opus_int )silk_RSHIFT_ROUND( psShapeSt->HarmBoost_smth_Q16,     2 );
+        psEncCtrl->HarmShapeGain_Q14[ k ] = ( opus_int )silk_RSHIFT_ROUND( psShapeSt->HarmShapeGain_smth_Q16, 2 );
+        psEncCtrl->Tilt_Q14[ k ]          = ( opus_int )silk_RSHIFT_ROUND( psShapeSt->Tilt_smth_Q16,          2 );
+    }
+    RESTORE_STACK;
+}
diff --git a/drivers/opus/silk/fixed/pitch_analysis_core_FIX.c b/drivers/opus/silk/fixed/pitch_analysis_core_FIX.c
new file mode 100644
index 0000000000..4d65c09d1d
--- /dev/null
+++ b/drivers/opus/silk/fixed/pitch_analysis_core_FIX.c
@@ -0,0 +1,744 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef OPUS_HAVE_CONFIG_H
+#include "opus_config.h"
+#endif
+
+/***********************************************************
+* Pitch analyser function
+********************************************************** */
+#include "SigProc_FIX.h"
+#include "pitch_est_defines.h"
+#include "stack_alloc.h"
+#include "debug.h"
+#include "pitch.h"
+
+#define SCRATCH_SIZE    22
+#define SF_LENGTH_4KHZ  ( PE_SUBFR_LENGTH_MS * 4 )
+#define SF_LENGTH_8KHZ  ( PE_SUBFR_LENGTH_MS * 8 )
+#define MIN_LAG_4KHZ    ( PE_MIN_LAG_MS * 4 )
+#define MIN_LAG_8KHZ    ( PE_MIN_LAG_MS * 8 )
+#define MAX_LAG_4KHZ    ( PE_MAX_LAG_MS * 4 )
+#define MAX_LAG_8KHZ    ( PE_MAX_LAG_MS * 8 - 1 )
+#define CSTRIDE_4KHZ    ( MAX_LAG_4KHZ + 1 - MIN_LAG_4KHZ )
+#define CSTRIDE_8KHZ    ( MAX_LAG_8KHZ + 3 - ( MIN_LAG_8KHZ - 2 ) )
+#define D_COMP_MIN      ( MIN_LAG_8KHZ - 3 )
+#define D_COMP_MAX      ( MAX_LAG_8KHZ + 4 )
+#define D_COMP_STRIDE   ( D_COMP_MAX - D_COMP_MIN )
+
+typedef opus_int32 silk_pe_stage3_vals[ PE_NB_STAGE3_LAGS ];
+
+/************************************************************/
+/* Internally used functions                                */
+/************************************************************/
+static void silk_P_Ana_calc_corr_st3(
+    silk_pe_stage3_vals cross_corr_st3[],              /* O 3 DIM correlation array */
+    const opus_int16  frame[],                         /* I vector to correlate         */
+    opus_int          start_lag,                       /* I lag offset to search around */
+    opus_int          sf_length,                       /* I length of a 5 ms subframe   */
+    opus_int          nb_subfr,                        /* I number of subframes         */
+    opus_int          complexity,                      /* I Complexity setting          */
+    int               arch                             /* I Run-time architecture       */
+);
+
+static void silk_P_Ana_calc_energy_st3(
+    silk_pe_stage3_vals energies_st3[],                /* O 3 DIM energy array */
+    const opus_int16  frame[],                         /* I vector to calc energy in    */
+    opus_int          start_lag,                       /* I lag offset to search around */
+    opus_int          sf_length,                       /* I length of one 5 ms subframe */
+    opus_int          nb_subfr,                        /* I number of subframes         */
+    opus_int          complexity                       /* I Complexity setting          */
+);
+
+/*************************************************************/
+/*      FIXED POINT CORE PITCH ANALYSIS FUNCTION             */
+/*************************************************************/
+opus_int silk_pitch_analysis_core(                  /* O    Voicing estimate: 0 voiced, 1 unvoiced                      */
+    const opus_int16            *frame,             /* I    Signal of length PE_FRAME_LENGTH_MS*Fs_kHz                  */
+    opus_int                    *pitch_out,         /* O    4 pitch lag values                                          */
+    opus_int16                  *lagIndex,          /* O    Lag Index                                                   */
+    opus_int8                   *contourIndex,      /* O    Pitch contour Index                                         */
+    opus_int                    *LTPCorr_Q15,       /* I/O  Normalized correlation; input: value from previous frame    */
+    opus_int                    prevLag,            /* I    Last lag of previous frame; set to zero is unvoiced         */
+    const opus_int32            search_thres1_Q16,  /* I    First stage threshold for lag candidates 0 - 1              */
+    const opus_int              search_thres2_Q13,  /* I    Final threshold for lag candidates 0 - 1                    */
+    const opus_int              Fs_kHz,             /* I    Sample frequency (kHz)                                      */
+    const opus_int              complexity,         /* I    Complexity setting, 0-2, where 2 is highest                 */
+    const opus_int              nb_subfr,           /* I    number of 5 ms subframes                                    */
+    int                         arch                /* I    Run-time architecture                                       */
+)
+{
+    VARDECL( opus_int16, frame_8kHz );
+    VARDECL( opus_int16, frame_4kHz );
+    opus_int32 filt_state[ 6 ];
+    const opus_int16 *input_frame_ptr;
+    opus_int   i, k, d, j;
+    VARDECL( opus_int16, C );
+    VARDECL( opus_int32, xcorr32 );
+    const opus_int16 *target_ptr, *basis_ptr;
+    opus_int32 cross_corr, normalizer, energy, shift, energy_basis, energy_target;
+    opus_int   d_srch[ PE_D_SRCH_LENGTH ], Cmax, length_d_srch, length_d_comp;
+    VARDECL( opus_int16, d_comp );
+    opus_int32 sum, threshold, lag_counter;
+    opus_int   CBimax, CBimax_new, CBimax_old, lag, start_lag, end_lag, lag_new;
+    opus_int32 CC[ PE_NB_CBKS_STAGE2_EXT ], CCmax, CCmax_b, CCmax_new_b, CCmax_new;
+    VARDECL( silk_pe_stage3_vals, energies_st3 );
+    VARDECL( silk_pe_stage3_vals, cross_corr_st3 );
+    opus_int   frame_length, frame_length_8kHz, frame_length_4kHz;
+    opus_int   sf_length;
+    opus_int   min_lag;
+    opus_int   max_lag;
+    opus_int32 contour_bias_Q15, diff;
+    opus_int   nb_cbk_search, cbk_size;
+    opus_int32 delta_lag_log2_sqr_Q7, lag_log2_Q7, prevLag_log2_Q7, prev_lag_bias_Q13;
+    const opus_int8 *Lag_CB_ptr;
+    SAVE_STACK;
+    /* Check for valid sampling frequency */
+    silk_assert( Fs_kHz == 8 || Fs_kHz == 12 || Fs_kHz == 16 );
+
+    /* Check for valid complexity setting */
+    silk_assert( complexity >= SILK_PE_MIN_COMPLEX );
+    silk_assert( complexity <= SILK_PE_MAX_COMPLEX );
+
+    silk_assert( search_thres1_Q16 >= 0 && search_thres1_Q16 <= (1<<16) );
+    silk_assert( search_thres2_Q13 >= 0 && search_thres2_Q13 <= (1<<13) );
+
+    /* Set up frame lengths max / min lag for the sampling frequency */
+    frame_length      = ( PE_LTP_MEM_LENGTH_MS + nb_subfr * PE_SUBFR_LENGTH_MS ) * Fs_kHz;
+    frame_length_4kHz = ( PE_LTP_MEM_LENGTH_MS + nb_subfr * PE_SUBFR_LENGTH_MS ) * 4;
+    frame_length_8kHz = ( PE_LTP_MEM_LENGTH_MS + nb_subfr * PE_SUBFR_LENGTH_MS ) * 8;
+    sf_length         = PE_SUBFR_LENGTH_MS * Fs_kHz;
+    min_lag           = PE_MIN_LAG_MS * Fs_kHz;
+    max_lag           = PE_MAX_LAG_MS * Fs_kHz - 1;
+
+    /* Resample from input sampled at Fs_kHz to 8 kHz */
+    ALLOC( frame_8kHz, frame_length_8kHz, opus_int16 );
+    if( Fs_kHz == 16 ) {
+        silk_memset( filt_state, 0, 2 * sizeof( opus_int32 ) );
+        silk_resampler_down2( filt_state, frame_8kHz, frame, frame_length );
+    } else if( Fs_kHz == 12 ) {
+        silk_memset( filt_state, 0, 6 * sizeof( opus_int32 ) );
+        silk_resampler_down2_3( filt_state, frame_8kHz, frame, frame_length );
+    } else {
+        silk_assert( Fs_kHz == 8 );
+        silk_memcpy( frame_8kHz, frame, frame_length_8kHz * sizeof(opus_int16) );
+    }
+
+    /* Decimate again to 4 kHz */
+    silk_memset( filt_state, 0, 2 * sizeof( opus_int32 ) );/* Set state to zero */
+    ALLOC( frame_4kHz, frame_length_4kHz, opus_int16 );
+    silk_resampler_down2( filt_state, frame_4kHz, frame_8kHz, frame_length_8kHz );
+
+    /* Low-pass filter */
+    for( i = frame_length_4kHz - 1; i > 0; i-- ) {
+        frame_4kHz[ i ] = silk_ADD_SAT16( frame_4kHz[ i ], frame_4kHz[ i - 1 ] );
+    }
+
+    /*******************************************************************************
+    ** Scale 4 kHz signal down to prevent correlations measures from overflowing
+    ** find scaling as max scaling for each 8kHz(?) subframe
+    *******************************************************************************/
+
+    /* Inner product is calculated with different lengths, so scale for the worst case */
+    silk_sum_sqr_shift( &energy, &shift, frame_4kHz, frame_length_4kHz );
+    if( shift > 0 ) {
+        shift = silk_RSHIFT( shift, 1 );
+        for( i = 0; i < frame_length_4kHz; i++ ) {
+            frame_4kHz[ i ] = silk_RSHIFT( frame_4kHz[ i ], shift );
+        }
+    }
+
+    /******************************************************************************
+    * FIRST STAGE, operating in 4 khz
+    ******************************************************************************/
+    ALLOC( C, nb_subfr * CSTRIDE_8KHZ, opus_int16 );
+    ALLOC( xcorr32, MAX_LAG_4KHZ-MIN_LAG_4KHZ+1, opus_int32 );
+    silk_memset( C, 0, (nb_subfr >> 1) * CSTRIDE_4KHZ * sizeof( opus_int16 ) );
+    target_ptr = &frame_4kHz[ silk_LSHIFT( SF_LENGTH_4KHZ, 2 ) ];
+    for( k = 0; k < nb_subfr >> 1; k++ ) {
+        /* Check that we are within range of the array */
+        silk_assert( target_ptr >= frame_4kHz );
+        silk_assert( target_ptr + SF_LENGTH_8KHZ <= frame_4kHz + frame_length_4kHz );
+
+        basis_ptr = target_ptr - MIN_LAG_4KHZ;
+
+        /* Check that we are within range of the array */
+        silk_assert( basis_ptr >= frame_4kHz );
+        silk_assert( basis_ptr + SF_LENGTH_8KHZ <= frame_4kHz + frame_length_4kHz );
+
+        celt_pitch_xcorr( target_ptr, target_ptr - MAX_LAG_4KHZ, xcorr32, SF_LENGTH_8KHZ, MAX_LAG_4KHZ - MIN_LAG_4KHZ + 1, arch );
+
+        /* Calculate first vector products before loop */
+        cross_corr = xcorr32[ MAX_LAG_4KHZ - MIN_LAG_4KHZ ];
+        normalizer = silk_inner_prod_aligned( target_ptr, target_ptr, SF_LENGTH_8KHZ );
+        normalizer = silk_ADD32( normalizer, silk_inner_prod_aligned( basis_ptr,  basis_ptr, SF_LENGTH_8KHZ ) );
+        normalizer = silk_ADD32( normalizer, silk_SMULBB( SF_LENGTH_8KHZ, 4000 ) );
+
+        matrix_ptr( C, k, 0, CSTRIDE_4KHZ ) =
+            (opus_int16)silk_DIV32_varQ( cross_corr, normalizer, 13 + 1 );                      /* Q13 */
+
+        /* From now on normalizer is computed recursively */
+        for( d = MIN_LAG_4KHZ + 1; d <= MAX_LAG_4KHZ; d++ ) {
+            basis_ptr--;
+
+            /* Check that we are within range of the array */
+            silk_assert( basis_ptr >= frame_4kHz );
+            silk_assert( basis_ptr + SF_LENGTH_8KHZ <= frame_4kHz + frame_length_4kHz );
+
+            cross_corr = xcorr32[ MAX_LAG_4KHZ - d ];
+
+            /* Add contribution of new sample and remove contribution from oldest sample */
+            normalizer = silk_ADD32( normalizer,
+                silk_SMULBB( basis_ptr[ 0 ], basis_ptr[ 0 ] ) -
+                silk_SMULBB( basis_ptr[ SF_LENGTH_8KHZ ], basis_ptr[ SF_LENGTH_8KHZ ] ) );
+
+            matrix_ptr( C, k, d - MIN_LAG_4KHZ, CSTRIDE_4KHZ) =
+                (opus_int16)silk_DIV32_varQ( cross_corr, normalizer, 13 + 1 );                  /* Q13 */
+        }
+        /* Update target pointer */
+        target_ptr += SF_LENGTH_8KHZ;
+    }
+
+    /* Combine two subframes into single correlation measure and apply short-lag bias */
+    if( nb_subfr == PE_MAX_NB_SUBFR ) {
+        for( i = MAX_LAG_4KHZ; i >= MIN_LAG_4KHZ; i-- ) {
+            sum = (opus_int32)matrix_ptr( C, 0, i - MIN_LAG_4KHZ, CSTRIDE_4KHZ )
+                + (opus_int32)matrix_ptr( C, 1, i - MIN_LAG_4KHZ, CSTRIDE_4KHZ );               /* Q14 */
+            sum = silk_SMLAWB( sum, sum, silk_LSHIFT( -i, 4 ) );                                /* Q14 */
+            C[ i - MIN_LAG_4KHZ ] = (opus_int16)sum;                                            /* Q14 */
+        }
+    } else {
+        /* Only short-lag bias */
+        for( i = MAX_LAG_4KHZ; i >= MIN_LAG_4KHZ; i-- ) {
+            sum = silk_LSHIFT( (opus_int32)C[ i - MIN_LAG_4KHZ ], 1 );                          /* Q14 */
+            sum = silk_SMLAWB( sum, sum, silk_LSHIFT( -i, 4 ) );                                /* Q14 */
+            C[ i - MIN_LAG_4KHZ ] = (opus_int16)sum;                                            /* Q14 */
+        }
+    }
+
+    /* Sort */
+    length_d_srch = silk_ADD_LSHIFT32( 4, complexity, 1 );
+    silk_assert( 3 * length_d_srch <= PE_D_SRCH_LENGTH );
+    silk_insertion_sort_decreasing_int16( C, d_srch, CSTRIDE_4KHZ,
+                                          length_d_srch );
+
+    /* Escape if correlation is very low already here */
+    Cmax = (opus_int)C[ 0 ];                                                    /* Q14 */
+    if( Cmax < SILK_FIX_CONST( 0.2, 14 ) ) {
+        silk_memset( pitch_out, 0, nb_subfr * sizeof( opus_int ) );
+        *LTPCorr_Q15  = 0;
+        *lagIndex     = 0;
+        *contourIndex = 0;
+        RESTORE_STACK;
+        return 1;
+    }
+
+    threshold = silk_SMULWB( search_thres1_Q16, Cmax );
+    for( i = 0; i < length_d_srch; i++ ) {
+        /* Convert to 8 kHz indices for the sorted correlation that exceeds the threshold */
+        if( C[ i ] > threshold ) {
+            d_srch[ i ] = silk_LSHIFT( d_srch[ i ] + MIN_LAG_4KHZ, 1 );
+        } else {
+            length_d_srch = i;
+            break;
+        }
+    }
+    silk_assert( length_d_srch > 0 );
+
+    ALLOC( d_comp, D_COMP_STRIDE, opus_int16 );
+    for( i = D_COMP_MIN; i < D_COMP_MAX; i++ ) {
+        d_comp[ i - D_COMP_MIN ] = 0;
+    }
+    for( i = 0; i < length_d_srch; i++ ) {
+        d_comp[ d_srch[ i ] - D_COMP_MIN ] = 1;
+    }
+
+    /* Convolution */
+    for( i = D_COMP_MAX - 1; i >= MIN_LAG_8KHZ; i-- ) {
+        d_comp[ i - D_COMP_MIN ] +=
+            d_comp[ i - 1 - D_COMP_MIN ] + d_comp[ i - 2 - D_COMP_MIN ];
+    }
+
+    length_d_srch = 0;
+    for( i = MIN_LAG_8KHZ; i < MAX_LAG_8KHZ + 1; i++ ) {
+        if( d_comp[ i + 1 - D_COMP_MIN ] > 0 ) {
+            d_srch[ length_d_srch ] = i;
+            length_d_srch++;
+        }
+    }
+
+    /* Convolution */
+    for( i = D_COMP_MAX - 1; i >= MIN_LAG_8KHZ; i-- ) {
+        d_comp[ i - D_COMP_MIN ] += d_comp[ i - 1 - D_COMP_MIN ]
+            + d_comp[ i - 2 - D_COMP_MIN ] + d_comp[ i - 3 - D_COMP_MIN ];
+    }
+
+    length_d_comp = 0;
+    for( i = MIN_LAG_8KHZ; i < D_COMP_MAX; i++ ) {
+        if( d_comp[ i - D_COMP_MIN ] > 0 ) {
+            d_comp[ length_d_comp ] = i - 2;
+            length_d_comp++;
+        }
+    }
+
+    /**********************************************************************************
+    ** SECOND STAGE, operating at 8 kHz, on lag sections with high correlation
+    *************************************************************************************/
+
+    /******************************************************************************
+    ** Scale signal down to avoid correlations measures from overflowing
+    *******************************************************************************/
+    /* find scaling as max scaling for each subframe */
+    silk_sum_sqr_shift( &energy, &shift, frame_8kHz, frame_length_8kHz );
+    if( shift > 0 ) {
+        shift = silk_RSHIFT( shift, 1 );
+        for( i = 0; i < frame_length_8kHz; i++ ) {
+            frame_8kHz[ i ] = silk_RSHIFT( frame_8kHz[ i ], shift );
+        }
+    }
+
+    /*********************************************************************************
+    * Find energy of each subframe projected onto its history, for a range of delays
+    *********************************************************************************/
+    silk_memset( C, 0, nb_subfr * CSTRIDE_8KHZ * sizeof( opus_int16 ) );
+
+    target_ptr = &frame_8kHz[ PE_LTP_MEM_LENGTH_MS * 8 ];
+    for( k = 0; k < nb_subfr; k++ ) {
+
+        /* Check that we are within range of the array */
+        silk_assert( target_ptr >= frame_8kHz );
+        silk_assert( target_ptr + SF_LENGTH_8KHZ <= frame_8kHz + frame_length_8kHz );
+
+        energy_target = silk_ADD32( silk_inner_prod_aligned( target_ptr, target_ptr, SF_LENGTH_8KHZ ), 1 );
+        for( j = 0; j < length_d_comp; j++ ) {
+            d = d_comp[ j ];
+            basis_ptr = target_ptr - d;
+
+            /* Check that we are within range of the array */
+            silk_assert( basis_ptr >= frame_8kHz );
+            silk_assert( basis_ptr + SF_LENGTH_8KHZ <= frame_8kHz + frame_length_8kHz );
+
+            cross_corr = silk_inner_prod_aligned( target_ptr, basis_ptr, SF_LENGTH_8KHZ );
+            if( cross_corr > 0 ) {
+                energy_basis = silk_inner_prod_aligned( basis_ptr, basis_ptr, SF_LENGTH_8KHZ );
+                matrix_ptr( C, k, d - ( MIN_LAG_8KHZ - 2 ), CSTRIDE_8KHZ ) =
+                    (opus_int16)silk_DIV32_varQ( cross_corr,
+                                                 silk_ADD32( energy_target,
+                                                             energy_basis ),
+                                                 13 + 1 );                                      /* Q13 */
+            } else {
+                matrix_ptr( C, k, d - ( MIN_LAG_8KHZ - 2 ), CSTRIDE_8KHZ ) = 0;
+            }
+        }
+        target_ptr += SF_LENGTH_8KHZ;
+    }
+
+    /* search over lag range and lags codebook */
+    /* scale factor for lag codebook, as a function of center lag */
+
+    CCmax   = silk_int32_MIN;
+    CCmax_b = silk_int32_MIN;
+
+    CBimax = 0; /* To avoid returning undefined lag values */
+    lag = -1;   /* To check if lag with strong enough correlation has been found */
+
+    if( prevLag > 0 ) {
+        if( Fs_kHz == 12 ) {
+            prevLag = silk_DIV32_16( silk_LSHIFT( prevLag, 1 ), 3 );
+        } else if( Fs_kHz == 16 ) {
+            prevLag = silk_RSHIFT( prevLag, 1 );
+        }
+        prevLag_log2_Q7 = silk_lin2log( (opus_int32)prevLag );
+    } else {
+        prevLag_log2_Q7 = 0;
+    }
+    silk_assert( search_thres2_Q13 == silk_SAT16( search_thres2_Q13 ) );
+    /* Set up stage 2 codebook based on number of subframes */
+    if( nb_subfr == PE_MAX_NB_SUBFR ) {
+        cbk_size   = PE_NB_CBKS_STAGE2_EXT;
+        Lag_CB_ptr = &silk_CB_lags_stage2[ 0 ][ 0 ];
+        if( Fs_kHz == 8 && complexity > SILK_PE_MIN_COMPLEX ) {
+            /* If input is 8 khz use a larger codebook here because it is last stage */
+            nb_cbk_search = PE_NB_CBKS_STAGE2_EXT;
+        } else {
+            nb_cbk_search = PE_NB_CBKS_STAGE2;
+        }
+    } else {
+        cbk_size       = PE_NB_CBKS_STAGE2_10MS;
+        Lag_CB_ptr     = &silk_CB_lags_stage2_10_ms[ 0 ][ 0 ];
+        nb_cbk_search  = PE_NB_CBKS_STAGE2_10MS;
+    }
+
+    for( k = 0; k < length_d_srch; k++ ) {
+        d = d_srch[ k ];
+        for( j = 0; j < nb_cbk_search; j++ ) {
+            CC[ j ] = 0;
+            for( i = 0; i < nb_subfr; i++ ) {
+                opus_int d_subfr;
+                /* Try all codebooks */
+                d_subfr = d + matrix_ptr( Lag_CB_ptr, i, j, cbk_size );
+                CC[ j ] = CC[ j ]
+                    + (opus_int32)matrix_ptr( C, i,
+                                              d_subfr - ( MIN_LAG_8KHZ - 2 ),
+                                              CSTRIDE_8KHZ );
+            }
+        }
+        /* Find best codebook */
+        CCmax_new = silk_int32_MIN;
+        CBimax_new = 0;
+        for( i = 0; i < nb_cbk_search; i++ ) {
+            if( CC[ i ] > CCmax_new ) {
+                CCmax_new = CC[ i ];
+                CBimax_new = i;
+            }
+        }
+
+        /* Bias towards shorter lags */
+        lag_log2_Q7 = silk_lin2log( d ); /* Q7 */
+        silk_assert( lag_log2_Q7 == silk_SAT16( lag_log2_Q7 ) );
+        silk_assert( nb_subfr * SILK_FIX_CONST( PE_SHORTLAG_BIAS, 13 ) == silk_SAT16( nb_subfr * SILK_FIX_CONST( PE_SHORTLAG_BIAS, 13 ) ) );
+        CCmax_new_b = CCmax_new - silk_RSHIFT( silk_SMULBB( nb_subfr * SILK_FIX_CONST( PE_SHORTLAG_BIAS, 13 ), lag_log2_Q7 ), 7 ); /* Q13 */
+
+        /* Bias towards previous lag */
+        silk_assert( nb_subfr * SILK_FIX_CONST( PE_PREVLAG_BIAS, 13 ) == silk_SAT16( nb_subfr * SILK_FIX_CONST( PE_PREVLAG_BIAS, 13 ) ) );
+        if( prevLag > 0 ) {
+            delta_lag_log2_sqr_Q7 = lag_log2_Q7 - prevLag_log2_Q7;
+            silk_assert( delta_lag_log2_sqr_Q7 == silk_SAT16( delta_lag_log2_sqr_Q7 ) );
+            delta_lag_log2_sqr_Q7 = silk_RSHIFT( silk_SMULBB( delta_lag_log2_sqr_Q7, delta_lag_log2_sqr_Q7 ), 7 );
+            prev_lag_bias_Q13 = silk_RSHIFT( silk_SMULBB( nb_subfr * SILK_FIX_CONST( PE_PREVLAG_BIAS, 13 ), *LTPCorr_Q15 ), 15 ); /* Q13 */
+            prev_lag_bias_Q13 = silk_DIV32( silk_MUL( prev_lag_bias_Q13, delta_lag_log2_sqr_Q7 ), delta_lag_log2_sqr_Q7 + SILK_FIX_CONST( 0.5, 7 ) );
+            CCmax_new_b -= prev_lag_bias_Q13; /* Q13 */
+        }
+
+        if( CCmax_new_b > CCmax_b                                   &&  /* Find maximum biased correlation                  */
+            CCmax_new > silk_SMULBB( nb_subfr, search_thres2_Q13 )  &&  /* Correlation needs to be high enough to be voiced */
+            silk_CB_lags_stage2[ 0 ][ CBimax_new ] <= MIN_LAG_8KHZ      /* Lag must be in range                             */
+         ) {
+            CCmax_b = CCmax_new_b;
+            CCmax   = CCmax_new;
+            lag     = d;
+            CBimax  = CBimax_new;
+        }
+    }
+
+    if( lag == -1 ) {
+        /* No suitable candidate found */
+        silk_memset( pitch_out, 0, nb_subfr * sizeof( opus_int ) );
+        *LTPCorr_Q15  = 0;
+        *lagIndex     = 0;
+        *contourIndex = 0;
+        RESTORE_STACK;
+        return 1;
+    }
+
+    /* Output normalized correlation */
+    *LTPCorr_Q15 = (opus_int)silk_LSHIFT( silk_DIV32_16( CCmax, nb_subfr ), 2 );
+    silk_assert( *LTPCorr_Q15 >= 0 );
+
+    if( Fs_kHz > 8 ) {
+        VARDECL( opus_int16, scratch_mem );
+        /***************************************************************************/
+        /* Scale input signal down to avoid correlations measures from overflowing */
+        /***************************************************************************/
+        /* find scaling as max scaling for each subframe */
+        silk_sum_sqr_shift( &energy, &shift, frame, frame_length );
+        ALLOC( scratch_mem, shift > 0 ? frame_length : ALLOC_NONE, opus_int16 );
+        if( shift > 0 ) {
+            /* Move signal to scratch mem because the input signal should be unchanged */
+            shift = silk_RSHIFT( shift, 1 );
+            for( i = 0; i < frame_length; i++ ) {
+                scratch_mem[ i ] = silk_RSHIFT( frame[ i ], shift );
+            }
+            input_frame_ptr = scratch_mem;
+        } else {
+            input_frame_ptr = frame;
+        }
+
+        /* Search in original signal */
+
+        CBimax_old = CBimax;
+        /* Compensate for decimation */
+        silk_assert( lag == silk_SAT16( lag ) );
+        if( Fs_kHz == 12 ) {
+            lag = silk_RSHIFT( silk_SMULBB( lag, 3 ), 1 );
+        } else if( Fs_kHz == 16 ) {
+            lag = silk_LSHIFT( lag, 1 );
+        } else {
+            lag = silk_SMULBB( lag, 3 );
+        }
+
+        lag = silk_LIMIT_int( lag, min_lag, max_lag );
+        start_lag = silk_max_int( lag - 2, min_lag );
+        end_lag   = silk_min_int( lag + 2, max_lag );
+        lag_new   = lag;                                    /* to avoid undefined lag */
+        CBimax    = 0;                                      /* to avoid undefined lag */
+
+        CCmax = silk_int32_MIN;
+        /* pitch lags according to second stage */
+        for( k = 0; k < nb_subfr; k++ ) {
+            pitch_out[ k ] = lag + 2 * silk_CB_lags_stage2[ k ][ CBimax_old ];
+        }
+
+        /* Set up codebook parameters according to complexity setting and frame length */
+        if( nb_subfr == PE_MAX_NB_SUBFR ) {
+            nb_cbk_search   = (opus_int)silk_nb_cbk_searchs_stage3[ complexity ];
+            cbk_size        = PE_NB_CBKS_STAGE3_MAX;
+            Lag_CB_ptr      = &silk_CB_lags_stage3[ 0 ][ 0 ];
+        } else {
+            nb_cbk_search   = PE_NB_CBKS_STAGE3_10MS;
+            cbk_size        = PE_NB_CBKS_STAGE3_10MS;
+            Lag_CB_ptr      = &silk_CB_lags_stage3_10_ms[ 0 ][ 0 ];
+        }
+
+        /* Calculate the correlations and energies needed in stage 3 */
+        ALLOC( energies_st3, nb_subfr * nb_cbk_search, silk_pe_stage3_vals );
+        ALLOC( cross_corr_st3, nb_subfr * nb_cbk_search, silk_pe_stage3_vals );
+        silk_P_Ana_calc_corr_st3(  cross_corr_st3, input_frame_ptr, start_lag, sf_length, nb_subfr, complexity, arch );
+        silk_P_Ana_calc_energy_st3( energies_st3, input_frame_ptr, start_lag, sf_length, nb_subfr, complexity );
+
+        lag_counter = 0;
+        silk_assert( lag == silk_SAT16( lag ) );
+        contour_bias_Q15 = silk_DIV32_16( SILK_FIX_CONST( PE_FLATCONTOUR_BIAS, 15 ), lag );
+
+        target_ptr = &input_frame_ptr[ PE_LTP_MEM_LENGTH_MS * Fs_kHz ];
+        energy_target = silk_ADD32( silk_inner_prod_aligned( target_ptr, target_ptr, nb_subfr * sf_length ), 1 );
+        for( d = start_lag; d <= end_lag; d++ ) {
+            for( j = 0; j < nb_cbk_search; j++ ) {
+                cross_corr = 0;
+                energy     = energy_target;
+                for( k = 0; k < nb_subfr; k++ ) {
+                    cross_corr = silk_ADD32( cross_corr,
+                        matrix_ptr( cross_corr_st3, k, j,
+                                    nb_cbk_search )[ lag_counter ] );
+                    energy     = silk_ADD32( energy,
+                        matrix_ptr( energies_st3, k, j,
+                                    nb_cbk_search )[ lag_counter ] );
+                    silk_assert( energy >= 0 );
+                }
+                if( cross_corr > 0 ) {
+                    CCmax_new = silk_DIV32_varQ( cross_corr, energy, 13 + 1 );          /* Q13 */
+                    /* Reduce depending on flatness of contour */
+                    diff = silk_int16_MAX - silk_MUL( contour_bias_Q15, j );            /* Q15 */
+                    silk_assert( diff == silk_SAT16( diff ) );
+                    CCmax_new = silk_SMULWB( CCmax_new, diff );                         /* Q14 */
+                } else {
+                    CCmax_new = 0;
+                }
+
+                if( CCmax_new > CCmax && ( d + silk_CB_lags_stage3[ 0 ][ j ] ) <= max_lag ) {
+                    CCmax   = CCmax_new;
+                    lag_new = d;
+                    CBimax  = j;
+                }
+            }
+            lag_counter++;
+        }
+
+        for( k = 0; k < nb_subfr; k++ ) {
+            pitch_out[ k ] = lag_new + matrix_ptr( Lag_CB_ptr, k, CBimax, cbk_size );
+            pitch_out[ k ] = silk_LIMIT( pitch_out[ k ], min_lag, PE_MAX_LAG_MS * Fs_kHz );
+        }
+        *lagIndex = (opus_int16)( lag_new - min_lag);
+        *contourIndex = (opus_int8)CBimax;
+    } else {        /* Fs_kHz == 8 */
+        /* Save Lags */
+        for( k = 0; k < nb_subfr; k++ ) {
+            pitch_out[ k ] = lag + matrix_ptr( Lag_CB_ptr, k, CBimax, cbk_size );
+            pitch_out[ k ] = silk_LIMIT( pitch_out[ k ], MIN_LAG_8KHZ, PE_MAX_LAG_MS * 8 );
+        }
+        *lagIndex = (opus_int16)( lag - MIN_LAG_8KHZ );
+        *contourIndex = (opus_int8)CBimax;
+    }
+    silk_assert( *lagIndex >= 0 );
+    /* return as voiced */
+    RESTORE_STACK;
+    return 0;
+}
+
+/***********************************************************************
+ * Calculates the correlations used in stage 3 search. In order to cover
+ * the whole lag codebook for all the searched offset lags (lag +- 2),
+ * the following correlations are needed in each sub frame:
+ *
+ * sf1: lag range [-8,...,7] total 16 correlations
+ * sf2: lag range [-4,...,4] total 9 correlations
+ * sf3: lag range [-3,....4] total 8 correltions
+ * sf4: lag range [-6,....8] total 15 correlations
+ *
+ * In total 48 correlations. The direct implementation computed in worst
+ * case 4*12*5 = 240 correlations, but more likely around 120.
+ ***********************************************************************/
+static void silk_P_Ana_calc_corr_st3(
+    silk_pe_stage3_vals cross_corr_st3[],              /* O 3 DIM correlation array */
+    const opus_int16  frame[],                         /* I vector to correlate         */
+    opus_int          start_lag,                       /* I lag offset to search around */
+    opus_int          sf_length,                       /* I length of a 5 ms subframe   */
+    opus_int          nb_subfr,                        /* I number of subframes         */
+    opus_int          complexity,                      /* I Complexity setting          */
+    int               arch                             /* I Run-time architecture       */
+)
+{
+    const opus_int16 *target_ptr;
+    opus_int   i, j, k, lag_counter, lag_low, lag_high;
+    opus_int   nb_cbk_search, delta, idx, cbk_size;
+    VARDECL( opus_int32, scratch_mem );
+    VARDECL( opus_int32, xcorr32 );
+    const opus_int8 *Lag_range_ptr, *Lag_CB_ptr;
+    SAVE_STACK;
+
+    silk_assert( complexity >= SILK_PE_MIN_COMPLEX );
+    silk_assert( complexity <= SILK_PE_MAX_COMPLEX );
+
+    if( nb_subfr == PE_MAX_NB_SUBFR ) {
+        Lag_range_ptr = &silk_Lag_range_stage3[ complexity ][ 0 ][ 0 ];
+        Lag_CB_ptr    = &silk_CB_lags_stage3[ 0 ][ 0 ];
+        nb_cbk_search = silk_nb_cbk_searchs_stage3[ complexity ];
+        cbk_size      = PE_NB_CBKS_STAGE3_MAX;
+    } else {
+        silk_assert( nb_subfr == PE_MAX_NB_SUBFR >> 1);
+        Lag_range_ptr = &silk_Lag_range_stage3_10_ms[ 0 ][ 0 ];
+        Lag_CB_ptr    = &silk_CB_lags_stage3_10_ms[ 0 ][ 0 ];
+        nb_cbk_search = PE_NB_CBKS_STAGE3_10MS;
+        cbk_size      = PE_NB_CBKS_STAGE3_10MS;
+    }
+    ALLOC( scratch_mem, SCRATCH_SIZE, opus_int32 );
+    ALLOC( xcorr32, SCRATCH_SIZE, opus_int32 );
+
+    target_ptr = &frame[ silk_LSHIFT( sf_length, 2 ) ]; /* Pointer to middle of frame */
+    for( k = 0; k < nb_subfr; k++ ) {
+        lag_counter = 0;
+
+        /* Calculate the correlations for each subframe */
+        lag_low  = matrix_ptr( Lag_range_ptr, k, 0, 2 );
+        lag_high = matrix_ptr( Lag_range_ptr, k, 1, 2 );
+        silk_assert(lag_high-lag_low+1 <= SCRATCH_SIZE);
+        celt_pitch_xcorr( target_ptr, target_ptr - start_lag - lag_high, xcorr32, sf_length, lag_high - lag_low + 1, arch );
+        for( j = lag_low; j <= lag_high; j++ ) {
+            silk_assert( lag_counter < SCRATCH_SIZE );
+            scratch_mem[ lag_counter ] = xcorr32[ lag_high - j ];
+            lag_counter++;
+        }
+
+        delta = matrix_ptr( Lag_range_ptr, k, 0, 2 );
+        for( i = 0; i < nb_cbk_search; i++ ) {
+            /* Fill out the 3 dim array that stores the correlations for */
+            /* each code_book vector for each start lag */
+            idx = matrix_ptr( Lag_CB_ptr, k, i, cbk_size ) - delta;
+            for( j = 0; j < PE_NB_STAGE3_LAGS; j++ ) {
+                silk_assert( idx + j < SCRATCH_SIZE );
+                silk_assert( idx + j < lag_counter );
+                matrix_ptr( cross_corr_st3, k, i, nb_cbk_search )[ j ] =
+                    scratch_mem[ idx + j ];
+            }
+        }
+        target_ptr += sf_length;
+    }
+    RESTORE_STACK;
+}
+
+/********************************************************************/
+/* Calculate the energies for first two subframes. The energies are */
+/* calculated recursively.                                          */
+/********************************************************************/
+static void silk_P_Ana_calc_energy_st3(
+    silk_pe_stage3_vals energies_st3[],                 /* O 3 DIM energy array */
+    const opus_int16  frame[],                          /* I vector to calc energy in    */
+    opus_int          start_lag,                        /* I lag offset to search around */
+    opus_int          sf_length,                        /* I length of one 5 ms subframe */
+    opus_int          nb_subfr,                         /* I number of subframes         */
+    opus_int          complexity                        /* I Complexity setting          */
+)
+{
+    const opus_int16 *target_ptr, *basis_ptr;
+    opus_int32 energy;
+    opus_int   k, i, j, lag_counter;
+    opus_int   nb_cbk_search, delta, idx, cbk_size, lag_diff;
+    VARDECL( opus_int32, scratch_mem );
+    const opus_int8 *Lag_range_ptr, *Lag_CB_ptr;
+    SAVE_STACK;
+
+    silk_assert( complexity >= SILK_PE_MIN_COMPLEX );
+    silk_assert( complexity <= SILK_PE_MAX_COMPLEX );
+
+    if( nb_subfr == PE_MAX_NB_SUBFR ) {
+        Lag_range_ptr = &silk_Lag_range_stage3[ complexity ][ 0 ][ 0 ];
+        Lag_CB_ptr    = &silk_CB_lags_stage3[ 0 ][ 0 ];
+        nb_cbk_search = silk_nb_cbk_searchs_stage3[ complexity ];
+        cbk_size      = PE_NB_CBKS_STAGE3_MAX;
+    } else {
+        silk_assert( nb_subfr == PE_MAX_NB_SUBFR >> 1);
+        Lag_range_ptr = &silk_Lag_range_stage3_10_ms[ 0 ][ 0 ];
+        Lag_CB_ptr    = &silk_CB_lags_stage3_10_ms[ 0 ][ 0 ];
+        nb_cbk_search = PE_NB_CBKS_STAGE3_10MS;
+        cbk_size      = PE_NB_CBKS_STAGE3_10MS;
+    }
+    ALLOC( scratch_mem, SCRATCH_SIZE, opus_int32 );
+
+    target_ptr = &frame[ silk_LSHIFT( sf_length, 2 ) ];
+    for( k = 0; k < nb_subfr; k++ ) {
+        lag_counter = 0;
+
+        /* Calculate the energy for first lag */
+        basis_ptr = target_ptr - ( start_lag + matrix_ptr( Lag_range_ptr, k, 0, 2 ) );
+        energy = silk_inner_prod_aligned( basis_ptr, basis_ptr, sf_length );
+        silk_assert( energy >= 0 );
+        scratch_mem[ lag_counter ] = energy;
+        lag_counter++;
+
+        lag_diff = ( matrix_ptr( Lag_range_ptr, k, 1, 2 ) -  matrix_ptr( Lag_range_ptr, k, 0, 2 ) + 1 );
+        for( i = 1; i < lag_diff; i++ ) {
+            /* remove part outside new window */
+            energy -= silk_SMULBB( basis_ptr[ sf_length - i ], basis_ptr[ sf_length - i ] );
+            silk_assert( energy >= 0 );
+
+            /* add part that comes into window */
+            energy = silk_ADD_SAT32( energy, silk_SMULBB( basis_ptr[ -i ], basis_ptr[ -i ] ) );
+            silk_assert( energy >= 0 );
+            silk_assert( lag_counter < SCRATCH_SIZE );
+            scratch_mem[ lag_counter ] = energy;
+            lag_counter++;
+        }
+
+        delta = matrix_ptr( Lag_range_ptr, k, 0, 2 );
+        for( i = 0; i < nb_cbk_search; i++ ) {
+            /* Fill out the 3 dim array that stores the correlations for    */
+            /* each code_book vector for each start lag                     */
+            idx = matrix_ptr( Lag_CB_ptr, k, i, cbk_size ) - delta;
+            for( j = 0; j < PE_NB_STAGE3_LAGS; j++ ) {
+                silk_assert( idx + j < SCRATCH_SIZE );
+                silk_assert( idx + j < lag_counter );
+                matrix_ptr( energies_st3, k, i, nb_cbk_search )[ j ] =
+                    scratch_mem[ idx + j ];
+                silk_assert(
+                    matrix_ptr( energies_st3, k, i, nb_cbk_search )[ j ] >= 0 );
+            }
+        }
+        target_ptr += sf_length;
+    }
+    RESTORE_STACK;
+}
diff --git a/drivers/opus/silk/fixed/prefilter_FIX.c b/drivers/opus/silk/fixed/prefilter_FIX.c
new file mode 100644
index 0000000000..0b027eb836
--- /dev/null
+++ b/drivers/opus/silk/fixed/prefilter_FIX.c
@@ -0,0 +1,209 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef OPUS_HAVE_CONFIG_H
+#include "opus_config.h"
+#endif
+
+#include "main_FIX.h"
+#include "stack_alloc.h"
+#include "tuning_parameters.h"
+
+/* Prefilter for finding Quantizer input signal */
+static OPUS_INLINE void silk_prefilt_FIX(
+    silk_prefilter_state_FIX    *P,                         /* I/O  state                               */
+    opus_int32                  st_res_Q12[],               /* I    short term residual signal          */
+    opus_int32                  xw_Q3[],                    /* O    prefiltered signal                  */
+    opus_int32                  HarmShapeFIRPacked_Q12,     /* I    Harmonic shaping coeficients        */
+    opus_int                    Tilt_Q14,                   /* I    Tilt shaping coeficient             */
+    opus_int32                  LF_shp_Q14,                 /* I    Low-frequancy shaping coeficients   */
+    opus_int                    lag,                        /* I    Lag for harmonic shaping            */
+    opus_int                    length                      /* I    Length of signals                   */
+);
+
+void silk_warped_LPC_analysis_filter_FIX(
+          opus_int32            state[],                    /* I/O  State [order + 1]                   */
+          opus_int32            res_Q2[],                   /* O    Residual signal [length]            */
+    const opus_int16            coef_Q13[],                 /* I    Coefficients [order]                */
+    const opus_int16            input[],                    /* I    Input signal [length]               */
+    const opus_int16            lambda_Q16,                 /* I    Warping factor                      */
+    const opus_int              length,                     /* I    Length of input signal              */
+    const opus_int              order                       /* I    Filter order (even)                 */
+)
+{
+    opus_int     n, i;
+    opus_int32   acc_Q11, tmp1, tmp2;
+
+    /* Order must be even */
+    silk_assert( ( order & 1 ) == 0 );
+
+    for( n = 0; n < length; n++ ) {
+        /* Output of lowpass section */
+        tmp2 = silk_SMLAWB( state[ 0 ], state[ 1 ], lambda_Q16 );
+        state[ 0 ] = silk_LSHIFT( input[ n ], 14 );
+        /* Output of allpass section */
+        tmp1 = silk_SMLAWB( state[ 1 ], state[ 2 ] - tmp2, lambda_Q16 );
+        state[ 1 ] = tmp2;
+        acc_Q11 = silk_RSHIFT( order, 1 );
+        acc_Q11 = silk_SMLAWB( acc_Q11, tmp2, coef_Q13[ 0 ] );
+        /* Loop over allpass sections */
+        for( i = 2; i < order; i += 2 ) {
+            /* Output of allpass section */
+            tmp2 = silk_SMLAWB( state[ i ], state[ i + 1 ] - tmp1, lambda_Q16 );
+            state[ i ] = tmp1;
+            acc_Q11 = silk_SMLAWB( acc_Q11, tmp1, coef_Q13[ i - 1 ] );
+            /* Output of allpass section */
+            tmp1 = silk_SMLAWB( state[ i + 1 ], state[ i + 2 ] - tmp2, lambda_Q16 );
+            state[ i + 1 ] = tmp2;
+            acc_Q11 = silk_SMLAWB( acc_Q11, tmp2, coef_Q13[ i ] );
+        }
+        state[ order ] = tmp1;
+        acc_Q11 = silk_SMLAWB( acc_Q11, tmp1, coef_Q13[ order - 1 ] );
+        res_Q2[ n ] = silk_LSHIFT( (opus_int32)input[ n ], 2 ) - silk_RSHIFT_ROUND( acc_Q11, 9 );
+    }
+}
+
+void silk_prefilter_FIX(
+    silk_encoder_state_FIX          *psEnc,                                 /* I/O  Encoder state                                                               */
+    const silk_encoder_control_FIX  *psEncCtrl,                             /* I    Encoder control                                                             */
+    opus_int32                      xw_Q3[],                                /* O    Weighted signal                                                             */
+    const opus_int16                x[]                                     /* I    Speech signal                                                               */
+)
+{
+    silk_prefilter_state_FIX *P = &psEnc->sPrefilt;
+    opus_int   j, k, lag;
+    opus_int32 tmp_32;
+    const opus_int16 *AR1_shp_Q13;
+    const opus_int16 *px;
+    opus_int32 *pxw_Q3;
+    opus_int   HarmShapeGain_Q12, Tilt_Q14;
+    opus_int32 HarmShapeFIRPacked_Q12, LF_shp_Q14;
+    VARDECL( opus_int32, x_filt_Q12 );
+    VARDECL( opus_int32, st_res_Q2 );
+    opus_int16 B_Q10[ 2 ];
+    SAVE_STACK;
+
+    /* Set up pointers */
+    px  = x;
+    pxw_Q3 = xw_Q3;
+    lag = P->lagPrev;
+    ALLOC( x_filt_Q12, psEnc->sCmn.subfr_length, opus_int32 );
+    ALLOC( st_res_Q2, psEnc->sCmn.subfr_length, opus_int32 );
+    for( k = 0; k < psEnc->sCmn.nb_subfr; k++ ) {
+        /* Update Variables that change per sub frame */
+        if( psEnc->sCmn.indices.signalType == TYPE_VOICED ) {
+            lag = psEncCtrl->pitchL[ k ];
+        }
+
+        /* Noise shape parameters */
+        HarmShapeGain_Q12 = silk_SMULWB( (opus_int32)psEncCtrl->HarmShapeGain_Q14[ k ], 16384 - psEncCtrl->HarmBoost_Q14[ k ] );
+        silk_assert( HarmShapeGain_Q12 >= 0 );
+        HarmShapeFIRPacked_Q12  =                          silk_RSHIFT( HarmShapeGain_Q12, 2 );
+        HarmShapeFIRPacked_Q12 |= silk_LSHIFT( (opus_int32)silk_RSHIFT( HarmShapeGain_Q12, 1 ), 16 );
+        Tilt_Q14    = psEncCtrl->Tilt_Q14[   k ];
+        LF_shp_Q14  = psEncCtrl->LF_shp_Q14[ k ];
+        AR1_shp_Q13 = &psEncCtrl->AR1_Q13[   k * MAX_SHAPE_LPC_ORDER ];
+
+        /* Short term FIR filtering*/
+        silk_warped_LPC_analysis_filter_FIX( P->sAR_shp, st_res_Q2, AR1_shp_Q13, px,
+            psEnc->sCmn.warping_Q16, psEnc->sCmn.subfr_length, psEnc->sCmn.shapingLPCOrder );
+
+        /* Reduce (mainly) low frequencies during harmonic emphasis */
+        B_Q10[ 0 ] = silk_RSHIFT_ROUND( psEncCtrl->GainsPre_Q14[ k ], 4 );
+        tmp_32 = silk_SMLABB( SILK_FIX_CONST( INPUT_TILT, 26 ), psEncCtrl->HarmBoost_Q14[ k ], HarmShapeGain_Q12 );   /* Q26 */
+        tmp_32 = silk_SMLABB( tmp_32, psEncCtrl->coding_quality_Q14, SILK_FIX_CONST( HIGH_RATE_INPUT_TILT, 12 ) );    /* Q26 */
+        tmp_32 = silk_SMULWB( tmp_32, -psEncCtrl->GainsPre_Q14[ k ] );                                                /* Q24 */
+        tmp_32 = silk_RSHIFT_ROUND( tmp_32, 14 );                                                                     /* Q10 */
+        B_Q10[ 1 ]= silk_SAT16( tmp_32 );
+        x_filt_Q12[ 0 ] = silk_MLA( silk_MUL( st_res_Q2[ 0 ], B_Q10[ 0 ] ), P->sHarmHP_Q2, B_Q10[ 1 ] );
+        for( j = 1; j < psEnc->sCmn.subfr_length; j++ ) {
+            x_filt_Q12[ j ] = silk_MLA( silk_MUL( st_res_Q2[ j ], B_Q10[ 0 ] ), st_res_Q2[ j - 1 ], B_Q10[ 1 ] );
+        }
+        P->sHarmHP_Q2 = st_res_Q2[ psEnc->sCmn.subfr_length - 1 ];
+
+        silk_prefilt_FIX( P, x_filt_Q12, pxw_Q3, HarmShapeFIRPacked_Q12, Tilt_Q14, LF_shp_Q14, lag, psEnc->sCmn.subfr_length );
+
+        px  += psEnc->sCmn.subfr_length;
+        pxw_Q3 += psEnc->sCmn.subfr_length;
+    }
+
+    P->lagPrev = psEncCtrl->pitchL[ psEnc->sCmn.nb_subfr - 1 ];
+    RESTORE_STACK;
+}
+
+/* Prefilter for finding Quantizer input signal */
+static OPUS_INLINE void silk_prefilt_FIX(
+    silk_prefilter_state_FIX    *P,                         /* I/O  state                               */
+    opus_int32                  st_res_Q12[],               /* I    short term residual signal          */
+    opus_int32                  xw_Q3[],                    /* O    prefiltered signal                  */
+    opus_int32                  HarmShapeFIRPacked_Q12,     /* I    Harmonic shaping coeficients        */
+    opus_int                    Tilt_Q14,                   /* I    Tilt shaping coeficient             */
+    opus_int32                  LF_shp_Q14,                 /* I    Low-frequancy shaping coeficients   */
+    opus_int                    lag,                        /* I    Lag for harmonic shaping            */
+    opus_int                    length                      /* I    Length of signals                   */
+)
+{
+    opus_int   i, idx, LTP_shp_buf_idx;
+    opus_int32 n_LTP_Q12, n_Tilt_Q10, n_LF_Q10;
+    opus_int32 sLF_MA_shp_Q12, sLF_AR_shp_Q12;
+    opus_int16 *LTP_shp_buf;
+
+    /* To speed up use temp variables instead of using the struct */
+    LTP_shp_buf     = P->sLTP_shp;
+    LTP_shp_buf_idx = P->sLTP_shp_buf_idx;
+    sLF_AR_shp_Q12  = P->sLF_AR_shp_Q12;
+    sLF_MA_shp_Q12  = P->sLF_MA_shp_Q12;
+
+    for( i = 0; i < length; i++ ) {
+        if( lag > 0 ) {
+            /* unrolled loop */
+            silk_assert( HARM_SHAPE_FIR_TAPS == 3 );
+            idx = lag + LTP_shp_buf_idx;
+            n_LTP_Q12 = silk_SMULBB(            LTP_shp_buf[ ( idx - HARM_SHAPE_FIR_TAPS / 2 - 1) & LTP_MASK ], HarmShapeFIRPacked_Q12 );
+            n_LTP_Q12 = silk_SMLABT( n_LTP_Q12, LTP_shp_buf[ ( idx - HARM_SHAPE_FIR_TAPS / 2    ) & LTP_MASK ], HarmShapeFIRPacked_Q12 );
+            n_LTP_Q12 = silk_SMLABB( n_LTP_Q12, LTP_shp_buf[ ( idx - HARM_SHAPE_FIR_TAPS / 2 + 1) & LTP_MASK ], HarmShapeFIRPacked_Q12 );
+        } else {
+            n_LTP_Q12 = 0;
+        }
+
+        n_Tilt_Q10 = silk_SMULWB( sLF_AR_shp_Q12, Tilt_Q14 );
+        n_LF_Q10   = silk_SMLAWB( silk_SMULWT( sLF_AR_shp_Q12, LF_shp_Q14 ), sLF_MA_shp_Q12, LF_shp_Q14 );
+
+        sLF_AR_shp_Q12 = silk_SUB32( st_res_Q12[ i ], silk_LSHIFT( n_Tilt_Q10, 2 ) );
+        sLF_MA_shp_Q12 = silk_SUB32( sLF_AR_shp_Q12,  silk_LSHIFT( n_LF_Q10,   2 ) );
+
+        LTP_shp_buf_idx = ( LTP_shp_buf_idx - 1 ) & LTP_MASK;
+        LTP_shp_buf[ LTP_shp_buf_idx ] = (opus_int16)silk_SAT16( silk_RSHIFT_ROUND( sLF_MA_shp_Q12, 12 ) );
+
+        xw_Q3[i] = silk_RSHIFT_ROUND( silk_SUB32( sLF_MA_shp_Q12, n_LTP_Q12 ), 9 );
+    }
+
+    /* Copy temp variable back to state */
+    P->sLF_AR_shp_Q12   = sLF_AR_shp_Q12;
+    P->sLF_MA_shp_Q12   = sLF_MA_shp_Q12;
+    P->sLTP_shp_buf_idx = LTP_shp_buf_idx;
+}
diff --git a/drivers/opus/silk/fixed/process_gains_FIX.c b/drivers/opus/silk/fixed/process_gains_FIX.c
new file mode 100644
index 0000000000..3a78c475bb
--- /dev/null
+++ b/drivers/opus/silk/fixed/process_gains_FIX.c
@@ -0,0 +1,117 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef OPUS_HAVE_CONFIG_H
+#include "opus_config.h"
+#endif
+
+#include "main_FIX.h"
+#include "tuning_parameters.h"
+
+/* Processing of gains */
+void silk_process_gains_FIX(
+    silk_encoder_state_FIX          *psEnc,                                 /* I/O  Encoder state                                                               */
+    silk_encoder_control_FIX        *psEncCtrl,                             /* I/O  Encoder control                                                             */
+    opus_int                        condCoding                              /* I    The type of conditional coding to use                                       */
+)
+{
+    silk_shape_state_FIX *psShapeSt = &psEnc->sShape;
+    opus_int     k;
+    opus_int32   s_Q16, InvMaxSqrVal_Q16, gain, gain_squared, ResNrg, ResNrgPart, quant_offset_Q10;
+
+    /* Gain reduction when LTP coding gain is high */
+    if( psEnc->sCmn.indices.signalType == TYPE_VOICED ) {
+        /*s = -0.5f * silk_sigmoid( 0.25f * ( psEncCtrl->LTPredCodGain - 12.0f ) ); */
+        s_Q16 = -silk_sigm_Q15( silk_RSHIFT_ROUND( psEncCtrl->LTPredCodGain_Q7 - SILK_FIX_CONST( 12.0, 7 ), 4 ) );
+        for( k = 0; k < psEnc->sCmn.nb_subfr; k++ ) {
+            psEncCtrl->Gains_Q16[ k ] = silk_SMLAWB( psEncCtrl->Gains_Q16[ k ], psEncCtrl->Gains_Q16[ k ], s_Q16 );
+        }
+    }
+
+    /* Limit the quantized signal */
+    /* InvMaxSqrVal = pow( 2.0f, 0.33f * ( 21.0f - SNR_dB ) ) / subfr_length; */
+    InvMaxSqrVal_Q16 = silk_DIV32_16( silk_log2lin(
+        silk_SMULWB( SILK_FIX_CONST( 21 + 16 / 0.33, 7 ) - psEnc->sCmn.SNR_dB_Q7, SILK_FIX_CONST( 0.33, 16 ) ) ), psEnc->sCmn.subfr_length );
+
+    for( k = 0; k < psEnc->sCmn.nb_subfr; k++ ) {
+        /* Soft limit on ratio residual energy and squared gains */
+        ResNrg     = psEncCtrl->ResNrg[ k ];
+        ResNrgPart = silk_SMULWW( ResNrg, InvMaxSqrVal_Q16 );
+        if( psEncCtrl->ResNrgQ[ k ] > 0 ) {
+            ResNrgPart = silk_RSHIFT_ROUND( ResNrgPart, psEncCtrl->ResNrgQ[ k ] );
+        } else {
+            if( ResNrgPart >= silk_RSHIFT( silk_int32_MAX, -psEncCtrl->ResNrgQ[ k ] ) ) {
+                ResNrgPart = silk_int32_MAX;
+            } else {
+                ResNrgPart = silk_LSHIFT( ResNrgPart, -psEncCtrl->ResNrgQ[ k ] );
+            }
+        }
+        gain = psEncCtrl->Gains_Q16[ k ];
+        gain_squared = silk_ADD_SAT32( ResNrgPart, silk_SMMUL( gain, gain ) );
+        if( gain_squared < silk_int16_MAX ) {
+            /* recalculate with higher precision */
+            gain_squared = silk_SMLAWW( silk_LSHIFT( ResNrgPart, 16 ), gain, gain );
+            silk_assert( gain_squared > 0 );
+            gain = silk_SQRT_APPROX( gain_squared );                    /* Q8   */
+            gain = silk_min( gain, silk_int32_MAX >> 8 );
+            psEncCtrl->Gains_Q16[ k ] = silk_LSHIFT_SAT32( gain, 8 );   /* Q16  */
+        } else {
+            gain = silk_SQRT_APPROX( gain_squared );                    /* Q0   */
+            gain = silk_min( gain, silk_int32_MAX >> 16 );
+            psEncCtrl->Gains_Q16[ k ] = silk_LSHIFT_SAT32( gain, 16 );  /* Q16  */
+        }
+    }
+
+    /* Save unquantized gains and gain Index */
+    silk_memcpy( psEncCtrl->GainsUnq_Q16, psEncCtrl->Gains_Q16, psEnc->sCmn.nb_subfr * sizeof( opus_int32 ) );
+    psEncCtrl->lastGainIndexPrev = psShapeSt->LastGainIndex;
+
+    /* Quantize gains */
+    silk_gains_quant( psEnc->sCmn.indices.GainsIndices, psEncCtrl->Gains_Q16,
+        &psShapeSt->LastGainIndex, condCoding == CODE_CONDITIONALLY, psEnc->sCmn.nb_subfr );
+
+    /* Set quantizer offset for voiced signals. Larger offset when LTP coding gain is low or tilt is high (ie low-pass) */
+    if( psEnc->sCmn.indices.signalType == TYPE_VOICED ) {
+        if( psEncCtrl->LTPredCodGain_Q7 + silk_RSHIFT( psEnc->sCmn.input_tilt_Q15, 8 ) > SILK_FIX_CONST( 1.0, 7 ) ) {
+            psEnc->sCmn.indices.quantOffsetType = 0;
+        } else {
+            psEnc->sCmn.indices.quantOffsetType = 1;
+        }
+    }
+
+    /* Quantizer boundary adjustment */
+    quant_offset_Q10 = silk_Quantization_Offsets_Q10[ psEnc->sCmn.indices.signalType >> 1 ][ psEnc->sCmn.indices.quantOffsetType ];
+    psEncCtrl->Lambda_Q10 = SILK_FIX_CONST( LAMBDA_OFFSET, 10 )
+                          + silk_SMULBB( SILK_FIX_CONST( LAMBDA_DELAYED_DECISIONS, 10 ), psEnc->sCmn.nStatesDelayedDecision )
+                          + silk_SMULWB( SILK_FIX_CONST( LAMBDA_SPEECH_ACT,        18 ), psEnc->sCmn.speech_activity_Q8     )
+                          + silk_SMULWB( SILK_FIX_CONST( LAMBDA_INPUT_QUALITY,     12 ), psEncCtrl->input_quality_Q14       )
+                          + silk_SMULWB( SILK_FIX_CONST( LAMBDA_CODING_QUALITY,    12 ), psEncCtrl->coding_quality_Q14      )
+                          + silk_SMULWB( SILK_FIX_CONST( LAMBDA_QUANT_OFFSET,      16 ), quant_offset_Q10                   );
+
+    silk_assert( psEncCtrl->Lambda_Q10 > 0 );
+    silk_assert( psEncCtrl->Lambda_Q10 < SILK_FIX_CONST( 2, 10 ) );
+}
diff --git a/drivers/opus/silk/fixed/regularize_correlations_FIX.c b/drivers/opus/silk/fixed/regularize_correlations_FIX.c
new file mode 100644
index 0000000000..a3378fdd17
--- /dev/null
+++ b/drivers/opus/silk/fixed/regularize_correlations_FIX.c
@@ -0,0 +1,47 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef OPUS_HAVE_CONFIG_H
+#include "opus_config.h"
+#endif
+
+#include "main_FIX.h"
+
+/* Add noise to matrix diagonal */
+void silk_regularize_correlations_FIX(
+    opus_int32                      *XX,                                    /* I/O  Correlation matrices                                                        */
+    opus_int32                      *xx,                                    /* I/O  Correlation values                                                          */
+    opus_int32                      noise,                                  /* I    Noise to add                                                                */
+    opus_int                        D                                       /* I    Dimension of XX                                                             */
+)
+{
+    opus_int i;
+    for( i = 0; i < D; i++ ) {
+        matrix_ptr( &XX[ 0 ], i, i, D ) = silk_ADD32( matrix_ptr( &XX[ 0 ], i, i, D ), noise );
+    }
+    xx[ 0 ] += noise;
+}
diff --git a/drivers/opus/silk/fixed/residual_energy16_FIX.c b/drivers/opus/silk/fixed/residual_energy16_FIX.c
new file mode 100644
index 0000000000..39bdff2a72
--- /dev/null
+++ b/drivers/opus/silk/fixed/residual_energy16_FIX.c
@@ -0,0 +1,103 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef OPUS_HAVE_CONFIG_H
+#include "opus_config.h"
+#endif
+
+#include "main_FIX.h"
+
+/* Residual energy: nrg = wxx - 2 * wXx * c + c' * wXX * c */
+opus_int32 silk_residual_energy16_covar_FIX(
+    const opus_int16                *c,                                     /* I    Prediction vector                                                           */
+    const opus_int32                *wXX,                                   /* I    Correlation matrix                                                          */
+    const opus_int32                *wXx,                                   /* I    Correlation vector                                                          */
+    opus_int32                      wxx,                                    /* I    Signal energy                                                               */
+    opus_int                        D,                                      /* I    Dimension                                                                   */
+    opus_int                        cQ                                      /* I    Q value for c vector 0 - 15                                                 */
+)
+{
+    opus_int   i, j, lshifts, Qxtra;
+    opus_int32 c_max, w_max, tmp, tmp2, nrg;
+    opus_int   cn[ MAX_MATRIX_SIZE ];
+    const opus_int32 *pRow;
+
+    /* Safety checks */
+    silk_assert( D >=  0 );
+    silk_assert( D <= 16 );
+    silk_assert( cQ >  0 );
+    silk_assert( cQ < 16 );
+
+    lshifts = 16 - cQ;
+    Qxtra = lshifts;
+
+    c_max = 0;
+    for( i = 0; i < D; i++ ) {
+        c_max = silk_max_32( c_max, silk_abs( (opus_int32)c[ i ] ) );
+    }
+    Qxtra = silk_min_int( Qxtra, silk_CLZ32( c_max ) - 17 );
+
+    w_max = silk_max_32( wXX[ 0 ], wXX[ D * D - 1 ] );
+    Qxtra = silk_min_int( Qxtra, silk_CLZ32( silk_MUL( D, silk_RSHIFT( silk_SMULWB( w_max, c_max ), 4 ) ) ) - 5 );
+    Qxtra = silk_max_int( Qxtra, 0 );
+    for( i = 0; i < D; i++ ) {
+        cn[ i ] = silk_LSHIFT( ( opus_int )c[ i ], Qxtra );
+        silk_assert( silk_abs(cn[i]) <= ( silk_int16_MAX + 1 ) ); /* Check that silk_SMLAWB can be used */
+    }
+    lshifts -= Qxtra;
+
+    /* Compute wxx - 2 * wXx * c */
+    tmp = 0;
+    for( i = 0; i < D; i++ ) {
+        tmp = silk_SMLAWB( tmp, wXx[ i ], cn[ i ] );
+    }
+    nrg = silk_RSHIFT( wxx, 1 + lshifts ) - tmp;                         /* Q: -lshifts - 1 */
+
+    /* Add c' * wXX * c, assuming wXX is symmetric */
+    tmp2 = 0;
+    for( i = 0; i < D; i++ ) {
+        tmp = 0;
+        pRow = &wXX[ i * D ];
+        for( j = i + 1; j < D; j++ ) {
+            tmp = silk_SMLAWB( tmp, pRow[ j ], cn[ j ] );
+        }
+        tmp  = silk_SMLAWB( tmp,  silk_RSHIFT( pRow[ i ], 1 ), cn[ i ] );
+        tmp2 = silk_SMLAWB( tmp2, tmp,                        cn[ i ] );
+    }
+    nrg = silk_ADD_LSHIFT32( nrg, tmp2, lshifts );                       /* Q: -lshifts - 1 */
+
+    /* Keep one bit free always, because we add them for LSF interpolation */
+    if( nrg < 1 ) {
+        nrg = 1;
+    } else if( nrg > silk_RSHIFT( silk_int32_MAX, lshifts + 2 ) ) {
+        nrg = silk_int32_MAX >> 1;
+    } else {
+        nrg = silk_LSHIFT( nrg, lshifts + 1 );                           /* Q0 */
+    }
+    return nrg;
+
+}
diff --git a/drivers/opus/silk/fixed/residual_energy_FIX.c b/drivers/opus/silk/fixed/residual_energy_FIX.c
new file mode 100644
index 0000000000..13dbc51e39
--- /dev/null
+++ b/drivers/opus/silk/fixed/residual_energy_FIX.c
@@ -0,0 +1,97 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef OPUS_HAVE_CONFIG_H
+#include "opus_config.h"
+#endif
+
+#include "main_FIX.h"
+#include "stack_alloc.h"
+
+/* Calculates residual energies of input subframes where all subframes have LPC_order   */
+/* of preceding samples                                                                 */
+void silk_residual_energy_FIX(
+          opus_int32                nrgs[ MAX_NB_SUBFR ],                   /* O    Residual energy per subframe                                                */
+          opus_int                  nrgsQ[ MAX_NB_SUBFR ],                  /* O    Q value per subframe                                                        */
+    const opus_int16                x[],                                    /* I    Input signal                                                                */
+          opus_int16                a_Q12[ 2 ][ MAX_LPC_ORDER ],            /* I    AR coefs for each frame half                                                */
+    const opus_int32                gains[ MAX_NB_SUBFR ],                  /* I    Quantization gains                                                          */
+    const opus_int                  subfr_length,                           /* I    Subframe length                                                             */
+    const opus_int                  nb_subfr,                               /* I    Number of subframes                                                         */
+    const opus_int                  LPC_order                               /* I    LPC order                                                                   */
+)
+{
+    opus_int         offset, i, j, rshift, lz1, lz2;
+    opus_int16       *LPC_res_ptr;
+    VARDECL( opus_int16, LPC_res );
+    const opus_int16 *x_ptr;
+    opus_int32       tmp32;
+    SAVE_STACK;
+
+    x_ptr  = x;
+    offset = LPC_order + subfr_length;
+
+    /* Filter input to create the LPC residual for each frame half, and measure subframe energies */
+    ALLOC( LPC_res, ( MAX_NB_SUBFR >> 1 ) * offset, opus_int16 );
+    silk_assert( ( nb_subfr >> 1 ) * ( MAX_NB_SUBFR >> 1 ) == nb_subfr );
+    for( i = 0; i < nb_subfr >> 1; i++ ) {
+        /* Calculate half frame LPC residual signal including preceding samples */
+        silk_LPC_analysis_filter( LPC_res, x_ptr, a_Q12[ i ], ( MAX_NB_SUBFR >> 1 ) * offset, LPC_order );
+
+        /* Point to first subframe of the just calculated LPC residual signal */
+        LPC_res_ptr = LPC_res + LPC_order;
+        for( j = 0; j < ( MAX_NB_SUBFR >> 1 ); j++ ) {
+            /* Measure subframe energy */
+            silk_sum_sqr_shift( &nrgs[ i * ( MAX_NB_SUBFR >> 1 ) + j ], &rshift, LPC_res_ptr, subfr_length );
+
+            /* Set Q values for the measured energy */
+            nrgsQ[ i * ( MAX_NB_SUBFR >> 1 ) + j ] = -rshift;
+
+            /* Move to next subframe */
+            LPC_res_ptr += offset;
+        }
+        /* Move to next frame half */
+        x_ptr += ( MAX_NB_SUBFR >> 1 ) * offset;
+    }
+
+    /* Apply the squared subframe gains */
+    for( i = 0; i < nb_subfr; i++ ) {
+        /* Fully upscale gains and energies */
+        lz1 = silk_CLZ32( nrgs[  i ] ) - 1;
+        lz2 = silk_CLZ32( gains[ i ] ) - 1;
+
+        tmp32 = silk_LSHIFT32( gains[ i ], lz2 );
+
+        /* Find squared gains */
+        tmp32 = silk_SMMUL( tmp32, tmp32 ); /* Q( 2 * lz2 - 32 )*/
+
+        /* Scale energies */
+        nrgs[ i ] = silk_SMMUL( tmp32, silk_LSHIFT32( nrgs[ i ], lz1 ) ); /* Q( nrgsQ[ i ] + lz1 + 2 * lz2 - 32 - 32 )*/
+        nrgsQ[ i ] += lz1 + 2 * lz2 - 32 - 32;
+    }
+    RESTORE_STACK;
+}
diff --git a/drivers/opus/silk/fixed/schur64_FIX.c b/drivers/opus/silk/fixed/schur64_FIX.c
new file mode 100644
index 0000000000..22c0952ffd
--- /dev/null
+++ b/drivers/opus/silk/fixed/schur64_FIX.c
@@ -0,0 +1,92 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef OPUS_HAVE_CONFIG_H
+#include "opus_config.h"
+#endif
+
+#include "SigProc_FIX.h"
+
+/* Slower than schur(), but more accurate.                              */
+/* Uses SMULL(), available on armv4                                     */
+opus_int32 silk_schur64(                            /* O    returns residual energy                                     */
+    opus_int32                  rc_Q16[],           /* O    Reflection coefficients [order] Q16                         */
+    const opus_int32            c[],                /* I    Correlations [order+1]                                      */
+    opus_int32                  order               /* I    Prediction order                                            */
+)
+{
+    opus_int   k, n;
+    opus_int32 C[ SILK_MAX_ORDER_LPC + 1 ][ 2 ];
+    opus_int32 Ctmp1_Q30, Ctmp2_Q30, rc_tmp_Q31;
+
+    silk_assert( order==6||order==8||order==10||order==12||order==14||order==16 );
+
+    /* Check for invalid input */
+    if( c[ 0 ] <= 0 ) {
+        silk_memset( rc_Q16, 0, order * sizeof( opus_int32 ) );
+        return 0;
+    }
+
+    for( k = 0; k < order + 1; k++ ) {
+        C[ k ][ 0 ] = C[ k ][ 1 ] = c[ k ];
+    }
+
+    for( k = 0; k < order; k++ ) {
+        /* Check that we won't be getting an unstable rc, otherwise stop here. */
+        if (silk_abs_int32(C[ k + 1 ][ 0 ]) >= C[ 0 ][ 1 ]) {
+           if ( C[ k + 1 ][ 0 ] > 0 ) {
+              rc_Q16[ k ] = -SILK_FIX_CONST( .99f, 16 );
+           } else {
+              rc_Q16[ k ] = SILK_FIX_CONST( .99f, 16 );
+           }
+           k++;
+           break;
+        }
+
+        /* Get reflection coefficient: divide two Q30 values and get result in Q31 */
+        rc_tmp_Q31 = silk_DIV32_varQ( -C[ k + 1 ][ 0 ], C[ 0 ][ 1 ], 31 );
+
+        /* Save the output */
+        rc_Q16[ k ] = silk_RSHIFT_ROUND( rc_tmp_Q31, 15 );
+
+        /* Update correlations */
+        for( n = 0; n < order - k; n++ ) {
+            Ctmp1_Q30 = C[ n + k + 1 ][ 0 ];
+            Ctmp2_Q30 = C[ n ][ 1 ];
+
+            /* Multiply and add the highest int32 */
+            C[ n + k + 1 ][ 0 ] = Ctmp1_Q30 + silk_SMMUL( silk_LSHIFT( Ctmp2_Q30, 1 ), rc_tmp_Q31 );
+            C[ n ][ 1 ]         = Ctmp2_Q30 + silk_SMMUL( silk_LSHIFT( Ctmp1_Q30, 1 ), rc_tmp_Q31 );
+        }
+    }
+
+    for(; k < order; k++ ) {
+       rc_Q16[ k ] = 0;
+    }
+
+    return silk_max_32( 1, C[ 0 ][ 1 ] );
+}
diff --git a/drivers/opus/silk/fixed/schur_FIX.c b/drivers/opus/silk/fixed/schur_FIX.c
new file mode 100644
index 0000000000..e8b24cf068
--- /dev/null
+++ b/drivers/opus/silk/fixed/schur_FIX.c
@@ -0,0 +1,106 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef OPUS_HAVE_CONFIG_H
+#include "opus_config.h"
+#endif
+
+#include "SigProc_FIX.h"
+
+/* Faster than schur64(), but much less accurate.                       */
+/* uses SMLAWB(), requiring armv5E and higher.                          */
+opus_int32 silk_schur(                              /* O    Returns residual energy                                     */
+    opus_int16                  *rc_Q15,            /* O    reflection coefficients [order] Q15                         */
+    const opus_int32            *c,                 /* I    correlations [order+1]                                      */
+    const opus_int32            order               /* I    prediction order                                            */
+)
+{
+    opus_int        k, n, lz;
+    opus_int32    C[ SILK_MAX_ORDER_LPC + 1 ][ 2 ];
+    opus_int32    Ctmp1, Ctmp2, rc_tmp_Q15;
+
+    silk_assert( order==6||order==8||order==10||order==12||order==14||order==16 );
+
+    /* Get number of leading zeros */
+    lz = silk_CLZ32( c[ 0 ] );
+
+    /* Copy correlations and adjust level to Q30 */
+    if( lz < 2 ) {
+        /* lz must be 1, so shift one to the right */
+        for( k = 0; k < order + 1; k++ ) {
+            C[ k ][ 0 ] = C[ k ][ 1 ] = silk_RSHIFT( c[ k ], 1 );
+        }
+    } else if( lz > 2 ) {
+        /* Shift to the left */
+        lz -= 2;
+        for( k = 0; k < order + 1; k++ ) {
+            C[ k ][ 0 ] = C[ k ][ 1 ] = silk_LSHIFT( c[ k ], lz );
+        }
+    } else {
+        /* No need to shift */
+        for( k = 0; k < order + 1; k++ ) {
+            C[ k ][ 0 ] = C[ k ][ 1 ] = c[ k ];
+        }
+    }
+
+    for( k = 0; k < order; k++ ) {
+        /* Check that we won't be getting an unstable rc, otherwise stop here. */
+        if (silk_abs_int32(C[ k + 1 ][ 0 ]) >= C[ 0 ][ 1 ]) {
+           if ( C[ k + 1 ][ 0 ] > 0 ) {
+              rc_Q15[ k ] = -SILK_FIX_CONST( .99f, 15 );
+           } else {
+              rc_Q15[ k ] = SILK_FIX_CONST( .99f, 15 );
+           }
+           k++;
+           break;
+        }
+
+        /* Get reflection coefficient */
+        rc_tmp_Q15 = -silk_DIV32_16( C[ k + 1 ][ 0 ], silk_max_32( silk_RSHIFT( C[ 0 ][ 1 ], 15 ), 1 ) );
+
+        /* Clip (shouldn't happen for properly conditioned inputs) */
+        rc_tmp_Q15 = silk_SAT16( rc_tmp_Q15 );
+
+        /* Store */
+        rc_Q15[ k ] = (opus_int16)rc_tmp_Q15;
+
+        /* Update correlations */
+        for( n = 0; n < order - k; n++ ) {
+            Ctmp1 = C[ n + k + 1 ][ 0 ];
+            Ctmp2 = C[ n ][ 1 ];
+            C[ n + k + 1 ][ 0 ] = silk_SMLAWB( Ctmp1, silk_LSHIFT( Ctmp2, 1 ), rc_tmp_Q15 );
+            C[ n ][ 1 ]         = silk_SMLAWB( Ctmp2, silk_LSHIFT( Ctmp1, 1 ), rc_tmp_Q15 );
+        }
+    }
+
+    for(; k < order; k++ ) {
+       rc_Q15[ k ] = 0;
+    }
+
+    /* return residual energy */
+    return silk_max_32( 1, C[ 0 ][ 1 ] );
+}
diff --git a/drivers/opus/silk/fixed/solve_LS_FIX.c b/drivers/opus/silk/fixed/solve_LS_FIX.c
new file mode 100644
index 0000000000..5d09284935
--- /dev/null
+++ b/drivers/opus/silk/fixed/solve_LS_FIX.c
@@ -0,0 +1,249 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef OPUS_HAVE_CONFIG_H
+#include "opus_config.h"
+#endif
+
+#include "main_FIX.h"
+#include "stack_alloc.h"
+#include "tuning_parameters.h"
+
+/*****************************/
+/* Internal function headers */
+/*****************************/
+
+typedef struct {
+    opus_int32 Q36_part;
+    opus_int32 Q48_part;
+} inv_D_t;
+
+/* Factorize square matrix A into LDL form */
+static OPUS_INLINE void silk_LDL_factorize_FIX(
+    opus_int32          *A,         /* I/O Pointer to Symetric Square Matrix                            */
+    opus_int            M,          /* I   Size of Matrix                                               */
+    opus_int32          *L_Q16,     /* I/O Pointer to Square Upper triangular Matrix                    */
+    inv_D_t             *inv_D      /* I/O Pointer to vector holding inverted diagonal elements of D    */
+);
+
+/* Solve Lx = b, when L is lower triangular and has ones on the diagonal */
+static OPUS_INLINE void silk_LS_SolveFirst_FIX(
+    const opus_int32    *L_Q16,     /* I    Pointer to Lower Triangular Matrix                          */
+    opus_int            M,          /* I    Dim of Matrix equation                                      */
+    const opus_int32    *b,         /* I    b Vector                                                    */
+    opus_int32          *x_Q16      /* O    x Vector                                                    */
+);
+
+/* Solve L^t*x = b, where L is lower triangular with ones on the diagonal */
+static OPUS_INLINE void silk_LS_SolveLast_FIX(
+    const opus_int32    *L_Q16,     /* I    Pointer to Lower Triangular Matrix                          */
+    const opus_int      M,          /* I    Dim of Matrix equation                                      */
+    const opus_int32    *b,         /* I    b Vector                                                    */
+    opus_int32          *x_Q16      /* O    x Vector                                                    */
+);
+
+static OPUS_INLINE void silk_LS_divide_Q16_FIX(
+    opus_int32          T[],        /* I/O  Numenator vector                                            */
+    inv_D_t             *inv_D,     /* I    1 / D vector                                                */
+    opus_int            M           /* I    dimension                                                   */
+);
+
+/* Solves Ax = b, assuming A is symmetric */
+void silk_solve_LDL_FIX(
+    opus_int32                      *A,                                     /* I    Pointer to symetric square matrix A                                         */
+    opus_int                        M,                                      /* I    Size of matrix                                                              */
+    const opus_int32                *b,                                     /* I    Pointer to b vector                                                         */
+    opus_int32                      *x_Q16                                  /* O    Pointer to x solution vector                                                */
+)
+{
+    VARDECL( opus_int32, L_Q16 );
+    opus_int32 Y[      MAX_MATRIX_SIZE ];
+    inv_D_t   inv_D[  MAX_MATRIX_SIZE ];
+    SAVE_STACK;
+
+    silk_assert( M <= MAX_MATRIX_SIZE );
+    ALLOC( L_Q16, M * M, opus_int32 );
+
+    /***************************************************
+    Factorize A by LDL such that A = L*D*L',
+    where L is lower triangular with ones on diagonal
+    ****************************************************/
+    silk_LDL_factorize_FIX( A, M, L_Q16, inv_D );
+
+    /****************************************************
+    * substitute D*L'*x = Y. ie:
+    L*D*L'*x = b => L*Y = b <=> Y = inv(L)*b
+    ******************************************************/
+    silk_LS_SolveFirst_FIX( L_Q16, M, b, Y );
+
+    /****************************************************
+    D*L'*x = Y <=> L'*x = inv(D)*Y, because D is
+    diagonal just multiply with 1/d_i
+    ****************************************************/
+    silk_LS_divide_Q16_FIX( Y, inv_D, M );
+
+    /****************************************************
+    x = inv(L') * inv(D) * Y
+    *****************************************************/
+    silk_LS_SolveLast_FIX( L_Q16, M, Y, x_Q16 );
+    RESTORE_STACK;
+}
+
+static OPUS_INLINE void silk_LDL_factorize_FIX(
+    opus_int32          *A,         /* I/O Pointer to Symetric Square Matrix                            */
+    opus_int            M,          /* I   Size of Matrix                                               */
+    opus_int32          *L_Q16,     /* I/O Pointer to Square Upper triangular Matrix                    */
+    inv_D_t             *inv_D      /* I/O Pointer to vector holding inverted diagonal elements of D    */
+)
+{
+    opus_int   i, j, k, status, loop_count;
+    const opus_int32 *ptr1, *ptr2;
+    opus_int32 diag_min_value, tmp_32, err;
+    opus_int32 v_Q0[ MAX_MATRIX_SIZE ], D_Q0[ MAX_MATRIX_SIZE ];
+    opus_int32 one_div_diag_Q36, one_div_diag_Q40, one_div_diag_Q48;
+
+    silk_assert( M <= MAX_MATRIX_SIZE );
+
+    status = 1;
+    diag_min_value = silk_max_32( silk_SMMUL( silk_ADD_SAT32( A[ 0 ], A[ silk_SMULBB( M, M ) - 1 ] ), SILK_FIX_CONST( FIND_LTP_COND_FAC, 31 ) ), 1 << 9 );
+    for( loop_count = 0; loop_count < M && status == 1; loop_count++ ) {
+        status = 0;
+        for( j = 0; j < M; j++ ) {
+            ptr1 = matrix_adr( L_Q16, j, 0, M );
+            tmp_32 = 0;
+            for( i = 0; i < j; i++ ) {
+                v_Q0[ i ] = silk_SMULWW(         D_Q0[ i ], ptr1[ i ] ); /* Q0 */
+                tmp_32    = silk_SMLAWW( tmp_32, v_Q0[ i ], ptr1[ i ] ); /* Q0 */
+            }
+            tmp_32 = silk_SUB32( matrix_ptr( A, j, j, M ), tmp_32 );
+
+            if( tmp_32 < diag_min_value ) {
+                tmp_32 = silk_SUB32( silk_SMULBB( loop_count + 1, diag_min_value ), tmp_32 );
+                /* Matrix not positive semi-definite, or ill conditioned */
+                for( i = 0; i < M; i++ ) {
+                    matrix_ptr( A, i, i, M ) = silk_ADD32( matrix_ptr( A, i, i, M ), tmp_32 );
+                }
+                status = 1;
+                break;
+            }
+            D_Q0[ j ] = tmp_32;                         /* always < max(Correlation) */
+
+            /* two-step division */
+            one_div_diag_Q36 = silk_INVERSE32_varQ( tmp_32, 36 );                    /* Q36 */
+            one_div_diag_Q40 = silk_LSHIFT( one_div_diag_Q36, 4 );                   /* Q40 */
+            err = silk_SUB32( (opus_int32)1 << 24, silk_SMULWW( tmp_32, one_div_diag_Q40 ) );     /* Q24 */
+            one_div_diag_Q48 = silk_SMULWW( err, one_div_diag_Q40 );                 /* Q48 */
+
+            /* Save 1/Ds */
+            inv_D[ j ].Q36_part = one_div_diag_Q36;
+            inv_D[ j ].Q48_part = one_div_diag_Q48;
+
+            matrix_ptr( L_Q16, j, j, M ) = 65536; /* 1.0 in Q16 */
+            ptr1 = matrix_adr( A, j, 0, M );
+            ptr2 = matrix_adr( L_Q16, j + 1, 0, M );
+            for( i = j + 1; i < M; i++ ) {
+                tmp_32 = 0;
+                for( k = 0; k < j; k++ ) {
+                    tmp_32 = silk_SMLAWW( tmp_32, v_Q0[ k ], ptr2[ k ] ); /* Q0 */
+                }
+                tmp_32 = silk_SUB32( ptr1[ i ], tmp_32 ); /* always < max(Correlation) */
+
+                /* tmp_32 / D_Q0[j] : Divide to Q16 */
+                matrix_ptr( L_Q16, i, j, M ) = silk_ADD32( silk_SMMUL( tmp_32, one_div_diag_Q48 ),
+                    silk_RSHIFT( silk_SMULWW( tmp_32, one_div_diag_Q36 ), 4 ) );
+
+                /* go to next column */
+                ptr2 += M;
+            }
+        }
+    }
+
+    silk_assert( status == 0 );
+}
+
+static OPUS_INLINE void silk_LS_divide_Q16_FIX(
+    opus_int32          T[],        /* I/O  Numenator vector                                            */
+    inv_D_t             *inv_D,     /* I    1 / D vector                                                */
+    opus_int            M           /* I    dimension                                                   */
+)
+{
+    opus_int   i;
+    opus_int32 tmp_32;
+    opus_int32 one_div_diag_Q36, one_div_diag_Q48;
+
+    for( i = 0; i < M; i++ ) {
+        one_div_diag_Q36 = inv_D[ i ].Q36_part;
+        one_div_diag_Q48 = inv_D[ i ].Q48_part;
+
+        tmp_32 = T[ i ];
+        T[ i ] = silk_ADD32( silk_SMMUL( tmp_32, one_div_diag_Q48 ), silk_RSHIFT( silk_SMULWW( tmp_32, one_div_diag_Q36 ), 4 ) );
+    }
+}
+
+/* Solve Lx = b, when L is lower triangular and has ones on the diagonal */
+static OPUS_INLINE void silk_LS_SolveFirst_FIX(
+    const opus_int32    *L_Q16,     /* I    Pointer to Lower Triangular Matrix                          */
+    opus_int            M,          /* I    Dim of Matrix equation                                      */
+    const opus_int32    *b,         /* I    b Vector                                                    */
+    opus_int32          *x_Q16      /* O    x Vector                                                    */
+)
+{
+    opus_int i, j;
+    const opus_int32 *ptr32;
+    opus_int32 tmp_32;
+
+    for( i = 0; i < M; i++ ) {
+        ptr32 = matrix_adr( L_Q16, i, 0, M );
+        tmp_32 = 0;
+        for( j = 0; j < i; j++ ) {
+            tmp_32 = silk_SMLAWW( tmp_32, ptr32[ j ], x_Q16[ j ] );
+        }
+        x_Q16[ i ] = silk_SUB32( b[ i ], tmp_32 );
+    }
+}
+
+/* Solve L^t*x = b, where L is lower triangular with ones on the diagonal */
+static OPUS_INLINE void silk_LS_SolveLast_FIX(
+    const opus_int32    *L_Q16,     /* I    Pointer to Lower Triangular Matrix                          */
+    const opus_int      M,          /* I    Dim of Matrix equation                                      */
+    const opus_int32    *b,         /* I    b Vector                                                    */
+    opus_int32          *x_Q16      /* O    x Vector                                                    */
+)
+{
+    opus_int i, j;
+    const opus_int32 *ptr32;
+    opus_int32 tmp_32;
+
+    for( i = M - 1; i >= 0; i-- ) {
+        ptr32 = matrix_adr( L_Q16, 0, i, M );
+        tmp_32 = 0;
+        for( j = M - 1; j > i; j-- ) {
+            tmp_32 = silk_SMLAWW( tmp_32, ptr32[ silk_SMULBB( j, M ) ], x_Q16[ j ] );
+        }
+        x_Q16[ i ] = silk_SUB32( b[ i ], tmp_32 );
+    }
+}
diff --git a/drivers/opus/silk/fixed/structs_FIX.h b/drivers/opus/silk/fixed/structs_FIX.h
new file mode 100644
index 0000000000..0284dfa27a
--- /dev/null
+++ b/drivers/opus/silk/fixed/structs_FIX.h
@@ -0,0 +1,133 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifndef SILK_STRUCTS_FIX_H
+#define SILK_STRUCTS_FIX_H
+
+#include "typedef.h"
+#include "silk_main.h"
+#include "structs.h"
+
+#ifdef __cplusplus
+extern "C"
+{
+#endif
+
+/********************************/
+/* Noise shaping analysis state */
+/********************************/
+typedef struct {
+    opus_int8                   LastGainIndex;
+    opus_int32                  HarmBoost_smth_Q16;
+    opus_int32                  HarmShapeGain_smth_Q16;
+    opus_int32                  Tilt_smth_Q16;
+} silk_shape_state_FIX;
+
+/********************************/
+/* Prefilter state              */
+/********************************/
+typedef struct {
+    opus_int16                  sLTP_shp[ LTP_BUF_LENGTH ];
+    opus_int32                  sAR_shp[ MAX_SHAPE_LPC_ORDER + 1 ];
+    opus_int                    sLTP_shp_buf_idx;
+    opus_int32                  sLF_AR_shp_Q12;
+    opus_int32                  sLF_MA_shp_Q12;
+    opus_int32                  sHarmHP_Q2;
+    opus_int32                  rand_seed;
+    opus_int                    lagPrev;
+} silk_prefilter_state_FIX;
+
+/********************************/
+/* Encoder state FIX            */
+/********************************/
+typedef struct {
+    silk_encoder_state          sCmn;                                   /* Common struct, shared with floating-point code       */
+    silk_shape_state_FIX        sShape;                                 /* Shape state                                          */
+    silk_prefilter_state_FIX    sPrefilt;                               /* Prefilter State                                      */
+
+    /* Buffer for find pitch and noise shape analysis */
+    silk_DWORD_ALIGN opus_int16 x_buf[ 2 * MAX_FRAME_LENGTH + LA_SHAPE_MAX ];/* Buffer for find pitch and noise shape analysis  */
+    opus_int                    LTPCorr_Q15;                            /* Normalized correlation from pitch lag estimator      */
+} silk_encoder_state_FIX;
+
+/************************/
+/* Encoder control FIX  */
+/************************/
+typedef struct {
+    /* Prediction and coding parameters */
+    opus_int32                  Gains_Q16[ MAX_NB_SUBFR ];
+    silk_DWORD_ALIGN opus_int16 PredCoef_Q12[ 2 ][ MAX_LPC_ORDER ];
+    opus_int16                  LTPCoef_Q14[ LTP_ORDER * MAX_NB_SUBFR ];
+    opus_int                    LTP_scale_Q14;
+    opus_int                    pitchL[ MAX_NB_SUBFR ];
+
+    /* Noise shaping parameters */
+    /* Testing */
+    silk_DWORD_ALIGN opus_int16 AR1_Q13[ MAX_NB_SUBFR * MAX_SHAPE_LPC_ORDER ];
+    silk_DWORD_ALIGN opus_int16 AR2_Q13[ MAX_NB_SUBFR * MAX_SHAPE_LPC_ORDER ];
+    opus_int32                  LF_shp_Q14[        MAX_NB_SUBFR ];      /* Packs two int16 coefficients per int32 value         */
+    opus_int                    GainsPre_Q14[      MAX_NB_SUBFR ];
+    opus_int                    HarmBoost_Q14[     MAX_NB_SUBFR ];
+    opus_int                    Tilt_Q14[          MAX_NB_SUBFR ];
+    opus_int                    HarmShapeGain_Q14[ MAX_NB_SUBFR ];
+    opus_int                    Lambda_Q10;
+    opus_int                    input_quality_Q14;
+    opus_int                    coding_quality_Q14;
+
+    /* measures */
+    opus_int                    sparseness_Q8;
+    opus_int32                  predGain_Q16;
+    opus_int                    LTPredCodGain_Q7;
+    opus_int32                  ResNrg[ MAX_NB_SUBFR ];                 /* Residual energy per subframe                         */
+    opus_int                    ResNrgQ[ MAX_NB_SUBFR ];                /* Q domain for the residual energy > 0                 */
+
+    /* Parameters for CBR mode */
+    opus_int32                  GainsUnq_Q16[ MAX_NB_SUBFR ];
+    opus_int8                   lastGainIndexPrev;
+} silk_encoder_control_FIX;
+
+/************************/
+/* Encoder Super Struct */
+/************************/
+typedef struct {
+    silk_encoder_state_FIX      state_Fxx[ ENCODER_NUM_CHANNELS ];
+    stereo_enc_state            sStereo;
+    opus_int32                  nBitsExceeded;
+    opus_int                    nChannelsAPI;
+    opus_int                    nChannelsInternal;
+    opus_int                    nPrevChannelsInternal;
+    opus_int                    timeSinceSwitchAllowed_ms;
+    opus_int                    allowBandwidthSwitch;
+    opus_int                    prev_decode_only_middle;
+} silk_encoder;
+
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/drivers/opus/silk/fixed/vector_ops_FIX.c b/drivers/opus/silk/fixed/vector_ops_FIX.c
new file mode 100644
index 0000000000..b1e422eb91
--- /dev/null
+++ b/drivers/opus/silk/fixed/vector_ops_FIX.c
@@ -0,0 +1,96 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef OPUS_HAVE_CONFIG_H
+#include "opus_config.h"
+#endif
+
+#include "SigProc_FIX.h"
+
+/* Copy and multiply a vector by a constant */
+void silk_scale_copy_vector16(
+    opus_int16                  *data_out,
+    const opus_int16            *data_in,
+    opus_int32                  gain_Q16,           /* I    Gain in Q16                                                 */
+    const opus_int              dataSize            /* I    Length                                                      */
+)
+{
+    opus_int  i;
+    opus_int32 tmp32;
+
+    for( i = 0; i < dataSize; i++ ) {
+        tmp32 = silk_SMULWB( gain_Q16, data_in[ i ] );
+        data_out[ i ] = (opus_int16)silk_CHECK_FIT16( tmp32 );
+    }
+}
+
+/* Multiply a vector by a constant */
+void silk_scale_vector32_Q26_lshift_18(
+    opus_int32                  *data1,             /* I/O  Q0/Q18                                                      */
+    opus_int32                  gain_Q26,           /* I    Q26                                                         */
+    opus_int                    dataSize            /* I    length                                                      */
+)
+{
+    opus_int  i;
+
+    for( i = 0; i < dataSize; i++ ) {
+        data1[ i ] = (opus_int32)silk_CHECK_FIT32( silk_RSHIFT64( silk_SMULL( data1[ i ], gain_Q26 ), 8 ) );    /* OUTPUT: Q18 */
+    }
+}
+
+/* sum = for(i=0;i<len;i++)inVec1[i]*inVec2[i];      ---        inner product   */
+/* Note for ARM asm:                                                            */
+/*        * inVec1 and inVec2 should be at least 2 byte aligned.                */
+/*        * len should be positive 16bit integer.                               */
+/*        * only when len>6, memory access can be reduced by half.              */
+opus_int32 silk_inner_prod_aligned(
+    const opus_int16 *const     inVec1,             /*    I input vector 1                                              */
+    const opus_int16 *const     inVec2,             /*    I input vector 2                                              */
+    const opus_int              len                 /*    I vector lengths                                              */
+)
+{
+    opus_int   i;
+    opus_int32 sum = 0;
+    for( i = 0; i < len; i++ ) {
+        sum = silk_SMLABB( sum, inVec1[ i ], inVec2[ i ] );
+    }
+    return sum;
+}
+
+opus_int64 silk_inner_prod16_aligned_64(
+    const opus_int16            *inVec1,            /*    I input vector 1                                              */
+    const opus_int16            *inVec2,            /*    I input vector 2                                              */
+    const opus_int              len                 /*    I vector lengths                                              */
+)
+{
+    opus_int   i;
+    opus_int64 sum = 0;
+    for( i = 0; i < len; i++ ) {
+        sum = silk_SMLALBB( sum, inVec1[ i ], inVec2[ i ] );
+    }
+    return sum;
+}
diff --git a/drivers/opus/silk/fixed/warped_autocorrelation_FIX.c b/drivers/opus/silk/fixed/warped_autocorrelation_FIX.c
new file mode 100644
index 0000000000..3f04df775c
--- /dev/null
+++ b/drivers/opus/silk/fixed/warped_autocorrelation_FIX.c
@@ -0,0 +1,88 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef OPUS_HAVE_CONFIG_H
+#include "opus_config.h"
+#endif
+
+#include "main_FIX.h"
+
+#define QC  10
+#define QS  14
+
+/* Autocorrelations for a warped frequency axis */
+void silk_warped_autocorrelation_FIX(
+          opus_int32                *corr,                                  /* O    Result [order + 1]                                                          */
+          opus_int                  *scale,                                 /* O    Scaling of the correlation vector                                           */
+    const opus_int16                *input,                                 /* I    Input data to correlate                                                     */
+    const opus_int                  warping_Q16,                            /* I    Warping coefficient                                                         */
+    const opus_int                  length,                                 /* I    Length of input                                                             */
+    const opus_int                  order                                   /* I    Correlation order (even)                                                    */
+)
+{
+    opus_int   n, i, lsh;
+    opus_int32 tmp1_QS, tmp2_QS;
+    opus_int32 state_QS[ MAX_SHAPE_LPC_ORDER + 1 ] = { 0 };
+    opus_int64 corr_QC[  MAX_SHAPE_LPC_ORDER + 1 ] = { 0 };
+
+    /* Order must be even */
+    silk_assert( ( order & 1 ) == 0 );
+    silk_assert( 2 * QS - QC >= 0 );
+
+    /* Loop over samples */
+    for( n = 0; n < length; n++ ) {
+        tmp1_QS = silk_LSHIFT32( (opus_int32)input[ n ], QS );
+        /* Loop over allpass sections */
+        for( i = 0; i < order; i += 2 ) {
+            /* Output of allpass section */
+            tmp2_QS = silk_SMLAWB( state_QS[ i ], state_QS[ i + 1 ] - tmp1_QS, warping_Q16 );
+            state_QS[ i ]  = tmp1_QS;
+            corr_QC[  i ] += silk_RSHIFT64( silk_SMULL( tmp1_QS, state_QS[ 0 ] ), 2 * QS - QC );
+            /* Output of allpass section */
+            tmp1_QS = silk_SMLAWB( state_QS[ i + 1 ], state_QS[ i + 2 ] - tmp2_QS, warping_Q16 );
+            state_QS[ i + 1 ]  = tmp2_QS;
+            corr_QC[  i + 1 ] += silk_RSHIFT64( silk_SMULL( tmp2_QS, state_QS[ 0 ] ), 2 * QS - QC );
+        }
+        state_QS[ order ] = tmp1_QS;
+        corr_QC[  order ] += silk_RSHIFT64( silk_SMULL( tmp1_QS, state_QS[ 0 ] ), 2 * QS - QC );
+    }
+
+    lsh = silk_CLZ64( corr_QC[ 0 ] ) - 35;
+    lsh = silk_LIMIT( lsh, -12 - QC, 30 - QC );
+    *scale = -( QC + lsh );
+    silk_assert( *scale >= -30 && *scale <= 12 );
+    if( lsh >= 0 ) {
+        for( i = 0; i < order + 1; i++ ) {
+            corr[ i ] = (opus_int32)silk_CHECK_FIT32( silk_LSHIFT64( corr_QC[ i ], lsh ) );
+        }
+    } else {
+        for( i = 0; i < order + 1; i++ ) {
+            corr[ i ] = (opus_int32)silk_CHECK_FIT32( silk_RSHIFT64( corr_QC[ i ], -lsh ) );
+        }
+    }
+    silk_assert( corr_QC[ 0 ] >= 0 ); /* If breaking, decrease QC*/
+}
diff --git a/drivers/opus/silk/float/LPC_analysis_filter_FLP.c b/drivers/opus/silk/float/LPC_analysis_filter_FLP.c
new file mode 100644
index 0000000000..8d26c093bf
--- /dev/null
+++ b/drivers/opus/silk/float/LPC_analysis_filter_FLP.c
@@ -0,0 +1,249 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef OPUS_HAVE_CONFIG_H
+#include "opus_config.h"
+#endif
+
+#include <stdlib.h>
+#include "main_FLP.h"
+
+/************************************************/
+/* LPC analysis filter                          */
+/* NB! State is kept internally and the         */
+/* filter always starts with zero state         */
+/* first Order output samples are set to zero   */
+/************************************************/
+
+/* 16th order LPC analysis filter, does not write first 16 samples */
+static OPUS_INLINE void silk_LPC_analysis_filter16_FLP(
+          silk_float                 r_LPC[],            /* O    LPC residual signal                     */
+    const silk_float                 PredCoef[],         /* I    LPC coefficients                        */
+    const silk_float                 s[],                /* I    Input signal                            */
+    const opus_int                   length              /* I    Length of input signal                  */
+)
+{
+    opus_int   ix;
+    silk_float LPC_pred;
+    const silk_float *s_ptr;
+
+    for( ix = 16; ix < length; ix++ ) {
+        s_ptr = &s[ix - 1];
+
+        /* short-term prediction */
+        LPC_pred = s_ptr[  0 ]  * PredCoef[ 0 ]  +
+                   s_ptr[ -1 ]  * PredCoef[ 1 ]  +
+                   s_ptr[ -2 ]  * PredCoef[ 2 ]  +
+                   s_ptr[ -3 ]  * PredCoef[ 3 ]  +
+                   s_ptr[ -4 ]  * PredCoef[ 4 ]  +
+                   s_ptr[ -5 ]  * PredCoef[ 5 ]  +
+                   s_ptr[ -6 ]  * PredCoef[ 6 ]  +
+                   s_ptr[ -7 ]  * PredCoef[ 7 ]  +
+                   s_ptr[ -8 ]  * PredCoef[ 8 ]  +
+                   s_ptr[ -9 ]  * PredCoef[ 9 ]  +
+                   s_ptr[ -10 ] * PredCoef[ 10 ] +
+                   s_ptr[ -11 ] * PredCoef[ 11 ] +
+                   s_ptr[ -12 ] * PredCoef[ 12 ] +
+                   s_ptr[ -13 ] * PredCoef[ 13 ] +
+                   s_ptr[ -14 ] * PredCoef[ 14 ] +
+                   s_ptr[ -15 ] * PredCoef[ 15 ];
+
+        /* prediction error */
+        r_LPC[ix] = s_ptr[ 1 ] - LPC_pred;
+    }
+}
+
+/* 12th order LPC analysis filter, does not write first 12 samples */
+static OPUS_INLINE void silk_LPC_analysis_filter12_FLP(
+          silk_float                 r_LPC[],            /* O    LPC residual signal                     */
+    const silk_float                 PredCoef[],         /* I    LPC coefficients                        */
+    const silk_float                 s[],                /* I    Input signal                            */
+    const opus_int                   length              /* I    Length of input signal                  */
+)
+{
+    opus_int   ix;
+    silk_float LPC_pred;
+    const silk_float *s_ptr;
+
+    for( ix = 12; ix < length; ix++ ) {
+        s_ptr = &s[ix - 1];
+
+        /* short-term prediction */
+        LPC_pred = s_ptr[  0 ]  * PredCoef[ 0 ]  +
+                   s_ptr[ -1 ]  * PredCoef[ 1 ]  +
+                   s_ptr[ -2 ]  * PredCoef[ 2 ]  +
+                   s_ptr[ -3 ]  * PredCoef[ 3 ]  +
+                   s_ptr[ -4 ]  * PredCoef[ 4 ]  +
+                   s_ptr[ -5 ]  * PredCoef[ 5 ]  +
+                   s_ptr[ -6 ]  * PredCoef[ 6 ]  +
+                   s_ptr[ -7 ]  * PredCoef[ 7 ]  +
+                   s_ptr[ -8 ]  * PredCoef[ 8 ]  +
+                   s_ptr[ -9 ]  * PredCoef[ 9 ]  +
+                   s_ptr[ -10 ] * PredCoef[ 10 ] +
+                   s_ptr[ -11 ] * PredCoef[ 11 ];
+
+        /* prediction error */
+        r_LPC[ix] = s_ptr[ 1 ] - LPC_pred;
+    }
+}
+
+/* 10th order LPC analysis filter, does not write first 10 samples */
+static OPUS_INLINE void silk_LPC_analysis_filter10_FLP(
+          silk_float                 r_LPC[],            /* O    LPC residual signal                     */
+    const silk_float                 PredCoef[],         /* I    LPC coefficients                        */
+    const silk_float                 s[],                /* I    Input signal                            */
+    const opus_int                   length              /* I    Length of input signal                  */
+)
+{
+    opus_int   ix;
+    silk_float LPC_pred;
+    const silk_float *s_ptr;
+
+    for( ix = 10; ix < length; ix++ ) {
+        s_ptr = &s[ix - 1];
+
+        /* short-term prediction */
+        LPC_pred = s_ptr[  0 ] * PredCoef[ 0 ]  +
+                   s_ptr[ -1 ] * PredCoef[ 1 ]  +
+                   s_ptr[ -2 ] * PredCoef[ 2 ]  +
+                   s_ptr[ -3 ] * PredCoef[ 3 ]  +
+                   s_ptr[ -4 ] * PredCoef[ 4 ]  +
+                   s_ptr[ -5 ] * PredCoef[ 5 ]  +
+                   s_ptr[ -6 ] * PredCoef[ 6 ]  +
+                   s_ptr[ -7 ] * PredCoef[ 7 ]  +
+                   s_ptr[ -8 ] * PredCoef[ 8 ]  +
+                   s_ptr[ -9 ] * PredCoef[ 9 ];
+
+        /* prediction error */
+        r_LPC[ix] = s_ptr[ 1 ] - LPC_pred;
+    }
+}
+
+/* 8th order LPC analysis filter, does not write first 8 samples */
+static OPUS_INLINE void silk_LPC_analysis_filter8_FLP(
+          silk_float                 r_LPC[],            /* O    LPC residual signal                     */
+    const silk_float                 PredCoef[],         /* I    LPC coefficients                        */
+    const silk_float                 s[],                /* I    Input signal                            */
+    const opus_int                   length              /* I    Length of input signal                  */
+)
+{
+    opus_int   ix;
+    silk_float LPC_pred;
+    const silk_float *s_ptr;
+
+    for( ix = 8; ix < length; ix++ ) {
+        s_ptr = &s[ix - 1];
+
+        /* short-term prediction */
+        LPC_pred = s_ptr[  0 ] * PredCoef[ 0 ]  +
+                   s_ptr[ -1 ] * PredCoef[ 1 ]  +
+                   s_ptr[ -2 ] * PredCoef[ 2 ]  +
+                   s_ptr[ -3 ] * PredCoef[ 3 ]  +
+                   s_ptr[ -4 ] * PredCoef[ 4 ]  +
+                   s_ptr[ -5 ] * PredCoef[ 5 ]  +
+                   s_ptr[ -6 ] * PredCoef[ 6 ]  +
+                   s_ptr[ -7 ] * PredCoef[ 7 ];
+
+        /* prediction error */
+        r_LPC[ix] = s_ptr[ 1 ] - LPC_pred;
+    }
+}
+
+/* 6th order LPC analysis filter, does not write first 6 samples */
+static OPUS_INLINE void silk_LPC_analysis_filter6_FLP(
+          silk_float                 r_LPC[],            /* O    LPC residual signal                     */
+    const silk_float                 PredCoef[],         /* I    LPC coefficients                        */
+    const silk_float                 s[],                /* I    Input signal                            */
+    const opus_int                   length              /* I    Length of input signal                  */
+)
+{
+    opus_int   ix;
+    silk_float LPC_pred;
+    const silk_float *s_ptr;
+
+    for( ix = 6; ix < length; ix++ ) {
+        s_ptr = &s[ix - 1];
+
+        /* short-term prediction */
+        LPC_pred = s_ptr[  0 ] * PredCoef[ 0 ]  +
+                   s_ptr[ -1 ] * PredCoef[ 1 ]  +
+                   s_ptr[ -2 ] * PredCoef[ 2 ]  +
+                   s_ptr[ -3 ] * PredCoef[ 3 ]  +
+                   s_ptr[ -4 ] * PredCoef[ 4 ]  +
+                   s_ptr[ -5 ] * PredCoef[ 5 ];
+
+        /* prediction error */
+        r_LPC[ix] = s_ptr[ 1 ] - LPC_pred;
+    }
+}
+
+/************************************************/
+/* LPC analysis filter                          */
+/* NB! State is kept internally and the         */
+/* filter always starts with zero state         */
+/* first Order output samples are set to zero   */
+/************************************************/
+void silk_LPC_analysis_filter_FLP(
+    silk_float                      r_LPC[],                            /* O    LPC residual signal                         */
+    const silk_float                PredCoef[],                         /* I    LPC coefficients                            */
+    const silk_float                s[],                                /* I    Input signal                                */
+    const opus_int                  length,                             /* I    Length of input signal                      */
+    const opus_int                  Order                               /* I    LPC order                                   */
+)
+{
+    silk_assert( Order <= length );
+
+    switch( Order ) {
+        case 6:
+            silk_LPC_analysis_filter6_FLP(  r_LPC, PredCoef, s, length );
+        break;
+
+        case 8:
+            silk_LPC_analysis_filter8_FLP(  r_LPC, PredCoef, s, length );
+        break;
+
+        case 10:
+            silk_LPC_analysis_filter10_FLP( r_LPC, PredCoef, s, length );
+        break;
+
+        case 12:
+            silk_LPC_analysis_filter12_FLP( r_LPC, PredCoef, s, length );
+        break;
+
+        case 16:
+            silk_LPC_analysis_filter16_FLP( r_LPC, PredCoef, s, length );
+        break;
+
+        default:
+            silk_assert( 0 );
+        break;
+    }
+
+    /* Set first Order output samples to zero */
+    silk_memset( r_LPC, 0, Order * sizeof( silk_float ) );
+}
+
diff --git a/drivers/opus/silk/float/LPC_inv_pred_gain_FLP.c b/drivers/opus/silk/float/LPC_inv_pred_gain_FLP.c
new file mode 100644
index 0000000000..968edfb189
--- /dev/null
+++ b/drivers/opus/silk/float/LPC_inv_pred_gain_FLP.c
@@ -0,0 +1,76 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef OPUS_HAVE_CONFIG_H
+#include "opus_config.h"
+#endif
+
+#include "SigProc_FIX.h"
+#include "SigProc_FLP.h"
+
+#define RC_THRESHOLD        0.9999f
+
+/* compute inverse of LPC prediction gain, and                          */
+/* test if LPC coefficients are stable (all poles within unit circle)   */
+/* this code is based on silk_a2k_FLP()                                 */
+silk_float silk_LPC_inverse_pred_gain_FLP(  /* O    return inverse prediction gain, energy domain               */
+    const silk_float    *A,                 /* I    prediction coefficients [order]                             */
+    opus_int32          order               /* I    prediction order                                            */
+)
+{
+    opus_int   k, n;
+    double     invGain, rc, rc_mult1, rc_mult2;
+    silk_float Atmp[ 2 ][ SILK_MAX_ORDER_LPC ];
+    silk_float *Aold, *Anew;
+
+    Anew = Atmp[ order & 1 ];
+    silk_memcpy( Anew, A, order * sizeof(silk_float) );
+
+    invGain = 1.0;
+    for( k = order - 1; k > 0; k-- ) {
+        rc = -Anew[ k ];
+        if( rc > RC_THRESHOLD || rc < -RC_THRESHOLD ) {
+            return 0.0f;
+        }
+        rc_mult1 = 1.0f - rc * rc;
+        rc_mult2 = 1.0f / rc_mult1;
+        invGain *= rc_mult1;
+        /* swap pointers */
+        Aold = Anew;
+        Anew = Atmp[ k & 1 ];
+        for( n = 0; n < k; n++ ) {
+            Anew[ n ] = (silk_float)( ( Aold[ n ] - Aold[ k - n - 1 ] * rc ) * rc_mult2 );
+        }
+    }
+    rc = -Anew[ 0 ];
+    if( rc > RC_THRESHOLD || rc < -RC_THRESHOLD ) {
+        return 0.0f;
+    }
+    rc_mult1 = 1.0f - rc * rc;
+    invGain *= rc_mult1;
+    return (silk_float)invGain;
+}
diff --git a/drivers/opus/silk/float/LTP_analysis_filter_FLP.c b/drivers/opus/silk/float/LTP_analysis_filter_FLP.c
new file mode 100644
index 0000000000..fc729e99b1
--- /dev/null
+++ b/drivers/opus/silk/float/LTP_analysis_filter_FLP.c
@@ -0,0 +1,75 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef OPUS_HAVE_CONFIG_H
+#include "opus_config.h"
+#endif
+
+#include "main_FLP.h"
+
+void silk_LTP_analysis_filter_FLP(
+    silk_float                      *LTP_res,                           /* O    LTP res MAX_NB_SUBFR*(pre_lgth+subfr_lngth) */
+    const silk_float                *x,                                 /* I    Input signal, with preceding samples        */
+    const silk_float                B[ LTP_ORDER * MAX_NB_SUBFR ],      /* I    LTP coefficients for each subframe          */
+    const opus_int                  pitchL[   MAX_NB_SUBFR ],           /* I    Pitch lags                                  */
+    const silk_float                invGains[ MAX_NB_SUBFR ],           /* I    Inverse quantization gains                  */
+    const opus_int                  subfr_length,                       /* I    Length of each subframe                     */
+    const opus_int                  nb_subfr,                           /* I    number of subframes                         */
+    const opus_int                  pre_length                          /* I    Preceding samples for each subframe         */
+)
+{
+    const silk_float *x_ptr, *x_lag_ptr;
+    silk_float   Btmp[ LTP_ORDER ];
+    silk_float   *LTP_res_ptr;
+    silk_float   inv_gain;
+    opus_int     k, i, j;
+
+    x_ptr = x;
+    LTP_res_ptr = LTP_res;
+    for( k = 0; k < nb_subfr; k++ ) {
+        x_lag_ptr = x_ptr - pitchL[ k ];
+        inv_gain = invGains[ k ];
+        for( i = 0; i < LTP_ORDER; i++ ) {
+            Btmp[ i ] = B[ k * LTP_ORDER + i ];
+        }
+
+        /* LTP analysis FIR filter */
+        for( i = 0; i < subfr_length + pre_length; i++ ) {
+            LTP_res_ptr[ i ] = x_ptr[ i ];
+            /* Subtract long-term prediction */
+            for( j = 0; j < LTP_ORDER; j++ ) {
+                LTP_res_ptr[ i ] -= Btmp[ j ] * x_lag_ptr[ LTP_ORDER / 2 - j ];
+            }
+            LTP_res_ptr[ i ] *= inv_gain;
+            x_lag_ptr++;
+        }
+
+        /* Update pointers */
+        LTP_res_ptr += subfr_length + pre_length;
+        x_ptr       += subfr_length;
+    }
+}
diff --git a/drivers/opus/silk/float/LTP_scale_ctrl_FLP.c b/drivers/opus/silk/float/LTP_scale_ctrl_FLP.c
new file mode 100644
index 0000000000..60e1119d5a
--- /dev/null
+++ b/drivers/opus/silk/float/LTP_scale_ctrl_FLP.c
@@ -0,0 +1,52 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef OPUS_HAVE_CONFIG_H
+#include "opus_config.h"
+#endif
+
+#include "main_FLP.h"
+
+void silk_LTP_scale_ctrl_FLP(
+    silk_encoder_state_FLP          *psEnc,                             /* I/O  Encoder state FLP                           */
+    silk_encoder_control_FLP        *psEncCtrl,                         /* I/O  Encoder control FLP                         */
+    opus_int                        condCoding                          /* I    The type of conditional coding to use       */
+)
+{
+    opus_int   round_loss;
+
+    if( condCoding == CODE_INDEPENDENTLY ) {
+        /* Only scale if first frame in packet */
+        round_loss = psEnc->sCmn.PacketLoss_perc + psEnc->sCmn.nFramesPerPacket;
+        psEnc->sCmn.indices.LTP_scaleIndex = (opus_int8)silk_LIMIT( round_loss * psEncCtrl->LTPredCodGain * 0.1f, 0.0f, 2.0f );
+    } else {
+        /* Default is minimum scaling */
+        psEnc->sCmn.indices.LTP_scaleIndex = 0;
+    }
+
+    psEncCtrl->LTP_scale = (silk_float)silk_LTPScales_table_Q14[ psEnc->sCmn.indices.LTP_scaleIndex ] / 16384.0f;
+}
diff --git a/drivers/opus/silk/float/SigProc_FLP.h b/drivers/opus/silk/float/SigProc_FLP.h
new file mode 100644
index 0000000000..f0cb3733be
--- /dev/null
+++ b/drivers/opus/silk/float/SigProc_FLP.h
@@ -0,0 +1,204 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifndef SILK_SIGPROC_FLP_H
+#define SILK_SIGPROC_FLP_H
+
+#include "SigProc_FIX.h"
+#include "float_cast.h"
+#include <math.h>
+
+#ifdef  __cplusplus
+extern "C"
+{
+#endif
+
+/********************************************************************/
+/*                    SIGNAL PROCESSING FUNCTIONS                   */
+/********************************************************************/
+
+/* Chirp (bw expand) LP AR filter */
+void silk_bwexpander_FLP(
+    silk_float          *ar,                /* I/O  AR filter to be expanded (without leading 1)                */
+    const opus_int      d,                  /* I    length of ar                                                */
+    const silk_float    chirp               /* I    chirp factor (typically in range (0..1) )                   */
+);
+
+/* compute inverse of LPC prediction gain, and                          */
+/* test if LPC coefficients are stable (all poles within unit circle)   */
+/* this code is based on silk_FLP_a2k()                                 */
+silk_float silk_LPC_inverse_pred_gain_FLP(  /* O    return inverse prediction gain, energy domain               */
+    const silk_float    *A,                 /* I    prediction coefficients [order]                             */
+    opus_int32          order               /* I    prediction order                                            */
+);
+
+silk_float silk_schur_FLP(                  /* O    returns residual energy                                     */
+    silk_float          refl_coef[],        /* O    reflection coefficients (length order)                      */
+    const silk_float    auto_corr[],        /* I    autocorrelation sequence (length order+1)                   */
+    opus_int            order               /* I    order                                                       */
+);
+
+void silk_k2a_FLP(
+    silk_float          *A,                 /* O     prediction coefficients [order]                            */
+    const silk_float    *rc,                /* I     reflection coefficients [order]                            */
+    opus_int32          order               /* I     prediction order                                           */
+);
+
+/* Solve the normal equations using the Levinson-Durbin recursion */
+silk_float silk_levinsondurbin_FLP(         /* O    prediction error energy                                     */
+    silk_float          A[],                /* O    prediction coefficients [order]                             */
+    const silk_float    corr[],             /* I    input auto-correlations [order + 1]                         */
+    const opus_int      order               /* I    prediction order                                            */
+);
+
+/* compute autocorrelation */
+void silk_autocorrelation_FLP(
+    silk_float          *results,           /* O    result (length correlationCount)                            */
+    const silk_float    *inputData,         /* I    input data to correlate                                     */
+    opus_int            inputDataSize,      /* I    length of input                                             */
+    opus_int            correlationCount    /* I    number of correlation taps to compute                       */
+);
+
+opus_int silk_pitch_analysis_core_FLP(      /* O    Voicing estimate: 0 voiced, 1 unvoiced                      */
+    const silk_float    *frame,             /* I    Signal of length PE_FRAME_LENGTH_MS*Fs_kHz                  */
+    opus_int            *pitch_out,         /* O    Pitch lag values [nb_subfr]                                 */
+    opus_int16          *lagIndex,          /* O    Lag Index                                                   */
+    opus_int8           *contourIndex,      /* O    Pitch contour Index                                         */
+    silk_float          *LTPCorr,           /* I/O  Normalized correlation; input: value from previous frame    */
+    opus_int            prevLag,            /* I    Last lag of previous frame; set to zero is unvoiced         */
+    const silk_float    search_thres1,      /* I    First stage threshold for lag candidates 0 - 1              */
+    const silk_float    search_thres2,      /* I    Final threshold for lag candidates 0 - 1                    */
+    const opus_int      Fs_kHz,             /* I    sample frequency (kHz)                                      */
+    const opus_int      complexity,         /* I    Complexity setting, 0-2, where 2 is highest                 */
+    const opus_int      nb_subfr,           /* I    Number of 5 ms subframes                                    */
+    int                 arch                /* I    Run-time architecture                                       */
+);
+
+void silk_insertion_sort_decreasing_FLP(
+    silk_float          *a,                 /* I/O  Unsorted / Sorted vector                                    */
+    opus_int            *idx,               /* O    Index vector for the sorted elements                        */
+    const opus_int      L,                  /* I    Vector length                                               */
+    const opus_int      K                   /* I    Number of correctly sorted positions                        */
+);
+
+/* Compute reflection coefficients from input signal */
+silk_float silk_burg_modified_FLP(          /* O    returns residual energy                                     */
+    silk_float          A[],                /* O    prediction coefficients (length order)                      */
+    const silk_float    x[],                /* I    input signal, length: nb_subfr*(D+L_sub)                    */
+    const silk_float    minInvGain,         /* I    minimum inverse prediction gain                             */
+    const opus_int      subfr_length,       /* I    input signal subframe length (incl. D preceding samples)    */
+    const opus_int      nb_subfr,           /* I    number of subframes stacked in x                            */
+    const opus_int      D                   /* I    order                                                       */
+);
+
+/* multiply a vector by a constant */
+void silk_scale_vector_FLP(
+    silk_float          *data1,
+    silk_float          gain,
+    opus_int            dataSize
+);
+
+/* copy and multiply a vector by a constant */
+void silk_scale_copy_vector_FLP(
+    silk_float          *data_out,
+    const silk_float    *data_in,
+    silk_float          gain,
+    opus_int            dataSize
+);
+
+/* inner product of two silk_float arrays, with result as double */
+double silk_inner_product_FLP(
+    const silk_float    *data1,
+    const silk_float    *data2,
+    opus_int            dataSize
+);
+
+/* sum of squares of a silk_float array, with result as double */
+double silk_energy_FLP(
+    const silk_float    *data,
+    opus_int            dataSize
+);
+
+/********************************************************************/
+/*                                MACROS                            */
+/********************************************************************/
+
+#define PI              (3.1415926536f)
+
+#define silk_min_float( a, b )                  (((a) < (b)) ? (a) :  (b))
+#define silk_max_float( a, b )                  (((a) > (b)) ? (a) :  (b))
+#define silk_abs_float( a )                     ((silk_float)fabs(a))
+
+/* sigmoid function */
+static OPUS_INLINE silk_float silk_sigmoid( silk_float x )
+{
+    return (silk_float)(1.0 / (1.0 + exp(-x)));
+}
+
+/* floating-point to integer conversion (rounding) */
+static OPUS_INLINE opus_int32 silk_float2int( silk_float x )
+{
+    return (opus_int32)float2int( x );
+}
+
+/* floating-point to integer conversion (rounding) */
+static OPUS_INLINE void silk_float2short_array(
+    opus_int16       *out,
+    const silk_float *in,
+    opus_int32       length
+)
+{
+    opus_int32 k;
+    for( k = length - 1; k >= 0; k-- ) {
+        out[k] = silk_SAT16( (opus_int32)float2int( in[k] ) );
+    }
+}
+
+/* integer to floating-point conversion */
+static OPUS_INLINE void silk_short2float_array(
+    silk_float       *out,
+    const opus_int16 *in,
+    opus_int32       length
+)
+{
+    opus_int32 k;
+    for( k = length - 1; k >= 0; k-- ) {
+        out[k] = (silk_float)in[k];
+    }
+}
+
+/* using log2() helps the fixed-point conversion */
+static OPUS_INLINE silk_float silk_log2( double x )
+{
+    return ( silk_float )( 3.32192809488736 * log10( x ) );
+}
+
+#ifdef  __cplusplus
+}
+#endif
+
+#endif /* SILK_SIGPROC_FLP_H */
diff --git a/drivers/opus/silk/float/apply_sine_window_FLP.c b/drivers/opus/silk/float/apply_sine_window_FLP.c
new file mode 100644
index 0000000000..d904585d17
--- /dev/null
+++ b/drivers/opus/silk/float/apply_sine_window_FLP.c
@@ -0,0 +1,81 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef OPUS_HAVE_CONFIG_H
+#include "opus_config.h"
+#endif
+
+#include "main_FLP.h"
+
+/* Apply sine window to signal vector   */
+/* Window types:                        */
+/*  1 -> sine window from 0 to pi/2     */
+/*  2 -> sine window from pi/2 to pi    */
+void silk_apply_sine_window_FLP(
+    silk_float                      px_win[],                           /* O    Pointer to windowed signal                  */
+    const silk_float                px[],                               /* I    Pointer to input signal                     */
+    const opus_int                  win_type,                           /* I    Selects a window type                       */
+    const opus_int                  length                              /* I    Window length, multiple of 4                */
+)
+{
+    opus_int   k;
+    silk_float freq, c, S0, S1;
+
+    silk_assert( win_type == 1 || win_type == 2 );
+
+    /* Length must be multiple of 4 */
+    silk_assert( ( length & 3 ) == 0 );
+
+    freq = PI / ( length + 1 );
+
+    /* Approximation of 2 * cos(f) */
+    c = 2.0f - freq * freq;
+
+    /* Initialize state */
+    if( win_type < 2 ) {
+        /* Start from 0 */
+        S0 = 0.0f;
+        /* Approximation of sin(f) */
+        S1 = freq;
+    } else {
+        /* Start from 1 */
+        S0 = 1.0f;
+        /* Approximation of cos(f) */
+        S1 = 0.5f * c;
+    }
+
+    /* Uses the recursive equation:   sin(n*f) = 2 * cos(f) * sin((n-1)*f) - sin((n-2)*f)   */
+    /* 4 samples at a time */
+    for( k = 0; k < length; k += 4 ) {
+        px_win[ k + 0 ] = px[ k + 0 ] * 0.5f * ( S0 + S1 );
+        px_win[ k + 1 ] = px[ k + 1 ] * S1;
+        S0 = c * S1 - S0;
+        px_win[ k + 2 ] = px[ k + 2 ] * 0.5f * ( S1 + S0 );
+        px_win[ k + 3 ] = px[ k + 3 ] * S0;
+        S1 = c * S0 - S1;
+    }
+}
diff --git a/drivers/opus/silk/float/autocorrelation_FLP.c b/drivers/opus/silk/float/autocorrelation_FLP.c
new file mode 100644
index 0000000000..192a001b16
--- /dev/null
+++ b/drivers/opus/silk/float/autocorrelation_FLP.c
@@ -0,0 +1,52 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef OPUS_HAVE_CONFIG_H
+#include "opus_config.h"
+#endif
+
+#include "typedef.h"
+#include "SigProc_FLP.h"
+
+/* compute autocorrelation */
+void silk_autocorrelation_FLP(
+    silk_float          *results,           /* O    result (length correlationCount)                            */
+    const silk_float    *inputData,         /* I    input data to correlate                                     */
+    opus_int            inputDataSize,      /* I    length of input                                             */
+    opus_int            correlationCount    /* I    number of correlation taps to compute                       */
+)
+{
+    opus_int i;
+
+    if( correlationCount > inputDataSize ) {
+        correlationCount = inputDataSize;
+    }
+
+    for( i = 0; i < correlationCount; i++ ) {
+        results[ i ] =  (silk_float)silk_inner_product_FLP( inputData, inputData + i, inputDataSize - i );
+    }
+}
diff --git a/drivers/opus/silk/float/burg_modified_FLP.c b/drivers/opus/silk/float/burg_modified_FLP.c
new file mode 100644
index 0000000000..0f30ca2280
--- /dev/null
+++ b/drivers/opus/silk/float/burg_modified_FLP.c
@@ -0,0 +1,186 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef OPUS_HAVE_CONFIG_H
+#include "opus_config.h"
+#endif
+
+#include "SigProc_FLP.h"
+#include "tuning_parameters.h"
+#include "define.h"
+
+#define MAX_FRAME_SIZE              384 /* subfr_length * nb_subfr = ( 0.005 * 16000 + 16 ) * 4 = 384*/
+
+/* Compute reflection coefficients from input signal */
+silk_float silk_burg_modified_FLP(          /* O    returns residual energy                                     */
+    silk_float          A[],                /* O    prediction coefficients (length order)                      */
+    const silk_float    x[],                /* I    input signal, length: nb_subfr*(D+L_sub)                    */
+    const silk_float    minInvGain,         /* I    minimum inverse prediction gain                             */
+    const opus_int      subfr_length,       /* I    input signal subframe length (incl. D preceding samples)    */
+    const opus_int      nb_subfr,           /* I    number of subframes stacked in x                            */
+    const opus_int      D                   /* I    order                                                       */
+)
+{
+    opus_int         k, n, s, reached_max_gain;
+    double           C0, invGain, num, nrg_f, nrg_b, rc, Atmp, tmp1, tmp2;
+    const silk_float *x_ptr;
+    double           C_first_row[ SILK_MAX_ORDER_LPC ], C_last_row[ SILK_MAX_ORDER_LPC ];
+    double           CAf[ SILK_MAX_ORDER_LPC + 1 ], CAb[ SILK_MAX_ORDER_LPC + 1 ];
+    double           Af[ SILK_MAX_ORDER_LPC ];
+
+    silk_assert( subfr_length * nb_subfr <= MAX_FRAME_SIZE );
+
+    /* Compute autocorrelations, added over subframes */
+    C0 = silk_energy_FLP( x, nb_subfr * subfr_length );
+    silk_memset( C_first_row, 0, SILK_MAX_ORDER_LPC * sizeof( double ) );
+    for( s = 0; s < nb_subfr; s++ ) {
+        x_ptr = x + s * subfr_length;
+        for( n = 1; n < D + 1; n++ ) {
+            C_first_row[ n - 1 ] += silk_inner_product_FLP( x_ptr, x_ptr + n, subfr_length - n );
+        }
+    }
+    silk_memcpy( C_last_row, C_first_row, SILK_MAX_ORDER_LPC * sizeof( double ) );
+
+    /* Initialize */
+    CAb[ 0 ] = CAf[ 0 ] = C0 + FIND_LPC_COND_FAC * C0 + 1e-9f;
+    invGain = 1.0f;
+    reached_max_gain = 0;
+    for( n = 0; n < D; n++ ) {
+        /* Update first row of correlation matrix (without first element) */
+        /* Update last row of correlation matrix (without last element, stored in reversed order) */
+        /* Update C * Af */
+        /* Update C * flipud(Af) (stored in reversed order) */
+        for( s = 0; s < nb_subfr; s++ ) {
+            x_ptr = x + s * subfr_length;
+            tmp1 = x_ptr[ n ];
+            tmp2 = x_ptr[ subfr_length - n - 1 ];
+            for( k = 0; k < n; k++ ) {
+                C_first_row[ k ] -= x_ptr[ n ] * x_ptr[ n - k - 1 ];
+                C_last_row[ k ]  -= x_ptr[ subfr_length - n - 1 ] * x_ptr[ subfr_length - n + k ];
+                Atmp = Af[ k ];
+                tmp1 += x_ptr[ n - k - 1 ] * Atmp;
+                tmp2 += x_ptr[ subfr_length - n + k ] * Atmp;
+            }
+            for( k = 0; k <= n; k++ ) {
+                CAf[ k ] -= tmp1 * x_ptr[ n - k ];
+                CAb[ k ] -= tmp2 * x_ptr[ subfr_length - n + k - 1 ];
+            }
+        }
+        tmp1 = C_first_row[ n ];
+        tmp2 = C_last_row[ n ];
+        for( k = 0; k < n; k++ ) {
+            Atmp = Af[ k ];
+            tmp1 += C_last_row[  n - k - 1 ] * Atmp;
+            tmp2 += C_first_row[ n - k - 1 ] * Atmp;
+        }
+        CAf[ n + 1 ] = tmp1;
+        CAb[ n + 1 ] = tmp2;
+
+        /* Calculate nominator and denominator for the next order reflection (parcor) coefficient */
+        num = CAb[ n + 1 ];
+        nrg_b = CAb[ 0 ];
+        nrg_f = CAf[ 0 ];
+        for( k = 0; k < n; k++ ) {
+            Atmp = Af[ k ];
+            num   += CAb[ n - k ] * Atmp;
+            nrg_b += CAb[ k + 1 ] * Atmp;
+            nrg_f += CAf[ k + 1 ] * Atmp;
+        }
+        silk_assert( nrg_f > 0.0 );
+        silk_assert( nrg_b > 0.0 );
+
+        /* Calculate the next order reflection (parcor) coefficient */
+        rc = -2.0 * num / ( nrg_f + nrg_b );
+        silk_assert( rc > -1.0 && rc < 1.0 );
+
+        /* Update inverse prediction gain */
+        tmp1 = invGain * ( 1.0 - rc * rc );
+        if( tmp1 <= minInvGain ) {
+            /* Max prediction gain exceeded; set reflection coefficient such that max prediction gain is exactly hit */
+            rc = sqrt( 1.0 - minInvGain / invGain );
+            if( num > 0 ) {
+                /* Ensure adjusted reflection coefficients has the original sign */
+                rc = -rc;
+            }
+            invGain = minInvGain;
+            reached_max_gain = 1;
+        } else {
+            invGain = tmp1;
+        }
+
+        /* Update the AR coefficients */
+        for( k = 0; k < (n + 1) >> 1; k++ ) {
+            tmp1 = Af[ k ];
+            tmp2 = Af[ n - k - 1 ];
+            Af[ k ]         = tmp1 + rc * tmp2;
+            Af[ n - k - 1 ] = tmp2 + rc * tmp1;
+        }
+        Af[ n ] = rc;
+
+        if( reached_max_gain ) {
+            /* Reached max prediction gain; set remaining coefficients to zero and exit loop */
+            for( k = n + 1; k < D; k++ ) {
+                Af[ k ] = 0.0;
+            }
+            break;
+        }
+
+        /* Update C * Af and C * Ab */
+        for( k = 0; k <= n + 1; k++ ) {
+            tmp1 = CAf[ k ];
+            CAf[ k ]          += rc * CAb[ n - k + 1 ];
+            CAb[ n - k + 1  ] += rc * tmp1;
+        }
+    }
+
+    if( reached_max_gain ) {
+        /* Convert to silk_float */
+        for( k = 0; k < D; k++ ) {
+            A[ k ] = (silk_float)( -Af[ k ] );
+        }
+        /* Subtract energy of preceding samples from C0 */
+        for( s = 0; s < nb_subfr; s++ ) {
+            C0 -= silk_energy_FLP( x + s * subfr_length, D );
+        }
+        /* Approximate residual energy */
+        nrg_f = C0 * invGain;
+    } else {
+        /* Compute residual energy and store coefficients as silk_float */
+        nrg_f = CAf[ 0 ];
+        tmp1 = 1.0;
+        for( k = 0; k < D; k++ ) {
+            Atmp = Af[ k ];
+            nrg_f += CAf[ k + 1 ] * Atmp;
+            tmp1  += Atmp * Atmp;
+            A[ k ] = (silk_float)(-Atmp);
+        }
+        nrg_f -= FIND_LPC_COND_FAC * C0 * tmp1;
+    }
+
+    /* Return residual energy */
+    return (silk_float)nrg_f;
+}
diff --git a/drivers/opus/silk/float/bwexpander_FLP.c b/drivers/opus/silk/float/bwexpander_FLP.c
new file mode 100644
index 0000000000..86154dc3f1
--- /dev/null
+++ b/drivers/opus/silk/float/bwexpander_FLP.c
@@ -0,0 +1,49 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef OPUS_HAVE_CONFIG_H
+#include "opus_config.h"
+#endif
+
+#include "SigProc_FLP.h"
+
+/* Chirp (bw expand) LP AR filter */
+void silk_bwexpander_FLP(
+    silk_float          *ar,                /* I/O  AR filter to be expanded (without leading 1)                */
+    const opus_int      d,                  /* I    length of ar                                                */
+    const silk_float    chirp               /* I    chirp factor (typically in range (0..1) )                   */
+)
+{
+    opus_int   i;
+    silk_float cfac = chirp;
+
+    for( i = 0; i < d - 1; i++ ) {
+        ar[ i ] *=  cfac;
+        cfac    *=  chirp;
+    }
+    ar[ d - 1 ] *=  cfac;
+}
diff --git a/drivers/opus/silk/float/corrMatrix_FLP.c b/drivers/opus/silk/float/corrMatrix_FLP.c
new file mode 100644
index 0000000000..e193c98f11
--- /dev/null
+++ b/drivers/opus/silk/float/corrMatrix_FLP.c
@@ -0,0 +1,93 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef OPUS_HAVE_CONFIG_H
+#include "opus_config.h"
+#endif
+
+/**********************************************************************
+ * Correlation matrix computations for LS estimate.
+ **********************************************************************/
+
+#include "main_FLP.h"
+
+/* Calculates correlation vector X'*t */
+void silk_corrVector_FLP(
+    const silk_float                *x,                                 /* I    x vector [L+order-1] used to create X       */
+    const silk_float                *t,                                 /* I    Target vector [L]                           */
+    const opus_int                  L,                                  /* I    Length of vecors                            */
+    const opus_int                  Order,                              /* I    Max lag for correlation                     */
+    silk_float                      *Xt                                 /* O    X'*t correlation vector [order]             */
+)
+{
+    opus_int lag;
+    const silk_float *ptr1;
+
+    ptr1 = &x[ Order - 1 ];                     /* Points to first sample of column 0 of X: X[:,0] */
+    for( lag = 0; lag < Order; lag++ ) {
+        /* Calculate X[:,lag]'*t */
+        Xt[ lag ] = (silk_float)silk_inner_product_FLP( ptr1, t, L );
+        ptr1--;                                 /* Next column of X */
+    }
+}
+
+/* Calculates correlation matrix X'*X */
+void silk_corrMatrix_FLP(
+    const silk_float                *x,                                 /* I    x vector [ L+order-1 ] used to create X     */
+    const opus_int                  L,                                  /* I    Length of vectors                           */
+    const opus_int                  Order,                              /* I    Max lag for correlation                     */
+    silk_float                      *XX                                 /* O    X'*X correlation matrix [order x order]     */
+)
+{
+    opus_int j, lag;
+    double  energy;
+    const silk_float *ptr1, *ptr2;
+
+    ptr1 = &x[ Order - 1 ];                     /* First sample of column 0 of X */
+    energy = silk_energy_FLP( ptr1, L );  /* X[:,0]'*X[:,0] */
+    matrix_ptr( XX, 0, 0, Order ) = ( silk_float )energy;
+    for( j = 1; j < Order; j++ ) {
+        /* Calculate X[:,j]'*X[:,j] */
+        energy += ptr1[ -j ] * ptr1[ -j ] - ptr1[ L - j ] * ptr1[ L - j ];
+        matrix_ptr( XX, j, j, Order ) = ( silk_float )energy;
+    }
+
+    ptr2 = &x[ Order - 2 ];                     /* First sample of column 1 of X */
+    for( lag = 1; lag < Order; lag++ ) {
+        /* Calculate X[:,0]'*X[:,lag] */
+        energy = silk_inner_product_FLP( ptr1, ptr2, L );
+        matrix_ptr( XX, lag, 0, Order ) = ( silk_float )energy;
+        matrix_ptr( XX, 0, lag, Order ) = ( silk_float )energy;
+        /* Calculate X[:,j]'*X[:,j + lag] */
+        for( j = 1; j < ( Order - lag ); j++ ) {
+            energy += ptr1[ -j ] * ptr2[ -j ] - ptr1[ L - j ] * ptr2[ L - j ];
+            matrix_ptr( XX, lag + j, j, Order ) = ( silk_float )energy;
+            matrix_ptr( XX, j, lag + j, Order ) = ( silk_float )energy;
+        }
+        ptr2--;                                 /* Next column of X */
+    }
+}
diff --git a/drivers/opus/silk/float/encode_frame_FLP.c b/drivers/opus/silk/float/encode_frame_FLP.c
new file mode 100644
index 0000000000..90e5357ced
--- /dev/null
+++ b/drivers/opus/silk/float/encode_frame_FLP.c
@@ -0,0 +1,372 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef OPUS_HAVE_CONFIG_H
+#include "opus_config.h"
+#endif
+
+#include "main_FLP.h"
+#include "tuning_parameters.h"
+
+/* Low Bitrate Redundancy (LBRR) encoding. Reuse all parameters but encode with lower bitrate */
+static OPUS_INLINE void silk_LBRR_encode_FLP(
+    silk_encoder_state_FLP          *psEnc,                             /* I/O  Encoder state FLP                           */
+    silk_encoder_control_FLP        *psEncCtrl,                         /* I/O  Encoder control FLP                         */
+    const silk_float                xfw[],                              /* I    Input signal                                */
+    opus_int                        condCoding                          /* I    The type of conditional coding used so far for this frame */
+);
+
+void silk_encode_do_VAD_FLP(
+    silk_encoder_state_FLP          *psEnc                              /* I/O  Encoder state FLP                           */
+)
+{
+    /****************************/
+    /* Voice Activity Detection */
+    /****************************/
+    silk_VAD_GetSA_Q8( &psEnc->sCmn, psEnc->sCmn.inputBuf + 1 );
+
+    /**************************************************/
+    /* Convert speech activity into VAD and DTX flags */
+    /**************************************************/
+    if( psEnc->sCmn.speech_activity_Q8 < SILK_FIX_CONST( SPEECH_ACTIVITY_DTX_THRES, 8 ) ) {
+        psEnc->sCmn.indices.signalType = TYPE_NO_VOICE_ACTIVITY;
+        psEnc->sCmn.noSpeechCounter++;
+        if( psEnc->sCmn.noSpeechCounter < NB_SPEECH_FRAMES_BEFORE_DTX ) {
+            psEnc->sCmn.inDTX = 0;
+        } else if( psEnc->sCmn.noSpeechCounter > MAX_CONSECUTIVE_DTX + NB_SPEECH_FRAMES_BEFORE_DTX ) {
+            psEnc->sCmn.noSpeechCounter = NB_SPEECH_FRAMES_BEFORE_DTX;
+            psEnc->sCmn.inDTX           = 0;
+        }
+        psEnc->sCmn.VAD_flags[ psEnc->sCmn.nFramesEncoded ] = 0;
+    } else {
+        psEnc->sCmn.noSpeechCounter    = 0;
+        psEnc->sCmn.inDTX              = 0;
+        psEnc->sCmn.indices.signalType = TYPE_UNVOICED;
+        psEnc->sCmn.VAD_flags[ psEnc->sCmn.nFramesEncoded ] = 1;
+    }
+}
+
+/****************/
+/* Encode frame */
+/****************/
+opus_int silk_encode_frame_FLP(
+    silk_encoder_state_FLP          *psEnc,                             /* I/O  Encoder state FLP                           */
+    opus_int32                      *pnBytesOut,                        /* O    Number of payload bytes;                    */
+    ec_enc                          *psRangeEnc,                        /* I/O  compressor data structure                   */
+    opus_int                        condCoding,                         /* I    The type of conditional coding to use       */
+    opus_int                        maxBits,                            /* I    If > 0: maximum number of output bits       */
+    opus_int                        useCBR                              /* I    Flag to force constant-bitrate operation    */
+)
+{
+    silk_encoder_control_FLP sEncCtrl;
+    opus_int     i, iter, maxIter, found_upper, found_lower, ret = 0;
+    silk_float   *x_frame, *res_pitch_frame;
+    silk_float   xfw[ MAX_FRAME_LENGTH ];
+    silk_float   res_pitch[ 2 * MAX_FRAME_LENGTH + LA_PITCH_MAX ];
+    ec_enc       sRangeEnc_copy, sRangeEnc_copy2;
+    silk_nsq_state sNSQ_copy, sNSQ_copy2;
+    opus_int32   seed_copy, nBits, nBits_lower, nBits_upper, gainMult_lower, gainMult_upper;
+    opus_int32   gainsID, gainsID_lower, gainsID_upper;
+    opus_int16   gainMult_Q8;
+    opus_int16   ec_prevLagIndex_copy;
+    opus_int     ec_prevSignalType_copy;
+    opus_int8    LastGainIndex_copy2;
+    opus_int32   pGains_Q16[ MAX_NB_SUBFR ];
+    opus_uint8   ec_buf_copy[ 1275 ];
+
+    /* This is totally unnecessary but many compilers (including gcc) are too dumb to realise it */
+    LastGainIndex_copy2 = nBits_lower = nBits_upper = gainMult_lower = gainMult_upper = 0;
+
+    psEnc->sCmn.indices.Seed = psEnc->sCmn.frameCounter++ & 3;
+
+    /**************************************************************/
+    /* Set up Input Pointers, and insert frame in input buffer    */
+    /**************************************************************/
+    /* pointers aligned with start of frame to encode */
+    x_frame         = psEnc->x_buf + psEnc->sCmn.ltp_mem_length;    /* start of frame to encode */
+    res_pitch_frame = res_pitch    + psEnc->sCmn.ltp_mem_length;    /* start of pitch LPC residual frame */
+
+    /***************************************/
+    /* Ensure smooth bandwidth transitions */
+    /***************************************/
+    silk_LP_variable_cutoff( &psEnc->sCmn.sLP, psEnc->sCmn.inputBuf + 1, psEnc->sCmn.frame_length );
+
+    /*******************************************/
+    /* Copy new frame to front of input buffer */
+    /*******************************************/
+    silk_short2float_array( x_frame + LA_SHAPE_MS * psEnc->sCmn.fs_kHz, psEnc->sCmn.inputBuf + 1, psEnc->sCmn.frame_length );
+
+    /* Add tiny signal to avoid high CPU load from denormalized floating point numbers */
+    for( i = 0; i < 8; i++ ) {
+        x_frame[ LA_SHAPE_MS * psEnc->sCmn.fs_kHz + i * ( psEnc->sCmn.frame_length >> 3 ) ] += ( 1 - ( i & 2 ) ) * 1e-6f;
+    }
+
+    if( !psEnc->sCmn.prefillFlag ) {
+        /*****************************************/
+        /* Find pitch lags, initial LPC analysis */
+        /*****************************************/
+        silk_find_pitch_lags_FLP( psEnc, &sEncCtrl, res_pitch, x_frame, psEnc->sCmn.arch );
+
+        /************************/
+        /* Noise shape analysis */
+        /************************/
+        silk_noise_shape_analysis_FLP( psEnc, &sEncCtrl, res_pitch_frame, x_frame );
+
+        /***************************************************/
+        /* Find linear prediction coefficients (LPC + LTP) */
+        /***************************************************/
+        silk_find_pred_coefs_FLP( psEnc, &sEncCtrl, res_pitch, x_frame, condCoding );
+
+        /****************************************/
+        /* Process gains                        */
+        /****************************************/
+        silk_process_gains_FLP( psEnc, &sEncCtrl, condCoding );
+
+        /*****************************************/
+        /* Prefiltering for noise shaper         */
+        /*****************************************/
+        silk_prefilter_FLP( psEnc, &sEncCtrl, xfw, x_frame );
+
+        /****************************************/
+        /* Low Bitrate Redundant Encoding       */
+        /****************************************/
+        silk_LBRR_encode_FLP( psEnc, &sEncCtrl, xfw, condCoding );
+
+        /* Loop over quantizer and entroy coding to control bitrate */
+        maxIter = 6;
+        gainMult_Q8 = SILK_FIX_CONST( 1, 8 );
+        found_lower = 0;
+        found_upper = 0;
+        gainsID = silk_gains_ID( psEnc->sCmn.indices.GainsIndices, psEnc->sCmn.nb_subfr );
+        gainsID_lower = -1;
+        gainsID_upper = -1;
+        /* Copy part of the input state */
+        silk_memcpy( &sRangeEnc_copy, psRangeEnc, sizeof( ec_enc ) );
+        silk_memcpy( &sNSQ_copy, &psEnc->sCmn.sNSQ, sizeof( silk_nsq_state ) );
+        seed_copy = psEnc->sCmn.indices.Seed;
+        ec_prevLagIndex_copy = psEnc->sCmn.ec_prevLagIndex;
+        ec_prevSignalType_copy = psEnc->sCmn.ec_prevSignalType;
+        for( iter = 0; ; iter++ ) {
+            if( gainsID == gainsID_lower ) {
+                nBits = nBits_lower;
+            } else if( gainsID == gainsID_upper ) {
+                nBits = nBits_upper;
+            } else {
+                /* Restore part of the input state */
+                if( iter > 0 ) {
+                    silk_memcpy( psRangeEnc, &sRangeEnc_copy, sizeof( ec_enc ) );
+                    silk_memcpy( &psEnc->sCmn.sNSQ, &sNSQ_copy, sizeof( silk_nsq_state ) );
+                    psEnc->sCmn.indices.Seed = seed_copy;
+                    psEnc->sCmn.ec_prevLagIndex = ec_prevLagIndex_copy;
+                    psEnc->sCmn.ec_prevSignalType = ec_prevSignalType_copy;
+                }
+
+                /*****************************************/
+                /* Noise shaping quantization            */
+                /*****************************************/
+                silk_NSQ_wrapper_FLP( psEnc, &sEncCtrl, &psEnc->sCmn.indices, &psEnc->sCmn.sNSQ, psEnc->sCmn.pulses, xfw );
+
+                /****************************************/
+                /* Encode Parameters                    */
+                /****************************************/
+                silk_encode_indices( &psEnc->sCmn, psRangeEnc, psEnc->sCmn.nFramesEncoded, 0, condCoding );
+
+                /****************************************/
+                /* Encode Excitation Signal             */
+                /****************************************/
+                silk_encode_pulses( psRangeEnc, psEnc->sCmn.indices.signalType, psEnc->sCmn.indices.quantOffsetType,
+                      psEnc->sCmn.pulses, psEnc->sCmn.frame_length );
+
+                nBits = ec_tell( psRangeEnc );
+
+                if( useCBR == 0 && iter == 0 && nBits <= maxBits ) {
+                    break;
+                }
+            }
+
+            if( iter == maxIter ) {
+                if( found_lower && ( gainsID == gainsID_lower || nBits > maxBits ) ) {
+                    /* Restore output state from earlier iteration that did meet the bitrate budget */
+                    silk_memcpy( psRangeEnc, &sRangeEnc_copy2, sizeof( ec_enc ) );
+                    silk_assert( sRangeEnc_copy2.offs <= 1275 );
+                    silk_memcpy( psRangeEnc->buf, ec_buf_copy, sRangeEnc_copy2.offs );
+                    silk_memcpy( &psEnc->sCmn.sNSQ, &sNSQ_copy2, sizeof( silk_nsq_state ) );
+                    psEnc->sShape.LastGainIndex = LastGainIndex_copy2;
+                }
+                break;
+            }
+
+            if( nBits > maxBits ) {
+                if( found_lower == 0 && iter >= 2 ) {
+                    /* Adjust the quantizer's rate/distortion tradeoff and discard previous "upper" results */
+                    sEncCtrl.Lambda *= 1.5f;
+                    found_upper = 0;
+                    gainsID_upper = -1;
+                } else {
+                    found_upper = 1;
+                    nBits_upper = nBits;
+                    gainMult_upper = gainMult_Q8;
+                    gainsID_upper = gainsID;
+                }
+            } else if( nBits < maxBits - 5 ) {
+                found_lower = 1;
+                nBits_lower = nBits;
+                gainMult_lower = gainMult_Q8;
+                if( gainsID != gainsID_lower ) {
+                    gainsID_lower = gainsID;
+                    /* Copy part of the output state */
+                    silk_memcpy( &sRangeEnc_copy2, psRangeEnc, sizeof( ec_enc ) );
+                    silk_assert( psRangeEnc->offs <= 1275 );
+                    silk_memcpy( ec_buf_copy, psRangeEnc->buf, psRangeEnc->offs );
+                    silk_memcpy( &sNSQ_copy2, &psEnc->sCmn.sNSQ, sizeof( silk_nsq_state ) );
+                    LastGainIndex_copy2 = psEnc->sShape.LastGainIndex;
+                }
+            } else {
+                /* Within 5 bits of budget: close enough */
+                break;
+            }
+
+            if( ( found_lower & found_upper ) == 0 ) {
+                /* Adjust gain according to high-rate rate/distortion curve */
+                opus_int32 gain_factor_Q16;
+                gain_factor_Q16 = silk_log2lin( silk_LSHIFT( nBits - maxBits, 7 ) / psEnc->sCmn.frame_length + SILK_FIX_CONST( 16, 7 ) );
+                gain_factor_Q16 = silk_min_32( gain_factor_Q16, SILK_FIX_CONST( 2, 16 ) );
+                if( nBits > maxBits ) {
+                    gain_factor_Q16 = silk_max_32( gain_factor_Q16, SILK_FIX_CONST( 1.3, 16 ) );
+                }
+                gainMult_Q8 = silk_SMULWB( gain_factor_Q16, gainMult_Q8 );
+            } else {
+                /* Adjust gain by interpolating */
+                gainMult_Q8 = gainMult_lower + ( ( gainMult_upper - gainMult_lower ) * ( maxBits - nBits_lower ) ) / ( nBits_upper - nBits_lower );
+                /* New gain multplier must be between 25% and 75% of old range (note that gainMult_upper < gainMult_lower) */
+                if( gainMult_Q8 > silk_ADD_RSHIFT32( gainMult_lower, gainMult_upper - gainMult_lower, 2 ) ) {
+                    gainMult_Q8 = silk_ADD_RSHIFT32( gainMult_lower, gainMult_upper - gainMult_lower, 2 );
+                } else
+                if( gainMult_Q8 < silk_SUB_RSHIFT32( gainMult_upper, gainMult_upper - gainMult_lower, 2 ) ) {
+                    gainMult_Q8 = silk_SUB_RSHIFT32( gainMult_upper, gainMult_upper - gainMult_lower, 2 );
+                }
+            }
+
+            for( i = 0; i < psEnc->sCmn.nb_subfr; i++ ) {
+                pGains_Q16[ i ] = silk_LSHIFT_SAT32( silk_SMULWB( sEncCtrl.GainsUnq_Q16[ i ], gainMult_Q8 ), 8 );
+            }
+
+            /* Quantize gains */
+            psEnc->sShape.LastGainIndex = sEncCtrl.lastGainIndexPrev;
+            silk_gains_quant( psEnc->sCmn.indices.GainsIndices, pGains_Q16,
+                  &psEnc->sShape.LastGainIndex, condCoding == CODE_CONDITIONALLY, psEnc->sCmn.nb_subfr );
+
+            /* Unique identifier of gains vector */
+            gainsID = silk_gains_ID( psEnc->sCmn.indices.GainsIndices, psEnc->sCmn.nb_subfr );
+
+            /* Overwrite unquantized gains with quantized gains and convert back to Q0 from Q16 */
+            for( i = 0; i < psEnc->sCmn.nb_subfr; i++ ) {
+                sEncCtrl.Gains[ i ] = pGains_Q16[ i ] / 65536.0f;
+            }
+        }
+    }
+
+    /* Update input buffer */
+    silk_memmove( psEnc->x_buf, &psEnc->x_buf[ psEnc->sCmn.frame_length ],
+        ( psEnc->sCmn.ltp_mem_length + LA_SHAPE_MS * psEnc->sCmn.fs_kHz ) * sizeof( silk_float ) );
+
+    /* Exit without entropy coding */
+    if( psEnc->sCmn.prefillFlag ) {
+        /* No payload */
+        *pnBytesOut = 0;
+        return ret;
+    }
+
+    /* Parameters needed for next frame */
+    psEnc->sCmn.prevLag        = sEncCtrl.pitchL[ psEnc->sCmn.nb_subfr - 1 ];
+    psEnc->sCmn.prevSignalType = psEnc->sCmn.indices.signalType;
+
+    /****************************************/
+    /* Finalize payload                     */
+    /****************************************/
+    psEnc->sCmn.first_frame_after_reset = 0;
+    /* Payload size */
+    *pnBytesOut = silk_RSHIFT( ec_tell( psRangeEnc ) + 7, 3 );
+
+    return ret;
+}
+
+/* Low-Bitrate Redundancy (LBRR) encoding. Reuse all parameters but encode excitation at lower bitrate  */
+static OPUS_INLINE void silk_LBRR_encode_FLP(
+    silk_encoder_state_FLP          *psEnc,                             /* I/O  Encoder state FLP                           */
+    silk_encoder_control_FLP        *psEncCtrl,                         /* I/O  Encoder control FLP                         */
+    const silk_float                xfw[],                              /* I    Input signal                                */
+    opus_int                        condCoding                          /* I    The type of conditional coding used so far for this frame */
+)
+{
+    opus_int     k;
+    opus_int32   Gains_Q16[ MAX_NB_SUBFR ];
+    silk_float   TempGains[ MAX_NB_SUBFR ];
+    SideInfoIndices *psIndices_LBRR = &psEnc->sCmn.indices_LBRR[ psEnc->sCmn.nFramesEncoded ];
+    silk_nsq_state sNSQ_LBRR;
+
+    /*******************************************/
+    /* Control use of inband LBRR              */
+    /*******************************************/
+    if( psEnc->sCmn.LBRR_enabled && psEnc->sCmn.speech_activity_Q8 > SILK_FIX_CONST( LBRR_SPEECH_ACTIVITY_THRES, 8 ) ) {
+        psEnc->sCmn.LBRR_flags[ psEnc->sCmn.nFramesEncoded ] = 1;
+
+        /* Copy noise shaping quantizer state and quantization indices from regular encoding */
+        silk_memcpy( &sNSQ_LBRR, &psEnc->sCmn.sNSQ, sizeof( silk_nsq_state ) );
+        silk_memcpy( psIndices_LBRR, &psEnc->sCmn.indices, sizeof( SideInfoIndices ) );
+
+        /* Save original gains */
+        silk_memcpy( TempGains, psEncCtrl->Gains, psEnc->sCmn.nb_subfr * sizeof( silk_float ) );
+
+        if( psEnc->sCmn.nFramesEncoded == 0 || psEnc->sCmn.LBRR_flags[ psEnc->sCmn.nFramesEncoded - 1 ] == 0 ) {
+            /* First frame in packet or previous frame not LBRR coded */
+            psEnc->sCmn.LBRRprevLastGainIndex = psEnc->sShape.LastGainIndex;
+
+            /* Increase Gains to get target LBRR rate */
+            psIndices_LBRR->GainsIndices[ 0 ] += psEnc->sCmn.LBRR_GainIncreases;
+            psIndices_LBRR->GainsIndices[ 0 ] = silk_min_int( psIndices_LBRR->GainsIndices[ 0 ], N_LEVELS_QGAIN - 1 );
+        }
+
+        /* Decode to get gains in sync with decoder */
+        silk_gains_dequant( Gains_Q16, psIndices_LBRR->GainsIndices,
+            &psEnc->sCmn.LBRRprevLastGainIndex, condCoding == CODE_CONDITIONALLY, psEnc->sCmn.nb_subfr );
+
+        /* Overwrite unquantized gains with quantized gains and convert back to Q0 from Q16 */
+        for( k = 0; k <  psEnc->sCmn.nb_subfr; k++ ) {
+            psEncCtrl->Gains[ k ] = Gains_Q16[ k ] * ( 1.0f / 65536.0f );
+        }
+
+        /*****************************************/
+        /* Noise shaping quantization            */
+        /*****************************************/
+        silk_NSQ_wrapper_FLP( psEnc, psEncCtrl, psIndices_LBRR, &sNSQ_LBRR,
+            psEnc->sCmn.pulses_LBRR[ psEnc->sCmn.nFramesEncoded ], xfw );
+
+        /* Restore original gains */
+        silk_memcpy( psEncCtrl->Gains, TempGains, psEnc->sCmn.nb_subfr * sizeof( silk_float ) );
+    }
+}
diff --git a/drivers/opus/silk/float/energy_FLP.c b/drivers/opus/silk/float/energy_FLP.c
new file mode 100644
index 0000000000..d441526df3
--- /dev/null
+++ b/drivers/opus/silk/float/energy_FLP.c
@@ -0,0 +1,60 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef OPUS_HAVE_CONFIG_H
+#include "opus_config.h"
+#endif
+
+#include "SigProc_FLP.h"
+
+/* sum of squares of a silk_float array, with result as double */
+double silk_energy_FLP(
+    const silk_float    *data,
+    opus_int            dataSize
+)
+{
+    opus_int  i, dataSize4;
+    double   result;
+
+    /* 4x unrolled loop */
+    result = 0.0;
+    dataSize4 = dataSize & 0xFFFC;
+    for( i = 0; i < dataSize4; i += 4 ) {
+        result += data[ i + 0 ] * (double)data[ i + 0 ] +
+                  data[ i + 1 ] * (double)data[ i + 1 ] +
+                  data[ i + 2 ] * (double)data[ i + 2 ] +
+                  data[ i + 3 ] * (double)data[ i + 3 ];
+    }
+
+    /* add any remaining products */
+    for( ; i < dataSize; i++ ) {
+        result += data[ i ] * (double)data[ i ];
+    }
+
+    silk_assert( result >= 0.0 );
+    return result;
+}
diff --git a/drivers/opus/silk/float/find_LPC_FLP.c b/drivers/opus/silk/float/find_LPC_FLP.c
new file mode 100644
index 0000000000..212f2de3cd
--- /dev/null
+++ b/drivers/opus/silk/float/find_LPC_FLP.c
@@ -0,0 +1,104 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef OPUS_HAVE_CONFIG_H
+#include "opus_config.h"
+#endif
+
+#include "define.h"
+#include "main_FLP.h"
+#include "tuning_parameters.h"
+
+/* LPC analysis */
+void silk_find_LPC_FLP(
+    silk_encoder_state              *psEncC,                            /* I/O  Encoder state                               */
+    opus_int16                      NLSF_Q15[],                         /* O    NLSFs                                       */
+    const silk_float                x[],                                /* I    Input signal                                */
+    const silk_float                minInvGain                          /* I    Inverse of max prediction gain              */
+)
+{
+    opus_int    k, subfr_length;
+    silk_float  a[ MAX_LPC_ORDER ];
+
+    /* Used only for NLSF interpolation */
+    silk_float  res_nrg, res_nrg_2nd, res_nrg_interp;
+    opus_int16  NLSF0_Q15[ MAX_LPC_ORDER ];
+    silk_float  a_tmp[ MAX_LPC_ORDER ];
+    silk_float  LPC_res[ MAX_FRAME_LENGTH + MAX_NB_SUBFR * MAX_LPC_ORDER ];
+
+    subfr_length = psEncC->subfr_length + psEncC->predictLPCOrder;
+
+    /* Default: No interpolation */
+    psEncC->indices.NLSFInterpCoef_Q2 = 4;
+
+    /* Burg AR analysis for the full frame */
+    res_nrg = silk_burg_modified_FLP( a, x, minInvGain, subfr_length, psEncC->nb_subfr, psEncC->predictLPCOrder );
+
+    if( psEncC->useInterpolatedNLSFs && !psEncC->first_frame_after_reset && psEncC->nb_subfr == MAX_NB_SUBFR ) {
+        /* Optimal solution for last 10 ms; subtract residual energy here, as that's easier than        */
+        /* adding it to the residual energy of the first 10 ms in each iteration of the search below    */
+        res_nrg -= silk_burg_modified_FLP( a_tmp, x + ( MAX_NB_SUBFR / 2 ) * subfr_length, minInvGain, subfr_length, MAX_NB_SUBFR / 2, psEncC->predictLPCOrder );
+
+        /* Convert to NLSFs */
+        silk_A2NLSF_FLP( NLSF_Q15, a_tmp, psEncC->predictLPCOrder );
+
+        /* Search over interpolation indices to find the one with lowest residual energy */
+        res_nrg_2nd = silk_float_MAX;
+        for( k = 3; k >= 0; k-- ) {
+            /* Interpolate NLSFs for first half */
+            silk_interpolate( NLSF0_Q15, psEncC->prev_NLSFq_Q15, NLSF_Q15, k, psEncC->predictLPCOrder );
+
+            /* Convert to LPC for residual energy evaluation */
+            silk_NLSF2A_FLP( a_tmp, NLSF0_Q15, psEncC->predictLPCOrder );
+
+            /* Calculate residual energy with LSF interpolation */
+            silk_LPC_analysis_filter_FLP( LPC_res, a_tmp, x, 2 * subfr_length, psEncC->predictLPCOrder );
+            res_nrg_interp = (silk_float)(
+                silk_energy_FLP( LPC_res + psEncC->predictLPCOrder,                subfr_length - psEncC->predictLPCOrder ) +
+                silk_energy_FLP( LPC_res + psEncC->predictLPCOrder + subfr_length, subfr_length - psEncC->predictLPCOrder ) );
+
+            /* Determine whether current interpolated NLSFs are best so far */
+            if( res_nrg_interp < res_nrg ) {
+                /* Interpolation has lower residual energy */
+                res_nrg = res_nrg_interp;
+                psEncC->indices.NLSFInterpCoef_Q2 = (opus_int8)k;
+            } else if( res_nrg_interp > res_nrg_2nd ) {
+                /* No reason to continue iterating - residual energies will continue to climb */
+                break;
+            }
+            res_nrg_2nd = res_nrg_interp;
+        }
+    }
+
+    if( psEncC->indices.NLSFInterpCoef_Q2 == 4 ) {
+        /* NLSF interpolation is currently inactive, calculate NLSFs from full frame AR coefficients */
+        silk_A2NLSF_FLP( NLSF_Q15, a, psEncC->predictLPCOrder );
+    }
+
+    silk_assert( psEncC->indices.NLSFInterpCoef_Q2 == 4 || 
+        ( psEncC->useInterpolatedNLSFs && !psEncC->first_frame_after_reset && psEncC->nb_subfr == MAX_NB_SUBFR ) );
+}
diff --git a/drivers/opus/silk/float/find_LTP_FLP.c b/drivers/opus/silk/float/find_LTP_FLP.c
new file mode 100644
index 0000000000..5c62851f20
--- /dev/null
+++ b/drivers/opus/silk/float/find_LTP_FLP.c
@@ -0,0 +1,132 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef OPUS_HAVE_CONFIG_H
+#include "opus_config.h"
+#endif
+
+#include "main_FLP.h"
+#include "tuning_parameters.h"
+
+void silk_find_LTP_FLP(
+    silk_float                      b[ MAX_NB_SUBFR * LTP_ORDER ],      /* O    LTP coefs                                   */
+    silk_float                      WLTP[ MAX_NB_SUBFR * LTP_ORDER * LTP_ORDER ], /* O    Weight for LTP quantization       */
+    silk_float                      *LTPredCodGain,                     /* O    LTP coding gain                             */
+    const silk_float                r_lpc[],                            /* I    LPC residual                                */
+    const opus_int                  lag[  MAX_NB_SUBFR ],               /* I    LTP lags                                    */
+    const silk_float                Wght[ MAX_NB_SUBFR ],               /* I    Weights                                     */
+    const opus_int                  subfr_length,                       /* I    Subframe length                             */
+    const opus_int                  nb_subfr,                           /* I    number of subframes                         */
+    const opus_int                  mem_offset                          /* I    Number of samples in LTP memory             */
+)
+{
+    opus_int   i, k;
+    silk_float *b_ptr, temp, *WLTP_ptr;
+    silk_float LPC_res_nrg, LPC_LTP_res_nrg;
+    silk_float d[ MAX_NB_SUBFR ], m, g, delta_b[ LTP_ORDER ];
+    silk_float w[ MAX_NB_SUBFR ], nrg[ MAX_NB_SUBFR ], regu;
+    silk_float Rr[ LTP_ORDER ], rr[ MAX_NB_SUBFR ];
+    const silk_float *r_ptr, *lag_ptr;
+
+    b_ptr    = b;
+    WLTP_ptr = WLTP;
+    r_ptr    = &r_lpc[ mem_offset ];
+    for( k = 0; k < nb_subfr; k++ ) {
+        lag_ptr = r_ptr - ( lag[ k ] + LTP_ORDER / 2 );
+
+        silk_corrMatrix_FLP( lag_ptr, subfr_length, LTP_ORDER, WLTP_ptr );
+        silk_corrVector_FLP( lag_ptr, r_ptr, subfr_length, LTP_ORDER, Rr );
+
+        rr[ k ] = ( silk_float )silk_energy_FLP( r_ptr, subfr_length );
+        regu = 1.0f + rr[ k ] +
+            matrix_ptr( WLTP_ptr, 0, 0, LTP_ORDER ) +
+            matrix_ptr( WLTP_ptr, LTP_ORDER-1, LTP_ORDER-1, LTP_ORDER );
+        regu *= LTP_DAMPING / 3;
+        silk_regularize_correlations_FLP( WLTP_ptr, &rr[ k ], regu, LTP_ORDER );
+        silk_solve_LDL_FLP( WLTP_ptr, LTP_ORDER, Rr, b_ptr );
+
+        /* Calculate residual energy */
+        nrg[ k ] = silk_residual_energy_covar_FLP( b_ptr, WLTP_ptr, Rr, rr[ k ], LTP_ORDER );
+
+        temp = Wght[ k ] / ( nrg[ k ] * Wght[ k ] + 0.01f * subfr_length );
+        silk_scale_vector_FLP( WLTP_ptr, temp, LTP_ORDER * LTP_ORDER );
+        w[ k ] = matrix_ptr( WLTP_ptr, LTP_ORDER / 2, LTP_ORDER / 2, LTP_ORDER );
+
+        r_ptr    += subfr_length;
+        b_ptr    += LTP_ORDER;
+        WLTP_ptr += LTP_ORDER * LTP_ORDER;
+    }
+
+    /* Compute LTP coding gain */
+    if( LTPredCodGain != NULL ) {
+        LPC_LTP_res_nrg = 1e-6f;
+        LPC_res_nrg     = 0.0f;
+        for( k = 0; k < nb_subfr; k++ ) {
+            LPC_res_nrg     += rr[  k ] * Wght[ k ];
+            LPC_LTP_res_nrg += nrg[ k ] * Wght[ k ];
+        }
+
+        silk_assert( LPC_LTP_res_nrg > 0 );
+        *LTPredCodGain = 3.0f * silk_log2( LPC_res_nrg / LPC_LTP_res_nrg );
+    }
+
+    /* Smoothing */
+    /* d = sum( B, 1 ); */
+    b_ptr = b;
+    for( k = 0; k < nb_subfr; k++ ) {
+        d[ k ] = 0;
+        for( i = 0; i < LTP_ORDER; i++ ) {
+            d[ k ] += b_ptr[ i ];
+        }
+        b_ptr += LTP_ORDER;
+    }
+    /* m = ( w * d' ) / ( sum( w ) + 1e-3 ); */
+    temp = 1e-3f;
+    for( k = 0; k < nb_subfr; k++ ) {
+        temp += w[ k ];
+    }
+    m = 0;
+    for( k = 0; k < nb_subfr; k++ ) {
+        m += d[ k ] * w[ k ];
+    }
+    m = m / temp;
+
+    b_ptr = b;
+    for( k = 0; k < nb_subfr; k++ ) {
+        g = LTP_SMOOTHING / ( LTP_SMOOTHING + w[ k ] ) * ( m - d[ k ] );
+        temp = 0;
+        for( i = 0; i < LTP_ORDER; i++ ) {
+            delta_b[ i ] = silk_max_float( b_ptr[ i ], 0.1f );
+            temp += delta_b[ i ];
+        }
+        temp = g / temp;
+        for( i = 0; i < LTP_ORDER; i++ ) {
+            b_ptr[ i ] = b_ptr[ i ] + delta_b[ i ] * temp;
+        }
+        b_ptr += LTP_ORDER;
+    }
+}
diff --git a/drivers/opus/silk/float/find_pitch_lags_FLP.c b/drivers/opus/silk/float/find_pitch_lags_FLP.c
new file mode 100644
index 0000000000..d74d5941b5
--- /dev/null
+++ b/drivers/opus/silk/float/find_pitch_lags_FLP.c
@@ -0,0 +1,132 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef OPUS_HAVE_CONFIG_H
+#include "opus_config.h"
+#endif
+
+#include <stdlib.h>
+#include "main_FLP.h"
+#include "tuning_parameters.h"
+
+void silk_find_pitch_lags_FLP(
+    silk_encoder_state_FLP          *psEnc,                             /* I/O  Encoder state FLP                           */
+    silk_encoder_control_FLP        *psEncCtrl,                         /* I/O  Encoder control FLP                         */
+    silk_float                      res[],                              /* O    Residual                                    */
+    const silk_float                x[],                                /* I    Speech signal                               */
+    int                             arch                                /* I    Run-time architecture                       */
+)
+{
+    opus_int   buf_len;
+    silk_float thrhld, res_nrg;
+    const silk_float *x_buf_ptr, *x_buf;
+    silk_float auto_corr[ MAX_FIND_PITCH_LPC_ORDER + 1 ];
+    silk_float A[         MAX_FIND_PITCH_LPC_ORDER ];
+    silk_float refl_coef[ MAX_FIND_PITCH_LPC_ORDER ];
+    silk_float Wsig[      FIND_PITCH_LPC_WIN_MAX ];
+    silk_float *Wsig_ptr;
+
+    /******************************************/
+    /* Set up buffer lengths etc based on Fs  */
+    /******************************************/
+    buf_len = psEnc->sCmn.la_pitch + psEnc->sCmn.frame_length + psEnc->sCmn.ltp_mem_length;
+
+    /* Safety check */
+    silk_assert( buf_len >= psEnc->sCmn.pitch_LPC_win_length );
+
+    x_buf = x - psEnc->sCmn.ltp_mem_length;
+
+    /******************************************/
+    /* Estimate LPC AR coeficients            */
+    /******************************************/
+
+    /* Calculate windowed signal */
+
+    /* First LA_LTP samples */
+    x_buf_ptr = x_buf + buf_len - psEnc->sCmn.pitch_LPC_win_length;
+    Wsig_ptr  = Wsig;
+    silk_apply_sine_window_FLP( Wsig_ptr, x_buf_ptr, 1, psEnc->sCmn.la_pitch );
+
+    /* Middle non-windowed samples */
+    Wsig_ptr  += psEnc->sCmn.la_pitch;
+    x_buf_ptr += psEnc->sCmn.la_pitch;
+    silk_memcpy( Wsig_ptr, x_buf_ptr, ( psEnc->sCmn.pitch_LPC_win_length - ( psEnc->sCmn.la_pitch << 1 ) ) * sizeof( silk_float ) );
+
+    /* Last LA_LTP samples */
+    Wsig_ptr  += psEnc->sCmn.pitch_LPC_win_length - ( psEnc->sCmn.la_pitch << 1 );
+    x_buf_ptr += psEnc->sCmn.pitch_LPC_win_length - ( psEnc->sCmn.la_pitch << 1 );
+    silk_apply_sine_window_FLP( Wsig_ptr, x_buf_ptr, 2, psEnc->sCmn.la_pitch );
+
+    /* Calculate autocorrelation sequence */
+    silk_autocorrelation_FLP( auto_corr, Wsig, psEnc->sCmn.pitch_LPC_win_length, psEnc->sCmn.pitchEstimationLPCOrder + 1 );
+
+    /* Add white noise, as a fraction of the energy */
+    auto_corr[ 0 ] += auto_corr[ 0 ] * FIND_PITCH_WHITE_NOISE_FRACTION + 1;
+
+    /* Calculate the reflection coefficients using Schur */
+    res_nrg = silk_schur_FLP( refl_coef, auto_corr, psEnc->sCmn.pitchEstimationLPCOrder );
+
+    /* Prediction gain */
+    psEncCtrl->predGain = auto_corr[ 0 ] / silk_max_float( res_nrg, 1.0f );
+
+    /* Convert reflection coefficients to prediction coefficients */
+    silk_k2a_FLP( A, refl_coef, psEnc->sCmn.pitchEstimationLPCOrder );
+
+    /* Bandwidth expansion */
+    silk_bwexpander_FLP( A, psEnc->sCmn.pitchEstimationLPCOrder, FIND_PITCH_BANDWIDTH_EXPANSION );
+
+    /*****************************************/
+    /* LPC analysis filtering                */
+    /*****************************************/
+    silk_LPC_analysis_filter_FLP( res, A, x_buf, buf_len, psEnc->sCmn.pitchEstimationLPCOrder );
+
+    if( psEnc->sCmn.indices.signalType != TYPE_NO_VOICE_ACTIVITY && psEnc->sCmn.first_frame_after_reset == 0 ) {
+        /* Threshold for pitch estimator */
+        thrhld  = 0.6f;
+        thrhld -= 0.004f * psEnc->sCmn.pitchEstimationLPCOrder;
+        thrhld -= 0.1f   * psEnc->sCmn.speech_activity_Q8 * ( 1.0f /  256.0f );
+        thrhld -= 0.15f  * (psEnc->sCmn.prevSignalType >> 1);
+        thrhld -= 0.1f   * psEnc->sCmn.input_tilt_Q15 * ( 1.0f / 32768.0f );
+
+        /*****************************************/
+        /* Call Pitch estimator                  */
+        /*****************************************/
+        if( silk_pitch_analysis_core_FLP( res, psEncCtrl->pitchL, &psEnc->sCmn.indices.lagIndex,
+            &psEnc->sCmn.indices.contourIndex, &psEnc->LTPCorr, psEnc->sCmn.prevLag, psEnc->sCmn.pitchEstimationThreshold_Q16 / 65536.0f,
+            thrhld, psEnc->sCmn.fs_kHz, psEnc->sCmn.pitchEstimationComplexity, psEnc->sCmn.nb_subfr, arch ) == 0 )
+        {
+            psEnc->sCmn.indices.signalType = TYPE_VOICED;
+        } else {
+            psEnc->sCmn.indices.signalType = TYPE_UNVOICED;
+        }
+    } else {
+        silk_memset( psEncCtrl->pitchL, 0, sizeof( psEncCtrl->pitchL ) );
+        psEnc->sCmn.indices.lagIndex = 0;
+        psEnc->sCmn.indices.contourIndex = 0;
+        psEnc->LTPCorr = 0;
+    }
+}
diff --git a/drivers/opus/silk/float/find_pred_coefs_FLP.c b/drivers/opus/silk/float/find_pred_coefs_FLP.c
new file mode 100644
index 0000000000..e0d8804cc9
--- /dev/null
+++ b/drivers/opus/silk/float/find_pred_coefs_FLP.c
@@ -0,0 +1,117 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef OPUS_HAVE_CONFIG_H
+#include "opus_config.h"
+#endif
+
+#include "main_FLP.h"
+
+/* Find LPC and LTP coefficients */
+void silk_find_pred_coefs_FLP(
+    silk_encoder_state_FLP          *psEnc,                             /* I/O  Encoder state FLP                           */
+    silk_encoder_control_FLP        *psEncCtrl,                         /* I/O  Encoder control FLP                         */
+    const silk_float                res_pitch[],                        /* I    Residual from pitch analysis                */
+    const silk_float                x[],                                /* I    Speech signal                               */
+    opus_int                        condCoding                          /* I    The type of conditional coding to use       */
+)
+{
+    opus_int         i;
+    silk_float       WLTP[ MAX_NB_SUBFR * LTP_ORDER * LTP_ORDER ];
+    silk_float       invGains[ MAX_NB_SUBFR ], Wght[ MAX_NB_SUBFR ];
+    opus_int16       NLSF_Q15[ MAX_LPC_ORDER ];
+    const silk_float *x_ptr;
+    silk_float       *x_pre_ptr, LPC_in_pre[ MAX_NB_SUBFR * MAX_LPC_ORDER + MAX_FRAME_LENGTH ];
+    silk_float       minInvGain;
+
+    /* Weighting for weighted least squares */
+    for( i = 0; i < psEnc->sCmn.nb_subfr; i++ ) {
+        silk_assert( psEncCtrl->Gains[ i ] > 0.0f );
+        invGains[ i ] = 1.0f / psEncCtrl->Gains[ i ];
+        Wght[ i ]     = invGains[ i ] * invGains[ i ];
+    }
+
+    if( psEnc->sCmn.indices.signalType == TYPE_VOICED ) {
+        /**********/
+        /* VOICED */
+        /**********/
+        silk_assert( psEnc->sCmn.ltp_mem_length - psEnc->sCmn.predictLPCOrder >= psEncCtrl->pitchL[ 0 ] + LTP_ORDER / 2 );
+
+        /* LTP analysis */
+        silk_find_LTP_FLP( psEncCtrl->LTPCoef, WLTP, &psEncCtrl->LTPredCodGain, res_pitch,
+            psEncCtrl->pitchL, Wght, psEnc->sCmn.subfr_length, psEnc->sCmn.nb_subfr, psEnc->sCmn.ltp_mem_length );
+
+        /* Quantize LTP gain parameters */
+        silk_quant_LTP_gains_FLP( psEncCtrl->LTPCoef, psEnc->sCmn.indices.LTPIndex, &psEnc->sCmn.indices.PERIndex,
+            &psEnc->sCmn.sum_log_gain_Q7, WLTP, psEnc->sCmn.mu_LTP_Q9, psEnc->sCmn.LTPQuantLowComplexity, psEnc->sCmn.nb_subfr );
+
+        /* Control LTP scaling */
+        silk_LTP_scale_ctrl_FLP( psEnc, psEncCtrl, condCoding );
+
+        /* Create LTP residual */
+        silk_LTP_analysis_filter_FLP( LPC_in_pre, x - psEnc->sCmn.predictLPCOrder, psEncCtrl->LTPCoef,
+            psEncCtrl->pitchL, invGains, psEnc->sCmn.subfr_length, psEnc->sCmn.nb_subfr, psEnc->sCmn.predictLPCOrder );
+    } else {
+        /************/
+        /* UNVOICED */
+        /************/
+        /* Create signal with prepended subframes, scaled by inverse gains */
+        x_ptr     = x - psEnc->sCmn.predictLPCOrder;
+        x_pre_ptr = LPC_in_pre;
+        for( i = 0; i < psEnc->sCmn.nb_subfr; i++ ) {
+            silk_scale_copy_vector_FLP( x_pre_ptr, x_ptr, invGains[ i ],
+                psEnc->sCmn.subfr_length + psEnc->sCmn.predictLPCOrder );
+            x_pre_ptr += psEnc->sCmn.subfr_length + psEnc->sCmn.predictLPCOrder;
+            x_ptr     += psEnc->sCmn.subfr_length;
+        }
+        silk_memset( psEncCtrl->LTPCoef, 0, psEnc->sCmn.nb_subfr * LTP_ORDER * sizeof( silk_float ) );
+        psEncCtrl->LTPredCodGain = 0.0f;
+		psEnc->sCmn.sum_log_gain_Q7 = 0;
+    }
+
+    /* Limit on total predictive coding gain */
+    if( psEnc->sCmn.first_frame_after_reset ) {
+        minInvGain = 1.0f / MAX_PREDICTION_POWER_GAIN_AFTER_RESET;
+    } else {        
+        minInvGain = (silk_float)pow( 2, psEncCtrl->LTPredCodGain / 3 ) /  MAX_PREDICTION_POWER_GAIN;
+        minInvGain /= 0.25f + 0.75f * psEncCtrl->coding_quality;
+    }
+
+    /* LPC_in_pre contains the LTP-filtered input for voiced, and the unfiltered input for unvoiced */
+    silk_find_LPC_FLP( &psEnc->sCmn, NLSF_Q15, LPC_in_pre, minInvGain );
+
+    /* Quantize LSFs */
+    silk_process_NLSFs_FLP( &psEnc->sCmn, psEncCtrl->PredCoef, NLSF_Q15, psEnc->sCmn.prev_NLSFq_Q15 );
+
+    /* Calculate residual energy using quantized LPC coefficients */
+    silk_residual_energy_FLP( psEncCtrl->ResNrg, LPC_in_pre, psEncCtrl->PredCoef, psEncCtrl->Gains,
+        psEnc->sCmn.subfr_length, psEnc->sCmn.nb_subfr, psEnc->sCmn.predictLPCOrder );
+
+    /* Copy to prediction struct for use in next frame for interpolation */
+    silk_memcpy( psEnc->sCmn.prev_NLSFq_Q15, NLSF_Q15, sizeof( psEnc->sCmn.prev_NLSFq_Q15 ) );
+}
+
diff --git a/drivers/opus/silk/float/inner_product_FLP.c b/drivers/opus/silk/float/inner_product_FLP.c
new file mode 100644
index 0000000000..57acf5ffba
--- /dev/null
+++ b/drivers/opus/silk/float/inner_product_FLP.c
@@ -0,0 +1,60 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef OPUS_HAVE_CONFIG_H
+#include "opus_config.h"
+#endif
+
+#include "SigProc_FLP.h"
+
+/* inner product of two silk_float arrays, with result as double */
+double silk_inner_product_FLP(
+    const silk_float    *data1,
+    const silk_float    *data2,
+    opus_int            dataSize
+)
+{
+    opus_int  i, dataSize4;
+    double   result;
+
+    /* 4x unrolled loop */
+    result = 0.0;
+    dataSize4 = dataSize & 0xFFFC;
+    for( i = 0; i < dataSize4; i += 4 ) {
+        result += data1[ i + 0 ] * (double)data2[ i + 0 ] +
+                  data1[ i + 1 ] * (double)data2[ i + 1 ] +
+                  data1[ i + 2 ] * (double)data2[ i + 2 ] +
+                  data1[ i + 3 ] * (double)data2[ i + 3 ];
+    }
+
+    /* add any remaining products */
+    for( ; i < dataSize; i++ ) {
+        result += data1[ i ] * (double)data2[ i ];
+    }
+
+    return result;
+}
diff --git a/drivers/opus/silk/float/k2a_FLP.c b/drivers/opus/silk/float/k2a_FLP.c
new file mode 100644
index 0000000000..a668a32127
--- /dev/null
+++ b/drivers/opus/silk/float/k2a_FLP.c
@@ -0,0 +1,53 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef OPUS_HAVE_CONFIG_H
+#include "opus_config.h"
+#endif
+
+#include "SigProc_FLP.h"
+
+/* step up function, converts reflection coefficients to prediction coefficients */
+void silk_k2a_FLP(
+    silk_float          *A,                 /* O     prediction coefficients [order]                            */
+    const silk_float    *rc,                /* I     reflection coefficients [order]                            */
+    opus_int32          order               /* I     prediction order                                           */
+)
+{
+    opus_int   k, n;
+    silk_float Atmp[ SILK_MAX_ORDER_LPC ];
+
+    for( k = 0; k < order; k++ ) {
+        for( n = 0; n < k; n++ ) {
+            Atmp[ n ] = A[ n ];
+        }
+        for( n = 0; n < k; n++ ) {
+            A[ n ] += Atmp[ k - n - 1 ] * rc[ k ];
+        }
+        A[ k ] = -rc[ k ];
+    }
+}
diff --git a/drivers/opus/silk/float/levinsondurbin_FLP.c b/drivers/opus/silk/float/levinsondurbin_FLP.c
new file mode 100644
index 0000000000..64aaf0fb29
--- /dev/null
+++ b/drivers/opus/silk/float/levinsondurbin_FLP.c
@@ -0,0 +1,81 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef OPUS_HAVE_CONFIG_H
+#include "opus_config.h"
+#endif
+
+#include "SigProc_FLP.h"
+
+/* Solve the normal equations using the Levinson-Durbin recursion */
+silk_float silk_levinsondurbin_FLP(         /* O    prediction error energy                                     */
+    silk_float          A[],                /* O    prediction coefficients    [order]                          */
+    const silk_float    corr[],             /* I    input auto-correlations [order + 1]                         */
+    const opus_int      order               /* I    prediction order                                            */
+)
+{
+    opus_int   i, mHalf, m;
+    silk_float min_nrg, nrg, t, km, Atmp1, Atmp2;
+
+    min_nrg = 1e-12f * corr[ 0 ] + 1e-9f;
+    nrg = corr[ 0 ];
+    nrg = silk_max_float(min_nrg, nrg);
+    A[ 0 ] = corr[ 1 ] / nrg;
+    nrg -= A[ 0 ] * corr[ 1 ];
+    nrg = silk_max_float(min_nrg, nrg);
+
+    for( m = 1; m < order; m++ )
+    {
+        t = corr[ m + 1 ];
+        for( i = 0; i < m; i++ ) {
+            t -= A[ i ] * corr[ m - i ];
+        }
+
+        /* reflection coefficient */
+        km = t / nrg;
+
+        /* residual energy */
+        nrg -= km * t;
+        nrg = silk_max_float(min_nrg, nrg);
+
+        mHalf = m >> 1;
+        for( i = 0; i < mHalf; i++ ) {
+            Atmp1 = A[ i ];
+            Atmp2 = A[ m - i - 1 ];
+            A[ m - i - 1 ] -= km * Atmp1;
+            A[ i ]         -= km * Atmp2;
+        }
+        if( m & 1 ) {
+            A[ mHalf ]     -= km * A[ mHalf ];
+        }
+        A[ m ] = km;
+    }
+
+    /* return the residual energy */
+    return nrg;
+}
+
diff --git a/drivers/opus/silk/float/main_FLP.h b/drivers/opus/silk/float/main_FLP.h
new file mode 100644
index 0000000000..92d6ec3df1
--- /dev/null
+++ b/drivers/opus/silk/float/main_FLP.h
@@ -0,0 +1,312 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifndef SILK_MAIN_FLP_H
+#define SILK_MAIN_FLP_H
+
+#include "SigProc_FLP.h"
+#include "SigProc_FIX.h"
+#include "structs_FLP.h"
+#include "silk_main.h"
+#include "define.h"
+#include "debug.h"
+#include "entenc.h"
+
+#ifdef __cplusplus
+extern "C"
+{
+#endif
+
+#define silk_encoder_state_Fxx      silk_encoder_state_FLP
+#define silk_encode_do_VAD_Fxx      silk_encode_do_VAD_FLP
+#define silk_encode_frame_Fxx       silk_encode_frame_FLP
+
+/*********************/
+/* Encoder Functions */
+/*********************/
+
+/* High-pass filter with cutoff frequency adaptation based on pitch lag statistics */
+void silk_HP_variable_cutoff(
+    silk_encoder_state_Fxx          state_Fxx[]                         /* I/O  Encoder states                              */
+);
+
+/* Encoder main function */
+void silk_encode_do_VAD_FLP(
+    silk_encoder_state_FLP          *psEnc                              /* I/O  Encoder state FLP                           */
+);
+
+/* Encoder main function */
+opus_int silk_encode_frame_FLP(
+    silk_encoder_state_FLP          *psEnc,                             /* I/O  Encoder state FLP                           */
+    opus_int32                      *pnBytesOut,                        /* O    Number of payload bytes;                    */
+    ec_enc                          *psRangeEnc,                        /* I/O  compressor data structure                   */
+    opus_int                        condCoding,                         /* I    The type of conditional coding to use       */
+    opus_int                        maxBits,                            /* I    If > 0: maximum number of output bits       */
+    opus_int                        useCBR                              /* I    Flag to force constant-bitrate operation    */
+);
+
+/* Initializes the Silk encoder state */
+opus_int silk_init_encoder(
+    silk_encoder_state_FLP          *psEnc,                             /* I/O  Encoder state FLP                           */
+    int                              arch                               /* I    Run-tim architecture                        */
+);
+
+/* Control the Silk encoder */
+opus_int silk_control_encoder(
+    silk_encoder_state_FLP          *psEnc,                             /* I/O  Pointer to Silk encoder state FLP           */
+    silk_EncControlStruct           *encControl,                        /* I    Control structure                           */
+    const opus_int32                TargetRate_bps,                     /* I    Target max bitrate (bps)                    */
+    const opus_int                  allow_bw_switch,                    /* I    Flag to allow switching audio bandwidth     */
+    const opus_int                  channelNb,                          /* I    Channel number                              */
+    const opus_int                  force_fs_kHz
+);
+
+/****************/
+/* Prefiltering */
+/****************/
+void silk_prefilter_FLP(
+    silk_encoder_state_FLP          *psEnc,                             /* I/O  Encoder state FLP                           */
+    const silk_encoder_control_FLP  *psEncCtrl,                         /* I    Encoder control FLP                         */
+    silk_float                      xw[],                               /* O    Weighted signal                             */
+    const silk_float                x[]                                 /* I    Speech signal                               */
+);
+
+/**************************/
+/* Noise shaping analysis */
+/**************************/
+/* Compute noise shaping coefficients and initial gain values */
+void silk_noise_shape_analysis_FLP(
+    silk_encoder_state_FLP          *psEnc,                             /* I/O  Encoder state FLP                           */
+    silk_encoder_control_FLP        *psEncCtrl,                         /* I/O  Encoder control FLP                         */
+    const silk_float                *pitch_res,                         /* I    LPC residual from pitch analysis            */
+    const silk_float                *x                                  /* I    Input signal [frame_length + la_shape]      */
+);
+
+/* Autocorrelations for a warped frequency axis */
+void silk_warped_autocorrelation_FLP(
+    silk_float                      *corr,                              /* O    Result [order + 1]                          */
+    const silk_float                *input,                             /* I    Input data to correlate                     */
+    const silk_float                warping,                            /* I    Warping coefficient                         */
+    const opus_int                  length,                             /* I    Length of input                             */
+    const opus_int                  order                               /* I    Correlation order (even)                    */
+);
+
+/* Calculation of LTP state scaling */
+void silk_LTP_scale_ctrl_FLP(
+    silk_encoder_state_FLP          *psEnc,                             /* I/O  Encoder state FLP                           */
+    silk_encoder_control_FLP        *psEncCtrl,                         /* I/O  Encoder control FLP                         */
+    opus_int                        condCoding                          /* I    The type of conditional coding to use       */
+);
+
+/**********************************************/
+/* Prediction Analysis                        */
+/**********************************************/
+/* Find pitch lags */
+void silk_find_pitch_lags_FLP(
+    silk_encoder_state_FLP          *psEnc,                             /* I/O  Encoder state FLP                           */
+    silk_encoder_control_FLP        *psEncCtrl,                         /* I/O  Encoder control FLP                         */
+    silk_float                      res[],                              /* O    Residual                                    */
+    const silk_float                x[],                                /* I    Speech signal                               */
+    int                             arch                                /* I    Run-time architecture                       */
+);
+
+/* Find LPC and LTP coefficients */
+void silk_find_pred_coefs_FLP(
+    silk_encoder_state_FLP          *psEnc,                             /* I/O  Encoder state FLP                           */
+    silk_encoder_control_FLP        *psEncCtrl,                         /* I/O  Encoder control FLP                         */
+    const silk_float                res_pitch[],                        /* I    Residual from pitch analysis                */
+    const silk_float                x[],                                /* I    Speech signal                               */
+    opus_int                        condCoding                          /* I    The type of conditional coding to use       */
+);
+
+/* LPC analysis */
+void silk_find_LPC_FLP(
+    silk_encoder_state              *psEncC,                            /* I/O  Encoder state                               */
+    opus_int16                      NLSF_Q15[],                         /* O    NLSFs                                       */
+    const silk_float                x[],                                /* I    Input signal                                */
+    const silk_float                minInvGain                          /* I    Prediction gain from LTP (dB)               */
+);
+
+/* LTP analysis */
+void silk_find_LTP_FLP(
+    silk_float                      b[ MAX_NB_SUBFR * LTP_ORDER ],      /* O    LTP coefs                                   */
+    silk_float                      WLTP[ MAX_NB_SUBFR * LTP_ORDER * LTP_ORDER ], /* O    Weight for LTP quantization       */
+    silk_float                      *LTPredCodGain,                     /* O    LTP coding gain                             */
+    const silk_float                r_lpc[],                            /* I    LPC residual                                */
+    const opus_int                  lag[  MAX_NB_SUBFR ],               /* I    LTP lags                                    */
+    const silk_float                Wght[ MAX_NB_SUBFR ],               /* I    Weights                                     */
+    const opus_int                  subfr_length,                       /* I    Subframe length                             */
+    const opus_int                  nb_subfr,                           /* I    number of subframes                         */
+    const opus_int                  mem_offset                          /* I    Number of samples in LTP memory             */
+);
+
+void silk_LTP_analysis_filter_FLP(
+    silk_float                      *LTP_res,                           /* O    LTP res MAX_NB_SUBFR*(pre_lgth+subfr_lngth) */
+    const silk_float                *x,                                 /* I    Input signal, with preceding samples        */
+    const silk_float                B[ LTP_ORDER * MAX_NB_SUBFR ],      /* I    LTP coefficients for each subframe          */
+    const opus_int                  pitchL[   MAX_NB_SUBFR ],           /* I    Pitch lags                                  */
+    const silk_float                invGains[ MAX_NB_SUBFR ],           /* I    Inverse quantization gains                  */
+    const opus_int                  subfr_length,                       /* I    Length of each subframe                     */
+    const opus_int                  nb_subfr,                           /* I    number of subframes                         */
+    const opus_int                  pre_length                          /* I    Preceding samples for each subframe         */
+);
+
+/* Calculates residual energies of input subframes where all subframes have LPC_order   */
+/* of preceding samples                                                                 */
+void silk_residual_energy_FLP(
+    silk_float                      nrgs[ MAX_NB_SUBFR ],               /* O    Residual energy per subframe                */
+    const silk_float                x[],                                /* I    Input signal                                */
+    silk_float                      a[ 2 ][ MAX_LPC_ORDER ],            /* I    AR coefs for each frame half                */
+    const silk_float                gains[],                            /* I    Quantization gains                          */
+    const opus_int                  subfr_length,                       /* I    Subframe length                             */
+    const opus_int                  nb_subfr,                           /* I    number of subframes                         */
+    const opus_int                  LPC_order                           /* I    LPC order                                   */
+);
+
+/* 16th order LPC analysis filter */
+void silk_LPC_analysis_filter_FLP(
+    silk_float                      r_LPC[],                            /* O    LPC residual signal                         */
+    const silk_float                PredCoef[],                         /* I    LPC coefficients                            */
+    const silk_float                s[],                                /* I    Input signal                                */
+    const opus_int                  length,                             /* I    Length of input signal                      */
+    const opus_int                  Order                               /* I    LPC order                                   */
+);
+
+/* LTP tap quantizer */
+void silk_quant_LTP_gains_FLP(
+    silk_float                      B[ MAX_NB_SUBFR * LTP_ORDER ],      /* I/O  (Un-)quantized LTP gains                    */
+    opus_int8                       cbk_index[ MAX_NB_SUBFR ],          /* O    Codebook index                              */
+    opus_int8                       *periodicity_index,                 /* O    Periodicity index                           */
+    opus_int32                      *sum_log_gain_Q7,                   /* I/O  Cumulative max prediction gain  */
+    const silk_float                W[ MAX_NB_SUBFR * LTP_ORDER * LTP_ORDER ], /* I    Error weights                        */
+    const opus_int                  mu_Q10,                             /* I    Mu value (R/D tradeoff)                     */
+    const opus_int                  lowComplexity,                      /* I    Flag for low complexity                     */
+    const opus_int                  nb_subfr                            /* I    number of subframes                         */
+);
+
+/* Residual energy: nrg = wxx - 2 * wXx * c + c' * wXX * c */
+silk_float silk_residual_energy_covar_FLP(                              /* O    Weighted residual energy                    */
+    const silk_float                *c,                                 /* I    Filter coefficients                         */
+    silk_float                      *wXX,                               /* I/O  Weighted correlation matrix, reg. out       */
+    const silk_float                *wXx,                               /* I    Weighted correlation vector                 */
+    const silk_float                wxx,                                /* I    Weighted correlation value                  */
+    const opus_int                  D                                   /* I    Dimension                                   */
+);
+
+/* Processing of gains */
+void silk_process_gains_FLP(
+    silk_encoder_state_FLP          *psEnc,                             /* I/O  Encoder state FLP                           */
+    silk_encoder_control_FLP        *psEncCtrl,                         /* I/O  Encoder control FLP                         */
+    opus_int                        condCoding                          /* I    The type of conditional coding to use       */
+);
+
+/******************/
+/* Linear Algebra */
+/******************/
+/* Calculates correlation matrix X'*X */
+void silk_corrMatrix_FLP(
+    const silk_float                *x,                                 /* I    x vector [ L+order-1 ] used to create X     */
+    const opus_int                  L,                                  /* I    Length of vectors                           */
+    const opus_int                  Order,                              /* I    Max lag for correlation                     */
+    silk_float                      *XX                                 /* O    X'*X correlation matrix [order x order]     */
+);
+
+/* Calculates correlation vector X'*t */
+void silk_corrVector_FLP(
+    const silk_float                *x,                                 /* I    x vector [L+order-1] used to create X       */
+    const silk_float                *t,                                 /* I    Target vector [L]                           */
+    const opus_int                  L,                                  /* I    Length of vecors                            */
+    const opus_int                  Order,                              /* I    Max lag for correlation                     */
+    silk_float                      *Xt                                 /* O    X'*t correlation vector [order]             */
+);
+
+/* Add noise to matrix diagonal */
+void silk_regularize_correlations_FLP(
+    silk_float                      *XX,                                /* I/O  Correlation matrices                        */
+    silk_float                      *xx,                                /* I/O  Correlation values                          */
+    const silk_float                noise,                              /* I    Noise energy to add                         */
+    const opus_int                  D                                   /* I    Dimension of XX                             */
+);
+
+/* Function to solve linear equation Ax = b, where A is an MxM symmetric matrix */
+void silk_solve_LDL_FLP(
+    silk_float                      *A,                                 /* I/O  Symmetric square matrix, out: reg.          */
+    const opus_int                  M,                                  /* I    Size of matrix                              */
+    const silk_float                *b,                                 /* I    Pointer to b vector                         */
+    silk_float                      *x                                  /* O    Pointer to x solution vector                */
+);
+
+/* Apply sine window to signal vector.  */
+/* Window types:                        */
+/*  1 -> sine window from 0 to pi/2     */
+/*  2 -> sine window from pi/2 to pi    */
+void silk_apply_sine_window_FLP(
+    silk_float                      px_win[],                           /* O    Pointer to windowed signal                  */
+    const silk_float                px[],                               /* I    Pointer to input signal                     */
+    const opus_int                  win_type,                           /* I    Selects a window type                       */
+    const opus_int                  length                              /* I    Window length, multiple of 4                */
+);
+
+/* Wrapper functions. Call flp / fix code */
+
+/* Convert AR filter coefficients to NLSF parameters */
+void silk_A2NLSF_FLP(
+    opus_int16                      *NLSF_Q15,                          /* O    NLSF vector      [ LPC_order ]              */
+    const silk_float                *pAR,                               /* I    LPC coefficients [ LPC_order ]              */
+    const opus_int                  LPC_order                           /* I    LPC order                                   */
+);
+
+/* Convert NLSF parameters to AR prediction filter coefficients */
+void silk_NLSF2A_FLP(
+    silk_float                      *pAR,                               /* O    LPC coefficients [ LPC_order ]              */
+    const opus_int16                *NLSF_Q15,                          /* I    NLSF vector      [ LPC_order ]              */
+    const opus_int                  LPC_order                           /* I    LPC order                                   */
+);
+
+/* Limit, stabilize, and quantize NLSFs */
+void silk_process_NLSFs_FLP(
+    silk_encoder_state              *psEncC,                            /* I/O  Encoder state                               */
+    silk_float                      PredCoef[ 2 ][ MAX_LPC_ORDER ],     /* O    Prediction coefficients                     */
+    opus_int16                      NLSF_Q15[      MAX_LPC_ORDER ],     /* I/O  Normalized LSFs (quant out) (0 - (2^15-1))  */
+    const opus_int16                prev_NLSF_Q15[ MAX_LPC_ORDER ]      /* I    Previous Normalized LSFs (0 - (2^15-1))     */
+);
+
+/* Floating-point Silk NSQ wrapper      */
+void silk_NSQ_wrapper_FLP(
+    silk_encoder_state_FLP          *psEnc,                             /* I/O  Encoder state FLP                           */
+    silk_encoder_control_FLP        *psEncCtrl,                         /* I/O  Encoder control FLP                         */
+    SideInfoIndices                 *psIndices,                         /* I/O  Quantization indices                        */
+    silk_nsq_state                  *psNSQ,                             /* I/O  Noise Shaping Quantzation state             */
+    opus_int8                       pulses[],                           /* O    Quantized pulse signal                      */
+    const silk_float                x[]                                 /* I    Prefiltered input signal                    */
+);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/drivers/opus/silk/float/noise_shape_analysis_FLP.c b/drivers/opus/silk/float/noise_shape_analysis_FLP.c
new file mode 100644
index 0000000000..f80e0b3d0e
--- /dev/null
+++ b/drivers/opus/silk/float/noise_shape_analysis_FLP.c
@@ -0,0 +1,365 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef OPUS_HAVE_CONFIG_H
+#include "opus_config.h"
+#endif
+
+#include "main_FLP.h"
+#include "tuning_parameters.h"
+
+/* Compute gain to make warped filter coefficients have a zero mean log frequency response on a   */
+/* non-warped frequency scale. (So that it can be implemented with a minimum-phase monic filter.) */
+/* Note: A monic filter is one with the first coefficient equal to 1.0. In Silk we omit the first */
+/* coefficient in an array of coefficients, for monic filters.                                    */
+static OPUS_INLINE silk_float warped_gain(
+    const silk_float     *coefs,
+    silk_float           lambda,
+    opus_int             order
+) {
+    opus_int   i;
+    silk_float gain;
+
+    lambda = -lambda;
+    gain = coefs[ order - 1 ];
+    for( i = order - 2; i >= 0; i-- ) {
+        gain = lambda * gain + coefs[ i ];
+    }
+    return (silk_float)( 1.0f / ( 1.0f - lambda * gain ) );
+}
+
+/* Convert warped filter coefficients to monic pseudo-warped coefficients and limit maximum     */
+/* amplitude of monic warped coefficients by using bandwidth expansion on the true coefficients */
+static OPUS_INLINE void warped_true2monic_coefs(
+    silk_float           *coefs_syn,
+    silk_float           *coefs_ana,
+    silk_float           lambda,
+    silk_float           limit,
+    opus_int             order
+) {
+    opus_int   i, iter, ind = 0;
+    silk_float tmp, maxabs, chirp, gain_syn, gain_ana;
+
+    /* Convert to monic coefficients */
+    for( i = order - 1; i > 0; i-- ) {
+        coefs_syn[ i - 1 ] -= lambda * coefs_syn[ i ];
+        coefs_ana[ i - 1 ] -= lambda * coefs_ana[ i ];
+    }
+    gain_syn = ( 1.0f - lambda * lambda ) / ( 1.0f + lambda * coefs_syn[ 0 ] );
+    gain_ana = ( 1.0f - lambda * lambda ) / ( 1.0f + lambda * coefs_ana[ 0 ] );
+    for( i = 0; i < order; i++ ) {
+        coefs_syn[ i ] *= gain_syn;
+        coefs_ana[ i ] *= gain_ana;
+    }
+
+    /* Limit */
+    for( iter = 0; iter < 10; iter++ ) {
+        /* Find maximum absolute value */
+        maxabs = -1.0f;
+        for( i = 0; i < order; i++ ) {
+            tmp = silk_max( silk_abs_float( coefs_syn[ i ] ), silk_abs_float( coefs_ana[ i ] ) );
+            if( tmp > maxabs ) {
+                maxabs = tmp;
+                ind = i;
+            }
+        }
+        if( maxabs <= limit ) {
+            /* Coefficients are within range - done */
+            return;
+        }
+
+        /* Convert back to true warped coefficients */
+        for( i = 1; i < order; i++ ) {
+            coefs_syn[ i - 1 ] += lambda * coefs_syn[ i ];
+            coefs_ana[ i - 1 ] += lambda * coefs_ana[ i ];
+        }
+        gain_syn = 1.0f / gain_syn;
+        gain_ana = 1.0f / gain_ana;
+        for( i = 0; i < order; i++ ) {
+            coefs_syn[ i ] *= gain_syn;
+            coefs_ana[ i ] *= gain_ana;
+        }
+
+        /* Apply bandwidth expansion */
+        chirp = 0.99f - ( 0.8f + 0.1f * iter ) * ( maxabs - limit ) / ( maxabs * ( ind + 1 ) );
+        silk_bwexpander_FLP( coefs_syn, order, chirp );
+        silk_bwexpander_FLP( coefs_ana, order, chirp );
+
+        /* Convert to monic warped coefficients */
+        for( i = order - 1; i > 0; i-- ) {
+            coefs_syn[ i - 1 ] -= lambda * coefs_syn[ i ];
+            coefs_ana[ i - 1 ] -= lambda * coefs_ana[ i ];
+        }
+        gain_syn = ( 1.0f - lambda * lambda ) / ( 1.0f + lambda * coefs_syn[ 0 ] );
+        gain_ana = ( 1.0f - lambda * lambda ) / ( 1.0f + lambda * coefs_ana[ 0 ] );
+        for( i = 0; i < order; i++ ) {
+            coefs_syn[ i ] *= gain_syn;
+            coefs_ana[ i ] *= gain_ana;
+        }
+    }
+    silk_assert( 0 );
+}
+
+/* Compute noise shaping coefficients and initial gain values */
+void silk_noise_shape_analysis_FLP(
+    silk_encoder_state_FLP          *psEnc,                             /* I/O  Encoder state FLP                           */
+    silk_encoder_control_FLP        *psEncCtrl,                         /* I/O  Encoder control FLP                         */
+    const silk_float                *pitch_res,                         /* I    LPC residual from pitch analysis            */
+    const silk_float                *x                                  /* I    Input signal [frame_length + la_shape]      */
+)
+{
+    silk_shape_state_FLP *psShapeSt = &psEnc->sShape;
+    opus_int     k, nSamples;
+    silk_float   SNR_adj_dB, HarmBoost, HarmShapeGain, Tilt;
+    silk_float   nrg, pre_nrg, log_energy, log_energy_prev, energy_variation;
+    silk_float   delta, BWExp1, BWExp2, gain_mult, gain_add, strength, b, warping;
+    silk_float   x_windowed[ SHAPE_LPC_WIN_MAX ];
+    silk_float   auto_corr[ MAX_SHAPE_LPC_ORDER + 1 ];
+    const silk_float *x_ptr, *pitch_res_ptr;
+
+    /* Point to start of first LPC analysis block */
+    x_ptr = x - psEnc->sCmn.la_shape;
+
+    /****************/
+    /* GAIN CONTROL */
+    /****************/
+    SNR_adj_dB = psEnc->sCmn.SNR_dB_Q7 * ( 1 / 128.0f );
+
+    /* Input quality is the average of the quality in the lowest two VAD bands */
+    psEncCtrl->input_quality = 0.5f * ( psEnc->sCmn.input_quality_bands_Q15[ 0 ] + psEnc->sCmn.input_quality_bands_Q15[ 1 ] ) * ( 1.0f / 32768.0f );
+
+    /* Coding quality level, between 0.0 and 1.0 */
+    psEncCtrl->coding_quality = silk_sigmoid( 0.25f * ( SNR_adj_dB - 20.0f ) );
+
+    if( psEnc->sCmn.useCBR == 0 ) {
+        /* Reduce coding SNR during low speech activity */
+        b = 1.0f - psEnc->sCmn.speech_activity_Q8 * ( 1.0f /  256.0f );
+        SNR_adj_dB -= BG_SNR_DECR_dB * psEncCtrl->coding_quality * ( 0.5f + 0.5f * psEncCtrl->input_quality ) * b * b;
+    }
+
+    if( psEnc->sCmn.indices.signalType == TYPE_VOICED ) {
+        /* Reduce gains for periodic signals */
+        SNR_adj_dB += HARM_SNR_INCR_dB * psEnc->LTPCorr;
+    } else {
+        /* For unvoiced signals and low-quality input, adjust the quality slower than SNR_dB setting */
+        SNR_adj_dB += ( -0.4f * psEnc->sCmn.SNR_dB_Q7 * ( 1 / 128.0f ) + 6.0f ) * ( 1.0f - psEncCtrl->input_quality );
+    }
+
+    /*************************/
+    /* SPARSENESS PROCESSING */
+    /*************************/
+    /* Set quantizer offset */
+    if( psEnc->sCmn.indices.signalType == TYPE_VOICED ) {
+        /* Initially set to 0; may be overruled in process_gains(..) */
+        psEnc->sCmn.indices.quantOffsetType = 0;
+        psEncCtrl->sparseness = 0.0f;
+    } else {
+        /* Sparseness measure, based on relative fluctuations of energy per 2 milliseconds */
+        nSamples = 2 * psEnc->sCmn.fs_kHz;
+        energy_variation = 0.0f;
+        log_energy_prev  = 0.0f;
+        pitch_res_ptr = pitch_res;
+        for( k = 0; k < silk_SMULBB( SUB_FRAME_LENGTH_MS, psEnc->sCmn.nb_subfr ) / 2; k++ ) {
+            nrg = ( silk_float )nSamples + ( silk_float )silk_energy_FLP( pitch_res_ptr, nSamples );
+            log_energy = silk_log2( nrg );
+            if( k > 0 ) {
+                energy_variation += silk_abs_float( log_energy - log_energy_prev );
+            }
+            log_energy_prev = log_energy;
+            pitch_res_ptr += nSamples;
+        }
+        psEncCtrl->sparseness = silk_sigmoid( 0.4f * ( energy_variation - 5.0f ) );
+
+        /* Set quantization offset depending on sparseness measure */
+        if( psEncCtrl->sparseness > SPARSENESS_THRESHOLD_QNT_OFFSET ) {
+            psEnc->sCmn.indices.quantOffsetType = 0;
+        } else {
+            psEnc->sCmn.indices.quantOffsetType = 1;
+        }
+
+        /* Increase coding SNR for sparse signals */
+        SNR_adj_dB += SPARSE_SNR_INCR_dB * ( psEncCtrl->sparseness - 0.5f );
+    }
+
+    /*******************************/
+    /* Control bandwidth expansion */
+    /*******************************/
+    /* More BWE for signals with high prediction gain */
+    strength = FIND_PITCH_WHITE_NOISE_FRACTION * psEncCtrl->predGain;           /* between 0.0 and 1.0 */
+    BWExp1 = BWExp2 = BANDWIDTH_EXPANSION / ( 1.0f + strength * strength );
+    delta  = LOW_RATE_BANDWIDTH_EXPANSION_DELTA * ( 1.0f - 0.75f * psEncCtrl->coding_quality );
+    BWExp1 -= delta;
+    BWExp2 += delta;
+    /* BWExp1 will be applied after BWExp2, so make it relative */
+    BWExp1 /= BWExp2;
+
+    if( psEnc->sCmn.warping_Q16 > 0 ) {
+        /* Slightly more warping in analysis will move quantization noise up in frequency, where it's better masked */
+        warping = (silk_float)psEnc->sCmn.warping_Q16 / 65536.0f + 0.01f * psEncCtrl->coding_quality;
+    } else {
+        warping = 0.0f;
+    }
+
+    /********************************************/
+    /* Compute noise shaping AR coefs and gains */
+    /********************************************/
+    for( k = 0; k < psEnc->sCmn.nb_subfr; k++ ) {
+        /* Apply window: sine slope followed by flat part followed by cosine slope */
+        opus_int shift, slope_part, flat_part;
+        flat_part = psEnc->sCmn.fs_kHz * 3;
+        slope_part = ( psEnc->sCmn.shapeWinLength - flat_part ) / 2;
+
+        silk_apply_sine_window_FLP( x_windowed, x_ptr, 1, slope_part );
+        shift = slope_part;
+        silk_memcpy( x_windowed + shift, x_ptr + shift, flat_part * sizeof(silk_float) );
+        shift += flat_part;
+        silk_apply_sine_window_FLP( x_windowed + shift, x_ptr + shift, 2, slope_part );
+
+        /* Update pointer: next LPC analysis block */
+        x_ptr += psEnc->sCmn.subfr_length;
+
+        if( psEnc->sCmn.warping_Q16 > 0 ) {
+            /* Calculate warped auto correlation */
+            silk_warped_autocorrelation_FLP( auto_corr, x_windowed, warping,
+                psEnc->sCmn.shapeWinLength, psEnc->sCmn.shapingLPCOrder );
+        } else {
+            /* Calculate regular auto correlation */
+            silk_autocorrelation_FLP( auto_corr, x_windowed, psEnc->sCmn.shapeWinLength, psEnc->sCmn.shapingLPCOrder + 1 );
+        }
+
+        /* Add white noise, as a fraction of energy */
+        auto_corr[ 0 ] += auto_corr[ 0 ] * SHAPE_WHITE_NOISE_FRACTION;
+
+        /* Convert correlations to prediction coefficients, and compute residual energy */
+        nrg = silk_levinsondurbin_FLP( &psEncCtrl->AR2[ k * MAX_SHAPE_LPC_ORDER ], auto_corr, psEnc->sCmn.shapingLPCOrder );
+        psEncCtrl->Gains[ k ] = ( silk_float )sqrt( nrg );
+
+        if( psEnc->sCmn.warping_Q16 > 0 ) {
+            /* Adjust gain for warping */
+            psEncCtrl->Gains[ k ] *= warped_gain( &psEncCtrl->AR2[ k * MAX_SHAPE_LPC_ORDER ], warping, psEnc->sCmn.shapingLPCOrder );
+        }
+
+        /* Bandwidth expansion for synthesis filter shaping */
+        silk_bwexpander_FLP( &psEncCtrl->AR2[ k * MAX_SHAPE_LPC_ORDER ], psEnc->sCmn.shapingLPCOrder, BWExp2 );
+
+        /* Compute noise shaping filter coefficients */
+        silk_memcpy(
+            &psEncCtrl->AR1[ k * MAX_SHAPE_LPC_ORDER ],
+            &psEncCtrl->AR2[ k * MAX_SHAPE_LPC_ORDER ],
+            psEnc->sCmn.shapingLPCOrder * sizeof( silk_float ) );
+
+        /* Bandwidth expansion for analysis filter shaping */
+        silk_bwexpander_FLP( &psEncCtrl->AR1[ k * MAX_SHAPE_LPC_ORDER ], psEnc->sCmn.shapingLPCOrder, BWExp1 );
+
+        /* Ratio of prediction gains, in energy domain */
+        pre_nrg = silk_LPC_inverse_pred_gain_FLP( &psEncCtrl->AR2[ k * MAX_SHAPE_LPC_ORDER ], psEnc->sCmn.shapingLPCOrder );
+        nrg     = silk_LPC_inverse_pred_gain_FLP( &psEncCtrl->AR1[ k * MAX_SHAPE_LPC_ORDER ], psEnc->sCmn.shapingLPCOrder );
+        psEncCtrl->GainsPre[ k ] = 1.0f - 0.7f * ( 1.0f - pre_nrg / nrg );
+
+        /* Convert to monic warped prediction coefficients and limit absolute values */
+        warped_true2monic_coefs( &psEncCtrl->AR2[ k * MAX_SHAPE_LPC_ORDER ], &psEncCtrl->AR1[ k * MAX_SHAPE_LPC_ORDER ],
+            warping, 3.999f, psEnc->sCmn.shapingLPCOrder );
+    }
+
+    /*****************/
+    /* Gain tweaking */
+    /*****************/
+    /* Increase gains during low speech activity */
+    gain_mult = (silk_float)pow( 2.0f, -0.16f * SNR_adj_dB );
+    gain_add  = (silk_float)pow( 2.0f,  0.16f * MIN_QGAIN_DB );
+    for( k = 0; k < psEnc->sCmn.nb_subfr; k++ ) {
+        psEncCtrl->Gains[ k ] *= gain_mult;
+        psEncCtrl->Gains[ k ] += gain_add;
+    }
+
+    gain_mult = 1.0f + INPUT_TILT + psEncCtrl->coding_quality * HIGH_RATE_INPUT_TILT;
+    for( k = 0; k < psEnc->sCmn.nb_subfr; k++ ) {
+        psEncCtrl->GainsPre[ k ] *= gain_mult;
+    }
+
+    /************************************************/
+    /* Control low-frequency shaping and noise tilt */
+    /************************************************/
+    /* Less low frequency shaping for noisy inputs */
+    strength = LOW_FREQ_SHAPING * ( 1.0f + LOW_QUALITY_LOW_FREQ_SHAPING_DECR * ( psEnc->sCmn.input_quality_bands_Q15[ 0 ] * ( 1.0f / 32768.0f ) - 1.0f ) );
+    strength *= psEnc->sCmn.speech_activity_Q8 * ( 1.0f /  256.0f );
+    if( psEnc->sCmn.indices.signalType == TYPE_VOICED ) {
+        /* Reduce low frequencies quantization noise for periodic signals, depending on pitch lag */
+        /*f = 400; freqz([1, -0.98 + 2e-4 * f], [1, -0.97 + 7e-4 * f], 2^12, Fs); axis([0, 1000, -10, 1])*/
+        for( k = 0; k < psEnc->sCmn.nb_subfr; k++ ) {
+            b = 0.2f / psEnc->sCmn.fs_kHz + 3.0f / psEncCtrl->pitchL[ k ];
+            psEncCtrl->LF_MA_shp[ k ] = -1.0f + b;
+            psEncCtrl->LF_AR_shp[ k ] =  1.0f - b - b * strength;
+        }
+        Tilt = - HP_NOISE_COEF -
+            (1 - HP_NOISE_COEF) * HARM_HP_NOISE_COEF * psEnc->sCmn.speech_activity_Q8 * ( 1.0f /  256.0f );
+    } else {
+        b = 1.3f / psEnc->sCmn.fs_kHz;
+        psEncCtrl->LF_MA_shp[ 0 ] = -1.0f + b;
+        psEncCtrl->LF_AR_shp[ 0 ] =  1.0f - b - b * strength * 0.6f;
+        for( k = 1; k < psEnc->sCmn.nb_subfr; k++ ) {
+            psEncCtrl->LF_MA_shp[ k ] = psEncCtrl->LF_MA_shp[ 0 ];
+            psEncCtrl->LF_AR_shp[ k ] = psEncCtrl->LF_AR_shp[ 0 ];
+        }
+        Tilt = -HP_NOISE_COEF;
+    }
+
+    /****************************/
+    /* HARMONIC SHAPING CONTROL */
+    /****************************/
+    /* Control boosting of harmonic frequencies */
+    HarmBoost = LOW_RATE_HARMONIC_BOOST * ( 1.0f - psEncCtrl->coding_quality ) * psEnc->LTPCorr;
+
+    /* More harmonic boost for noisy input signals */
+    HarmBoost += LOW_INPUT_QUALITY_HARMONIC_BOOST * ( 1.0f - psEncCtrl->input_quality );
+
+    if( USE_HARM_SHAPING && psEnc->sCmn.indices.signalType == TYPE_VOICED ) {
+        /* Harmonic noise shaping */
+        HarmShapeGain = HARMONIC_SHAPING;
+
+        /* More harmonic noise shaping for high bitrates or noisy input */
+        HarmShapeGain += HIGH_RATE_OR_LOW_QUALITY_HARMONIC_SHAPING *
+            ( 1.0f - ( 1.0f - psEncCtrl->coding_quality ) * psEncCtrl->input_quality );
+
+        /* Less harmonic noise shaping for less periodic signals */
+        HarmShapeGain *= ( silk_float )sqrt( psEnc->LTPCorr );
+    } else {
+        HarmShapeGain = 0.0f;
+    }
+
+    /*************************/
+    /* Smooth over subframes */
+    /*************************/
+    for( k = 0; k < psEnc->sCmn.nb_subfr; k++ ) {
+        psShapeSt->HarmBoost_smth     += SUBFR_SMTH_COEF * ( HarmBoost - psShapeSt->HarmBoost_smth );
+        psEncCtrl->HarmBoost[ k ]      = psShapeSt->HarmBoost_smth;
+        psShapeSt->HarmShapeGain_smth += SUBFR_SMTH_COEF * ( HarmShapeGain - psShapeSt->HarmShapeGain_smth );
+        psEncCtrl->HarmShapeGain[ k ]  = psShapeSt->HarmShapeGain_smth;
+        psShapeSt->Tilt_smth          += SUBFR_SMTH_COEF * ( Tilt - psShapeSt->Tilt_smth );
+        psEncCtrl->Tilt[ k ]           = psShapeSt->Tilt_smth;
+    }
+}
diff --git a/drivers/opus/silk/float/pitch_analysis_core_FLP.c b/drivers/opus/silk/float/pitch_analysis_core_FLP.c
new file mode 100644
index 0000000000..2588094c49
--- /dev/null
+++ b/drivers/opus/silk/float/pitch_analysis_core_FLP.c
@@ -0,0 +1,630 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef OPUS_HAVE_CONFIG_H
+#include "opus_config.h"
+#endif
+
+/*****************************************************************************
+* Pitch analyser function
+******************************************************************************/
+#include "SigProc_FLP.h"
+#include "SigProc_FIX.h"
+#include "pitch_est_defines.h"
+#include "pitch.h"
+
+#define SCRATCH_SIZE        22
+
+/************************************************************/
+/* Internally used functions                                */
+/************************************************************/
+static void silk_P_Ana_calc_corr_st3(
+    silk_float cross_corr_st3[ PE_MAX_NB_SUBFR ][ PE_NB_CBKS_STAGE3_MAX ][ PE_NB_STAGE3_LAGS ], /* O 3 DIM correlation array */
+    const silk_float    frame[],            /* I vector to correlate                                            */
+    opus_int            start_lag,          /* I start lag                                                      */
+    opus_int            sf_length,          /* I sub frame length                                               */
+    opus_int            nb_subfr,           /* I number of subframes                                            */
+    opus_int            complexity,         /* I Complexity setting                                             */
+    int                 arch                /* I Run-time architecture                                          */
+);
+
+static void silk_P_Ana_calc_energy_st3(
+    silk_float energies_st3[ PE_MAX_NB_SUBFR ][ PE_NB_CBKS_STAGE3_MAX ][ PE_NB_STAGE3_LAGS ], /* O 3 DIM correlation array */
+    const silk_float    frame[],            /* I vector to correlate                                            */
+    opus_int            start_lag,          /* I start lag                                                      */
+    opus_int            sf_length,          /* I sub frame length                                               */
+    opus_int            nb_subfr,           /* I number of subframes                                            */
+    opus_int            complexity          /* I Complexity setting                                             */
+);
+
+/************************************************************/
+/* CORE PITCH ANALYSIS FUNCTION                             */
+/************************************************************/
+opus_int silk_pitch_analysis_core_FLP(      /* O    Voicing estimate: 0 voiced, 1 unvoiced                      */
+    const silk_float    *frame,             /* I    Signal of length PE_FRAME_LENGTH_MS*Fs_kHz                  */
+    opus_int            *pitch_out,         /* O    Pitch lag values [nb_subfr]                                 */
+    opus_int16          *lagIndex,          /* O    Lag Index                                                   */
+    opus_int8           *contourIndex,      /* O    Pitch contour Index                                         */
+    silk_float          *LTPCorr,           /* I/O  Normalized correlation; input: value from previous frame    */
+    opus_int            prevLag,            /* I    Last lag of previous frame; set to zero is unvoiced         */
+    const silk_float    search_thres1,      /* I    First stage threshold for lag candidates 0 - 1              */
+    const silk_float    search_thres2,      /* I    Final threshold for lag candidates 0 - 1                    */
+    const opus_int      Fs_kHz,             /* I    sample frequency (kHz)                                      */
+    const opus_int      complexity,         /* I    Complexity setting, 0-2, where 2 is highest                 */
+    const opus_int      nb_subfr,           /* I    Number of 5 ms subframes                                    */
+    int                 arch                /* I    Run-time architecture                                       */
+)
+{
+    opus_int   i, k, d, j;
+    silk_float frame_8kHz[  PE_MAX_FRAME_LENGTH_MS * 8 ];
+    silk_float frame_4kHz[  PE_MAX_FRAME_LENGTH_MS * 4 ];
+    opus_int16 frame_8_FIX[ PE_MAX_FRAME_LENGTH_MS * 8 ];
+    opus_int16 frame_4_FIX[ PE_MAX_FRAME_LENGTH_MS * 4 ];
+    opus_int32 filt_state[ 6 ];
+    silk_float threshold, contour_bias;
+    silk_float C[ PE_MAX_NB_SUBFR][ (PE_MAX_LAG >> 1) + 5 ];
+    opus_val32 xcorr[ PE_MAX_LAG_MS * 4 - PE_MIN_LAG_MS * 4 + 1 ];
+    silk_float CC[ PE_NB_CBKS_STAGE2_EXT ];
+    const silk_float *target_ptr, *basis_ptr;
+    double    cross_corr, normalizer, energy, energy_tmp;
+    opus_int   d_srch[ PE_D_SRCH_LENGTH ];
+    opus_int16 d_comp[ (PE_MAX_LAG >> 1) + 5 ];
+    opus_int   length_d_srch, length_d_comp;
+    silk_float Cmax, CCmax, CCmax_b, CCmax_new_b, CCmax_new;
+    opus_int   CBimax, CBimax_new, lag, start_lag, end_lag, lag_new;
+    opus_int   cbk_size;
+    silk_float lag_log2, prevLag_log2, delta_lag_log2_sqr;
+    silk_float energies_st3[ PE_MAX_NB_SUBFR ][ PE_NB_CBKS_STAGE3_MAX ][ PE_NB_STAGE3_LAGS ];
+    silk_float cross_corr_st3[ PE_MAX_NB_SUBFR ][ PE_NB_CBKS_STAGE3_MAX ][ PE_NB_STAGE3_LAGS ];
+    opus_int   lag_counter;
+    opus_int   frame_length, frame_length_8kHz, frame_length_4kHz;
+    opus_int   sf_length, sf_length_8kHz, sf_length_4kHz;
+    opus_int   min_lag, min_lag_8kHz, min_lag_4kHz;
+    opus_int   max_lag, max_lag_8kHz, max_lag_4kHz;
+    opus_int   nb_cbk_search;
+    const opus_int8 *Lag_CB_ptr;
+
+    /* Check for valid sampling frequency */
+    silk_assert( Fs_kHz == 8 || Fs_kHz == 12 || Fs_kHz == 16 );
+
+    /* Check for valid complexity setting */
+    silk_assert( complexity >= SILK_PE_MIN_COMPLEX );
+    silk_assert( complexity <= SILK_PE_MAX_COMPLEX );
+
+    silk_assert( search_thres1 >= 0.0f && search_thres1 <= 1.0f );
+    silk_assert( search_thres2 >= 0.0f && search_thres2 <= 1.0f );
+
+    /* Set up frame lengths max / min lag for the sampling frequency */
+    frame_length      = ( PE_LTP_MEM_LENGTH_MS + nb_subfr * PE_SUBFR_LENGTH_MS ) * Fs_kHz;
+    frame_length_4kHz = ( PE_LTP_MEM_LENGTH_MS + nb_subfr * PE_SUBFR_LENGTH_MS ) * 4;
+    frame_length_8kHz = ( PE_LTP_MEM_LENGTH_MS + nb_subfr * PE_SUBFR_LENGTH_MS ) * 8;
+    sf_length         = PE_SUBFR_LENGTH_MS * Fs_kHz;
+    sf_length_4kHz    = PE_SUBFR_LENGTH_MS * 4;
+    sf_length_8kHz    = PE_SUBFR_LENGTH_MS * 8;
+    min_lag           = PE_MIN_LAG_MS * Fs_kHz;
+    min_lag_4kHz      = PE_MIN_LAG_MS * 4;
+    min_lag_8kHz      = PE_MIN_LAG_MS * 8;
+    max_lag           = PE_MAX_LAG_MS * Fs_kHz - 1;
+    max_lag_4kHz      = PE_MAX_LAG_MS * 4;
+    max_lag_8kHz      = PE_MAX_LAG_MS * 8 - 1;
+
+    /* Resample from input sampled at Fs_kHz to 8 kHz */
+    if( Fs_kHz == 16 ) {
+        /* Resample to 16 -> 8 khz */
+        opus_int16 frame_16_FIX[ 16 * PE_MAX_FRAME_LENGTH_MS ];
+        silk_float2short_array( frame_16_FIX, frame, frame_length );
+        silk_memset( filt_state, 0, 2 * sizeof( opus_int32 ) );
+        silk_resampler_down2( filt_state, frame_8_FIX, frame_16_FIX, frame_length );
+        silk_short2float_array( frame_8kHz, frame_8_FIX, frame_length_8kHz );
+    } else if( Fs_kHz == 12 ) {
+        /* Resample to 12 -> 8 khz */
+        opus_int16 frame_12_FIX[ 12 * PE_MAX_FRAME_LENGTH_MS ];
+        silk_float2short_array( frame_12_FIX, frame, frame_length );
+        silk_memset( filt_state, 0, 6 * sizeof( opus_int32 ) );
+        silk_resampler_down2_3( filt_state, frame_8_FIX, frame_12_FIX, frame_length );
+        silk_short2float_array( frame_8kHz, frame_8_FIX, frame_length_8kHz );
+    } else {
+        silk_assert( Fs_kHz == 8 );
+        silk_float2short_array( frame_8_FIX, frame, frame_length_8kHz );
+    }
+
+    /* Decimate again to 4 kHz */
+    silk_memset( filt_state, 0, 2 * sizeof( opus_int32 ) );
+    silk_resampler_down2( filt_state, frame_4_FIX, frame_8_FIX, frame_length_8kHz );
+    silk_short2float_array( frame_4kHz, frame_4_FIX, frame_length_4kHz );
+
+    /* Low-pass filter */
+    for( i = frame_length_4kHz - 1; i > 0; i-- ) {
+        frame_4kHz[ i ] += frame_4kHz[ i - 1 ];
+    }
+
+    /******************************************************************************
+    * FIRST STAGE, operating in 4 khz
+    ******************************************************************************/
+    silk_memset(C, 0, sizeof(silk_float) * nb_subfr * ((PE_MAX_LAG >> 1) + 5));
+    target_ptr = &frame_4kHz[ silk_LSHIFT( sf_length_4kHz, 2 ) ];
+    for( k = 0; k < nb_subfr >> 1; k++ ) {
+        /* Check that we are within range of the array */
+        silk_assert( target_ptr >= frame_4kHz );
+        silk_assert( target_ptr + sf_length_8kHz <= frame_4kHz + frame_length_4kHz );
+
+        basis_ptr = target_ptr - min_lag_4kHz;
+
+        /* Check that we are within range of the array */
+        silk_assert( basis_ptr >= frame_4kHz );
+        silk_assert( basis_ptr + sf_length_8kHz <= frame_4kHz + frame_length_4kHz );
+
+        celt_pitch_xcorr( target_ptr, target_ptr-max_lag_4kHz, xcorr, sf_length_8kHz, max_lag_4kHz - min_lag_4kHz + 1, arch );
+
+        /* Calculate first vector products before loop */
+        cross_corr = xcorr[ max_lag_4kHz - min_lag_4kHz ];
+        normalizer = silk_energy_FLP( target_ptr, sf_length_8kHz ) + 
+                     silk_energy_FLP( basis_ptr,  sf_length_8kHz ) + 
+                     sf_length_8kHz * 4000.0f;
+
+        C[ 0 ][ min_lag_4kHz ] += (silk_float)( 2 * cross_corr / normalizer );
+
+        /* From now on normalizer is computed recursively */
+        for( d = min_lag_4kHz + 1; d <= max_lag_4kHz; d++ ) {
+            basis_ptr--;
+
+            /* Check that we are within range of the array */
+            silk_assert( basis_ptr >= frame_4kHz );
+            silk_assert( basis_ptr + sf_length_8kHz <= frame_4kHz + frame_length_4kHz );
+
+            cross_corr = xcorr[ max_lag_4kHz - d ];
+
+            /* Add contribution of new sample and remove contribution from oldest sample */
+            normalizer +=
+                basis_ptr[ 0 ] * (double)basis_ptr[ 0 ] -
+                basis_ptr[ sf_length_8kHz ] * (double)basis_ptr[ sf_length_8kHz ];
+            C[ 0 ][ d ] += (silk_float)( 2 * cross_corr / normalizer );
+        }
+        /* Update target pointer */
+        target_ptr += sf_length_8kHz;
+    }
+
+    /* Apply short-lag bias */
+    for( i = max_lag_4kHz; i >= min_lag_4kHz; i-- ) {
+        C[ 0 ][ i ] -= C[ 0 ][ i ] * i / 4096.0f;
+    }
+
+    /* Sort */
+    length_d_srch = 4 + 2 * complexity;
+    silk_assert( 3 * length_d_srch <= PE_D_SRCH_LENGTH );
+    silk_insertion_sort_decreasing_FLP( &C[ 0 ][ min_lag_4kHz ], d_srch, max_lag_4kHz - min_lag_4kHz + 1, length_d_srch );
+
+    /* Escape if correlation is very low already here */
+    Cmax = C[ 0 ][ min_lag_4kHz ];
+    if( Cmax < 0.2f ) {
+        silk_memset( pitch_out, 0, nb_subfr * sizeof( opus_int ) );
+        *LTPCorr      = 0.0f;
+        *lagIndex     = 0;
+        *contourIndex = 0;
+        return 1;
+    }
+
+    threshold = search_thres1 * Cmax;
+    for( i = 0; i < length_d_srch; i++ ) {
+        /* Convert to 8 kHz indices for the sorted correlation that exceeds the threshold */
+        if( C[ 0 ][ min_lag_4kHz + i ] > threshold ) {
+            d_srch[ i ] = silk_LSHIFT( d_srch[ i ] + min_lag_4kHz, 1 );
+        } else {
+            length_d_srch = i;
+            break;
+        }
+    }
+    silk_assert( length_d_srch > 0 );
+
+    for( i = min_lag_8kHz - 5; i < max_lag_8kHz + 5; i++ ) {
+        d_comp[ i ] = 0;
+    }
+    for( i = 0; i < length_d_srch; i++ ) {
+        d_comp[ d_srch[ i ] ] = 1;
+    }
+
+    /* Convolution */
+    for( i = max_lag_8kHz + 3; i >= min_lag_8kHz; i-- ) {
+        d_comp[ i ] += d_comp[ i - 1 ] + d_comp[ i - 2 ];
+    }
+
+    length_d_srch = 0;
+    for( i = min_lag_8kHz; i < max_lag_8kHz + 1; i++ ) {
+        if( d_comp[ i + 1 ] > 0 ) {
+            d_srch[ length_d_srch ] = i;
+            length_d_srch++;
+        }
+    }
+
+    /* Convolution */
+    for( i = max_lag_8kHz + 3; i >= min_lag_8kHz; i-- ) {
+        d_comp[ i ] += d_comp[ i - 1 ] + d_comp[ i - 2 ] + d_comp[ i - 3 ];
+    }
+
+    length_d_comp = 0;
+    for( i = min_lag_8kHz; i < max_lag_8kHz + 4; i++ ) {
+        if( d_comp[ i ] > 0 ) {
+            d_comp[ length_d_comp ] = (opus_int16)( i - 2 );
+            length_d_comp++;
+        }
+    }
+
+    /**********************************************************************************
+    ** SECOND STAGE, operating at 8 kHz, on lag sections with high correlation
+    *************************************************************************************/
+    /*********************************************************************************
+    * Find energy of each subframe projected onto its history, for a range of delays
+    *********************************************************************************/
+    silk_memset( C, 0, PE_MAX_NB_SUBFR*((PE_MAX_LAG >> 1) + 5) * sizeof(silk_float));
+
+    if( Fs_kHz == 8 ) {
+        target_ptr = &frame[ PE_LTP_MEM_LENGTH_MS * 8 ];
+    } else {
+        target_ptr = &frame_8kHz[ PE_LTP_MEM_LENGTH_MS * 8 ];
+    }
+    for( k = 0; k < nb_subfr; k++ ) {
+        energy_tmp = silk_energy_FLP( target_ptr, sf_length_8kHz ) + 1.0;
+        for( j = 0; j < length_d_comp; j++ ) {
+            d = d_comp[ j ];
+            basis_ptr = target_ptr - d;
+            cross_corr = silk_inner_product_FLP( basis_ptr, target_ptr, sf_length_8kHz );
+            if( cross_corr > 0.0f ) {
+                energy = silk_energy_FLP( basis_ptr, sf_length_8kHz );
+                C[ k ][ d ] = (silk_float)( 2 * cross_corr / ( energy + energy_tmp ) );
+            } else {
+                C[ k ][ d ] = 0.0f;
+            }
+        }
+        target_ptr += sf_length_8kHz;
+    }
+
+    /* search over lag range and lags codebook */
+    /* scale factor for lag codebook, as a function of center lag */
+
+    CCmax   = 0.0f; /* This value doesn't matter */
+    CCmax_b = -1000.0f;
+
+    CBimax = 0; /* To avoid returning undefined lag values */
+    lag = -1;   /* To check if lag with strong enough correlation has been found */
+
+    if( prevLag > 0 ) {
+        if( Fs_kHz == 12 ) {
+            prevLag = silk_LSHIFT( prevLag, 1 ) / 3;
+        } else if( Fs_kHz == 16 ) {
+            prevLag = silk_RSHIFT( prevLag, 1 );
+        }
+        prevLag_log2 = silk_log2( (silk_float)prevLag );
+    } else {
+        prevLag_log2 = 0;
+    }
+
+    /* Set up stage 2 codebook based on number of subframes */
+    if( nb_subfr == PE_MAX_NB_SUBFR ) {
+        cbk_size   = PE_NB_CBKS_STAGE2_EXT;
+        Lag_CB_ptr = &silk_CB_lags_stage2[ 0 ][ 0 ];
+        if( Fs_kHz == 8 && complexity > SILK_PE_MIN_COMPLEX ) {
+            /* If input is 8 khz use a larger codebook here because it is last stage */
+            nb_cbk_search = PE_NB_CBKS_STAGE2_EXT;
+        } else {
+            nb_cbk_search = PE_NB_CBKS_STAGE2;
+        }
+    } else {
+        cbk_size       = PE_NB_CBKS_STAGE2_10MS;
+        Lag_CB_ptr     = &silk_CB_lags_stage2_10_ms[ 0 ][ 0 ];
+        nb_cbk_search  = PE_NB_CBKS_STAGE2_10MS;
+    }
+
+    for( k = 0; k < length_d_srch; k++ ) {
+        d = d_srch[ k ];
+        for( j = 0; j < nb_cbk_search; j++ ) {
+            CC[j] = 0.0f;
+            for( i = 0; i < nb_subfr; i++ ) {
+                /* Try all codebooks */
+                CC[ j ] += C[ i ][ d + matrix_ptr( Lag_CB_ptr, i, j, cbk_size )];
+            }
+        }
+        /* Find best codebook */
+        CCmax_new  = -1000.0f;
+        CBimax_new = 0;
+        for( i = 0; i < nb_cbk_search; i++ ) {
+            if( CC[ i ] > CCmax_new ) {
+                CCmax_new = CC[ i ];
+                CBimax_new = i;
+            }
+        }
+
+        /* Bias towards shorter lags */
+        lag_log2 = silk_log2( (silk_float)d );
+        CCmax_new_b = CCmax_new - PE_SHORTLAG_BIAS * nb_subfr * lag_log2;
+
+        /* Bias towards previous lag */
+        if( prevLag > 0 ) {
+            delta_lag_log2_sqr = lag_log2 - prevLag_log2;
+            delta_lag_log2_sqr *= delta_lag_log2_sqr;
+            CCmax_new_b -= PE_PREVLAG_BIAS * nb_subfr * (*LTPCorr) * delta_lag_log2_sqr / ( delta_lag_log2_sqr + 0.5f );
+        }
+
+        if( CCmax_new_b > CCmax_b &&                /* Find maximum biased correlation                  */
+            CCmax_new > nb_subfr * search_thres2    /* Correlation needs to be high enough to be voiced */
+        ) {
+            CCmax_b = CCmax_new_b;
+            CCmax   = CCmax_new;
+            lag     = d;
+            CBimax  = CBimax_new;
+        }
+    }
+
+    if( lag == -1 ) {
+        /* No suitable candidate found */
+        silk_memset( pitch_out, 0, PE_MAX_NB_SUBFR * sizeof(opus_int) );
+        *LTPCorr      = 0.0f;
+        *lagIndex     = 0;
+        *contourIndex = 0;
+        return 1;
+    }
+
+    /* Output normalized correlation */
+    *LTPCorr = (silk_float)( CCmax / nb_subfr );
+    silk_assert( *LTPCorr >= 0.0f );
+
+    if( Fs_kHz > 8 ) {
+        /* Search in original signal */
+
+        /* Compensate for decimation */
+        silk_assert( lag == silk_SAT16( lag ) );
+        if( Fs_kHz == 12 ) {
+            lag = silk_RSHIFT_ROUND( silk_SMULBB( lag, 3 ), 1 );
+        } else { /* Fs_kHz == 16 */
+            lag = silk_LSHIFT( lag, 1 );
+        }
+
+        lag = silk_LIMIT_int( lag, min_lag, max_lag );
+        start_lag = silk_max_int( lag - 2, min_lag );
+        end_lag   = silk_min_int( lag + 2, max_lag );
+        lag_new   = lag;                                    /* to avoid undefined lag */
+        CBimax    = 0;                                      /* to avoid undefined lag */
+
+        CCmax = -1000.0f;
+
+        /* Calculate the correlations and energies needed in stage 3 */
+        silk_P_Ana_calc_corr_st3( cross_corr_st3, frame, start_lag, sf_length, nb_subfr, complexity, arch );
+        silk_P_Ana_calc_energy_st3( energies_st3, frame, start_lag, sf_length, nb_subfr, complexity );
+
+        lag_counter = 0;
+        silk_assert( lag == silk_SAT16( lag ) );
+        contour_bias = PE_FLATCONTOUR_BIAS / lag;
+
+        /* Set up cbk parameters according to complexity setting and frame length */
+        if( nb_subfr == PE_MAX_NB_SUBFR ) {
+            nb_cbk_search = (opus_int)silk_nb_cbk_searchs_stage3[ complexity ];
+            cbk_size      = PE_NB_CBKS_STAGE3_MAX;
+            Lag_CB_ptr    = &silk_CB_lags_stage3[ 0 ][ 0 ];
+        } else {
+            nb_cbk_search = PE_NB_CBKS_STAGE3_10MS;
+            cbk_size      = PE_NB_CBKS_STAGE3_10MS;
+            Lag_CB_ptr    = &silk_CB_lags_stage3_10_ms[ 0 ][ 0 ];
+        }
+
+        target_ptr = &frame[ PE_LTP_MEM_LENGTH_MS * Fs_kHz ];
+        energy_tmp = silk_energy_FLP( target_ptr, nb_subfr * sf_length ) + 1.0;
+        for( d = start_lag; d <= end_lag; d++ ) {
+            for( j = 0; j < nb_cbk_search; j++ ) {
+                cross_corr = 0.0;
+                energy = energy_tmp;
+                for( k = 0; k < nb_subfr; k++ ) {
+                    cross_corr += cross_corr_st3[ k ][ j ][ lag_counter ];
+                    energy     +=   energies_st3[ k ][ j ][ lag_counter ];
+                }
+                if( cross_corr > 0.0 ) {
+                    CCmax_new = (silk_float)( 2 * cross_corr / energy );
+                    /* Reduce depending on flatness of contour */
+                    CCmax_new *= 1.0f - contour_bias * j;
+                } else {
+                    CCmax_new = 0.0f;
+                }
+
+                if( CCmax_new > CCmax && ( d + (opus_int)silk_CB_lags_stage3[ 0 ][ j ] ) <= max_lag ) {
+                    CCmax   = CCmax_new;
+                    lag_new = d;
+                    CBimax  = j;
+                }
+            }
+            lag_counter++;
+        }
+
+        for( k = 0; k < nb_subfr; k++ ) {
+            pitch_out[ k ] = lag_new + matrix_ptr( Lag_CB_ptr, k, CBimax, cbk_size );
+            pitch_out[ k ] = silk_LIMIT( pitch_out[ k ], min_lag, PE_MAX_LAG_MS * Fs_kHz );
+        }
+        *lagIndex = (opus_int16)( lag_new - min_lag );
+        *contourIndex = (opus_int8)CBimax;
+    } else {        /* Fs_kHz == 8 */
+        /* Save Lags */
+        for( k = 0; k < nb_subfr; k++ ) {
+            pitch_out[ k ] = lag + matrix_ptr( Lag_CB_ptr, k, CBimax, cbk_size );
+            pitch_out[ k ] = silk_LIMIT( pitch_out[ k ], min_lag_8kHz, PE_MAX_LAG_MS * 8 );
+        }
+        *lagIndex = (opus_int16)( lag - min_lag_8kHz );
+        *contourIndex = (opus_int8)CBimax;
+    }
+    silk_assert( *lagIndex >= 0 );
+    /* return as voiced */
+    return 0;
+}
+
+/***********************************************************************
+ * Calculates the correlations used in stage 3 search. In order to cover
+ * the whole lag codebook for all the searched offset lags (lag +- 2),
+ * the following correlations are needed in each sub frame:
+ *
+ * sf1: lag range [-8,...,7] total 16 correlations
+ * sf2: lag range [-4,...,4] total 9 correlations
+ * sf3: lag range [-3,....4] total 8 correltions
+ * sf4: lag range [-6,....8] total 15 correlations
+ *
+ * In total 48 correlations. The direct implementation computed in worst
+ * case 4*12*5 = 240 correlations, but more likely around 120.
+ ***********************************************************************/
+static void silk_P_Ana_calc_corr_st3(
+    silk_float cross_corr_st3[ PE_MAX_NB_SUBFR ][ PE_NB_CBKS_STAGE3_MAX ][ PE_NB_STAGE3_LAGS ], /* O 3 DIM correlation array */
+    const silk_float    frame[],            /* I vector to correlate                                            */
+    opus_int            start_lag,          /* I start lag                                                      */
+    opus_int            sf_length,          /* I sub frame length                                               */
+    opus_int            nb_subfr,           /* I number of subframes                                            */
+    opus_int            complexity,         /* I Complexity setting                                             */
+    int                 arch                /* I Run-time architecture                                          */
+)
+{
+    const silk_float *target_ptr;
+    opus_int   i, j, k, lag_counter, lag_low, lag_high;
+    opus_int   nb_cbk_search, delta, idx, cbk_size;
+    silk_float scratch_mem[ SCRATCH_SIZE ];
+    opus_val32 xcorr[ SCRATCH_SIZE ];
+    const opus_int8 *Lag_range_ptr, *Lag_CB_ptr;
+
+    silk_assert( complexity >= SILK_PE_MIN_COMPLEX );
+    silk_assert( complexity <= SILK_PE_MAX_COMPLEX );
+
+    if( nb_subfr == PE_MAX_NB_SUBFR ) {
+        Lag_range_ptr = &silk_Lag_range_stage3[ complexity ][ 0 ][ 0 ];
+        Lag_CB_ptr    = &silk_CB_lags_stage3[ 0 ][ 0 ];
+        nb_cbk_search = silk_nb_cbk_searchs_stage3[ complexity ];
+        cbk_size      = PE_NB_CBKS_STAGE3_MAX;
+    } else {
+        silk_assert( nb_subfr == PE_MAX_NB_SUBFR >> 1);
+        Lag_range_ptr = &silk_Lag_range_stage3_10_ms[ 0 ][ 0 ];
+        Lag_CB_ptr    = &silk_CB_lags_stage3_10_ms[ 0 ][ 0 ];
+        nb_cbk_search = PE_NB_CBKS_STAGE3_10MS;
+        cbk_size      = PE_NB_CBKS_STAGE3_10MS;
+    }
+
+    target_ptr = &frame[ silk_LSHIFT( sf_length, 2 ) ]; /* Pointer to middle of frame */
+    for( k = 0; k < nb_subfr; k++ ) {
+        lag_counter = 0;
+
+        /* Calculate the correlations for each subframe */
+        lag_low  = matrix_ptr( Lag_range_ptr, k, 0, 2 );
+        lag_high = matrix_ptr( Lag_range_ptr, k, 1, 2 );
+        silk_assert(lag_high-lag_low+1 <= SCRATCH_SIZE);
+        celt_pitch_xcorr( target_ptr, target_ptr - start_lag - lag_high, xcorr, sf_length, lag_high - lag_low + 1, arch );
+        for( j = lag_low; j <= lag_high; j++ ) {
+            silk_assert( lag_counter < SCRATCH_SIZE );
+            scratch_mem[ lag_counter ] = xcorr[ lag_high - j ];
+            lag_counter++;
+        }
+
+        delta = matrix_ptr( Lag_range_ptr, k, 0, 2 );
+        for( i = 0; i < nb_cbk_search; i++ ) {
+            /* Fill out the 3 dim array that stores the correlations for */
+            /* each code_book vector for each start lag */
+            idx = matrix_ptr( Lag_CB_ptr, k, i, cbk_size ) - delta;
+            for( j = 0; j < PE_NB_STAGE3_LAGS; j++ ) {
+                silk_assert( idx + j < SCRATCH_SIZE );
+                silk_assert( idx + j < lag_counter );
+                cross_corr_st3[ k ][ i ][ j ] = scratch_mem[ idx + j ];
+            }
+        }
+        target_ptr += sf_length;
+    }
+}
+
+/********************************************************************/
+/* Calculate the energies for first two subframes. The energies are */
+/* calculated recursively.                                          */
+/********************************************************************/
+static void silk_P_Ana_calc_energy_st3(
+    silk_float energies_st3[ PE_MAX_NB_SUBFR ][ PE_NB_CBKS_STAGE3_MAX ][ PE_NB_STAGE3_LAGS ], /* O 3 DIM correlation array */
+    const silk_float    frame[],            /* I vector to correlate                                            */
+    opus_int            start_lag,          /* I start lag                                                      */
+    opus_int            sf_length,          /* I sub frame length                                               */
+    opus_int            nb_subfr,           /* I number of subframes                                            */
+    opus_int            complexity          /* I Complexity setting                                             */
+)
+{
+    const silk_float *target_ptr, *basis_ptr;
+    double    energy;
+    opus_int   k, i, j, lag_counter;
+    opus_int   nb_cbk_search, delta, idx, cbk_size, lag_diff;
+    silk_float scratch_mem[ SCRATCH_SIZE ];
+    const opus_int8 *Lag_range_ptr, *Lag_CB_ptr;
+
+    silk_assert( complexity >= SILK_PE_MIN_COMPLEX );
+    silk_assert( complexity <= SILK_PE_MAX_COMPLEX );
+
+    if( nb_subfr == PE_MAX_NB_SUBFR ) {
+        Lag_range_ptr = &silk_Lag_range_stage3[ complexity ][ 0 ][ 0 ];
+        Lag_CB_ptr    = &silk_CB_lags_stage3[ 0 ][ 0 ];
+        nb_cbk_search = silk_nb_cbk_searchs_stage3[ complexity ];
+        cbk_size      = PE_NB_CBKS_STAGE3_MAX;
+    } else {
+        silk_assert( nb_subfr == PE_MAX_NB_SUBFR >> 1);
+        Lag_range_ptr = &silk_Lag_range_stage3_10_ms[ 0 ][ 0 ];
+        Lag_CB_ptr    = &silk_CB_lags_stage3_10_ms[ 0 ][ 0 ];
+        nb_cbk_search = PE_NB_CBKS_STAGE3_10MS;
+        cbk_size      = PE_NB_CBKS_STAGE3_10MS;
+    }
+
+    target_ptr = &frame[ silk_LSHIFT( sf_length, 2 ) ];
+    for( k = 0; k < nb_subfr; k++ ) {
+        lag_counter = 0;
+
+        /* Calculate the energy for first lag */
+        basis_ptr = target_ptr - ( start_lag + matrix_ptr( Lag_range_ptr, k, 0, 2 ) );
+        energy = silk_energy_FLP( basis_ptr, sf_length ) + 1e-3;
+        silk_assert( energy >= 0.0 );
+        scratch_mem[lag_counter] = (silk_float)energy;
+        lag_counter++;
+
+        lag_diff = ( matrix_ptr( Lag_range_ptr, k, 1, 2 ) -  matrix_ptr( Lag_range_ptr, k, 0, 2 ) + 1 );
+        for( i = 1; i < lag_diff; i++ ) {
+            /* remove part outside new window */
+            energy -= basis_ptr[sf_length - i] * (double)basis_ptr[sf_length - i];
+            silk_assert( energy >= 0.0 );
+
+            /* add part that comes into window */
+            energy += basis_ptr[ -i ] * (double)basis_ptr[ -i ];
+            silk_assert( energy >= 0.0 );
+            silk_assert( lag_counter < SCRATCH_SIZE );
+            scratch_mem[lag_counter] = (silk_float)energy;
+            lag_counter++;
+        }
+
+        delta = matrix_ptr( Lag_range_ptr, k, 0, 2 );
+        for( i = 0; i < nb_cbk_search; i++ ) {
+            /* Fill out the 3 dim array that stores the correlations for    */
+            /* each code_book vector for each start lag                     */
+            idx = matrix_ptr( Lag_CB_ptr, k, i, cbk_size ) - delta;
+            for( j = 0; j < PE_NB_STAGE3_LAGS; j++ ) {
+                silk_assert( idx + j < SCRATCH_SIZE );
+                silk_assert( idx + j < lag_counter );
+                energies_st3[ k ][ i ][ j ] = scratch_mem[ idx + j ];
+                silk_assert( energies_st3[ k ][ i ][ j ] >= 0.0f );
+            }
+        }
+        target_ptr += sf_length;
+    }
+}
diff --git a/drivers/opus/silk/float/prefilter_FLP.c b/drivers/opus/silk/float/prefilter_FLP.c
new file mode 100644
index 0000000000..aa43852ff1
--- /dev/null
+++ b/drivers/opus/silk/float/prefilter_FLP.c
@@ -0,0 +1,206 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef OPUS_HAVE_CONFIG_H
+#include "opus_config.h"
+#endif
+
+#include "main_FLP.h"
+#include "tuning_parameters.h"
+
+/*
+* Prefilter for finding Quantizer input signal
+*/
+static OPUS_INLINE void silk_prefilt_FLP(
+    silk_prefilter_state_FLP    *P,                 /* I/O state */
+    silk_float                  st_res[],           /* I */
+    silk_float                  xw[],               /* O */
+    silk_float                  *HarmShapeFIR,      /* I */
+    silk_float                  Tilt,               /* I */
+    silk_float                  LF_MA_shp,          /* I */
+    silk_float                  LF_AR_shp,          /* I */
+    opus_int                    lag,                /* I */
+    opus_int                    length              /* I */
+);
+
+static void silk_warped_LPC_analysis_filter_FLP(
+          silk_float                 state[],            /* I/O  State [order + 1]                       */
+          silk_float                 res[],              /* O    Residual signal [length]                */
+    const silk_float                 coef[],             /* I    Coefficients [order]                    */
+    const silk_float                 input[],            /* I    Input signal [length]                   */
+    const silk_float                 lambda,             /* I    Warping factor                          */
+    const opus_int                   length,             /* I    Length of input signal                  */
+    const opus_int                   order               /* I    Filter order (even)                     */
+)
+{
+    opus_int     n, i;
+    silk_float   acc, tmp1, tmp2;
+
+    /* Order must be even */
+    silk_assert( ( order & 1 ) == 0 );
+
+    for( n = 0; n < length; n++ ) {
+        /* Output of lowpass section */
+        tmp2 = state[ 0 ] + lambda * state[ 1 ];
+        state[ 0 ] = input[ n ];
+        /* Output of allpass section */
+        tmp1 = state[ 1 ] + lambda * ( state[ 2 ] - tmp2 );
+        state[ 1 ] = tmp2;
+        acc = coef[ 0 ] * tmp2;
+        /* Loop over allpass sections */
+        for( i = 2; i < order; i += 2 ) {
+            /* Output of allpass section */
+            tmp2 = state[ i ] + lambda * ( state[ i + 1 ] - tmp1 );
+            state[ i ] = tmp1;
+            acc += coef[ i - 1 ] * tmp1;
+            /* Output of allpass section */
+            tmp1 = state[ i + 1 ] + lambda * ( state[ i + 2 ] - tmp2 );
+            state[ i + 1 ] = tmp2;
+            acc += coef[ i ] * tmp2;
+        }
+        state[ order ] = tmp1;
+        acc += coef[ order - 1 ] * tmp1;
+        res[ n ] = input[ n ] - acc;
+    }
+}
+
+/*
+* silk_prefilter. Main prefilter function
+*/
+void silk_prefilter_FLP(
+    silk_encoder_state_FLP          *psEnc,                             /* I/O  Encoder state FLP                           */
+    const silk_encoder_control_FLP  *psEncCtrl,                         /* I    Encoder control FLP                         */
+    silk_float                      xw[],                               /* O    Weighted signal                             */
+    const silk_float                x[]                                 /* I    Speech signal                               */
+)
+{
+    silk_prefilter_state_FLP *P = &psEnc->sPrefilt;
+    opus_int   j, k, lag;
+    silk_float HarmShapeGain, Tilt, LF_MA_shp, LF_AR_shp;
+    silk_float B[ 2 ];
+    const silk_float *AR1_shp;
+    const silk_float *px;
+    silk_float *pxw;
+    silk_float HarmShapeFIR[ 3 ];
+    silk_float st_res[ MAX_SUB_FRAME_LENGTH + MAX_LPC_ORDER ];
+
+    /* Set up pointers */
+    px  = x;
+    pxw = xw;
+    lag = P->lagPrev;
+    for( k = 0; k < psEnc->sCmn.nb_subfr; k++ ) {
+        /* Update Variables that change per sub frame */
+        if( psEnc->sCmn.indices.signalType == TYPE_VOICED ) {
+            lag = psEncCtrl->pitchL[ k ];
+        }
+
+        /* Noise shape parameters */
+        HarmShapeGain = psEncCtrl->HarmShapeGain[ k ] * ( 1.0f - psEncCtrl->HarmBoost[ k ] );
+        HarmShapeFIR[ 0 ] = 0.25f               * HarmShapeGain;
+        HarmShapeFIR[ 1 ] = 32767.0f / 65536.0f * HarmShapeGain;
+        HarmShapeFIR[ 2 ] = 0.25f               * HarmShapeGain;
+        Tilt      =  psEncCtrl->Tilt[ k ];
+        LF_MA_shp =  psEncCtrl->LF_MA_shp[ k ];
+        LF_AR_shp =  psEncCtrl->LF_AR_shp[ k ];
+        AR1_shp   = &psEncCtrl->AR1[ k * MAX_SHAPE_LPC_ORDER ];
+
+        /* Short term FIR filtering */
+        silk_warped_LPC_analysis_filter_FLP( P->sAR_shp, st_res, AR1_shp, px,
+            (silk_float)psEnc->sCmn.warping_Q16 / 65536.0f, psEnc->sCmn.subfr_length, psEnc->sCmn.shapingLPCOrder );
+
+        /* Reduce (mainly) low frequencies during harmonic emphasis */
+        B[ 0 ] =  psEncCtrl->GainsPre[ k ];
+        B[ 1 ] = -psEncCtrl->GainsPre[ k ] *
+            ( psEncCtrl->HarmBoost[ k ] * HarmShapeGain + INPUT_TILT + psEncCtrl->coding_quality * HIGH_RATE_INPUT_TILT );
+        pxw[ 0 ] = B[ 0 ] * st_res[ 0 ] + B[ 1 ] * P->sHarmHP;
+        for( j = 1; j < psEnc->sCmn.subfr_length; j++ ) {
+            pxw[ j ] = B[ 0 ] * st_res[ j ] + B[ 1 ] * st_res[ j - 1 ];
+        }
+        P->sHarmHP = st_res[ psEnc->sCmn.subfr_length - 1 ];
+
+        silk_prefilt_FLP( P, pxw, pxw, HarmShapeFIR, Tilt, LF_MA_shp, LF_AR_shp, lag, psEnc->sCmn.subfr_length );
+
+        px  += psEnc->sCmn.subfr_length;
+        pxw += psEnc->sCmn.subfr_length;
+    }
+    P->lagPrev = psEncCtrl->pitchL[ psEnc->sCmn.nb_subfr - 1 ];
+}
+
+/*
+* Prefilter for finding Quantizer input signal
+*/
+static OPUS_INLINE void silk_prefilt_FLP(
+    silk_prefilter_state_FLP    *P,                 /* I/O state */
+    silk_float                  st_res[],           /* I */
+    silk_float                  xw[],               /* O */
+    silk_float                  *HarmShapeFIR,      /* I */
+    silk_float                  Tilt,               /* I */
+    silk_float                  LF_MA_shp,          /* I */
+    silk_float                  LF_AR_shp,          /* I */
+    opus_int                    lag,                /* I */
+    opus_int                    length              /* I */
+)
+{
+    opus_int   i;
+    opus_int   idx, LTP_shp_buf_idx;
+    silk_float n_Tilt, n_LF, n_LTP;
+    silk_float sLF_AR_shp, sLF_MA_shp;
+    silk_float *LTP_shp_buf;
+
+    /* To speed up use temp variables instead of using the struct */
+    LTP_shp_buf     = P->sLTP_shp;
+    LTP_shp_buf_idx = P->sLTP_shp_buf_idx;
+    sLF_AR_shp      = P->sLF_AR_shp;
+    sLF_MA_shp      = P->sLF_MA_shp;
+
+    for( i = 0; i < length; i++ ) {
+        if( lag > 0 ) {
+            silk_assert( HARM_SHAPE_FIR_TAPS == 3 );
+            idx = lag + LTP_shp_buf_idx;
+            n_LTP  = LTP_shp_buf[ ( idx - HARM_SHAPE_FIR_TAPS / 2 - 1) & LTP_MASK ] * HarmShapeFIR[ 0 ];
+            n_LTP += LTP_shp_buf[ ( idx - HARM_SHAPE_FIR_TAPS / 2    ) & LTP_MASK ] * HarmShapeFIR[ 1 ];
+            n_LTP += LTP_shp_buf[ ( idx - HARM_SHAPE_FIR_TAPS / 2 + 1) & LTP_MASK ] * HarmShapeFIR[ 2 ];
+        } else {
+            n_LTP = 0;
+        }
+
+        n_Tilt = sLF_AR_shp * Tilt;
+        n_LF   = sLF_AR_shp * LF_AR_shp + sLF_MA_shp * LF_MA_shp;
+
+        sLF_AR_shp = st_res[ i ] - n_Tilt;
+        sLF_MA_shp = sLF_AR_shp - n_LF;
+
+        LTP_shp_buf_idx = ( LTP_shp_buf_idx - 1 ) & LTP_MASK;
+        LTP_shp_buf[ LTP_shp_buf_idx ] = sLF_MA_shp;
+
+        xw[ i ] = sLF_MA_shp - n_LTP;
+    }
+    /* Copy temp variable back to state */
+    P->sLF_AR_shp       = sLF_AR_shp;
+    P->sLF_MA_shp       = sLF_MA_shp;
+    P->sLTP_shp_buf_idx = LTP_shp_buf_idx;
+}
diff --git a/drivers/opus/silk/float/process_gains_FLP.c b/drivers/opus/silk/float/process_gains_FLP.c
new file mode 100644
index 0000000000..e83d05552a
--- /dev/null
+++ b/drivers/opus/silk/float/process_gains_FLP.c
@@ -0,0 +1,103 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef OPUS_HAVE_CONFIG_H
+#include "opus_config.h"
+#endif
+
+#include "main_FLP.h"
+#include "tuning_parameters.h"
+
+/* Processing of gains */
+void silk_process_gains_FLP(
+    silk_encoder_state_FLP          *psEnc,                             /* I/O  Encoder state FLP                           */
+    silk_encoder_control_FLP        *psEncCtrl,                         /* I/O  Encoder control FLP                         */
+    opus_int                        condCoding                          /* I    The type of conditional coding to use       */
+)
+{
+    silk_shape_state_FLP *psShapeSt = &psEnc->sShape;
+    opus_int     k;
+    opus_int32   pGains_Q16[ MAX_NB_SUBFR ];
+    silk_float   s, InvMaxSqrVal, gain, quant_offset;
+
+    /* Gain reduction when LTP coding gain is high */
+    if( psEnc->sCmn.indices.signalType == TYPE_VOICED ) {
+        s = 1.0f - 0.5f * silk_sigmoid( 0.25f * ( psEncCtrl->LTPredCodGain - 12.0f ) );
+        for( k = 0; k < psEnc->sCmn.nb_subfr; k++ ) {
+            psEncCtrl->Gains[ k ] *= s;
+        }
+    }
+
+    /* Limit the quantized signal */
+    InvMaxSqrVal = ( silk_float )( pow( 2.0f, 0.33f * ( 21.0f - psEnc->sCmn.SNR_dB_Q7 * ( 1 / 128.0f ) ) ) / psEnc->sCmn.subfr_length );
+
+    for( k = 0; k < psEnc->sCmn.nb_subfr; k++ ) {
+        /* Soft limit on ratio residual energy and squared gains */
+        gain = psEncCtrl->Gains[ k ];
+        gain = ( silk_float )sqrt( gain * gain + psEncCtrl->ResNrg[ k ] * InvMaxSqrVal );
+        psEncCtrl->Gains[ k ] = silk_min_float( gain, 32767.0f );
+    }
+
+    /* Prepare gains for noise shaping quantization */
+    for( k = 0; k < psEnc->sCmn.nb_subfr; k++ ) {
+        pGains_Q16[ k ] = (opus_int32)( psEncCtrl->Gains[ k ] * 65536.0f );
+    }
+
+    /* Save unquantized gains and gain Index */
+    silk_memcpy( psEncCtrl->GainsUnq_Q16, pGains_Q16, psEnc->sCmn.nb_subfr * sizeof( opus_int32 ) );
+    psEncCtrl->lastGainIndexPrev = psShapeSt->LastGainIndex;
+
+    /* Quantize gains */
+    silk_gains_quant( psEnc->sCmn.indices.GainsIndices, pGains_Q16,
+            &psShapeSt->LastGainIndex, condCoding == CODE_CONDITIONALLY, psEnc->sCmn.nb_subfr );
+
+    /* Overwrite unquantized gains with quantized gains and convert back to Q0 from Q16 */
+    for( k = 0; k < psEnc->sCmn.nb_subfr; k++ ) {
+        psEncCtrl->Gains[ k ] = pGains_Q16[ k ] / 65536.0f;
+    }
+
+    /* Set quantizer offset for voiced signals. Larger offset when LTP coding gain is low or tilt is high (ie low-pass) */
+    if( psEnc->sCmn.indices.signalType == TYPE_VOICED ) {
+        if( psEncCtrl->LTPredCodGain + psEnc->sCmn.input_tilt_Q15 * ( 1.0f / 32768.0f ) > 1.0f ) {
+            psEnc->sCmn.indices.quantOffsetType = 0;
+        } else {
+            psEnc->sCmn.indices.quantOffsetType = 1;
+        }
+    }
+
+    /* Quantizer boundary adjustment */
+    quant_offset = silk_Quantization_Offsets_Q10[ psEnc->sCmn.indices.signalType >> 1 ][ psEnc->sCmn.indices.quantOffsetType ] / 1024.0f;
+    psEncCtrl->Lambda = LAMBDA_OFFSET
+                      + LAMBDA_DELAYED_DECISIONS * psEnc->sCmn.nStatesDelayedDecision
+                      + LAMBDA_SPEECH_ACT        * psEnc->sCmn.speech_activity_Q8 * ( 1.0f /  256.0f )
+                      + LAMBDA_INPUT_QUALITY     * psEncCtrl->input_quality
+                      + LAMBDA_CODING_QUALITY    * psEncCtrl->coding_quality
+                      + LAMBDA_QUANT_OFFSET      * quant_offset;
+
+    silk_assert( psEncCtrl->Lambda > 0.0f );
+    silk_assert( psEncCtrl->Lambda < 2.0f );
+}
diff --git a/drivers/opus/silk/float/regularize_correlations_FLP.c b/drivers/opus/silk/float/regularize_correlations_FLP.c
new file mode 100644
index 0000000000..f056eadc57
--- /dev/null
+++ b/drivers/opus/silk/float/regularize_correlations_FLP.c
@@ -0,0 +1,48 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef OPUS_HAVE_CONFIG_H
+#include "opus_config.h"
+#endif
+
+#include "main_FLP.h"
+
+/* Add noise to matrix diagonal */
+void silk_regularize_correlations_FLP(
+    silk_float                      *XX,                                /* I/O  Correlation matrices                        */
+    silk_float                      *xx,                                /* I/O  Correlation values                          */
+    const silk_float                noise,                              /* I    Noise energy to add                         */
+    const opus_int                  D                                   /* I    Dimension of XX                             */
+)
+{
+    opus_int i;
+
+    for( i = 0; i < D; i++ ) {
+        matrix_ptr( &XX[ 0 ], i, i, D ) += noise;
+    }
+    xx[ 0 ] += noise;
+}
diff --git a/drivers/opus/silk/float/residual_energy_FLP.c b/drivers/opus/silk/float/residual_energy_FLP.c
new file mode 100644
index 0000000000..011efcef04
--- /dev/null
+++ b/drivers/opus/silk/float/residual_energy_FLP.c
@@ -0,0 +1,117 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef OPUS_HAVE_CONFIG_H
+#include "opus_config.h"
+#endif
+
+#include "main_FLP.h"
+
+#define MAX_ITERATIONS_RESIDUAL_NRG         10
+#define REGULARIZATION_FACTOR               1e-8f
+
+/* Residual energy: nrg = wxx - 2 * wXx * c + c' * wXX * c */
+silk_float silk_residual_energy_covar_FLP(                              /* O    Weighted residual energy                    */
+    const silk_float                *c,                                 /* I    Filter coefficients                         */
+    silk_float                      *wXX,                               /* I/O  Weighted correlation matrix, reg. out       */
+    const silk_float                *wXx,                               /* I    Weighted correlation vector                 */
+    const silk_float                wxx,                                /* I    Weighted correlation value                  */
+    const opus_int                  D                                   /* I    Dimension                                   */
+)
+{
+    opus_int   i, j, k;
+    silk_float tmp, nrg = 0.0f, regularization;
+
+    /* Safety checks */
+    silk_assert( D >= 0 );
+
+    regularization = REGULARIZATION_FACTOR * ( wXX[ 0 ] + wXX[ D * D - 1 ] );
+    for( k = 0; k < MAX_ITERATIONS_RESIDUAL_NRG; k++ ) {
+        nrg = wxx;
+
+        tmp = 0.0f;
+        for( i = 0; i < D; i++ ) {
+            tmp += wXx[ i ] * c[ i ];
+        }
+        nrg -= 2.0f * tmp;
+
+        /* compute c' * wXX * c, assuming wXX is symmetric */
+        for( i = 0; i < D; i++ ) {
+            tmp = 0.0f;
+            for( j = i + 1; j < D; j++ ) {
+                tmp += matrix_c_ptr( wXX, i, j, D ) * c[ j ];
+            }
+            nrg += c[ i ] * ( 2.0f * tmp + matrix_c_ptr( wXX, i, i, D ) * c[ i ] );
+        }
+        if( nrg > 0 ) {
+            break;
+        } else {
+            /* Add white noise */
+            for( i = 0; i < D; i++ ) {
+                matrix_c_ptr( wXX, i, i, D ) +=  regularization;
+            }
+            /* Increase noise for next run */
+            regularization *= 2.0f;
+        }
+    }
+    if( k == MAX_ITERATIONS_RESIDUAL_NRG ) {
+        silk_assert( nrg == 0 );
+        nrg = 1.0f;
+    }
+
+    return nrg;
+}
+
+/* Calculates residual energies of input subframes where all subframes have LPC_order   */
+/* of preceding samples                                                                 */
+void silk_residual_energy_FLP(
+    silk_float                      nrgs[ MAX_NB_SUBFR ],               /* O    Residual energy per subframe                */
+    const silk_float                x[],                                /* I    Input signal                                */
+    silk_float                      a[ 2 ][ MAX_LPC_ORDER ],            /* I    AR coefs for each frame half                */
+    const silk_float                gains[],                            /* I    Quantization gains                          */
+    const opus_int                  subfr_length,                       /* I    Subframe length                             */
+    const opus_int                  nb_subfr,                           /* I    number of subframes                         */
+    const opus_int                  LPC_order                           /* I    LPC order                                   */
+)
+{
+    opus_int     shift;
+    silk_float   *LPC_res_ptr, LPC_res[ ( MAX_FRAME_LENGTH + MAX_NB_SUBFR * MAX_LPC_ORDER ) / 2 ];
+
+    LPC_res_ptr = LPC_res + LPC_order;
+    shift = LPC_order + subfr_length;
+
+    /* Filter input to create the LPC residual for each frame half, and measure subframe energies */
+    silk_LPC_analysis_filter_FLP( LPC_res, a[ 0 ], x + 0 * shift, 2 * shift, LPC_order );
+    nrgs[ 0 ] = ( silk_float )( gains[ 0 ] * gains[ 0 ] * silk_energy_FLP( LPC_res_ptr + 0 * shift, subfr_length ) );
+    nrgs[ 1 ] = ( silk_float )( gains[ 1 ] * gains[ 1 ] * silk_energy_FLP( LPC_res_ptr + 1 * shift, subfr_length ) );
+
+    if( nb_subfr == MAX_NB_SUBFR ) {
+        silk_LPC_analysis_filter_FLP( LPC_res, a[ 1 ], x + 2 * shift, 2 * shift, LPC_order );
+        nrgs[ 2 ] = ( silk_float )( gains[ 2 ] * gains[ 2 ] * silk_energy_FLP( LPC_res_ptr + 0 * shift, subfr_length ) );
+        nrgs[ 3 ] = ( silk_float )( gains[ 3 ] * gains[ 3 ] * silk_energy_FLP( LPC_res_ptr + 1 * shift, subfr_length ) );
+    }
+}
diff --git a/drivers/opus/silk/float/scale_copy_vector_FLP.c b/drivers/opus/silk/float/scale_copy_vector_FLP.c
new file mode 100644
index 0000000000..7578d44894
--- /dev/null
+++ b/drivers/opus/silk/float/scale_copy_vector_FLP.c
@@ -0,0 +1,57 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef OPUS_HAVE_CONFIG_H
+#include "opus_config.h"
+#endif
+
+#include "SigProc_FLP.h"
+
+/* copy and multiply a vector by a constant */
+void silk_scale_copy_vector_FLP(
+    silk_float          *data_out,
+    const silk_float    *data_in,
+    silk_float          gain,
+    opus_int            dataSize
+)
+{
+    opus_int  i, dataSize4;
+
+    /* 4x unrolled loop */
+    dataSize4 = dataSize & 0xFFFC;
+    for( i = 0; i < dataSize4; i += 4 ) {
+        data_out[ i + 0 ] = gain * data_in[ i + 0 ];
+        data_out[ i + 1 ] = gain * data_in[ i + 1 ];
+        data_out[ i + 2 ] = gain * data_in[ i + 2 ];
+        data_out[ i + 3 ] = gain * data_in[ i + 3 ];
+    }
+
+    /* any remaining elements */
+    for( ; i < dataSize; i++ ) {
+        data_out[ i ] = gain * data_in[ i ];
+    }
+}
diff --git a/drivers/opus/silk/float/scale_vector_FLP.c b/drivers/opus/silk/float/scale_vector_FLP.c
new file mode 100644
index 0000000000..03345d519d
--- /dev/null
+++ b/drivers/opus/silk/float/scale_vector_FLP.c
@@ -0,0 +1,56 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef OPUS_HAVE_CONFIG_H
+#include "opus_config.h"
+#endif
+
+#include "SigProc_FLP.h"
+
+/* multiply a vector by a constant */
+void silk_scale_vector_FLP(
+    silk_float          *data1,
+    silk_float          gain,
+    opus_int            dataSize
+)
+{
+    opus_int  i, dataSize4;
+
+    /* 4x unrolled loop */
+    dataSize4 = dataSize & 0xFFFC;
+    for( i = 0; i < dataSize4; i += 4 ) {
+        data1[ i + 0 ] *= gain;
+        data1[ i + 1 ] *= gain;
+        data1[ i + 2 ] *= gain;
+        data1[ i + 3 ] *= gain;
+    }
+
+    /* any remaining elements */
+    for( ; i < dataSize; i++ ) {
+        data1[ i ] *= gain;
+    }
+}
diff --git a/drivers/opus/silk/float/schur_FLP.c b/drivers/opus/silk/float/schur_FLP.c
new file mode 100644
index 0000000000..76b87f1304
--- /dev/null
+++ b/drivers/opus/silk/float/schur_FLP.c
@@ -0,0 +1,70 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef OPUS_HAVE_CONFIG_H
+#include "opus_config.h"
+#endif
+
+#include "SigProc_FLP.h"
+
+silk_float silk_schur_FLP(                  /* O    returns residual energy                                     */
+    silk_float          refl_coef[],        /* O    reflection coefficients (length order)                      */
+    const silk_float    auto_corr[],        /* I    autocorrelation sequence (length order+1)                   */
+    opus_int            order               /* I    order                                                       */
+)
+{
+    opus_int   k, n;
+    silk_float C[ SILK_MAX_ORDER_LPC + 1 ][ 2 ];
+    silk_float Ctmp1, Ctmp2, rc_tmp;
+
+    silk_assert( order==6||order==8||order==10||order==12||order==14||order==16 );
+
+    /* Copy correlations */
+    for( k = 0; k < order+1; k++ ) {
+        C[ k ][ 0 ] = C[ k ][ 1 ] = auto_corr[ k ];
+    }
+
+    for( k = 0; k < order; k++ ) {
+        /* Get reflection coefficient */
+        rc_tmp = -C[ k + 1 ][ 0 ] / silk_max_float( C[ 0 ][ 1 ], 1e-9f );
+
+        /* Save the output */
+        refl_coef[ k ] = rc_tmp;
+
+        /* Update correlations */
+        for( n = 0; n < order - k; n++ ) {
+            Ctmp1 = C[ n + k + 1 ][ 0 ];
+            Ctmp2 = C[ n ][ 1 ];
+            C[ n + k + 1 ][ 0 ] = Ctmp1 + Ctmp2 * rc_tmp;
+            C[ n ][ 1 ]         = Ctmp2 + Ctmp1 * rc_tmp;
+        }
+    }
+
+    /* Return residual energy */
+    return C[ 0 ][ 1 ];
+}
+
diff --git a/drivers/opus/silk/float/solve_LS_FLP.c b/drivers/opus/silk/float/solve_LS_FLP.c
new file mode 100644
index 0000000000..9fd962b33d
--- /dev/null
+++ b/drivers/opus/silk/float/solve_LS_FLP.c
@@ -0,0 +1,207 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef OPUS_HAVE_CONFIG_H
+#include "opus_config.h"
+#endif
+
+#include "main_FLP.h"
+#include "tuning_parameters.h"
+
+/**********************************************************************
+ * LDL Factorisation. Finds the upper triangular matrix L and the diagonal
+ * Matrix D (only the diagonal elements returned in a vector)such that
+ * the symmetric matric A is given by A = L*D*L'.
+ **********************************************************************/
+static OPUS_INLINE void silk_LDL_FLP(
+    silk_float          *A,         /* I/O  Pointer to Symetric Square Matrix                               */
+    opus_int            M,          /* I    Size of Matrix                                                  */
+    silk_float          *L,         /* I/O  Pointer to Square Upper triangular Matrix                       */
+    silk_float          *Dinv       /* I/O  Pointer to vector holding the inverse diagonal elements of D    */
+);
+
+/**********************************************************************
+ * Function to solve linear equation Ax = b, when A is a MxM lower
+ * triangular matrix, with ones on the diagonal.
+ **********************************************************************/
+static OPUS_INLINE void silk_SolveWithLowerTriangularWdiagOnes_FLP(
+    const silk_float    *L,         /* I    Pointer to Lower Triangular Matrix                              */
+    opus_int            M,          /* I    Dim of Matrix equation                                          */
+    const silk_float    *b,         /* I    b Vector                                                        */
+    silk_float          *x          /* O    x Vector                                                        */
+);
+
+/**********************************************************************
+ * Function to solve linear equation (A^T)x = b, when A is a MxM lower
+ * triangular, with ones on the diagonal. (ie then A^T is upper triangular)
+ **********************************************************************/
+static OPUS_INLINE void silk_SolveWithUpperTriangularFromLowerWdiagOnes_FLP(
+    const silk_float    *L,         /* I    Pointer to Lower Triangular Matrix                              */
+    opus_int            M,          /* I    Dim of Matrix equation                                          */
+    const silk_float    *b,         /* I    b Vector                                                        */
+    silk_float          *x          /* O    x Vector                                                        */
+);
+
+/**********************************************************************
+ * Function to solve linear equation Ax = b, when A is a MxM
+ * symmetric square matrix - using LDL factorisation
+ **********************************************************************/
+void silk_solve_LDL_FLP(
+    silk_float                      *A,                                 /* I/O  Symmetric square matrix, out: reg.          */
+    const opus_int                  M,                                  /* I    Size of matrix                              */
+    const silk_float                *b,                                 /* I    Pointer to b vector                         */
+    silk_float                      *x                                  /* O    Pointer to x solution vector                */
+)
+{
+    opus_int   i;
+    silk_float L[    MAX_MATRIX_SIZE ][ MAX_MATRIX_SIZE ];
+    silk_float T[    MAX_MATRIX_SIZE ];
+    silk_float Dinv[ MAX_MATRIX_SIZE ]; /* inverse diagonal elements of D*/
+
+    silk_assert( M <= MAX_MATRIX_SIZE );
+
+    /***************************************************
+    Factorize A by LDL such that A = L*D*(L^T),
+    where L is lower triangular with ones on diagonal
+    ****************************************************/
+    silk_LDL_FLP( A, M, &L[ 0 ][ 0 ], Dinv );
+
+    /****************************************************
+    * substitute D*(L^T) = T. ie:
+    L*D*(L^T)*x = b => L*T = b <=> T = inv(L)*b
+    ******************************************************/
+    silk_SolveWithLowerTriangularWdiagOnes_FLP( &L[ 0 ][ 0 ], M, b, T );
+
+    /****************************************************
+    D*(L^T)*x = T <=> (L^T)*x = inv(D)*T, because D is
+    diagonal just multiply with 1/d_i
+    ****************************************************/
+    for( i = 0; i < M; i++ ) {
+        T[ i ] = T[ i ] * Dinv[ i ];
+    }
+    /****************************************************
+    x = inv(L') * inv(D) * T
+    *****************************************************/
+    silk_SolveWithUpperTriangularFromLowerWdiagOnes_FLP( &L[ 0 ][ 0 ], M, T, x );
+}
+
+static OPUS_INLINE void silk_SolveWithUpperTriangularFromLowerWdiagOnes_FLP(
+    const silk_float    *L,         /* I    Pointer to Lower Triangular Matrix                              */
+    opus_int            M,          /* I    Dim of Matrix equation                                          */
+    const silk_float    *b,         /* I    b Vector                                                        */
+    silk_float          *x          /* O    x Vector                                                        */
+)
+{
+    opus_int   i, j;
+    silk_float temp;
+    const silk_float *ptr1;
+
+    for( i = M - 1; i >= 0; i-- ) {
+        ptr1 =  matrix_adr( L, 0, i, M );
+        temp = 0;
+        for( j = M - 1; j > i ; j-- ) {
+            temp += ptr1[ j * M ] * x[ j ];
+        }
+        temp = b[ i ] - temp;
+        x[ i ] = temp;
+    }
+}
+
+static OPUS_INLINE void silk_SolveWithLowerTriangularWdiagOnes_FLP(
+    const silk_float    *L,         /* I    Pointer to Lower Triangular Matrix                              */
+    opus_int            M,          /* I    Dim of Matrix equation                                          */
+    const silk_float    *b,         /* I    b Vector                                                        */
+    silk_float          *x          /* O    x Vector                                                        */
+)
+{
+    opus_int   i, j;
+    silk_float temp;
+    const silk_float *ptr1;
+
+    for( i = 0; i < M; i++ ) {
+        ptr1 =  matrix_adr( L, i, 0, M );
+        temp = 0;
+        for( j = 0; j < i; j++ ) {
+            temp += ptr1[ j ] * x[ j ];
+        }
+        temp = b[ i ] - temp;
+        x[ i ] = temp;
+    }
+}
+
+static OPUS_INLINE void silk_LDL_FLP(
+    silk_float          *A,         /* I/O  Pointer to Symetric Square Matrix                               */
+    opus_int            M,          /* I    Size of Matrix                                                  */
+    silk_float          *L,         /* I/O  Pointer to Square Upper triangular Matrix                       */
+    silk_float          *Dinv       /* I/O  Pointer to vector holding the inverse diagonal elements of D    */
+)
+{
+    opus_int i, j, k, loop_count, err = 1;
+    silk_float *ptr1, *ptr2;
+    double temp, diag_min_value;
+    silk_float v[ MAX_MATRIX_SIZE ], D[ MAX_MATRIX_SIZE ]; /* temp arrays*/
+
+    silk_assert( M <= MAX_MATRIX_SIZE );
+
+    diag_min_value = FIND_LTP_COND_FAC * 0.5f * ( A[ 0 ] + A[ M * M - 1 ] );
+    for( loop_count = 0; loop_count < M && err == 1; loop_count++ ) {
+        err = 0;
+        for( j = 0; j < M; j++ ) {
+            ptr1 = matrix_adr( L, j, 0, M );
+            temp = matrix_ptr( A, j, j, M ); /* element in row j column j*/
+            for( i = 0; i < j; i++ ) {
+                v[ i ] = ptr1[ i ] * D[ i ];
+                temp  -= ptr1[ i ] * v[ i ];
+            }
+            if( temp < diag_min_value ) {
+                /* Badly conditioned matrix: add white noise and run again */
+                temp = ( loop_count + 1 ) * diag_min_value - temp;
+                for( i = 0; i < M; i++ ) {
+                    matrix_ptr( A, i, i, M ) += ( silk_float )temp;
+                }
+                err = 1;
+                break;
+            }
+            D[ j ]    = ( silk_float )temp;
+            Dinv[ j ] = ( silk_float )( 1.0f / temp );
+            matrix_ptr( L, j, j, M ) = 1.0f;
+
+            ptr1 = matrix_adr( A, j, 0, M );
+            ptr2 = matrix_adr( L, j + 1, 0, M);
+            for( i = j + 1; i < M; i++ ) {
+                temp = 0.0;
+                for( k = 0; k < j; k++ ) {
+                    temp += ptr2[ k ] * v[ k ];
+                }
+                matrix_ptr( L, i, j, M ) = ( silk_float )( ( ptr1[ i ] - temp ) * Dinv[ j ] );
+                ptr2 += M; /* go to next column*/
+            }
+        }
+    }
+    silk_assert( err == 0 );
+}
+
diff --git a/drivers/opus/silk/float/sort_FLP.c b/drivers/opus/silk/float/sort_FLP.c
new file mode 100644
index 0000000000..58ea485116
--- /dev/null
+++ b/drivers/opus/silk/float/sort_FLP.c
@@ -0,0 +1,83 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef OPUS_HAVE_CONFIG_H
+#include "opus_config.h"
+#endif
+
+/* Insertion sort (fast for already almost sorted arrays):  */
+/* Best case:  O(n)   for an already sorted array           */
+/* Worst case: O(n^2) for an inversely sorted array         */
+
+#include "typedef.h"
+#include "SigProc_FLP.h"
+
+void silk_insertion_sort_decreasing_FLP(
+    silk_float          *a,                 /* I/O  Unsorted / Sorted vector                                    */
+    opus_int            *idx,               /* O    Index vector for the sorted elements                        */
+    const opus_int      L,                  /* I    Vector length                                               */
+    const opus_int      K                   /* I    Number of correctly sorted positions                        */
+)
+{
+    silk_float value;
+    opus_int   i, j;
+
+    /* Safety checks */
+    silk_assert( K >  0 );
+    silk_assert( L >  0 );
+    silk_assert( L >= K );
+
+    /* Write start indices in index vector */
+    for( i = 0; i < K; i++ ) {
+        idx[ i ] = i;
+    }
+
+    /* Sort vector elements by value, decreasing order */
+    for( i = 1; i < K; i++ ) {
+        value = a[ i ];
+        for( j = i - 1; ( j >= 0 ) && ( value > a[ j ] ); j-- ) {
+            a[ j + 1 ]   = a[ j ];      /* Shift value */
+            idx[ j + 1 ] = idx[ j ];    /* Shift index */
+        }
+        a[ j + 1 ]   = value;   /* Write value */
+        idx[ j + 1 ] = i;       /* Write index */
+    }
+
+    /* If less than L values are asked check the remaining values,      */
+    /* but only spend CPU to ensure that the K first values are correct */
+    for( i = K; i < L; i++ ) {
+        value = a[ i ];
+        if( value > a[ K - 1 ] ) {
+            for( j = K - 2; ( j >= 0 ) && ( value > a[ j ] ); j-- ) {
+                a[ j + 1 ]   = a[ j ];      /* Shift value */
+                idx[ j + 1 ] = idx[ j ];    /* Shift index */
+            }
+            a[ j + 1 ]   = value;   /* Write value */
+            idx[ j + 1 ] = i;       /* Write index */
+        }
+    }
+}
diff --git a/drivers/opus/silk/float/structs_FLP.h b/drivers/opus/silk/float/structs_FLP.h
new file mode 100644
index 0000000000..4082914d93
--- /dev/null
+++ b/drivers/opus/silk/float/structs_FLP.h
@@ -0,0 +1,131 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifndef SILK_STRUCTS_FLP_H
+#define SILK_STRUCTS_FLP_H
+
+#include "typedef.h"
+#include "silk_main.h"
+#include "structs.h"
+
+#ifdef __cplusplus
+extern "C"
+{
+#endif
+
+/********************************/
+/* Noise shaping analysis state */
+/********************************/
+typedef struct {
+    opus_int8                   LastGainIndex;
+    silk_float                  HarmBoost_smth;
+    silk_float                  HarmShapeGain_smth;
+    silk_float                  Tilt_smth;
+} silk_shape_state_FLP;
+
+/********************************/
+/* Prefilter state              */
+/********************************/
+typedef struct {
+    silk_float                  sLTP_shp[ LTP_BUF_LENGTH ];
+    silk_float                  sAR_shp[ MAX_SHAPE_LPC_ORDER + 1 ];
+    opus_int                    sLTP_shp_buf_idx;
+    silk_float                  sLF_AR_shp;
+    silk_float                  sLF_MA_shp;
+    silk_float                  sHarmHP;
+    opus_int32                  rand_seed;
+    opus_int                    lagPrev;
+} silk_prefilter_state_FLP;
+
+/********************************/
+/* Encoder state FLP            */
+/********************************/
+typedef struct {
+    silk_encoder_state          sCmn;                               /* Common struct, shared with fixed-point code */
+    silk_shape_state_FLP        sShape;                             /* Noise shaping state */
+    silk_prefilter_state_FLP    sPrefilt;                           /* Prefilter State */
+
+    /* Buffer for find pitch and noise shape analysis */
+    silk_float                  x_buf[ 2 * MAX_FRAME_LENGTH + LA_SHAPE_MAX ];/* Buffer for find pitch and noise shape analysis */
+    silk_float                  LTPCorr;                            /* Normalized correlation from pitch lag estimator */
+} silk_encoder_state_FLP;
+
+/************************/
+/* Encoder control FLP  */
+/************************/
+typedef struct {
+    /* Prediction and coding parameters */
+    silk_float                  Gains[ MAX_NB_SUBFR ];
+    silk_float                  PredCoef[ 2 ][ MAX_LPC_ORDER ];     /* holds interpolated and final coefficients */
+    silk_float                  LTPCoef[LTP_ORDER * MAX_NB_SUBFR];
+    silk_float                  LTP_scale;
+    opus_int                    pitchL[ MAX_NB_SUBFR ];
+
+    /* Noise shaping parameters */
+    silk_float                  AR1[ MAX_NB_SUBFR * MAX_SHAPE_LPC_ORDER ];
+    silk_float                  AR2[ MAX_NB_SUBFR * MAX_SHAPE_LPC_ORDER ];
+    silk_float                  LF_MA_shp[     MAX_NB_SUBFR ];
+    silk_float                  LF_AR_shp[     MAX_NB_SUBFR ];
+    silk_float                  GainsPre[      MAX_NB_SUBFR ];
+    silk_float                  HarmBoost[     MAX_NB_SUBFR ];
+    silk_float                  Tilt[          MAX_NB_SUBFR ];
+    silk_float                  HarmShapeGain[ MAX_NB_SUBFR ];
+    silk_float                  Lambda;
+    silk_float                  input_quality;
+    silk_float                  coding_quality;
+
+    /* Measures */
+    silk_float                  sparseness;
+    silk_float                  predGain;
+    silk_float                  LTPredCodGain;
+    silk_float                  ResNrg[ MAX_NB_SUBFR ];             /* Residual energy per subframe */
+
+    /* Parameters for CBR mode */
+    opus_int32                  GainsUnq_Q16[ MAX_NB_SUBFR ];
+    opus_int8                   lastGainIndexPrev;
+} silk_encoder_control_FLP;
+
+/************************/
+/* Encoder Super Struct */
+/************************/
+typedef struct {
+    silk_encoder_state_FLP      state_Fxx[ ENCODER_NUM_CHANNELS ];
+    stereo_enc_state            sStereo;
+    opus_int32                  nBitsExceeded;
+    opus_int                    nChannelsAPI;
+    opus_int                    nChannelsInternal;
+    opus_int                    nPrevChannelsInternal;
+    opus_int                    timeSinceSwitchAllowed_ms;
+    opus_int                    allowBandwidthSwitch;
+    opus_int                    prev_decode_only_middle;
+} silk_encoder;
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/drivers/opus/silk/float/warped_autocorrelation_FLP.c b/drivers/opus/silk/float/warped_autocorrelation_FLP.c
new file mode 100644
index 0000000000..6075dfe8d3
--- /dev/null
+++ b/drivers/opus/silk/float/warped_autocorrelation_FLP.c
@@ -0,0 +1,73 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef OPUS_HAVE_CONFIG_H
+#include "opus_config.h"
+#endif
+
+#include "main_FLP.h"
+
+/* Autocorrelations for a warped frequency axis */
+void silk_warped_autocorrelation_FLP(
+    silk_float                      *corr,                              /* O    Result [order + 1]                          */
+    const silk_float                *input,                             /* I    Input data to correlate                     */
+    const silk_float                warping,                            /* I    Warping coefficient                         */
+    const opus_int                  length,                             /* I    Length of input                             */
+    const opus_int                  order                               /* I    Correlation order (even)                    */
+)
+{
+    opus_int    n, i;
+    double      tmp1, tmp2;
+    double      state[ MAX_SHAPE_LPC_ORDER + 1 ] = { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 };
+    double      C[     MAX_SHAPE_LPC_ORDER + 1 ] = { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 };
+
+    /* Order must be even */
+    silk_assert( ( order & 1 ) == 0 );
+
+    /* Loop over samples */
+    for( n = 0; n < length; n++ ) {
+        tmp1 = input[ n ];
+        /* Loop over allpass sections */
+        for( i = 0; i < order; i += 2 ) {
+            /* Output of allpass section */
+            tmp2 = state[ i ] + warping * ( state[ i + 1 ] - tmp1 );
+            state[ i ] = tmp1;
+            C[ i ] += state[ 0 ] * tmp1;
+            /* Output of allpass section */
+            tmp1 = state[ i + 1 ] + warping * ( state[ i + 2 ] - tmp2 );
+            state[ i + 1 ] = tmp2;
+            C[ i + 1 ] += state[ 0 ] * tmp2;
+        }
+        state[ order ] = tmp1;
+        C[ order ] += state[ 0 ] * tmp1;
+    }
+
+    /* Copy correlations in silk_float output format */
+    for( i = 0; i < order + 1; i++ ) {
+        corr[ i ] = ( silk_float )C[ i ];
+    }
+}
diff --git a/drivers/opus/silk/float/wrappers_FLP.c b/drivers/opus/silk/float/wrappers_FLP.c
new file mode 100644
index 0000000000..c4e34e5578
--- /dev/null
+++ b/drivers/opus/silk/float/wrappers_FLP.c
@@ -0,0 +1,201 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef OPUS_HAVE_CONFIG_H
+#include "opus_config.h"
+#endif
+
+#include "main_FLP.h"
+
+/* Wrappers. Calls flp / fix code */
+
+/* Convert AR filter coefficients to NLSF parameters */
+void silk_A2NLSF_FLP(
+    opus_int16                      *NLSF_Q15,                          /* O    NLSF vector      [ LPC_order ]              */
+    const silk_float                *pAR,                               /* I    LPC coefficients [ LPC_order ]              */
+    const opus_int                  LPC_order                           /* I    LPC order                                   */
+)
+{
+    opus_int   i;
+    opus_int32 a_fix_Q16[ MAX_LPC_ORDER ];
+
+    for( i = 0; i < LPC_order; i++ ) {
+        a_fix_Q16[ i ] = silk_float2int( pAR[ i ] * 65536.0f );
+    }
+
+    silk_A2NLSF( NLSF_Q15, a_fix_Q16, LPC_order );
+}
+
+/* Convert LSF parameters to AR prediction filter coefficients */
+void silk_NLSF2A_FLP(
+    silk_float                      *pAR,                               /* O    LPC coefficients [ LPC_order ]              */
+    const opus_int16                *NLSF_Q15,                          /* I    NLSF vector      [ LPC_order ]              */
+    const opus_int                  LPC_order                           /* I    LPC order                                   */
+)
+{
+    opus_int   i;
+    opus_int16 a_fix_Q12[ MAX_LPC_ORDER ];
+
+    silk_NLSF2A( a_fix_Q12, NLSF_Q15, LPC_order );
+
+    for( i = 0; i < LPC_order; i++ ) {
+        pAR[ i ] = ( silk_float )a_fix_Q12[ i ] * ( 1.0f / 4096.0f );
+    }
+}
+
+/******************************************/
+/* Floating-point NLSF processing wrapper */
+/******************************************/
+void silk_process_NLSFs_FLP(
+    silk_encoder_state              *psEncC,                            /* I/O  Encoder state                               */
+    silk_float                      PredCoef[ 2 ][ MAX_LPC_ORDER ],     /* O    Prediction coefficients                     */
+    opus_int16                      NLSF_Q15[      MAX_LPC_ORDER ],     /* I/O  Normalized LSFs (quant out) (0 - (2^15-1))  */
+    const opus_int16                prev_NLSF_Q15[ MAX_LPC_ORDER ]      /* I    Previous Normalized LSFs (0 - (2^15-1))     */
+)
+{
+    opus_int     i, j;
+    opus_int16   PredCoef_Q12[ 2 ][ MAX_LPC_ORDER ];
+
+    silk_process_NLSFs( psEncC, PredCoef_Q12, NLSF_Q15, prev_NLSF_Q15);
+
+    for( j = 0; j < 2; j++ ) {
+        for( i = 0; i < psEncC->predictLPCOrder; i++ ) {
+            PredCoef[ j ][ i ] = ( silk_float )PredCoef_Q12[ j ][ i ] * ( 1.0f / 4096.0f );
+        }
+    }
+}
+
+/****************************************/
+/* Floating-point Silk NSQ wrapper      */
+/****************************************/
+void silk_NSQ_wrapper_FLP(
+    silk_encoder_state_FLP          *psEnc,                             /* I/O  Encoder state FLP                           */
+    silk_encoder_control_FLP        *psEncCtrl,                         /* I/O  Encoder control FLP                         */
+    SideInfoIndices                 *psIndices,                         /* I/O  Quantization indices                        */
+    silk_nsq_state                  *psNSQ,                             /* I/O  Noise Shaping Quantzation state             */
+    opus_int8                       pulses[],                           /* O    Quantized pulse signal                      */
+    const silk_float                x[]                                 /* I    Prefiltered input signal                    */
+)
+{
+    opus_int     i, j;
+    opus_int32   x_Q3[ MAX_FRAME_LENGTH ];
+    opus_int32   Gains_Q16[ MAX_NB_SUBFR ];
+    silk_DWORD_ALIGN opus_int16 PredCoef_Q12[ 2 ][ MAX_LPC_ORDER ];
+    opus_int16   LTPCoef_Q14[ LTP_ORDER * MAX_NB_SUBFR ];
+    opus_int     LTP_scale_Q14;
+
+    /* Noise shaping parameters */
+    opus_int16   AR2_Q13[ MAX_NB_SUBFR * MAX_SHAPE_LPC_ORDER ];
+    opus_int32   LF_shp_Q14[ MAX_NB_SUBFR ];         /* Packs two int16 coefficients per int32 value             */
+    opus_int     Lambda_Q10;
+    opus_int     Tilt_Q14[ MAX_NB_SUBFR ];
+    opus_int     HarmShapeGain_Q14[ MAX_NB_SUBFR ];
+
+    /* Convert control struct to fix control struct */
+    /* Noise shape parameters */
+    for( i = 0; i < psEnc->sCmn.nb_subfr; i++ ) {
+        for( j = 0; j < psEnc->sCmn.shapingLPCOrder; j++ ) {
+            AR2_Q13[ i * MAX_SHAPE_LPC_ORDER + j ] = silk_float2int( psEncCtrl->AR2[ i * MAX_SHAPE_LPC_ORDER + j ] * 8192.0f );
+        }
+    }
+
+    for( i = 0; i < psEnc->sCmn.nb_subfr; i++ ) {
+        LF_shp_Q14[ i ] =   silk_LSHIFT32( silk_float2int( psEncCtrl->LF_AR_shp[ i ]     * 16384.0f ), 16 ) |
+                              (opus_uint16)silk_float2int( psEncCtrl->LF_MA_shp[ i ]     * 16384.0f );
+        Tilt_Q14[ i ]   =        (opus_int)silk_float2int( psEncCtrl->Tilt[ i ]          * 16384.0f );
+        HarmShapeGain_Q14[ i ] = (opus_int)silk_float2int( psEncCtrl->HarmShapeGain[ i ] * 16384.0f );
+    }
+    Lambda_Q10 = ( opus_int )silk_float2int( psEncCtrl->Lambda * 1024.0f );
+
+    /* prediction and coding parameters */
+    for( i = 0; i < psEnc->sCmn.nb_subfr * LTP_ORDER; i++ ) {
+        LTPCoef_Q14[ i ] = (opus_int16)silk_float2int( psEncCtrl->LTPCoef[ i ] * 16384.0f );
+    }
+
+    for( j = 0; j < 2; j++ ) {
+        for( i = 0; i < psEnc->sCmn.predictLPCOrder; i++ ) {
+            PredCoef_Q12[ j ][ i ] = (opus_int16)silk_float2int( psEncCtrl->PredCoef[ j ][ i ] * 4096.0f );
+        }
+    }
+
+    for( i = 0; i < psEnc->sCmn.nb_subfr; i++ ) {
+        Gains_Q16[ i ] = silk_float2int( psEncCtrl->Gains[ i ] * 65536.0f );
+        silk_assert( Gains_Q16[ i ] > 0 );
+    }
+
+    if( psIndices->signalType == TYPE_VOICED ) {
+        LTP_scale_Q14 = silk_LTPScales_table_Q14[ psIndices->LTP_scaleIndex ];
+    } else {
+        LTP_scale_Q14 = 0;
+    }
+
+    /* Convert input to fix */
+    for( i = 0; i < psEnc->sCmn.frame_length; i++ ) {
+        x_Q3[ i ] = silk_float2int( 8.0f * x[ i ] );
+    }
+
+    /* Call NSQ */
+    if( psEnc->sCmn.nStatesDelayedDecision > 1 || psEnc->sCmn.warping_Q16 > 0 ) {
+        silk_NSQ_del_dec( &psEnc->sCmn, psNSQ, psIndices, x_Q3, pulses, PredCoef_Q12[ 0 ], LTPCoef_Q14,
+            AR2_Q13, HarmShapeGain_Q14, Tilt_Q14, LF_shp_Q14, Gains_Q16, psEncCtrl->pitchL, Lambda_Q10, LTP_scale_Q14 );
+    } else {
+        silk_NSQ( &psEnc->sCmn, psNSQ, psIndices, x_Q3, pulses, PredCoef_Q12[ 0 ], LTPCoef_Q14,
+            AR2_Q13, HarmShapeGain_Q14, Tilt_Q14, LF_shp_Q14, Gains_Q16, psEncCtrl->pitchL, Lambda_Q10, LTP_scale_Q14 );
+    }
+}
+
+/***********************************************/
+/* Floating-point Silk LTP quantiation wrapper */
+/***********************************************/
+void silk_quant_LTP_gains_FLP(
+    silk_float                      B[ MAX_NB_SUBFR * LTP_ORDER ],      /* I/O  (Un-)quantized LTP gains                    */
+    opus_int8                       cbk_index[ MAX_NB_SUBFR ],          /* O    Codebook index                              */
+    opus_int8                       *periodicity_index,                 /* O    Periodicity index                           */
+    opus_int32                      *sum_log_gain_Q7,                   /* I/O  Cumulative max prediction gain  */
+    const silk_float                W[ MAX_NB_SUBFR * LTP_ORDER * LTP_ORDER ], /* I    Error weights                        */
+    const opus_int                  mu_Q10,                             /* I    Mu value (R/D tradeoff)                     */
+    const opus_int                  lowComplexity,                      /* I    Flag for low complexity                     */
+    const opus_int                  nb_subfr                            /* I    number of subframes                         */
+)
+{
+    opus_int   i;
+    opus_int16 B_Q14[ MAX_NB_SUBFR * LTP_ORDER ];
+    opus_int32 W_Q18[ MAX_NB_SUBFR*LTP_ORDER*LTP_ORDER ];
+
+    for( i = 0; i < nb_subfr * LTP_ORDER; i++ ) {
+        B_Q14[ i ] = (opus_int16)silk_float2int( B[ i ] * 16384.0f );
+    }
+    for( i = 0; i < nb_subfr * LTP_ORDER * LTP_ORDER; i++ ) {
+        W_Q18[ i ] = (opus_int32)silk_float2int( W[ i ] * 262144.0f );
+    }
+
+    silk_quant_LTP_gains( B_Q14, cbk_index, periodicity_index, sum_log_gain_Q7, W_Q18, mu_Q10, lowComplexity, nb_subfr );
+
+    for( i = 0; i < nb_subfr * LTP_ORDER; i++ ) {
+        B[ i ] = (silk_float)B_Q14[ i ] * ( 1.0f / 16384.0f );
+    }
+}
diff --git a/drivers/opus/silk/gain_quant.c b/drivers/opus/silk/gain_quant.c
new file mode 100644
index 0000000000..e9467198eb
--- /dev/null
+++ b/drivers/opus/silk/gain_quant.c
@@ -0,0 +1,141 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef OPUS_HAVE_CONFIG_H
+#include "opus_config.h"
+#endif
+
+#include "silk_main.h"
+
+#define OFFSET                  ( ( MIN_QGAIN_DB * 128 ) / 6 + 16 * 128 )
+#define SCALE_Q16               ( ( 65536 * ( N_LEVELS_QGAIN - 1 ) ) / ( ( ( MAX_QGAIN_DB - MIN_QGAIN_DB ) * 128 ) / 6 ) )
+#define INV_SCALE_Q16           ( ( 65536 * ( ( ( MAX_QGAIN_DB - MIN_QGAIN_DB ) * 128 ) / 6 ) ) / ( N_LEVELS_QGAIN - 1 ) )
+
+/* Gain scalar quantization with hysteresis, uniform on log scale */
+void silk_gains_quant(
+    opus_int8                   ind[ MAX_NB_SUBFR ],            /* O    gain indices                                */
+    opus_int32                  gain_Q16[ MAX_NB_SUBFR ],       /* I/O  gains (quantized out)                       */
+    opus_int8                   *prev_ind,                      /* I/O  last index in previous frame                */
+    const opus_int              conditional,                    /* I    first gain is delta coded if 1              */
+    const opus_int              nb_subfr                        /* I    number of subframes                         */
+)
+{
+    opus_int k, double_step_size_threshold;
+
+    for( k = 0; k < nb_subfr; k++ ) {
+        /* Convert to log scale, scale, floor() */
+        ind[ k ] = silk_SMULWB( SCALE_Q16, silk_lin2log( gain_Q16[ k ] ) - OFFSET );
+
+        /* Round towards previous quantized gain (hysteresis) */
+        if( ind[ k ] < *prev_ind ) {
+            ind[ k ]++;
+        }
+        ind[ k ] = silk_LIMIT_int( ind[ k ], 0, N_LEVELS_QGAIN - 1 );
+
+        /* Compute delta indices and limit */
+        if( k == 0 && conditional == 0 ) {
+            /* Full index */
+            ind[ k ] = silk_LIMIT_int( ind[ k ], *prev_ind + MIN_DELTA_GAIN_QUANT, N_LEVELS_QGAIN - 1 );
+            *prev_ind = ind[ k ];
+        } else {
+            /* Delta index */
+            ind[ k ] = ind[ k ] - *prev_ind;
+
+            /* Double the quantization step size for large gain increases, so that the max gain level can be reached */
+            double_step_size_threshold = 2 * MAX_DELTA_GAIN_QUANT - N_LEVELS_QGAIN + *prev_ind;
+            if( ind[ k ] > double_step_size_threshold ) {
+                ind[ k ] = double_step_size_threshold + silk_RSHIFT( ind[ k ] - double_step_size_threshold + 1, 1 );
+            }
+
+            ind[ k ] = silk_LIMIT_int( ind[ k ], MIN_DELTA_GAIN_QUANT, MAX_DELTA_GAIN_QUANT );
+
+            /* Accumulate deltas */
+            if( ind[ k ] > double_step_size_threshold ) {
+                *prev_ind += silk_LSHIFT( ind[ k ], 1 ) - double_step_size_threshold;
+            } else {
+                *prev_ind += ind[ k ];
+            }
+
+            /* Shift to make non-negative */
+            ind[ k ] -= MIN_DELTA_GAIN_QUANT;
+        }
+
+        /* Scale and convert to linear scale */
+        gain_Q16[ k ] = silk_log2lin( silk_min_32( silk_SMULWB( INV_SCALE_Q16, *prev_ind ) + OFFSET, 3967 ) ); /* 3967 = 31 in Q7 */
+    }
+}
+
+/* Gains scalar dequantization, uniform on log scale */
+void silk_gains_dequant(
+    opus_int32                  gain_Q16[ MAX_NB_SUBFR ],       /* O    quantized gains                             */
+    const opus_int8             ind[ MAX_NB_SUBFR ],            /* I    gain indices                                */
+    opus_int8                   *prev_ind,                      /* I/O  last index in previous frame                */
+    const opus_int              conditional,                    /* I    first gain is delta coded if 1              */
+    const opus_int              nb_subfr                        /* I    number of subframes                          */
+)
+{
+    opus_int   k, ind_tmp, double_step_size_threshold;
+
+    for( k = 0; k < nb_subfr; k++ ) {
+        if( k == 0 && conditional == 0 ) {
+            /* Gain index is not allowed to go down more than 16 steps (~21.8 dB) */
+            *prev_ind = silk_max_int( ind[ k ], *prev_ind - 16 );
+        } else {
+            /* Delta index */
+            ind_tmp = ind[ k ] + MIN_DELTA_GAIN_QUANT;
+
+            /* Accumulate deltas */
+            double_step_size_threshold = 2 * MAX_DELTA_GAIN_QUANT - N_LEVELS_QGAIN + *prev_ind;
+            if( ind_tmp > double_step_size_threshold ) {
+                *prev_ind += silk_LSHIFT( ind_tmp, 1 ) - double_step_size_threshold;
+            } else {
+                *prev_ind += ind_tmp;
+            }
+        }
+        *prev_ind = silk_LIMIT_int( *prev_ind, 0, N_LEVELS_QGAIN - 1 );
+
+        /* Scale and convert to linear scale */
+        gain_Q16[ k ] = silk_log2lin( silk_min_32( silk_SMULWB( INV_SCALE_Q16, *prev_ind ) + OFFSET, 3967 ) ); /* 3967 = 31 in Q7 */
+    }
+}
+
+/* Compute unique identifier of gain indices vector */
+opus_int32 silk_gains_ID(                                       /* O    returns unique identifier of gains          */
+    const opus_int8             ind[ MAX_NB_SUBFR ],            /* I    gain indices                                */
+    const opus_int              nb_subfr                        /* I    number of subframes                         */
+)
+{
+    opus_int   k;
+    opus_int32 gainsID;
+
+    gainsID = 0;
+    for( k = 0; k < nb_subfr; k++ ) {
+        gainsID = silk_ADD_LSHIFT32( ind[ k ], gainsID, 8 );
+    }
+
+    return gainsID;
+}
diff --git a/drivers/opus/silk/init_decoder.c b/drivers/opus/silk/init_decoder.c
new file mode 100644
index 0000000000..88c1ff7b43
--- /dev/null
+++ b/drivers/opus/silk/init_decoder.c
@@ -0,0 +1,56 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef OPUS_HAVE_CONFIG_H
+#include "opus_config.h"
+#endif
+
+#include "silk_main.h"
+
+/************************/
+/* Init Decoder State   */
+/************************/
+opus_int silk_init_decoder(
+    silk_decoder_state          *psDec                          /* I/O  Decoder state pointer                       */
+)
+{
+    /* Clear the entire encoder state, except anything copied */
+    silk_memset( psDec, 0, sizeof( silk_decoder_state ) );
+
+    /* Used to deactivate LSF interpolation */
+    psDec->first_frame_after_reset = 1;
+    psDec->prev_gain_Q16 = 65536;
+
+    /* Reset CNG state */
+    silk_CNG_Reset( psDec );
+
+    /* Reset PLC state */
+    silk_PLC_Reset( psDec );
+
+    return(0);
+}
+
diff --git a/drivers/opus/silk/init_encoder.c b/drivers/opus/silk/init_encoder.c
new file mode 100644
index 0000000000..baf97d49e7
--- /dev/null
+++ b/drivers/opus/silk/init_encoder.c
@@ -0,0 +1,64 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef OPUS_HAVE_CONFIG_H
+#include "opus_config.h"
+#endif
+#ifdef OPUS_FIXED_POINT
+#include "main_FIX.h"
+#else
+#include "main_FLP.h"
+#endif
+#include "tuning_parameters.h"
+#include "cpu_support.h"
+
+/*********************************/
+/* Initialize Silk Encoder state */
+/*********************************/
+opus_int silk_init_encoder(
+    silk_encoder_state_Fxx          *psEnc,                                 /* I/O  Pointer to Silk FIX encoder state                                           */
+    int                              arch                                   /* I    Run-time architecture                                                       */
+)
+{
+    opus_int ret = 0;
+
+    /* Clear the entire encoder state */
+    silk_memset( psEnc, 0, sizeof( silk_encoder_state_Fxx ) );
+
+    psEnc->sCmn.arch = arch;
+
+    psEnc->sCmn.variable_HP_smth1_Q15 = silk_LSHIFT( silk_lin2log( SILK_FIX_CONST( VARIABLE_HP_MIN_CUTOFF_HZ, 16 ) ) - ( 16 << 7 ), 8 );
+    psEnc->sCmn.variable_HP_smth2_Q15 = psEnc->sCmn.variable_HP_smth1_Q15;
+
+    /* Used to deactivate LSF interpolation, pitch prediction */
+    psEnc->sCmn.first_frame_after_reset = 1;
+
+    /* Initialize Silk VAD */
+    ret += silk_VAD_Init( &psEnc->sCmn.sVAD );
+
+    return  ret;
+}
diff --git a/drivers/opus/silk/inner_prod_aligned.c b/drivers/opus/silk/inner_prod_aligned.c
new file mode 100644
index 0000000000..d625001db7
--- /dev/null
+++ b/drivers/opus/silk/inner_prod_aligned.c
@@ -0,0 +1,47 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef OPUS_HAVE_CONFIG_H
+#include "opus_config.h"
+#endif
+
+#include "SigProc_FIX.h"
+
+opus_int32 silk_inner_prod_aligned_scale(
+    const opus_int16 *const     inVec1,             /*    I input vector 1                                              */
+    const opus_int16 *const     inVec2,             /*    I input vector 2                                              */
+    const opus_int              scale,              /*    I number of bits to shift                                     */
+    const opus_int              len                 /*    I vector lengths                                              */
+)
+{
+    opus_int   i;
+    opus_int32 sum = 0;
+    for( i = 0; i < len; i++ ) {
+        sum = silk_ADD_RSHIFT32( sum, silk_SMULBB( inVec1[ i ], inVec2[ i ] ), scale );
+    }
+    return sum;
+}
diff --git a/drivers/opus/silk/interpolate.c b/drivers/opus/silk/interpolate.c
new file mode 100644
index 0000000000..d5df0feddb
--- /dev/null
+++ b/drivers/opus/silk/interpolate.c
@@ -0,0 +1,51 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef OPUS_HAVE_CONFIG_H
+#include "opus_config.h"
+#endif
+
+#include "silk_main.h"
+
+/* Interpolate two vectors */
+void silk_interpolate(
+    opus_int16                  xi[ MAX_LPC_ORDER ],            /* O    interpolated vector                         */
+    const opus_int16            x0[ MAX_LPC_ORDER ],            /* I    first vector                                */
+    const opus_int16            x1[ MAX_LPC_ORDER ],            /* I    second vector                               */
+    const opus_int              ifact_Q2,                       /* I    interp. factor, weight on 2nd vector        */
+    const opus_int              d                               /* I    number of parameters                        */
+)
+{
+    opus_int i;
+
+    silk_assert( ifact_Q2 >= 0 );
+    silk_assert( ifact_Q2 <= 4 );
+
+    for( i = 0; i < d; i++ ) {
+        xi[ i ] = (opus_int16)silk_ADD_RSHIFT( x0[ i ], silk_SMULBB( x1[ i ] - x0[ i ], ifact_Q2 ), 2 );
+    }
+}
diff --git a/drivers/opus/silk/lin2log.c b/drivers/opus/silk/lin2log.c
new file mode 100644
index 0000000000..77bfc8c8ab
--- /dev/null
+++ b/drivers/opus/silk/lin2log.c
@@ -0,0 +1,46 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef OPUS_HAVE_CONFIG_H
+#include "opus_config.h"
+#endif
+
+#include "SigProc_FIX.h"
+/* Approximation of 128 * log2() (very close inverse of silk_log2lin()) */
+/* Convert input to a log scale    */
+opus_int32 silk_lin2log(
+    const opus_int32            inLin               /* I  input in linear scale                                         */
+)
+{
+    opus_int32 lz, frac_Q7;
+
+    silk_CLZ_FRAC( inLin, &lz, &frac_Q7 );
+
+    /* Piece-wise parabolic approximation */
+    return silk_LSHIFT( 31 - lz, 7 ) + silk_SMLAWB( frac_Q7, silk_MUL( frac_Q7, 128 - frac_Q7 ), 179 );
+}
+
diff --git a/drivers/opus/silk/log2lin.c b/drivers/opus/silk/log2lin.c
new file mode 100644
index 0000000000..0ed2a12efd
--- /dev/null
+++ b/drivers/opus/silk/log2lin.c
@@ -0,0 +1,58 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef OPUS_HAVE_CONFIG_H
+#include "opus_config.h"
+#endif
+
+#include "SigProc_FIX.h"
+
+/* Approximation of 2^() (very close inverse of silk_lin2log()) */
+/* Convert input to a linear scale    */
+opus_int32 silk_log2lin( 
+    const opus_int32            inLog_Q7            /* I  input on log scale                                            */
+)
+{
+    opus_int32 out, frac_Q7;
+
+    if( inLog_Q7 < 0 ) {
+        return 0;
+    } else if ( inLog_Q7 >= 3967 ) {
+		return silk_int32_MAX;
+	}
+
+    out = silk_LSHIFT( 1, silk_RSHIFT( inLog_Q7, 7 ) );
+    frac_Q7 = inLog_Q7 & 0x7F;
+    if( inLog_Q7 < 2048 ) {
+        /* Piece-wise parabolic approximation */
+        out = silk_ADD_RSHIFT32( out, silk_MUL( out, silk_SMLAWB( frac_Q7, silk_SMULBB( frac_Q7, 128 - frac_Q7 ), -174 ) ), 7 );
+    } else {
+        /* Piece-wise parabolic approximation */
+        out = silk_MLA( out, silk_RSHIFT( out, 7 ), silk_SMLAWB( frac_Q7, silk_SMULBB( frac_Q7, 128 - frac_Q7 ), -174 ) );
+    }
+    return out;
+}
diff --git a/drivers/opus/silk/macros.h b/drivers/opus/silk/macros.h
new file mode 100644
index 0000000000..6cf2e93dbc
--- /dev/null
+++ b/drivers/opus/silk/macros.h
@@ -0,0 +1,113 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifndef SILK_MACROS_H
+#define SILK_MACROS_H
+
+#include "opus_config.h"
+
+#include "opus_types.h"
+#include "opus_defines.h"
+
+/* This is an OPUS_INLINE header file for general platform. */
+
+/* (a32 * (opus_int32)((opus_int16)(b32))) >> 16 output have to be 32bit int */
+#define silk_SMULWB(a32, b32)            ((((a32) >> 16) * (opus_int32)((opus_int16)(b32))) + ((((a32) & 0x0000FFFF) * (opus_int32)((opus_int16)(b32))) >> 16))
+
+/* a32 + (b32 * (opus_int32)((opus_int16)(c32))) >> 16 output have to be 32bit int */
+#define silk_SMLAWB(a32, b32, c32)       ((a32) + ((((b32) >> 16) * (opus_int32)((opus_int16)(c32))) + ((((b32) & 0x0000FFFF) * (opus_int32)((opus_int16)(c32))) >> 16)))
+
+/* (a32 * (b32 >> 16)) >> 16 */
+#define silk_SMULWT(a32, b32)            (((a32) >> 16) * ((b32) >> 16) + ((((a32) & 0x0000FFFF) * ((b32) >> 16)) >> 16))
+
+/* a32 + (b32 * (c32 >> 16)) >> 16 */
+#define silk_SMLAWT(a32, b32, c32)       ((a32) + (((b32) >> 16) * ((c32) >> 16)) + ((((b32) & 0x0000FFFF) * ((c32) >> 16)) >> 16))
+
+/* (opus_int32)((opus_int16)(a3))) * (opus_int32)((opus_int16)(b32)) output have to be 32bit int */
+#define silk_SMULBB(a32, b32)            ((opus_int32)((opus_int16)(a32)) * (opus_int32)((opus_int16)(b32)))
+
+/* a32 + (opus_int32)((opus_int16)(b32)) * (opus_int32)((opus_int16)(c32)) output have to be 32bit int */
+#define silk_SMLABB(a32, b32, c32)       ((a32) + ((opus_int32)((opus_int16)(b32))) * (opus_int32)((opus_int16)(c32)))
+
+/* (opus_int32)((opus_int16)(a32)) * (b32 >> 16) */
+#define silk_SMULBT(a32, b32)            ((opus_int32)((opus_int16)(a32)) * ((b32) >> 16))
+
+/* a32 + (opus_int32)((opus_int16)(b32)) * (c32 >> 16) */
+#define silk_SMLABT(a32, b32, c32)       ((a32) + ((opus_int32)((opus_int16)(b32))) * ((c32) >> 16))
+
+/* a64 + (b32 * c32) */
+#define silk_SMLAL(a64, b32, c32)        (silk_ADD64((a64), ((opus_int64)(b32) * (opus_int64)(c32))))
+
+/* (a32 * b32) >> 16 */
+#define silk_SMULWW(a32, b32)            silk_MLA(silk_SMULWB((a32), (b32)), (a32), silk_RSHIFT_ROUND((b32), 16))
+
+/* a32 + ((b32 * c32) >> 16) */
+#define silk_SMLAWW(a32, b32, c32)       silk_MLA(silk_SMLAWB((a32), (b32), (c32)), (b32), silk_RSHIFT_ROUND((c32), 16))
+
+/* add/subtract with output saturated */
+#define silk_ADD_SAT32(a, b)             ((((opus_uint32)(a) + (opus_uint32)(b)) & 0x80000000) == 0 ?                              \
+                                        ((((a) & (b)) & 0x80000000) != 0 ? silk_int32_MIN : (a)+(b)) :   \
+                                        ((((a) | (b)) & 0x80000000) == 0 ? silk_int32_MAX : (a)+(b)) )
+
+#define silk_SUB_SAT32(a, b)             ((((opus_uint32)(a)-(opus_uint32)(b)) & 0x80000000) == 0 ?                                        \
+                                        (( (a) & ((b)^0x80000000) & 0x80000000) ? silk_int32_MIN : (a)-(b)) :    \
+                                        ((((a)^0x80000000) & (b)  & 0x80000000) ? silk_int32_MAX : (a)-(b)) )
+
+#include "ecintrin.h"
+
+static OPUS_INLINE opus_int32 silk_CLZ16(opus_int16 in16)
+{
+    return 32 - EC_ILOG(in16<<16|0x8000);
+}
+
+static OPUS_INLINE opus_int32 silk_CLZ32(opus_int32 in32)
+{
+    return in32 ? 32 - EC_ILOG(in32) : 32;
+}
+
+/* Row based */
+#define matrix_ptr(Matrix_base_adr, row, column, N) \
+    (*((Matrix_base_adr) + ((row)*(N)+(column))))
+#define matrix_adr(Matrix_base_adr, row, column, N) \
+      ((Matrix_base_adr) + ((row)*(N)+(column)))
+
+/* Column based */
+#ifndef matrix_c_ptr
+#   define matrix_c_ptr(Matrix_base_adr, row, column, M) \
+    (*((Matrix_base_adr) + ((row)+(M)*(column))))
+#endif
+
+#ifdef OPUS_ARM_INLINE_ASM
+#include "arm/macros_armv4.h"
+#endif
+
+#ifdef OPUS_ARM_INLINE_EDSP
+#include "arm/macros_armv5e.h"
+#endif
+
+#endif /* SILK_MACROS_H */
+
diff --git a/drivers/opus/silk/pitch_est_defines.h b/drivers/opus/silk/pitch_est_defines.h
new file mode 100644
index 0000000000..e1e4b5d768
--- /dev/null
+++ b/drivers/opus/silk/pitch_est_defines.h
@@ -0,0 +1,88 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifndef SILK_PE_DEFINES_H
+#define SILK_PE_DEFINES_H
+
+#include "SigProc_FIX.h"
+
+/********************************************************/
+/* Definitions for pitch estimator                      */
+/********************************************************/
+
+#define PE_MAX_FS_KHZ               16 /* Maximum sampling frequency used */
+
+#define PE_MAX_NB_SUBFR             4
+#define PE_SUBFR_LENGTH_MS          5   /* 5 ms */
+
+#define PE_LTP_MEM_LENGTH_MS        ( 4 * PE_SUBFR_LENGTH_MS )
+
+#define PE_MAX_FRAME_LENGTH_MS      ( PE_LTP_MEM_LENGTH_MS + PE_MAX_NB_SUBFR * PE_SUBFR_LENGTH_MS )
+#define PE_MAX_FRAME_LENGTH         ( PE_MAX_FRAME_LENGTH_MS * PE_MAX_FS_KHZ )
+#define PE_MAX_FRAME_LENGTH_ST_1    ( PE_MAX_FRAME_LENGTH >> 2 )
+#define PE_MAX_FRAME_LENGTH_ST_2    ( PE_MAX_FRAME_LENGTH >> 1 )
+
+#define PE_MAX_LAG_MS               18           /* 18 ms -> 56 Hz */
+#define PE_MIN_LAG_MS               2            /* 2 ms -> 500 Hz */
+#define PE_MAX_LAG                  ( PE_MAX_LAG_MS * PE_MAX_FS_KHZ )
+#define PE_MIN_LAG                  ( PE_MIN_LAG_MS * PE_MAX_FS_KHZ )
+
+#define PE_D_SRCH_LENGTH            24
+
+#define PE_NB_STAGE3_LAGS           5
+
+#define PE_NB_CBKS_STAGE2           3
+#define PE_NB_CBKS_STAGE2_EXT       11
+
+#define PE_NB_CBKS_STAGE3_MAX       34
+#define PE_NB_CBKS_STAGE3_MID       24
+#define PE_NB_CBKS_STAGE3_MIN       16
+
+#define PE_NB_CBKS_STAGE3_10MS      12
+#define PE_NB_CBKS_STAGE2_10MS      3
+
+#define PE_SHORTLAG_BIAS            0.2f    /* for logarithmic weighting    */
+#define PE_PREVLAG_BIAS             0.2f    /* for logarithmic weighting    */
+#define PE_FLATCONTOUR_BIAS         0.05f
+
+#define SILK_PE_MIN_COMPLEX         0
+#define SILK_PE_MID_COMPLEX         1
+#define SILK_PE_MAX_COMPLEX         2
+
+/* Tables for 20 ms frames */
+extern const opus_int8 silk_CB_lags_stage2[ PE_MAX_NB_SUBFR ][ PE_NB_CBKS_STAGE2_EXT ];
+extern const opus_int8 silk_CB_lags_stage3[ PE_MAX_NB_SUBFR ][ PE_NB_CBKS_STAGE3_MAX ];
+extern const opus_int8 silk_Lag_range_stage3[ SILK_PE_MAX_COMPLEX + 1 ] [ PE_MAX_NB_SUBFR ][ 2 ];
+extern const opus_int8 silk_nb_cbk_searchs_stage3[ SILK_PE_MAX_COMPLEX + 1 ];
+
+/* Tables for 10 ms frames */
+extern const opus_int8 silk_CB_lags_stage2_10_ms[ PE_MAX_NB_SUBFR >> 1][ 3 ];
+extern const opus_int8 silk_CB_lags_stage3_10_ms[ PE_MAX_NB_SUBFR >> 1 ][ 12 ];
+extern const opus_int8 silk_Lag_range_stage3_10_ms[ PE_MAX_NB_SUBFR >> 1 ][ 2 ];
+
+#endif
+
diff --git a/drivers/opus/silk/pitch_est_tables.c b/drivers/opus/silk/pitch_est_tables.c
new file mode 100644
index 0000000000..97ddbab010
--- /dev/null
+++ b/drivers/opus/silk/pitch_est_tables.c
@@ -0,0 +1,99 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef OPUS_HAVE_CONFIG_H
+#include "opus_config.h"
+#endif
+
+#include "typedef.h"
+#include "pitch_est_defines.h"
+
+const opus_int8 silk_CB_lags_stage2_10_ms[ PE_MAX_NB_SUBFR >> 1][ PE_NB_CBKS_STAGE2_10MS ] =
+{
+    {0, 1, 0},
+    {0, 0, 1}
+};
+
+const opus_int8 silk_CB_lags_stage3_10_ms[ PE_MAX_NB_SUBFR >> 1 ][ PE_NB_CBKS_STAGE3_10MS ] =
+{
+    { 0, 0, 1,-1, 1,-1, 2,-2, 2,-2, 3,-3},
+    { 0, 1, 0, 1,-1, 2,-1, 2,-2, 3,-2, 3}
+};
+
+const opus_int8 silk_Lag_range_stage3_10_ms[ PE_MAX_NB_SUBFR >> 1 ][ 2 ] =
+{
+    {-3, 7},
+    {-2, 7}
+};
+
+const opus_int8 silk_CB_lags_stage2[ PE_MAX_NB_SUBFR ][ PE_NB_CBKS_STAGE2_EXT ] =
+{
+    {0, 2,-1,-1,-1, 0, 0, 1, 1, 0, 1},
+    {0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0},
+    {0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0},
+    {0,-1, 2, 1, 0, 1, 1, 0, 0,-1,-1}
+};
+
+const opus_int8 silk_CB_lags_stage3[ PE_MAX_NB_SUBFR ][ PE_NB_CBKS_STAGE3_MAX ] =
+{
+    {0, 0, 1,-1, 0, 1,-1, 0,-1, 1,-2, 2,-2,-2, 2,-3, 2, 3,-3,-4, 3,-4, 4, 4,-5, 5,-6,-5, 6,-7, 6, 5, 8,-9},
+    {0, 0, 1, 0, 0, 0, 0, 0, 0, 0,-1, 1, 0, 0, 1,-1, 0, 1,-1,-1, 1,-1, 2, 1,-1, 2,-2,-2, 2,-2, 2, 2, 3,-3},
+    {0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 1,-1, 1, 0, 0, 2, 1,-1, 2,-1,-1, 2,-1, 2, 2,-1, 3,-2,-2,-2, 3},
+    {0, 1, 0, 0, 1, 0, 1,-1, 2,-1, 2,-1, 2, 3,-2, 3,-2,-2, 4, 4,-3, 5,-3,-4, 6,-4, 6, 5,-5, 8,-6,-5,-7, 9}
+};
+
+const opus_int8 silk_Lag_range_stage3[ SILK_PE_MAX_COMPLEX + 1 ] [ PE_MAX_NB_SUBFR ][ 2 ] =
+{
+    /* Lags to search for low number of stage3 cbks */
+    {
+        {-5,8},
+        {-1,6},
+        {-1,6},
+        {-4,10}
+    },
+    /* Lags to search for middle number of stage3 cbks */
+    {
+        {-6,10},
+        {-2,6},
+        {-1,6},
+        {-5,10}
+    },
+    /* Lags to search for max number of stage3 cbks */
+    {
+        {-9,12},
+        {-3,7},
+        {-2,7},
+        {-7,13}
+    }
+};
+
+const opus_int8 silk_nb_cbk_searchs_stage3[ SILK_PE_MAX_COMPLEX + 1 ] =
+{
+    PE_NB_CBKS_STAGE3_MIN,
+    PE_NB_CBKS_STAGE3_MID,
+    PE_NB_CBKS_STAGE3_MAX
+};
diff --git a/drivers/opus/silk/process_NLSFs.c b/drivers/opus/silk/process_NLSFs.c
new file mode 100644
index 0000000000..0193fda1f1
--- /dev/null
+++ b/drivers/opus/silk/process_NLSFs.c
@@ -0,0 +1,105 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef OPUS_HAVE_CONFIG_H
+#include "opus_config.h"
+#endif
+
+#include "silk_main.h"
+
+/* Limit, stabilize, convert and quantize NLSFs */
+void silk_process_NLSFs(
+    silk_encoder_state          *psEncC,                            /* I/O  Encoder state                               */
+    opus_int16                  PredCoef_Q12[ 2 ][ MAX_LPC_ORDER ], /* O    Prediction coefficients                     */
+    opus_int16                  pNLSF_Q15[         MAX_LPC_ORDER ], /* I/O  Normalized LSFs (quant out) (0 - (2^15-1))  */
+    const opus_int16            prev_NLSFq_Q15[    MAX_LPC_ORDER ]  /* I    Previous Normalized LSFs (0 - (2^15-1))     */
+)
+{
+    opus_int     i, doInterpolate;
+    opus_int     NLSF_mu_Q20;
+    opus_int32   i_sqr_Q15;
+    opus_int16   pNLSF0_temp_Q15[ MAX_LPC_ORDER ];
+    opus_int16   pNLSFW_QW[ MAX_LPC_ORDER ];
+    opus_int16   pNLSFW0_temp_QW[ MAX_LPC_ORDER ];
+
+    silk_assert( psEncC->speech_activity_Q8 >=   0 );
+    silk_assert( psEncC->speech_activity_Q8 <= SILK_FIX_CONST( 1.0, 8 ) );
+    silk_assert( psEncC->useInterpolatedNLSFs == 1 || psEncC->indices.NLSFInterpCoef_Q2 == ( 1 << 2 ) );
+
+    /***********************/
+    /* Calculate mu values */
+    /***********************/
+    /* NLSF_mu  = 0.003 - 0.0015 * psEnc->speech_activity; */
+    NLSF_mu_Q20 = silk_SMLAWB( SILK_FIX_CONST( 0.003, 20 ), SILK_FIX_CONST( -0.001, 28 ), psEncC->speech_activity_Q8 );
+    if( psEncC->nb_subfr == 2 ) {
+        /* Multiply by 1.5 for 10 ms packets */
+        NLSF_mu_Q20 = silk_ADD_RSHIFT( NLSF_mu_Q20, NLSF_mu_Q20, 1 );
+    }
+
+    silk_assert( NLSF_mu_Q20 >  0 );
+    silk_assert( NLSF_mu_Q20 <= SILK_FIX_CONST( 0.005, 20 ) );
+
+    /* Calculate NLSF weights */
+    silk_NLSF_VQ_weights_laroia( pNLSFW_QW, pNLSF_Q15, psEncC->predictLPCOrder );
+
+    /* Update NLSF weights for interpolated NLSFs */
+    doInterpolate = ( psEncC->useInterpolatedNLSFs == 1 ) && ( psEncC->indices.NLSFInterpCoef_Q2 < 4 );
+    if( doInterpolate ) {
+        /* Calculate the interpolated NLSF vector for the first half */
+        silk_interpolate( pNLSF0_temp_Q15, prev_NLSFq_Q15, pNLSF_Q15,
+            psEncC->indices.NLSFInterpCoef_Q2, psEncC->predictLPCOrder );
+
+        /* Calculate first half NLSF weights for the interpolated NLSFs */
+        silk_NLSF_VQ_weights_laroia( pNLSFW0_temp_QW, pNLSF0_temp_Q15, psEncC->predictLPCOrder );
+
+        /* Update NLSF weights with contribution from first half */
+        i_sqr_Q15 = silk_LSHIFT( silk_SMULBB( psEncC->indices.NLSFInterpCoef_Q2, psEncC->indices.NLSFInterpCoef_Q2 ), 11 );
+        for( i = 0; i < psEncC->predictLPCOrder; i++ ) {
+            pNLSFW_QW[ i ] = silk_SMLAWB( silk_RSHIFT( pNLSFW_QW[ i ], 1 ), (opus_int32)pNLSFW0_temp_QW[ i ], i_sqr_Q15 );
+            silk_assert( pNLSFW_QW[ i ] >= 1 );
+        }
+    }
+
+    silk_NLSF_encode( psEncC->indices.NLSFIndices, pNLSF_Q15, psEncC->psNLSF_CB, pNLSFW_QW,
+        NLSF_mu_Q20, psEncC->NLSF_MSVQ_Survivors, psEncC->indices.signalType );
+
+    /* Convert quantized NLSFs back to LPC coefficients */
+    silk_NLSF2A( PredCoef_Q12[ 1 ], pNLSF_Q15, psEncC->predictLPCOrder );
+
+    if( doInterpolate ) {
+        /* Calculate the interpolated, quantized LSF vector for the first half */
+        silk_interpolate( pNLSF0_temp_Q15, prev_NLSFq_Q15, pNLSF_Q15,
+            psEncC->indices.NLSFInterpCoef_Q2, psEncC->predictLPCOrder );
+
+        /* Convert back to LPC coefficients */
+        silk_NLSF2A( PredCoef_Q12[ 0 ], pNLSF0_temp_Q15, psEncC->predictLPCOrder );
+
+    } else {
+        /* Copy LPC coefficients for first half from second half */
+        silk_memcpy( PredCoef_Q12[ 0 ], PredCoef_Q12[ 1 ], psEncC->predictLPCOrder * sizeof( opus_int16 ) );
+    }
+}
diff --git a/drivers/opus/silk/quant_LTP_gains.c b/drivers/opus/silk/quant_LTP_gains.c
new file mode 100644
index 0000000000..34bcd3acdb
--- /dev/null
+++ b/drivers/opus/silk/quant_LTP_gains.c
@@ -0,0 +1,128 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef OPUS_HAVE_CONFIG_H
+#include "opus_config.h"
+#endif
+
+#include "silk_main.h"
+#include "tuning_parameters.h"
+
+void silk_quant_LTP_gains(
+    opus_int16                  B_Q14[ MAX_NB_SUBFR * LTP_ORDER ],          /* I/O  (un)quantized LTP gains         */
+    opus_int8                   cbk_index[ MAX_NB_SUBFR ],                  /* O    Codebook Index                  */
+    opus_int8                   *periodicity_index,                         /* O    Periodicity Index               */
+	opus_int32					*sum_log_gain_Q7,							/* I/O  Cumulative max prediction gain  */
+    const opus_int32            W_Q18[ MAX_NB_SUBFR*LTP_ORDER*LTP_ORDER ],  /* I    Error Weights in Q18            */
+    opus_int                    mu_Q9,                                      /* I    Mu value (R/D tradeoff)         */
+    opus_int                    lowComplexity,                              /* I    Flag for low complexity         */
+    const opus_int              nb_subfr                                    /* I    number of subframes             */
+)
+{
+    opus_int             j, k, cbk_size;
+    opus_int8            temp_idx[ MAX_NB_SUBFR ];
+    const opus_uint8     *cl_ptr_Q5;
+    const opus_int8      *cbk_ptr_Q7;
+    const opus_uint8     *cbk_gain_ptr_Q7;
+    const opus_int16     *b_Q14_ptr;
+    const opus_int32     *W_Q18_ptr;
+    opus_int32           rate_dist_Q14_subfr, rate_dist_Q14, min_rate_dist_Q14;
+	opus_int32           sum_log_gain_tmp_Q7, best_sum_log_gain_Q7, max_gain_Q7, gain_Q7;
+
+    /***************************************************/
+    /* iterate over different codebooks with different */
+    /* rates/distortions, and choose best */
+    /***************************************************/
+    min_rate_dist_Q14 = silk_int32_MAX;
+    best_sum_log_gain_Q7 = 0;
+    for( k = 0; k < 3; k++ ) {
+        /* Safety margin for pitch gain control, to take into account factors
+           such as state rescaling/rewhitening. */
+        opus_int32 gain_safety = SILK_FIX_CONST( 0.4, 7 );
+
+        cl_ptr_Q5  = silk_LTP_gain_BITS_Q5_ptrs[ k ];
+        cbk_ptr_Q7 = silk_LTP_vq_ptrs_Q7[        k ];
+        cbk_gain_ptr_Q7 = silk_LTP_vq_gain_ptrs_Q7[ k ];
+        cbk_size   = silk_LTP_vq_sizes[          k ];
+
+        /* Set up pointer to first subframe */
+        W_Q18_ptr = W_Q18;
+        b_Q14_ptr = B_Q14;
+
+        rate_dist_Q14 = 0;
+		sum_log_gain_tmp_Q7 = *sum_log_gain_Q7;
+        for( j = 0; j < nb_subfr; j++ ) {
+			max_gain_Q7 = silk_log2lin( ( SILK_FIX_CONST( MAX_SUM_LOG_GAIN_DB / 6.0, 7 ) - sum_log_gain_tmp_Q7 ) 
+										+ SILK_FIX_CONST( 7, 7 ) ) - gain_safety;
+
+            silk_VQ_WMat_EC(
+                &temp_idx[ j ],         /* O    index of best codebook vector                           */
+                &rate_dist_Q14_subfr,   /* O    best weighted quantization error + mu * rate            */
+				&gain_Q7,               /* O    sum of absolute LTP coefficients                        */
+                b_Q14_ptr,              /* I    input vector to be quantized                            */
+                W_Q18_ptr,              /* I    weighting matrix                                        */
+                cbk_ptr_Q7,             /* I    codebook                                                */
+                cbk_gain_ptr_Q7,        /* I    codebook effective gains                                */
+                cl_ptr_Q5,              /* I    code length for each codebook vector                    */
+                mu_Q9,                  /* I    tradeoff between weighted error and rate                */
+				max_gain_Q7,            /* I    maximum sum of absolute LTP coefficients                */
+                cbk_size                /* I    number of vectors in codebook                           */
+            );
+
+            rate_dist_Q14 = silk_ADD_POS_SAT32( rate_dist_Q14, rate_dist_Q14_subfr );
+            sum_log_gain_tmp_Q7 = silk_max(0, sum_log_gain_tmp_Q7
+                                + silk_lin2log( gain_safety + gain_Q7 ) - SILK_FIX_CONST( 7, 7 ));
+
+            b_Q14_ptr += LTP_ORDER;
+            W_Q18_ptr += LTP_ORDER * LTP_ORDER;
+        }
+
+        /* Avoid never finding a codebook */
+        rate_dist_Q14 = silk_min( silk_int32_MAX - 1, rate_dist_Q14 );
+
+        if( rate_dist_Q14 < min_rate_dist_Q14 ) {
+            min_rate_dist_Q14 = rate_dist_Q14;
+            *periodicity_index = (opus_int8)k;
+            silk_memcpy( cbk_index, temp_idx, nb_subfr * sizeof( opus_int8 ) );
+			best_sum_log_gain_Q7 = sum_log_gain_tmp_Q7;
+        }
+
+        /* Break early in low-complexity mode if rate distortion is below threshold */
+        if( lowComplexity && ( rate_dist_Q14 < silk_LTP_gain_middle_avg_RD_Q14 ) ) {
+            break;
+        }
+    }
+
+    cbk_ptr_Q7 = silk_LTP_vq_ptrs_Q7[ *periodicity_index ];
+    for( j = 0; j < nb_subfr; j++ ) {
+        for( k = 0; k < LTP_ORDER; k++ ) {
+            B_Q14[ j * LTP_ORDER + k ] = silk_LSHIFT( cbk_ptr_Q7[ cbk_index[ j ] * LTP_ORDER + k ], 7 );
+        }
+    }
+	*sum_log_gain_Q7 = best_sum_log_gain_Q7;
+}
+
diff --git a/drivers/opus/silk/resampler.c b/drivers/opus/silk/resampler.c
new file mode 100644
index 0000000000..14b185c45e
--- /dev/null
+++ b/drivers/opus/silk/resampler.c
@@ -0,0 +1,215 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef OPUS_HAVE_CONFIG_H
+#include "opus_config.h"
+#endif
+
+/*
+ * Matrix of resampling methods used:
+ *                                 Fs_out (kHz)
+ *                        8      12     16     24     48
+ *
+ *               8        C      UF     U      UF     UF
+ *              12        AF     C      UF     U      UF
+ * Fs_in (kHz)  16        D      AF     C      UF     UF
+ *              24        AF     D      AF     C      U
+ *              48        AF     AF     AF     D      C
+ *
+ * C   -> Copy (no resampling)
+ * D   -> Allpass-based 2x downsampling
+ * U   -> Allpass-based 2x upsampling
+ * UF  -> Allpass-based 2x upsampling followed by FIR interpolation
+ * AF  -> AR2 filter followed by FIR interpolation
+ */
+
+#include "resampler_private.h"
+
+/* Tables with delay compensation values to equalize total delay for different modes */
+static const opus_int8 delay_matrix_enc[ 5 ][ 3 ] = {
+/* in  \ out  8  12  16 */
+/*  8 */   {  6,  0,  3 },
+/* 12 */   {  0,  7,  3 },
+/* 16 */   {  0,  1, 10 },
+/* 24 */   {  0,  2,  6 },
+/* 48 */   { 18, 10, 12 }
+};
+
+static const opus_int8 delay_matrix_dec[ 3 ][ 5 ] = {
+/* in  \ out  8  12  16  24  48 */
+/*  8 */   {  4,  0,  2,  0,  0 },
+/* 12 */   {  0,  9,  4,  7,  4 },
+/* 16 */   {  0,  3, 12,  7,  7 }
+};
+
+/* Simple way to make [8000, 12000, 16000, 24000, 48000] to [0, 1, 2, 3, 4] */
+#define rateID(R) ( ( ( ((R)>>12) - ((R)>16000) ) >> ((R)>24000) ) - 1 )
+
+#define USE_silk_resampler_copy                     (0)
+#define USE_silk_resampler_private_up2_HQ_wrapper   (1)
+#define USE_silk_resampler_private_IIR_FIR          (2)
+#define USE_silk_resampler_private_down_FIR         (3)
+
+/* Initialize/reset the resampler state for a given pair of input/output sampling rates */
+opus_int silk_resampler_init(
+    silk_resampler_state_struct *S,                 /* I/O  Resampler state                                             */
+    opus_int32                  Fs_Hz_in,           /* I    Input sampling rate (Hz)                                    */
+    opus_int32                  Fs_Hz_out,          /* I    Output sampling rate (Hz)                                   */
+    opus_int                    forEnc              /* I    If 1: encoder; if 0: decoder                                */
+)
+{
+    opus_int up2x;
+
+    /* Clear state */
+    silk_memset( S, 0, sizeof( silk_resampler_state_struct ) );
+
+    /* Input checking */
+    if( forEnc ) {
+        if( ( Fs_Hz_in  != 8000 && Fs_Hz_in  != 12000 && Fs_Hz_in  != 16000 && Fs_Hz_in  != 24000 && Fs_Hz_in  != 48000 ) ||
+            ( Fs_Hz_out != 8000 && Fs_Hz_out != 12000 && Fs_Hz_out != 16000 ) ) {
+            silk_assert( 0 );
+            return -1;
+        }
+        S->inputDelay = delay_matrix_enc[ rateID( Fs_Hz_in ) ][ rateID( Fs_Hz_out ) ];
+    } else {
+        if( ( Fs_Hz_in  != 8000 && Fs_Hz_in  != 12000 && Fs_Hz_in  != 16000 ) ||
+            ( Fs_Hz_out != 8000 && Fs_Hz_out != 12000 && Fs_Hz_out != 16000 && Fs_Hz_out != 24000 && Fs_Hz_out != 48000 ) ) {
+            silk_assert( 0 );
+            return -1;
+        }
+        S->inputDelay = delay_matrix_dec[ rateID( Fs_Hz_in ) ][ rateID( Fs_Hz_out ) ];
+    }
+
+    S->Fs_in_kHz  = silk_DIV32_16( Fs_Hz_in,  1000 );
+    S->Fs_out_kHz = silk_DIV32_16( Fs_Hz_out, 1000 );
+
+    /* Number of samples processed per batch */
+    S->batchSize = S->Fs_in_kHz * RESAMPLER_MAX_BATCH_SIZE_MS;
+
+    /* Find resampler with the right sampling ratio */
+    up2x = 0;
+    if( Fs_Hz_out > Fs_Hz_in ) {
+        /* Upsample */
+        if( Fs_Hz_out == silk_MUL( Fs_Hz_in, 2 ) ) {                            /* Fs_out : Fs_in = 2 : 1 */
+            /* Special case: directly use 2x upsampler */
+            S->resampler_function = USE_silk_resampler_private_up2_HQ_wrapper;
+        } else {
+            /* Default resampler */
+            S->resampler_function = USE_silk_resampler_private_IIR_FIR;
+            up2x = 1;
+        }
+    } else if ( Fs_Hz_out < Fs_Hz_in ) {
+        /* Downsample */
+         S->resampler_function = USE_silk_resampler_private_down_FIR;
+        if( silk_MUL( Fs_Hz_out, 4 ) == silk_MUL( Fs_Hz_in, 3 ) ) {             /* Fs_out : Fs_in = 3 : 4 */
+            S->FIR_Fracs = 3;
+            S->FIR_Order = RESAMPLER_DOWN_ORDER_FIR0;
+            S->Coefs = silk_Resampler_3_4_COEFS;
+        } else if( silk_MUL( Fs_Hz_out, 3 ) == silk_MUL( Fs_Hz_in, 2 ) ) {      /* Fs_out : Fs_in = 2 : 3 */
+            S->FIR_Fracs = 2;
+            S->FIR_Order = RESAMPLER_DOWN_ORDER_FIR0;
+            S->Coefs = silk_Resampler_2_3_COEFS;
+        } else if( silk_MUL( Fs_Hz_out, 2 ) == Fs_Hz_in ) {                     /* Fs_out : Fs_in = 1 : 2 */
+            S->FIR_Fracs = 1;
+            S->FIR_Order = RESAMPLER_DOWN_ORDER_FIR1;
+            S->Coefs = silk_Resampler_1_2_COEFS;
+        } else if( silk_MUL( Fs_Hz_out, 3 ) == Fs_Hz_in ) {                     /* Fs_out : Fs_in = 1 : 3 */
+            S->FIR_Fracs = 1;
+            S->FIR_Order = RESAMPLER_DOWN_ORDER_FIR2;
+            S->Coefs = silk_Resampler_1_3_COEFS;
+        } else if( silk_MUL( Fs_Hz_out, 4 ) == Fs_Hz_in ) {                     /* Fs_out : Fs_in = 1 : 4 */
+            S->FIR_Fracs = 1;
+            S->FIR_Order = RESAMPLER_DOWN_ORDER_FIR2;
+            S->Coefs = silk_Resampler_1_4_COEFS;
+        } else if( silk_MUL( Fs_Hz_out, 6 ) == Fs_Hz_in ) {                     /* Fs_out : Fs_in = 1 : 6 */
+            S->FIR_Fracs = 1;
+            S->FIR_Order = RESAMPLER_DOWN_ORDER_FIR2;
+            S->Coefs = silk_Resampler_1_6_COEFS;
+        } else {
+            /* None available */
+            silk_assert( 0 );
+            return -1;
+        }
+    } else {
+        /* Input and output sampling rates are equal: copy */
+        S->resampler_function = USE_silk_resampler_copy;
+    }
+
+    /* Ratio of input/output samples */
+    S->invRatio_Q16 = silk_LSHIFT32( silk_DIV32( silk_LSHIFT32( Fs_Hz_in, 14 + up2x ), Fs_Hz_out ), 2 );
+    /* Make sure the ratio is rounded up */
+    while( silk_SMULWW( S->invRatio_Q16, Fs_Hz_out ) < silk_LSHIFT32( Fs_Hz_in, up2x ) ) {
+        S->invRatio_Q16++;
+    }
+
+    return 0;
+}
+
+/* Resampler: convert from one sampling rate to another */
+/* Input and output sampling rate are at most 48000 Hz  */
+opus_int silk_resampler(
+    silk_resampler_state_struct *S,                 /* I/O  Resampler state                                             */
+    opus_int16                  out[],              /* O    Output signal                                               */
+    const opus_int16            in[],               /* I    Input signal                                                */
+    opus_int32                  inLen               /* I    Number of input samples                                     */
+)
+{
+    opus_int nSamples;
+
+    /* Need at least 1 ms of input data */
+    silk_assert( inLen >= S->Fs_in_kHz );
+    /* Delay can't exceed the 1 ms of buffering */
+    silk_assert( S->inputDelay <= S->Fs_in_kHz );
+
+    nSamples = S->Fs_in_kHz - S->inputDelay;
+
+    /* Copy to delay buffer */
+    silk_memcpy( &S->delayBuf[ S->inputDelay ], in, nSamples * sizeof( opus_int16 ) );
+
+    switch( S->resampler_function ) {
+        case USE_silk_resampler_private_up2_HQ_wrapper:
+            silk_resampler_private_up2_HQ_wrapper( S, out, S->delayBuf, S->Fs_in_kHz );
+            silk_resampler_private_up2_HQ_wrapper( S, &out[ S->Fs_out_kHz ], &in[ nSamples ], inLen - S->Fs_in_kHz );
+            break;
+        case USE_silk_resampler_private_IIR_FIR:
+            silk_resampler_private_IIR_FIR( S, out, S->delayBuf, S->Fs_in_kHz );
+            silk_resampler_private_IIR_FIR( S, &out[ S->Fs_out_kHz ], &in[ nSamples ], inLen - S->Fs_in_kHz );
+            break;
+        case USE_silk_resampler_private_down_FIR:
+            silk_resampler_private_down_FIR( S, out, S->delayBuf, S->Fs_in_kHz );
+            silk_resampler_private_down_FIR( S, &out[ S->Fs_out_kHz ], &in[ nSamples ], inLen - S->Fs_in_kHz );
+            break;
+        default:
+            silk_memcpy( out, S->delayBuf, S->Fs_in_kHz * sizeof( opus_int16 ) );
+            silk_memcpy( &out[ S->Fs_out_kHz ], &in[ nSamples ], ( inLen - S->Fs_in_kHz ) * sizeof( opus_int16 ) );
+    }
+
+    /* Copy to delay buffer */
+    silk_memcpy( S->delayBuf, &in[ inLen - S->inputDelay ], S->inputDelay * sizeof( opus_int16 ) );
+
+    return 0;
+}
diff --git a/drivers/opus/silk/resampler_down2.c b/drivers/opus/silk/resampler_down2.c
new file mode 100644
index 0000000000..5c4b27759a
--- /dev/null
+++ b/drivers/opus/silk/resampler_down2.c
@@ -0,0 +1,74 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef OPUS_HAVE_CONFIG_H
+#include "opus_config.h"
+#endif
+
+#include "SigProc_FIX.h"
+#include "resampler_rom.h"
+
+/* Downsample by a factor 2 */
+void silk_resampler_down2(
+    opus_int32                  *S,                 /* I/O  State vector [ 2 ]                                          */
+    opus_int16                  *out,               /* O    Output signal [ floor(len/2) ]                              */
+    const opus_int16            *in,                /* I    Input signal [ len ]                                        */
+    opus_int32                  inLen               /* I    Number of input samples                                     */
+)
+{
+    opus_int32 k, len2 = silk_RSHIFT32( inLen, 1 );
+    opus_int32 in32, out32, Y, X;
+
+    silk_assert( silk_resampler_down2_0 > 0 );
+    silk_assert( silk_resampler_down2_1 < 0 );
+
+    /* Internal variables and state are in Q10 format */
+    for( k = 0; k < len2; k++ ) {
+        /* Convert to Q10 */
+        in32 = silk_LSHIFT( (opus_int32)in[ 2 * k ], 10 );
+
+        /* All-pass section for even input sample */
+        Y      = silk_SUB32( in32, S[ 0 ] );
+        X      = silk_SMLAWB( Y, Y, silk_resampler_down2_1 );
+        out32  = silk_ADD32( S[ 0 ], X );
+        S[ 0 ] = silk_ADD32( in32, X );
+
+        /* Convert to Q10 */
+        in32 = silk_LSHIFT( (opus_int32)in[ 2 * k + 1 ], 10 );
+
+        /* All-pass section for odd input sample, and add to output of previous section */
+        Y      = silk_SUB32( in32, S[ 1 ] );
+        X      = silk_SMULWB( Y, silk_resampler_down2_0 );
+        out32  = silk_ADD32( out32, S[ 1 ] );
+        out32  = silk_ADD32( out32, X );
+        S[ 1 ] = silk_ADD32( in32, X );
+
+        /* Add, convert back to int16 and store to output */
+        out[ k ] = (opus_int16)silk_SAT16( silk_RSHIFT_ROUND( out32, 11 ) );
+    }
+}
+
diff --git a/drivers/opus/silk/resampler_down2_3.c b/drivers/opus/silk/resampler_down2_3.c
new file mode 100644
index 0000000000..2733072fe6
--- /dev/null
+++ b/drivers/opus/silk/resampler_down2_3.c
@@ -0,0 +1,103 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef OPUS_HAVE_CONFIG_H
+#include "opus_config.h"
+#endif
+
+#include "SigProc_FIX.h"
+#include "resampler_private.h"
+#include "stack_alloc.h"
+
+#define ORDER_FIR                   4
+
+/* Downsample by a factor 2/3, low quality */
+void silk_resampler_down2_3(
+    opus_int32                  *S,                 /* I/O  State vector [ 6 ]                                          */
+    opus_int16                  *out,               /* O    Output signal [ floor(2*inLen/3) ]                          */
+    const opus_int16            *in,                /* I    Input signal [ inLen ]                                      */
+    opus_int32                  inLen               /* I    Number of input samples                                     */
+)
+{
+    opus_int32 nSamplesIn, counter, res_Q6;
+    VARDECL( opus_int32, buf );
+    opus_int32 *buf_ptr;
+    SAVE_STACK;
+
+    ALLOC( buf, RESAMPLER_MAX_BATCH_SIZE_IN + ORDER_FIR, opus_int32 );
+
+    /* Copy buffered samples to start of buffer */
+    silk_memcpy( buf, S, ORDER_FIR * sizeof( opus_int32 ) );
+
+    /* Iterate over blocks of frameSizeIn input samples */
+    while( 1 ) {
+        nSamplesIn = silk_min( inLen, RESAMPLER_MAX_BATCH_SIZE_IN );
+
+        /* Second-order AR filter (output in Q8) */
+        silk_resampler_private_AR2( &S[ ORDER_FIR ], &buf[ ORDER_FIR ], in,
+            silk_Resampler_2_3_COEFS_LQ, nSamplesIn );
+
+        /* Interpolate filtered signal */
+        buf_ptr = buf;
+        counter = nSamplesIn;
+        while( counter > 2 ) {
+            /* Inner product */
+            res_Q6 = silk_SMULWB(         buf_ptr[ 0 ], silk_Resampler_2_3_COEFS_LQ[ 2 ] );
+            res_Q6 = silk_SMLAWB( res_Q6, buf_ptr[ 1 ], silk_Resampler_2_3_COEFS_LQ[ 3 ] );
+            res_Q6 = silk_SMLAWB( res_Q6, buf_ptr[ 2 ], silk_Resampler_2_3_COEFS_LQ[ 5 ] );
+            res_Q6 = silk_SMLAWB( res_Q6, buf_ptr[ 3 ], silk_Resampler_2_3_COEFS_LQ[ 4 ] );
+
+            /* Scale down, saturate and store in output array */
+            *out++ = (opus_int16)silk_SAT16( silk_RSHIFT_ROUND( res_Q6, 6 ) );
+
+            res_Q6 = silk_SMULWB(         buf_ptr[ 1 ], silk_Resampler_2_3_COEFS_LQ[ 4 ] );
+            res_Q6 = silk_SMLAWB( res_Q6, buf_ptr[ 2 ], silk_Resampler_2_3_COEFS_LQ[ 5 ] );
+            res_Q6 = silk_SMLAWB( res_Q6, buf_ptr[ 3 ], silk_Resampler_2_3_COEFS_LQ[ 3 ] );
+            res_Q6 = silk_SMLAWB( res_Q6, buf_ptr[ 4 ], silk_Resampler_2_3_COEFS_LQ[ 2 ] );
+
+            /* Scale down, saturate and store in output array */
+            *out++ = (opus_int16)silk_SAT16( silk_RSHIFT_ROUND( res_Q6, 6 ) );
+
+            buf_ptr += 3;
+            counter -= 3;
+        }
+
+        in += nSamplesIn;
+        inLen -= nSamplesIn;
+
+        if( inLen > 0 ) {
+            /* More iterations to do; copy last part of filtered signal to beginning of buffer */
+            silk_memcpy( buf, &buf[ nSamplesIn ], ORDER_FIR * sizeof( opus_int32 ) );
+        } else {
+            break;
+        }
+    }
+
+    /* Copy last part of filtered signal to the state for the next call */
+    silk_memcpy( S, &buf[ nSamplesIn ], ORDER_FIR * sizeof( opus_int32 ) );
+    RESTORE_STACK;
+}
diff --git a/drivers/opus/silk/resampler_private.h b/drivers/opus/silk/resampler_private.h
new file mode 100644
index 0000000000..422a7d9d95
--- /dev/null
+++ b/drivers/opus/silk/resampler_private.h
@@ -0,0 +1,88 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifndef SILK_RESAMPLER_PRIVATE_H
+#define SILK_RESAMPLER_PRIVATE_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include "SigProc_FIX.h"
+#include "resampler_structs.h"
+#include "resampler_rom.h"
+
+/* Number of input samples to process in the inner loop */
+#define RESAMPLER_MAX_BATCH_SIZE_MS             10
+#define RESAMPLER_MAX_FS_KHZ                    48
+#define RESAMPLER_MAX_BATCH_SIZE_IN             ( RESAMPLER_MAX_BATCH_SIZE_MS * RESAMPLER_MAX_FS_KHZ )
+
+/* Description: Hybrid IIR/FIR polyphase implementation of resampling */
+void silk_resampler_private_IIR_FIR(
+    void                            *SS,            /* I/O  Resampler state             */
+    opus_int16                      out[],          /* O    Output signal               */
+    const opus_int16                in[],           /* I    Input signal                */
+    opus_int32                      inLen           /* I    Number of input samples     */
+);
+
+/* Description: Hybrid IIR/FIR polyphase implementation of resampling */
+void silk_resampler_private_down_FIR(
+    void                            *SS,            /* I/O  Resampler state             */
+    opus_int16                      out[],          /* O    Output signal               */
+    const opus_int16                in[],           /* I    Input signal                */
+    opus_int32                      inLen           /* I    Number of input samples     */
+);
+
+/* Upsample by a factor 2, high quality */
+void silk_resampler_private_up2_HQ_wrapper(
+    void                            *SS,            /* I/O  Resampler state (unused)    */
+    opus_int16                      *out,           /* O    Output signal [ 2 * len ]   */
+    const opus_int16                *in,            /* I    Input signal [ len ]        */
+    opus_int32                      len             /* I    Number of input samples     */
+);
+
+/* Upsample by a factor 2, high quality */
+void silk_resampler_private_up2_HQ(
+    opus_int32                      *S,             /* I/O  Resampler state [ 6 ]       */
+    opus_int16                      *out,           /* O    Output signal [ 2 * len ]   */
+    const opus_int16                *in,            /* I    Input signal [ len ]        */
+    opus_int32                      len             /* I    Number of input samples     */
+);
+
+/* Second order AR filter */
+void silk_resampler_private_AR2(
+    opus_int32                      S[],            /* I/O  State vector [ 2 ]          */
+    opus_int32                      out_Q8[],       /* O    Output signal               */
+    const opus_int16                in[],           /* I    Input signal                */
+    const opus_int16                A_Q14[],        /* I    AR coefficients, Q14        */
+    opus_int32                      len             /* I    Signal length               */
+);
+
+#ifdef __cplusplus
+}
+#endif
+#endif /* SILK_RESAMPLER_PRIVATE_H */
diff --git a/drivers/opus/silk/resampler_private_AR2.c b/drivers/opus/silk/resampler_private_AR2.c
new file mode 100644
index 0000000000..84157d17ba
--- /dev/null
+++ b/drivers/opus/silk/resampler_private_AR2.c
@@ -0,0 +1,55 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef OPUS_HAVE_CONFIG_H
+#include "opus_config.h"
+#endif
+
+#include "SigProc_FIX.h"
+#include "resampler_private.h"
+
+/* Second order AR filter with single delay elements */
+void silk_resampler_private_AR2(
+    opus_int32                      S[],            /* I/O  State vector [ 2 ]          */
+    opus_int32                      out_Q8[],       /* O    Output signal               */
+    const opus_int16                in[],           /* I    Input signal                */
+    const opus_int16                A_Q14[],        /* I    AR coefficients, Q14        */
+    opus_int32                      len             /* I    Signal length               */
+)
+{
+    opus_int32    k;
+    opus_int32    out32;
+
+    for( k = 0; k < len; k++ ) {
+        out32       = silk_ADD_LSHIFT32( S[ 0 ], (opus_int32)in[ k ], 8 );
+        out_Q8[ k ] = out32;
+        out32       = silk_LSHIFT( out32, 2 );
+        S[ 0 ]      = silk_SMLAWB( S[ 1 ], out32, A_Q14[ 0 ] );
+        S[ 1 ]      = silk_SMULWB( out32, A_Q14[ 1 ] );
+    }
+}
+
diff --git a/drivers/opus/silk/resampler_private_IIR_FIR.c b/drivers/opus/silk/resampler_private_IIR_FIR.c
new file mode 100644
index 0000000000..f45c3e7413
--- /dev/null
+++ b/drivers/opus/silk/resampler_private_IIR_FIR.c
@@ -0,0 +1,107 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef OPUS_HAVE_CONFIG_H
+#include "opus_config.h"
+#endif
+
+#include "SigProc_FIX.h"
+#include "resampler_private.h"
+#include "stack_alloc.h"
+
+static OPUS_INLINE opus_int16 *silk_resampler_private_IIR_FIR_INTERPOL(
+    opus_int16  *out,
+    opus_int16  *buf,
+    opus_int32  max_index_Q16,
+    opus_int32  index_increment_Q16
+)
+{
+    opus_int32 index_Q16, res_Q15;
+    opus_int16 *buf_ptr;
+    opus_int32 table_index;
+
+    /* Interpolate upsampled signal and store in output array */
+    for( index_Q16 = 0; index_Q16 < max_index_Q16; index_Q16 += index_increment_Q16 ) {
+        table_index = silk_SMULWB( index_Q16 & 0xFFFF, 12 );
+        buf_ptr = &buf[ index_Q16 >> 16 ];
+
+        res_Q15 = silk_SMULBB(          buf_ptr[ 0 ], silk_resampler_frac_FIR_12[      table_index ][ 0 ] );
+        res_Q15 = silk_SMLABB( res_Q15, buf_ptr[ 1 ], silk_resampler_frac_FIR_12[      table_index ][ 1 ] );
+        res_Q15 = silk_SMLABB( res_Q15, buf_ptr[ 2 ], silk_resampler_frac_FIR_12[      table_index ][ 2 ] );
+        res_Q15 = silk_SMLABB( res_Q15, buf_ptr[ 3 ], silk_resampler_frac_FIR_12[      table_index ][ 3 ] );
+        res_Q15 = silk_SMLABB( res_Q15, buf_ptr[ 4 ], silk_resampler_frac_FIR_12[ 11 - table_index ][ 3 ] );
+        res_Q15 = silk_SMLABB( res_Q15, buf_ptr[ 5 ], silk_resampler_frac_FIR_12[ 11 - table_index ][ 2 ] );
+        res_Q15 = silk_SMLABB( res_Q15, buf_ptr[ 6 ], silk_resampler_frac_FIR_12[ 11 - table_index ][ 1 ] );
+        res_Q15 = silk_SMLABB( res_Q15, buf_ptr[ 7 ], silk_resampler_frac_FIR_12[ 11 - table_index ][ 0 ] );
+        *out++ = (opus_int16)silk_SAT16( silk_RSHIFT_ROUND( res_Q15, 15 ) );
+    }
+    return out;
+}
+/* Upsample using a combination of allpass-based 2x upsampling and FIR interpolation */
+void silk_resampler_private_IIR_FIR(
+    void                            *SS,            /* I/O  Resampler state             */
+    opus_int16                      out[],          /* O    Output signal               */
+    const opus_int16                in[],           /* I    Input signal                */
+    opus_int32                      inLen           /* I    Number of input samples     */
+)
+{
+    silk_resampler_state_struct *S = (silk_resampler_state_struct *)SS;
+    opus_int32 nSamplesIn;
+    opus_int32 max_index_Q16, index_increment_Q16;
+    VARDECL( opus_int16, buf );
+    SAVE_STACK;
+
+    ALLOC( buf, 2 * S->batchSize + RESAMPLER_ORDER_FIR_12, opus_int16 );
+
+    /* Copy buffered samples to start of buffer */
+    silk_memcpy( buf, S->sFIR.i16, RESAMPLER_ORDER_FIR_12 * sizeof( opus_int16 ) );
+
+    /* Iterate over blocks of frameSizeIn input samples */
+    index_increment_Q16 = S->invRatio_Q16;
+    while( 1 ) {
+        nSamplesIn = silk_min( inLen, S->batchSize );
+
+        /* Upsample 2x */
+        silk_resampler_private_up2_HQ( S->sIIR, &buf[ RESAMPLER_ORDER_FIR_12 ], in, nSamplesIn );
+
+        max_index_Q16 = silk_LSHIFT32( nSamplesIn, 16 + 1 );         /* + 1 because 2x upsampling */
+        out = silk_resampler_private_IIR_FIR_INTERPOL( out, buf, max_index_Q16, index_increment_Q16 );
+        in += nSamplesIn;
+        inLen -= nSamplesIn;
+
+        if( inLen > 0 ) {
+            /* More iterations to do; copy last part of filtered signal to beginning of buffer */
+            silk_memcpy( buf, &buf[ nSamplesIn << 1 ], RESAMPLER_ORDER_FIR_12 * sizeof( opus_int16 ) );
+        } else {
+            break;
+        }
+    }
+
+    /* Copy last part of filtered signal to the state for the next call */
+    silk_memcpy( S->sFIR.i16, &buf[ nSamplesIn << 1 ], RESAMPLER_ORDER_FIR_12 * sizeof( opus_int16 ) );
+    RESTORE_STACK;
+}
diff --git a/drivers/opus/silk/resampler_private_down_FIR.c b/drivers/opus/silk/resampler_private_down_FIR.c
new file mode 100644
index 0000000000..f4de303546
--- /dev/null
+++ b/drivers/opus/silk/resampler_private_down_FIR.c
@@ -0,0 +1,194 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef OPUS_HAVE_CONFIG_H
+#include "opus_config.h"
+#endif
+
+#include "SigProc_FIX.h"
+#include "resampler_private.h"
+#include "stack_alloc.h"
+
+static OPUS_INLINE opus_int16 *silk_resampler_private_down_FIR_INTERPOL(
+    opus_int16          *out,
+    opus_int32          *buf,
+    const opus_int16    *FIR_Coefs,
+    opus_int            FIR_Order,
+    opus_int            FIR_Fracs,
+    opus_int32          max_index_Q16,
+    opus_int32          index_increment_Q16
+)
+{
+    opus_int32 index_Q16, res_Q6;
+    opus_int32 *buf_ptr;
+    opus_int32 interpol_ind;
+    const opus_int16 *interpol_ptr;
+
+    switch( FIR_Order ) {
+        case RESAMPLER_DOWN_ORDER_FIR0:
+            for( index_Q16 = 0; index_Q16 < max_index_Q16; index_Q16 += index_increment_Q16 ) {
+                /* Integer part gives pointer to buffered input */
+                buf_ptr = buf + silk_RSHIFT( index_Q16, 16 );
+
+                /* Fractional part gives interpolation coefficients */
+                interpol_ind = silk_SMULWB( index_Q16 & 0xFFFF, FIR_Fracs );
+
+                /* Inner product */
+                interpol_ptr = &FIR_Coefs[ RESAMPLER_DOWN_ORDER_FIR0 / 2 * interpol_ind ];
+                res_Q6 = silk_SMULWB(         buf_ptr[ 0 ], interpol_ptr[ 0 ] );
+                res_Q6 = silk_SMLAWB( res_Q6, buf_ptr[ 1 ], interpol_ptr[ 1 ] );
+                res_Q6 = silk_SMLAWB( res_Q6, buf_ptr[ 2 ], interpol_ptr[ 2 ] );
+                res_Q6 = silk_SMLAWB( res_Q6, buf_ptr[ 3 ], interpol_ptr[ 3 ] );
+                res_Q6 = silk_SMLAWB( res_Q6, buf_ptr[ 4 ], interpol_ptr[ 4 ] );
+                res_Q6 = silk_SMLAWB( res_Q6, buf_ptr[ 5 ], interpol_ptr[ 5 ] );
+                res_Q6 = silk_SMLAWB( res_Q6, buf_ptr[ 6 ], interpol_ptr[ 6 ] );
+                res_Q6 = silk_SMLAWB( res_Q6, buf_ptr[ 7 ], interpol_ptr[ 7 ] );
+                res_Q6 = silk_SMLAWB( res_Q6, buf_ptr[ 8 ], interpol_ptr[ 8 ] );
+                interpol_ptr = &FIR_Coefs[ RESAMPLER_DOWN_ORDER_FIR0 / 2 * ( FIR_Fracs - 1 - interpol_ind ) ];
+                res_Q6 = silk_SMLAWB( res_Q6, buf_ptr[ 17 ], interpol_ptr[ 0 ] );
+                res_Q6 = silk_SMLAWB( res_Q6, buf_ptr[ 16 ], interpol_ptr[ 1 ] );
+                res_Q6 = silk_SMLAWB( res_Q6, buf_ptr[ 15 ], interpol_ptr[ 2 ] );
+                res_Q6 = silk_SMLAWB( res_Q6, buf_ptr[ 14 ], interpol_ptr[ 3 ] );
+                res_Q6 = silk_SMLAWB( res_Q6, buf_ptr[ 13 ], interpol_ptr[ 4 ] );
+                res_Q6 = silk_SMLAWB( res_Q6, buf_ptr[ 12 ], interpol_ptr[ 5 ] );
+                res_Q6 = silk_SMLAWB( res_Q6, buf_ptr[ 11 ], interpol_ptr[ 6 ] );
+                res_Q6 = silk_SMLAWB( res_Q6, buf_ptr[ 10 ], interpol_ptr[ 7 ] );
+                res_Q6 = silk_SMLAWB( res_Q6, buf_ptr[  9 ], interpol_ptr[ 8 ] );
+
+                /* Scale down, saturate and store in output array */
+                *out++ = (opus_int16)silk_SAT16( silk_RSHIFT_ROUND( res_Q6, 6 ) );
+            }
+            break;
+        case RESAMPLER_DOWN_ORDER_FIR1:
+            for( index_Q16 = 0; index_Q16 < max_index_Q16; index_Q16 += index_increment_Q16 ) {
+                /* Integer part gives pointer to buffered input */
+                buf_ptr = buf + silk_RSHIFT( index_Q16, 16 );
+
+                /* Inner product */
+                res_Q6 = silk_SMULWB(         silk_ADD32( buf_ptr[  0 ], buf_ptr[ 23 ] ), FIR_Coefs[  0 ] );
+                res_Q6 = silk_SMLAWB( res_Q6, silk_ADD32( buf_ptr[  1 ], buf_ptr[ 22 ] ), FIR_Coefs[  1 ] );
+                res_Q6 = silk_SMLAWB( res_Q6, silk_ADD32( buf_ptr[  2 ], buf_ptr[ 21 ] ), FIR_Coefs[  2 ] );
+                res_Q6 = silk_SMLAWB( res_Q6, silk_ADD32( buf_ptr[  3 ], buf_ptr[ 20 ] ), FIR_Coefs[  3 ] );
+                res_Q6 = silk_SMLAWB( res_Q6, silk_ADD32( buf_ptr[  4 ], buf_ptr[ 19 ] ), FIR_Coefs[  4 ] );
+                res_Q6 = silk_SMLAWB( res_Q6, silk_ADD32( buf_ptr[  5 ], buf_ptr[ 18 ] ), FIR_Coefs[  5 ] );
+                res_Q6 = silk_SMLAWB( res_Q6, silk_ADD32( buf_ptr[  6 ], buf_ptr[ 17 ] ), FIR_Coefs[  6 ] );
+                res_Q6 = silk_SMLAWB( res_Q6, silk_ADD32( buf_ptr[  7 ], buf_ptr[ 16 ] ), FIR_Coefs[  7 ] );
+                res_Q6 = silk_SMLAWB( res_Q6, silk_ADD32( buf_ptr[  8 ], buf_ptr[ 15 ] ), FIR_Coefs[  8 ] );
+                res_Q6 = silk_SMLAWB( res_Q6, silk_ADD32( buf_ptr[  9 ], buf_ptr[ 14 ] ), FIR_Coefs[  9 ] );
+                res_Q6 = silk_SMLAWB( res_Q6, silk_ADD32( buf_ptr[ 10 ], buf_ptr[ 13 ] ), FIR_Coefs[ 10 ] );
+                res_Q6 = silk_SMLAWB( res_Q6, silk_ADD32( buf_ptr[ 11 ], buf_ptr[ 12 ] ), FIR_Coefs[ 11 ] );
+
+                /* Scale down, saturate and store in output array */
+                *out++ = (opus_int16)silk_SAT16( silk_RSHIFT_ROUND( res_Q6, 6 ) );
+            }
+            break;
+        case RESAMPLER_DOWN_ORDER_FIR2:
+            for( index_Q16 = 0; index_Q16 < max_index_Q16; index_Q16 += index_increment_Q16 ) {
+                /* Integer part gives pointer to buffered input */
+                buf_ptr = buf + silk_RSHIFT( index_Q16, 16 );
+
+                /* Inner product */
+                res_Q6 = silk_SMULWB(         silk_ADD32( buf_ptr[  0 ], buf_ptr[ 35 ] ), FIR_Coefs[  0 ] );
+                res_Q6 = silk_SMLAWB( res_Q6, silk_ADD32( buf_ptr[  1 ], buf_ptr[ 34 ] ), FIR_Coefs[  1 ] );
+                res_Q6 = silk_SMLAWB( res_Q6, silk_ADD32( buf_ptr[  2 ], buf_ptr[ 33 ] ), FIR_Coefs[  2 ] );
+                res_Q6 = silk_SMLAWB( res_Q6, silk_ADD32( buf_ptr[  3 ], buf_ptr[ 32 ] ), FIR_Coefs[  3 ] );
+                res_Q6 = silk_SMLAWB( res_Q6, silk_ADD32( buf_ptr[  4 ], buf_ptr[ 31 ] ), FIR_Coefs[  4 ] );
+                res_Q6 = silk_SMLAWB( res_Q6, silk_ADD32( buf_ptr[  5 ], buf_ptr[ 30 ] ), FIR_Coefs[  5 ] );
+                res_Q6 = silk_SMLAWB( res_Q6, silk_ADD32( buf_ptr[  6 ], buf_ptr[ 29 ] ), FIR_Coefs[  6 ] );
+                res_Q6 = silk_SMLAWB( res_Q6, silk_ADD32( buf_ptr[  7 ], buf_ptr[ 28 ] ), FIR_Coefs[  7 ] );
+                res_Q6 = silk_SMLAWB( res_Q6, silk_ADD32( buf_ptr[  8 ], buf_ptr[ 27 ] ), FIR_Coefs[  8 ] );
+                res_Q6 = silk_SMLAWB( res_Q6, silk_ADD32( buf_ptr[  9 ], buf_ptr[ 26 ] ), FIR_Coefs[  9 ] );
+                res_Q6 = silk_SMLAWB( res_Q6, silk_ADD32( buf_ptr[ 10 ], buf_ptr[ 25 ] ), FIR_Coefs[ 10 ] );
+                res_Q6 = silk_SMLAWB( res_Q6, silk_ADD32( buf_ptr[ 11 ], buf_ptr[ 24 ] ), FIR_Coefs[ 11 ] );
+                res_Q6 = silk_SMLAWB( res_Q6, silk_ADD32( buf_ptr[ 12 ], buf_ptr[ 23 ] ), FIR_Coefs[ 12 ] );
+                res_Q6 = silk_SMLAWB( res_Q6, silk_ADD32( buf_ptr[ 13 ], buf_ptr[ 22 ] ), FIR_Coefs[ 13 ] );
+                res_Q6 = silk_SMLAWB( res_Q6, silk_ADD32( buf_ptr[ 14 ], buf_ptr[ 21 ] ), FIR_Coefs[ 14 ] );
+                res_Q6 = silk_SMLAWB( res_Q6, silk_ADD32( buf_ptr[ 15 ], buf_ptr[ 20 ] ), FIR_Coefs[ 15 ] );
+                res_Q6 = silk_SMLAWB( res_Q6, silk_ADD32( buf_ptr[ 16 ], buf_ptr[ 19 ] ), FIR_Coefs[ 16 ] );
+                res_Q6 = silk_SMLAWB( res_Q6, silk_ADD32( buf_ptr[ 17 ], buf_ptr[ 18 ] ), FIR_Coefs[ 17 ] );
+
+                /* Scale down, saturate and store in output array */
+                *out++ = (opus_int16)silk_SAT16( silk_RSHIFT_ROUND( res_Q6, 6 ) );
+            }
+            break;
+        default:
+            silk_assert( 0 );
+    }
+    return out;
+}
+
+/* Resample with a 2nd order AR filter followed by FIR interpolation */
+void silk_resampler_private_down_FIR(
+    void                            *SS,            /* I/O  Resampler state             */
+    opus_int16                      out[],          /* O    Output signal               */
+    const opus_int16                in[],           /* I    Input signal                */
+    opus_int32                      inLen           /* I    Number of input samples     */
+)
+{
+    silk_resampler_state_struct *S = (silk_resampler_state_struct *)SS;
+    opus_int32 nSamplesIn;
+    opus_int32 max_index_Q16, index_increment_Q16;
+    VARDECL( opus_int32, buf );
+    const opus_int16 *FIR_Coefs;
+    SAVE_STACK;
+
+    ALLOC( buf, S->batchSize + S->FIR_Order, opus_int32 );
+
+    /* Copy buffered samples to start of buffer */
+    silk_memcpy( buf, S->sFIR.i32, S->FIR_Order * sizeof( opus_int32 ) );
+
+    FIR_Coefs = &S->Coefs[ 2 ];
+
+    /* Iterate over blocks of frameSizeIn input samples */
+    index_increment_Q16 = S->invRatio_Q16;
+    while( 1 ) {
+        nSamplesIn = silk_min( inLen, S->batchSize );
+
+        /* Second-order AR filter (output in Q8) */
+        silk_resampler_private_AR2( S->sIIR, &buf[ S->FIR_Order ], in, S->Coefs, nSamplesIn );
+
+        max_index_Q16 = silk_LSHIFT32( nSamplesIn, 16 );
+
+        /* Interpolate filtered signal */
+        out = silk_resampler_private_down_FIR_INTERPOL( out, buf, FIR_Coefs, S->FIR_Order,
+            S->FIR_Fracs, max_index_Q16, index_increment_Q16 );
+
+        in += nSamplesIn;
+        inLen -= nSamplesIn;
+
+        if( inLen > 1 ) {
+            /* More iterations to do; copy last part of filtered signal to beginning of buffer */
+            silk_memcpy( buf, &buf[ nSamplesIn ], S->FIR_Order * sizeof( opus_int32 ) );
+        } else {
+            break;
+        }
+    }
+
+    /* Copy last part of filtered signal to the state for the next call */
+    silk_memcpy( S->sFIR.i32, &buf[ nSamplesIn ], S->FIR_Order * sizeof( opus_int32 ) );
+    RESTORE_STACK;
+}
diff --git a/drivers/opus/silk/resampler_private_up2_HQ.c b/drivers/opus/silk/resampler_private_up2_HQ.c
new file mode 100644
index 0000000000..39f4818454
--- /dev/null
+++ b/drivers/opus/silk/resampler_private_up2_HQ.c
@@ -0,0 +1,113 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef OPUS_HAVE_CONFIG_H
+#include "opus_config.h"
+#endif
+
+#include "SigProc_FIX.h"
+#include "resampler_private.h"
+
+/* Upsample by a factor 2, high quality */
+/* Uses 2nd order allpass filters for the 2x upsampling, followed by a      */
+/* notch filter just above Nyquist.                                         */
+void silk_resampler_private_up2_HQ(
+    opus_int32                      *S,             /* I/O  Resampler state [ 6 ]       */
+    opus_int16                      *out,           /* O    Output signal [ 2 * len ]   */
+    const opus_int16                *in,            /* I    Input signal [ len ]        */
+    opus_int32                      len             /* I    Number of input samples     */
+)
+{
+    opus_int32 k;
+    opus_int32 in32, out32_1, out32_2, Y, X;
+
+    silk_assert( silk_resampler_up2_hq_0[ 0 ] > 0 );
+    silk_assert( silk_resampler_up2_hq_0[ 1 ] > 0 );
+    silk_assert( silk_resampler_up2_hq_0[ 2 ] < 0 );
+    silk_assert( silk_resampler_up2_hq_1[ 0 ] > 0 );
+    silk_assert( silk_resampler_up2_hq_1[ 1 ] > 0 );
+    silk_assert( silk_resampler_up2_hq_1[ 2 ] < 0 );
+
+    /* Internal variables and state are in Q10 format */
+    for( k = 0; k < len; k++ ) {
+        /* Convert to Q10 */
+        in32 = silk_LSHIFT( (opus_int32)in[ k ], 10 );
+
+        /* First all-pass section for even output sample */
+        Y       = silk_SUB32( in32, S[ 0 ] );
+        X       = silk_SMULWB( Y, silk_resampler_up2_hq_0[ 0 ] );
+        out32_1 = silk_ADD32( S[ 0 ], X );
+        S[ 0 ]  = silk_ADD32( in32, X );
+
+        /* Second all-pass section for even output sample */
+        Y       = silk_SUB32( out32_1, S[ 1 ] );
+        X       = silk_SMULWB( Y, silk_resampler_up2_hq_0[ 1 ] );
+        out32_2 = silk_ADD32( S[ 1 ], X );
+        S[ 1 ]  = silk_ADD32( out32_1, X );
+
+        /* Third all-pass section for even output sample */
+        Y       = silk_SUB32( out32_2, S[ 2 ] );
+        X       = silk_SMLAWB( Y, Y, silk_resampler_up2_hq_0[ 2 ] );
+        out32_1 = silk_ADD32( S[ 2 ], X );
+        S[ 2 ]  = silk_ADD32( out32_2, X );
+
+        /* Apply gain in Q15, convert back to int16 and store to output */
+        out[ 2 * k ] = (opus_int16)silk_SAT16( silk_RSHIFT_ROUND( out32_1, 10 ) );
+
+        /* First all-pass section for odd output sample */
+        Y       = silk_SUB32( in32, S[ 3 ] );
+        X       = silk_SMULWB( Y, silk_resampler_up2_hq_1[ 0 ] );
+        out32_1 = silk_ADD32( S[ 3 ], X );
+        S[ 3 ]  = silk_ADD32( in32, X );
+
+        /* Second all-pass section for odd output sample */
+        Y       = silk_SUB32( out32_1, S[ 4 ] );
+        X       = silk_SMULWB( Y, silk_resampler_up2_hq_1[ 1 ] );
+        out32_2 = silk_ADD32( S[ 4 ], X );
+        S[ 4 ]  = silk_ADD32( out32_1, X );
+
+        /* Third all-pass section for odd output sample */
+        Y       = silk_SUB32( out32_2, S[ 5 ] );
+        X       = silk_SMLAWB( Y, Y, silk_resampler_up2_hq_1[ 2 ] );
+        out32_1 = silk_ADD32( S[ 5 ], X );
+        S[ 5 ]  = silk_ADD32( out32_2, X );
+
+        /* Apply gain in Q15, convert back to int16 and store to output */
+        out[ 2 * k + 1 ] = (opus_int16)silk_SAT16( silk_RSHIFT_ROUND( out32_1, 10 ) );
+    }
+}
+
+void silk_resampler_private_up2_HQ_wrapper(
+    void                            *SS,            /* I/O  Resampler state (unused)    */
+    opus_int16                      *out,           /* O    Output signal [ 2 * len ]   */
+    const opus_int16                *in,            /* I    Input signal [ len ]        */
+    opus_int32                      len             /* I    Number of input samples     */
+)
+{
+    silk_resampler_state_struct *S = (silk_resampler_state_struct *)SS;
+    silk_resampler_private_up2_HQ( S->sIIR, out, in, len );
+}
diff --git a/drivers/opus/silk/resampler_rom.c b/drivers/opus/silk/resampler_rom.c
new file mode 100644
index 0000000000..0098e18ba8
--- /dev/null
+++ b/drivers/opus/silk/resampler_rom.c
@@ -0,0 +1,96 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef OPUS_HAVE_CONFIG_H
+#include "opus_config.h"
+#endif
+
+/* Filter coefficients for IIR/FIR polyphase resampling     *
+ * Total size: 179 Words (358 Bytes)                        */
+
+#include "resampler_private.h"
+
+/* Matlab code for the notch filter coefficients: */
+/* B = [1, 0.147, 1];  A = [1, 0.107, 0.89]; G = 0.93; freqz(G * B, A, 2^14, 16e3); axis([0, 8000, -10, 1]) */
+/* fprintf('\t%6d, %6d, %6d, %6d\n', round(B(2)*2^16), round(-A(2)*2^16), round((1-A(3))*2^16), round(G*2^15)) */
+/* const opus_int16 silk_resampler_up2_hq_notch[ 4 ] = { 9634,  -7012,   7209,  30474 }; */
+
+/* Tables with IIR and FIR coefficients for fractional downsamplers (123 Words) */
+silk_DWORD_ALIGN const opus_int16 silk_Resampler_3_4_COEFS[ 2 + 3 * RESAMPLER_DOWN_ORDER_FIR0 / 2 ] = {
+	-20694, -13867,
+	   -49,     64,     17,   -157,    353,   -496,    163,  11047,  22205,
+	   -39,      6,     91,   -170,    186,     23,   -896,   6336,  19928,
+	   -19,    -36,    102,    -89,    -24,    328,   -951,   2568,  15909,
+};
+
+silk_DWORD_ALIGN const opus_int16 silk_Resampler_2_3_COEFS[ 2 + 2 * RESAMPLER_DOWN_ORDER_FIR0 / 2 ] = {
+	-14457, -14019,
+	    64,    128,   -122,     36,    310,   -768,    584,   9267,  17733,
+	    12,    128,     18,   -142,    288,   -117,   -865,   4123,  14459,
+};
+
+silk_DWORD_ALIGN const opus_int16 silk_Resampler_1_2_COEFS[ 2 + RESAMPLER_DOWN_ORDER_FIR1 / 2 ] = {
+	   616, -14323,
+	   -10,     39,     58,    -46,    -84,    120,    184,   -315,   -541,   1284,   5380,   9024,
+};
+
+silk_DWORD_ALIGN const opus_int16 silk_Resampler_1_3_COEFS[ 2 + RESAMPLER_DOWN_ORDER_FIR2 / 2 ] = {
+	 16102, -15162,
+	   -13,      0,     20,     26,      5,    -31,    -43,     -4,     65,     90,      7,   -157,   -248,    -44,    593,   1583,   2612,   3271,
+};
+
+silk_DWORD_ALIGN const opus_int16 silk_Resampler_1_4_COEFS[ 2 + RESAMPLER_DOWN_ORDER_FIR2 / 2 ] = {
+	 22500, -15099,
+	     3,    -14,    -20,    -15,      2,     25,     37,     25,    -16,    -71,   -107,    -79,     50,    292,    623,    982,   1288,   1464,
+};
+
+silk_DWORD_ALIGN const opus_int16 silk_Resampler_1_6_COEFS[ 2 + RESAMPLER_DOWN_ORDER_FIR2 / 2 ] = {
+	 27540, -15257,
+	    17,     12,      8,      1,    -10,    -22,    -30,    -32,    -22,      3,     44,    100,    168,    243,    317,    381,    429,    455,
+};
+
+silk_DWORD_ALIGN const opus_int16 silk_Resampler_2_3_COEFS_LQ[ 2 + 2 * 2 ] = {
+     -2797,  -6507,
+      4697,  10739,
+      1567,   8276,
+};
+
+/* Table with interplation fractions of 1/24, 3/24, 5/24, ... , 23/24 : 23/24 (46 Words) */
+silk_DWORD_ALIGN const opus_int16 silk_resampler_frac_FIR_12[ 12 ][ RESAMPLER_ORDER_FIR_12 / 2 ] = {
+	{  189,  -600,   617, 30567 },
+	{  117,  -159, -1070, 29704 },
+	{   52,   221, -2392, 28276 },
+	{   -4,   529, -3350, 26341 },
+	{  -48,   758, -3956, 23973 },
+	{  -80,   905, -4235, 21254 },
+	{  -99,   972, -4222, 18278 },
+	{ -107,   967, -3957, 15143 },
+	{ -103,   896, -3487, 11950 },
+	{  -91,   773, -2865,  8798 },
+	{  -71,   611, -2143,  5784 },
+	{  -46,   425, -1375,  2996 },
+};
diff --git a/drivers/opus/silk/resampler_rom.h b/drivers/opus/silk/resampler_rom.h
new file mode 100644
index 0000000000..490b3388dc
--- /dev/null
+++ b/drivers/opus/silk/resampler_rom.h
@@ -0,0 +1,68 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifndef SILK_FIX_RESAMPLER_ROM_H
+#define SILK_FIX_RESAMPLER_ROM_H
+
+#ifdef  __cplusplus
+extern "C"
+{
+#endif
+
+#include "typedef.h"
+#include "resampler_structs.h"
+
+#define RESAMPLER_DOWN_ORDER_FIR0               18
+#define RESAMPLER_DOWN_ORDER_FIR1               24
+#define RESAMPLER_DOWN_ORDER_FIR2               36
+#define RESAMPLER_ORDER_FIR_12                  8
+
+/* Tables for 2x downsampler */
+static const opus_int16 silk_resampler_down2_0 = 9872;
+static const opus_int16 silk_resampler_down2_1 = 39809 - 65536;
+
+/* Tables for 2x upsampler, high quality */
+static const opus_int16 silk_resampler_up2_hq_0[ 3 ] = { 1746, 14986, 39083 - 65536 };
+static const opus_int16 silk_resampler_up2_hq_1[ 3 ] = { 6854, 25769, 55542 - 65536 };
+
+/* Tables with IIR and FIR coefficients for fractional downsamplers */
+extern const opus_int16 silk_Resampler_3_4_COEFS[ 2 + 3 * RESAMPLER_DOWN_ORDER_FIR0 / 2 ];
+extern const opus_int16 silk_Resampler_2_3_COEFS[ 2 + 2 * RESAMPLER_DOWN_ORDER_FIR0 / 2 ];
+extern const opus_int16 silk_Resampler_1_2_COEFS[ 2 +     RESAMPLER_DOWN_ORDER_FIR1 / 2 ];
+extern const opus_int16 silk_Resampler_1_3_COEFS[ 2 +     RESAMPLER_DOWN_ORDER_FIR2 / 2 ];
+extern const opus_int16 silk_Resampler_1_4_COEFS[ 2 +     RESAMPLER_DOWN_ORDER_FIR2 / 2 ];
+extern const opus_int16 silk_Resampler_1_6_COEFS[ 2 +     RESAMPLER_DOWN_ORDER_FIR2 / 2 ];
+extern const opus_int16 silk_Resampler_2_3_COEFS_LQ[ 2 + 2 * 2 ];
+
+/* Table with interplation fractions of 1/24, 3/24, ..., 23/24 */
+extern const opus_int16 silk_resampler_frac_FIR_12[ 12 ][ RESAMPLER_ORDER_FIR_12 / 2 ];
+
+#ifdef  __cplusplus
+}
+#endif
+
+#endif /* SILK_FIX_RESAMPLER_ROM_H */
diff --git a/drivers/opus/silk/resampler_structs.h b/drivers/opus/silk/resampler_structs.h
new file mode 100644
index 0000000000..9e9457d11c
--- /dev/null
+++ b/drivers/opus/silk/resampler_structs.h
@@ -0,0 +1,60 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifndef SILK_RESAMPLER_STRUCTS_H
+#define SILK_RESAMPLER_STRUCTS_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define SILK_RESAMPLER_MAX_FIR_ORDER                 36
+#define SILK_RESAMPLER_MAX_IIR_ORDER                 6
+
+typedef struct _silk_resampler_state_struct{
+    opus_int32       sIIR[ SILK_RESAMPLER_MAX_IIR_ORDER ]; /* this must be the first element of this struct */
+    union{
+        opus_int32   i32[ SILK_RESAMPLER_MAX_FIR_ORDER ];
+        opus_int16   i16[ SILK_RESAMPLER_MAX_FIR_ORDER ];
+    }                sFIR;
+    opus_int16       delayBuf[ 48 ];
+    opus_int         resampler_function;
+    opus_int         batchSize;
+    opus_int32       invRatio_Q16;
+    opus_int         FIR_Order;
+    opus_int         FIR_Fracs;
+    opus_int         Fs_in_kHz;
+    opus_int         Fs_out_kHz;
+    opus_int         inputDelay;
+    const opus_int16 *Coefs;
+} silk_resampler_state_struct;
+
+#ifdef __cplusplus
+}
+#endif
+#endif /* SILK_RESAMPLER_STRUCTS_H */
+
diff --git a/drivers/opus/silk/shell_coder.c b/drivers/opus/silk/shell_coder.c
new file mode 100644
index 0000000000..79e392bd98
--- /dev/null
+++ b/drivers/opus/silk/shell_coder.c
@@ -0,0 +1,151 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef OPUS_HAVE_CONFIG_H
+#include "opus_config.h"
+#endif
+
+#include "silk_main.h"
+
+/* shell coder; pulse-subframe length is hardcoded */
+
+static OPUS_INLINE void combine_pulses(
+    opus_int         *out,   /* O    combined pulses vector [len] */
+    const opus_int   *in,    /* I    input vector       [2 * len] */
+    const opus_int   len     /* I    number of OUTPUT samples     */
+)
+{
+    opus_int k;
+    for( k = 0; k < len; k++ ) {
+        out[ k ] = in[ 2 * k ] + in[ 2 * k + 1 ];
+    }
+}
+
+static OPUS_INLINE void encode_split(
+    ec_enc                      *psRangeEnc,    /* I/O  compressor data structure                   */
+    const opus_int              p_child1,       /* I    pulse amplitude of first child subframe     */
+    const opus_int              p,              /* I    pulse amplitude of current subframe         */
+    const opus_uint8            *shell_table    /* I    table of shell cdfs                         */
+)
+{
+    if( p > 0 ) {
+        ec_enc_icdf( psRangeEnc, p_child1, &shell_table[ silk_shell_code_table_offsets[ p ] ], 8 );
+    }
+}
+
+static OPUS_INLINE void decode_split(
+    opus_int                    *p_child1,      /* O    pulse amplitude of first child subframe     */
+    opus_int                    *p_child2,      /* O    pulse amplitude of second child subframe    */
+    ec_dec                      *psRangeDec,    /* I/O  Compressor data structure                   */
+    const opus_int              p,              /* I    pulse amplitude of current subframe         */
+    const opus_uint8            *shell_table    /* I    table of shell cdfs                         */
+)
+{
+    if( p > 0 ) {
+        p_child1[ 0 ] = ec_dec_icdf( psRangeDec, &shell_table[ silk_shell_code_table_offsets[ p ] ], 8 );
+        p_child2[ 0 ] = p - p_child1[ 0 ];
+    } else {
+        p_child1[ 0 ] = 0;
+        p_child2[ 0 ] = 0;
+    }
+}
+
+/* Shell encoder, operates on one shell code frame of 16 pulses */
+void silk_shell_encoder(
+    ec_enc                      *psRangeEnc,                    /* I/O  compressor data structure                   */
+    const opus_int              *pulses0                        /* I    data: nonnegative pulse amplitudes          */
+)
+{
+    opus_int pulses1[ 8 ], pulses2[ 4 ], pulses3[ 2 ], pulses4[ 1 ];
+
+    /* this function operates on one shell code frame of 16 pulses */
+    silk_assert( SHELL_CODEC_FRAME_LENGTH == 16 );
+
+    /* tree representation per pulse-subframe */
+    combine_pulses( pulses1, pulses0, 8 );
+    combine_pulses( pulses2, pulses1, 4 );
+    combine_pulses( pulses3, pulses2, 2 );
+    combine_pulses( pulses4, pulses3, 1 );
+
+    encode_split( psRangeEnc, pulses3[  0 ], pulses4[ 0 ], silk_shell_code_table3 );
+
+    encode_split( psRangeEnc, pulses2[  0 ], pulses3[ 0 ], silk_shell_code_table2 );
+
+    encode_split( psRangeEnc, pulses1[  0 ], pulses2[ 0 ], silk_shell_code_table1 );
+    encode_split( psRangeEnc, pulses0[  0 ], pulses1[ 0 ], silk_shell_code_table0 );
+    encode_split( psRangeEnc, pulses0[  2 ], pulses1[ 1 ], silk_shell_code_table0 );
+
+    encode_split( psRangeEnc, pulses1[  2 ], pulses2[ 1 ], silk_shell_code_table1 );
+    encode_split( psRangeEnc, pulses0[  4 ], pulses1[ 2 ], silk_shell_code_table0 );
+    encode_split( psRangeEnc, pulses0[  6 ], pulses1[ 3 ], silk_shell_code_table0 );
+
+    encode_split( psRangeEnc, pulses2[  2 ], pulses3[ 1 ], silk_shell_code_table2 );
+
+    encode_split( psRangeEnc, pulses1[  4 ], pulses2[ 2 ], silk_shell_code_table1 );
+    encode_split( psRangeEnc, pulses0[  8 ], pulses1[ 4 ], silk_shell_code_table0 );
+    encode_split( psRangeEnc, pulses0[ 10 ], pulses1[ 5 ], silk_shell_code_table0 );
+
+    encode_split( psRangeEnc, pulses1[  6 ], pulses2[ 3 ], silk_shell_code_table1 );
+    encode_split( psRangeEnc, pulses0[ 12 ], pulses1[ 6 ], silk_shell_code_table0 );
+    encode_split( psRangeEnc, pulses0[ 14 ], pulses1[ 7 ], silk_shell_code_table0 );
+}
+
+
+/* Shell decoder, operates on one shell code frame of 16 pulses */
+void silk_shell_decoder(
+    opus_int                    *pulses0,                       /* O    data: nonnegative pulse amplitudes          */
+    ec_dec                      *psRangeDec,                    /* I/O  Compressor data structure                   */
+    const opus_int              pulses4                         /* I    number of pulses per pulse-subframe         */
+)
+{
+    opus_int pulses3[ 2 ], pulses2[ 4 ], pulses1[ 8 ];
+
+    /* this function operates on one shell code frame of 16 pulses */
+    silk_assert( SHELL_CODEC_FRAME_LENGTH == 16 );
+
+    decode_split( &pulses3[  0 ], &pulses3[  1 ], psRangeDec, pulses4,      silk_shell_code_table3 );
+
+    decode_split( &pulses2[  0 ], &pulses2[  1 ], psRangeDec, pulses3[ 0 ], silk_shell_code_table2 );
+
+    decode_split( &pulses1[  0 ], &pulses1[  1 ], psRangeDec, pulses2[ 0 ], silk_shell_code_table1 );
+    decode_split( &pulses0[  0 ], &pulses0[  1 ], psRangeDec, pulses1[ 0 ], silk_shell_code_table0 );
+    decode_split( &pulses0[  2 ], &pulses0[  3 ], psRangeDec, pulses1[ 1 ], silk_shell_code_table0 );
+
+    decode_split( &pulses1[  2 ], &pulses1[  3 ], psRangeDec, pulses2[ 1 ], silk_shell_code_table1 );
+    decode_split( &pulses0[  4 ], &pulses0[  5 ], psRangeDec, pulses1[ 2 ], silk_shell_code_table0 );
+    decode_split( &pulses0[  6 ], &pulses0[  7 ], psRangeDec, pulses1[ 3 ], silk_shell_code_table0 );
+
+    decode_split( &pulses2[  2 ], &pulses2[  3 ], psRangeDec, pulses3[ 1 ], silk_shell_code_table2 );
+
+    decode_split( &pulses1[  4 ], &pulses1[  5 ], psRangeDec, pulses2[ 2 ], silk_shell_code_table1 );
+    decode_split( &pulses0[  8 ], &pulses0[  9 ], psRangeDec, pulses1[ 4 ], silk_shell_code_table0 );
+    decode_split( &pulses0[ 10 ], &pulses0[ 11 ], psRangeDec, pulses1[ 5 ], silk_shell_code_table0 );
+
+    decode_split( &pulses1[  6 ], &pulses1[  7 ], psRangeDec, pulses2[ 3 ], silk_shell_code_table1 );
+    decode_split( &pulses0[ 12 ], &pulses0[ 13 ], psRangeDec, pulses1[ 6 ], silk_shell_code_table0 );
+    decode_split( &pulses0[ 14 ], &pulses0[ 15 ], psRangeDec, pulses1[ 7 ], silk_shell_code_table0 );
+}
diff --git a/drivers/opus/silk/sigm_Q15.c b/drivers/opus/silk/sigm_Q15.c
new file mode 100644
index 0000000000..2df5b9695c
--- /dev/null
+++ b/drivers/opus/silk/sigm_Q15.c
@@ -0,0 +1,76 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef OPUS_HAVE_CONFIG_H
+#include "opus_config.h"
+#endif
+
+/* Approximate sigmoid function */
+
+#include "SigProc_FIX.h"
+
+/* fprintf(1, '%d, ', round(1024 * ([1 ./ (1 + exp(-(1:5))), 1] - 1 ./ (1 + exp(-(0:5)))))); */
+static const opus_int32 sigm_LUT_slope_Q10[ 6 ] = {
+    237, 153, 73, 30, 12, 7
+};
+/* fprintf(1, '%d, ', round(32767 * 1 ./ (1 + exp(-(0:5))))); */
+static const opus_int32 sigm_LUT_pos_Q15[ 6 ] = {
+    16384, 23955, 28861, 31213, 32178, 32548
+};
+/* fprintf(1, '%d, ', round(32767 * 1 ./ (1 + exp((0:5))))); */
+static const opus_int32 sigm_LUT_neg_Q15[ 6 ] = {
+    16384, 8812, 3906, 1554, 589, 219
+};
+
+opus_int silk_sigm_Q15(
+    opus_int                    in_Q5               /* I                                                                */
+)
+{
+    opus_int ind;
+
+    if( in_Q5 < 0 ) {
+        /* Negative input */
+        in_Q5 = -in_Q5;
+        if( in_Q5 >= 6 * 32 ) {
+            return 0;        /* Clip */
+        } else {
+            /* Linear interpolation of look up table */
+            ind = silk_RSHIFT( in_Q5, 5 );
+            return( sigm_LUT_neg_Q15[ ind ] - silk_SMULBB( sigm_LUT_slope_Q10[ ind ], in_Q5 & 0x1F ) );
+        }
+    } else {
+        /* Positive input */
+        if( in_Q5 >= 6 * 32 ) {
+            return 32767;        /* clip */
+        } else {
+            /* Linear interpolation of look up table */
+            ind = silk_RSHIFT( in_Q5, 5 );
+            return( sigm_LUT_pos_Q15[ ind ] + silk_SMULBB( sigm_LUT_slope_Q10[ ind ], in_Q5 & 0x1F ) );
+        }
+    }
+}
+
diff --git a/drivers/opus/silk/silk_main.h b/drivers/opus/silk/silk_main.h
new file mode 100644
index 0000000000..2bdf89784d
--- /dev/null
+++ b/drivers/opus/silk/silk_main.h
@@ -0,0 +1,438 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifndef SILK_MAIN_H
+#define SILK_MAIN_H
+
+#include "SigProc_FIX.h"
+#include "define.h"
+#include "structs.h"
+#include "tables.h"
+#include "PLC.h"
+#include "control.h"
+#include "debug.h"
+#include "entenc.h"
+#include "entdec.h"
+
+/* Convert Left/Right stereo signal to adaptive Mid/Side representation */
+void silk_stereo_LR_to_MS(
+    stereo_enc_state            *state,                         /* I/O  State                                       */
+    opus_int16                  x1[],                           /* I/O  Left input signal, becomes mid signal       */
+    opus_int16                  x2[],                           /* I/O  Right input signal, becomes side signal     */
+    opus_int8                   ix[ 2 ][ 3 ],                   /* O    Quantization indices                        */
+    opus_int8                   *mid_only_flag,                 /* O    Flag: only mid signal coded                 */
+    opus_int32                  mid_side_rates_bps[],           /* O    Bitrates for mid and side signals           */
+    opus_int32                  total_rate_bps,                 /* I    Total bitrate                               */
+    opus_int                    prev_speech_act_Q8,             /* I    Speech activity level in previous frame     */
+    opus_int                    toMono,                         /* I    Last frame before a stereo->mono transition */
+    opus_int                    fs_kHz,                         /* I    Sample rate (kHz)                           */
+    opus_int                    frame_length                    /* I    Number of samples                           */
+);
+
+/* Convert adaptive Mid/Side representation to Left/Right stereo signal */
+void silk_stereo_MS_to_LR(
+    stereo_dec_state            *state,                         /* I/O  State                                       */
+    opus_int16                  x1[],                           /* I/O  Left input signal, becomes mid signal       */
+    opus_int16                  x2[],                           /* I/O  Right input signal, becomes side signal     */
+    const opus_int32            pred_Q13[],                     /* I    Predictors                                  */
+    opus_int                    fs_kHz,                         /* I    Samples rate (kHz)                          */
+    opus_int                    frame_length                    /* I    Number of samples                           */
+);
+
+/* Find least-squares prediction gain for one signal based on another and quantize it */
+opus_int32 silk_stereo_find_predictor(                          /* O    Returns predictor in Q13                    */
+    opus_int32                  *ratio_Q14,                     /* O    Ratio of residual and mid energies          */
+    const opus_int16            x[],                            /* I    Basis signal                                */
+    const opus_int16            y[],                            /* I    Target signal                               */
+    opus_int32                  mid_res_amp_Q0[],               /* I/O  Smoothed mid, residual norms                */
+    opus_int                    length,                         /* I    Number of samples                           */
+    opus_int                    smooth_coef_Q16                 /* I    Smoothing coefficient                       */
+);
+
+/* Quantize mid/side predictors */
+void silk_stereo_quant_pred(
+    opus_int32                  pred_Q13[],                     /* I/O  Predictors (out: quantized)                 */
+    opus_int8                   ix[ 2 ][ 3 ]                    /* O    Quantization indices                        */
+);
+
+/* Entropy code the mid/side quantization indices */
+void silk_stereo_encode_pred(
+    ec_enc                      *psRangeEnc,                    /* I/O  Compressor data structure                   */
+    opus_int8                   ix[ 2 ][ 3 ]                    /* I    Quantization indices                        */
+);
+
+/* Entropy code the mid-only flag */
+void silk_stereo_encode_mid_only(
+    ec_enc                      *psRangeEnc,                    /* I/O  Compressor data structure                   */
+    opus_int8                   mid_only_flag
+);
+
+/* Decode mid/side predictors */
+void silk_stereo_decode_pred(
+    ec_dec                      *psRangeDec,                    /* I/O  Compressor data structure                   */
+    opus_int32                  pred_Q13[]                      /* O    Predictors                                  */
+);
+
+/* Decode mid-only flag */
+void silk_stereo_decode_mid_only(
+    ec_dec                      *psRangeDec,                    /* I/O  Compressor data structure                   */
+    opus_int                    *decode_only_mid                /* O    Flag that only mid channel has been coded   */
+);
+
+/* Encodes signs of excitation */
+void silk_encode_signs(
+    ec_enc                      *psRangeEnc,                        /* I/O  Compressor data structure                   */
+    const opus_int8             pulses[],                           /* I    pulse signal                                */
+    opus_int                    length,                             /* I    length of input                             */
+    const opus_int              signalType,                         /* I    Signal type                                 */
+    const opus_int              quantOffsetType,                    /* I    Quantization offset type                    */
+    const opus_int              sum_pulses[ MAX_NB_SHELL_BLOCKS ]   /* I    Sum of absolute pulses per block            */
+);
+
+/* Decodes signs of excitation */
+void silk_decode_signs(
+    ec_dec                      *psRangeDec,                        /* I/O  Compressor data structure                   */
+    opus_int                    pulses[],                           /* I/O  pulse signal                                */
+    opus_int                    length,                             /* I    length of input                             */
+    const opus_int              signalType,                         /* I    Signal type                                 */
+    const opus_int              quantOffsetType,                    /* I    Quantization offset type                    */
+    const opus_int              sum_pulses[ MAX_NB_SHELL_BLOCKS ]   /* I    Sum of absolute pulses per block            */
+);
+
+/* Check encoder control struct */
+opus_int check_control_input(
+    silk_EncControlStruct        *encControl                    /* I    Control structure                           */
+);
+
+/* Control internal sampling rate */
+opus_int silk_control_audio_bandwidth(
+    silk_encoder_state          *psEncC,                        /* I/O  Pointer to Silk encoder state               */
+    silk_EncControlStruct       *encControl                     /* I    Control structure                           */
+);
+
+/* Control SNR of redidual quantizer */
+opus_int silk_control_SNR(
+    silk_encoder_state          *psEncC,                        /* I/O  Pointer to Silk encoder state               */
+    opus_int32                  TargetRate_bps                  /* I    Target max bitrate (bps)                    */
+);
+
+/***************/
+/* Shell coder */
+/***************/
+
+/* Encode quantization indices of excitation */
+void silk_encode_pulses(
+    ec_enc                      *psRangeEnc,                    /* I/O  compressor data structure                   */
+    const opus_int              signalType,                     /* I    Signal type                                 */
+    const opus_int              quantOffsetType,                /* I    quantOffsetType                             */
+    opus_int8                   pulses[],                       /* I    quantization indices                        */
+    const opus_int              frame_length                    /* I    Frame length                                */
+);
+
+/* Shell encoder, operates on one shell code frame of 16 pulses */
+void silk_shell_encoder(
+    ec_enc                      *psRangeEnc,                    /* I/O  compressor data structure                   */
+    const opus_int              *pulses0                        /* I    data: nonnegative pulse amplitudes          */
+);
+
+/* Shell decoder, operates on one shell code frame of 16 pulses */
+void silk_shell_decoder(
+    opus_int                    *pulses0,                       /* O    data: nonnegative pulse amplitudes          */
+    ec_dec                      *psRangeDec,                    /* I/O  Compressor data structure                   */
+    const opus_int              pulses4                         /* I    number of pulses per pulse-subframe         */
+);
+
+/* Gain scalar quantization with hysteresis, uniform on log scale */
+void silk_gains_quant(
+    opus_int8                   ind[ MAX_NB_SUBFR ],            /* O    gain indices                                */
+    opus_int32                  gain_Q16[ MAX_NB_SUBFR ],       /* I/O  gains (quantized out)                       */
+    opus_int8                   *prev_ind,                      /* I/O  last index in previous frame                */
+    const opus_int              conditional,                    /* I    first gain is delta coded if 1              */
+    const opus_int              nb_subfr                        /* I    number of subframes                         */
+);
+
+/* Gains scalar dequantization, uniform on log scale */
+void silk_gains_dequant(
+    opus_int32                  gain_Q16[ MAX_NB_SUBFR ],       /* O    quantized gains                             */
+    const opus_int8             ind[ MAX_NB_SUBFR ],            /* I    gain indices                                */
+    opus_int8                   *prev_ind,                      /* I/O  last index in previous frame                */
+    const opus_int              conditional,                    /* I    first gain is delta coded if 1              */
+    const opus_int              nb_subfr                        /* I    number of subframes                          */
+);
+
+/* Compute unique identifier of gain indices vector */
+opus_int32 silk_gains_ID(                                       /* O    returns unique identifier of gains          */
+    const opus_int8             ind[ MAX_NB_SUBFR ],            /* I    gain indices                                */
+    const opus_int              nb_subfr                        /* I    number of subframes                         */
+);
+
+/* Interpolate two vectors */
+void silk_interpolate(
+    opus_int16                  xi[ MAX_LPC_ORDER ],            /* O    interpolated vector                         */
+    const opus_int16            x0[ MAX_LPC_ORDER ],            /* I    first vector                                */
+    const opus_int16            x1[ MAX_LPC_ORDER ],            /* I    second vector                               */
+    const opus_int              ifact_Q2,                       /* I    interp. factor, weight on 2nd vector        */
+    const opus_int              d                               /* I    number of parameters                        */
+);
+
+/* LTP tap quantizer */
+void silk_quant_LTP_gains(
+    opus_int16                  B_Q14[ MAX_NB_SUBFR * LTP_ORDER ],          /* I/O  (un)quantized LTP gains         */
+    opus_int8                   cbk_index[ MAX_NB_SUBFR ],                  /* O    Codebook Index                  */
+    opus_int8                   *periodicity_index,                         /* O    Periodicity Index               */
+	opus_int32					*sum_gain_dB_Q7,							/* I/O  Cumulative max prediction gain  */
+    const opus_int32            W_Q18[ MAX_NB_SUBFR*LTP_ORDER*LTP_ORDER ],  /* I    Error Weights in Q18            */
+    opus_int                    mu_Q9,                                      /* I    Mu value (R/D tradeoff)         */
+    opus_int                    lowComplexity,                              /* I    Flag for low complexity         */
+    const opus_int              nb_subfr                                    /* I    number of subframes             */
+);
+
+/* Entropy constrained matrix-weighted VQ, for a single input data vector */
+void silk_VQ_WMat_EC(
+    opus_int8                   *ind,                           /* O    index of best codebook vector               */
+    opus_int32                  *rate_dist_Q14,                 /* O    best weighted quant error + mu * rate       */
+    opus_int                    *gain_Q7,                       /* O    sum of absolute LTP coefficients            */
+    const opus_int16            *in_Q14,                        /* I    input vector to be quantized                */
+    const opus_int32            *W_Q18,                         /* I    weighting matrix                            */
+    const opus_int8             *cb_Q7,                         /* I    codebook                                    */
+    const opus_uint8            *cb_gain_Q7,                    /* I    codebook effective gain                     */
+    const opus_uint8            *cl_Q5,                         /* I    code length for each codebook vector        */
+    const opus_int              mu_Q9,                          /* I    tradeoff betw. weighted error and rate      */
+    const opus_int32            max_gain_Q7,                    /* I    maximum sum of absolute LTP coefficients    */
+    opus_int                    L                               /* I    number of vectors in codebook               */
+);
+
+/************************************/
+/* Noise shaping quantization (NSQ) */
+/************************************/
+void silk_NSQ(
+    const silk_encoder_state    *psEncC,                                    /* I/O  Encoder State                   */
+    silk_nsq_state              *NSQ,                                       /* I/O  NSQ state                       */
+    SideInfoIndices             *psIndices,                                 /* I/O  Quantization Indices            */
+    const opus_int32            x_Q3[],                                     /* I    Prefiltered input signal        */
+    opus_int8                   pulses[],                                   /* O    Quantized pulse signal          */
+    const opus_int16            PredCoef_Q12[ 2 * MAX_LPC_ORDER ],          /* I    Short term prediction coefs     */
+    const opus_int16            LTPCoef_Q14[ LTP_ORDER * MAX_NB_SUBFR ],    /* I    Long term prediction coefs      */
+    const opus_int16            AR2_Q13[ MAX_NB_SUBFR * MAX_SHAPE_LPC_ORDER ], /* I Noise shaping coefs             */
+    const opus_int              HarmShapeGain_Q14[ MAX_NB_SUBFR ],          /* I    Long term shaping coefs         */
+    const opus_int              Tilt_Q14[ MAX_NB_SUBFR ],                   /* I    Spectral tilt                   */
+    const opus_int32            LF_shp_Q14[ MAX_NB_SUBFR ],                 /* I    Low frequency shaping coefs     */
+    const opus_int32            Gains_Q16[ MAX_NB_SUBFR ],                  /* I    Quantization step sizes         */
+    const opus_int              pitchL[ MAX_NB_SUBFR ],                     /* I    Pitch lags                      */
+    const opus_int              Lambda_Q10,                                 /* I    Rate/distortion tradeoff        */
+    const opus_int              LTP_scale_Q14                               /* I    LTP state scaling               */
+);
+
+/* Noise shaping using delayed decision */
+void silk_NSQ_del_dec(
+    const silk_encoder_state    *psEncC,                                    /* I/O  Encoder State                   */
+    silk_nsq_state              *NSQ,                                       /* I/O  NSQ state                       */
+    SideInfoIndices             *psIndices,                                 /* I/O  Quantization Indices            */
+    const opus_int32            x_Q3[],                                     /* I    Prefiltered input signal        */
+    opus_int8                   pulses[],                                   /* O    Quantized pulse signal          */
+    const opus_int16            PredCoef_Q12[ 2 * MAX_LPC_ORDER ],          /* I    Short term prediction coefs     */
+    const opus_int16            LTPCoef_Q14[ LTP_ORDER * MAX_NB_SUBFR ],    /* I    Long term prediction coefs      */
+    const opus_int16            AR2_Q13[ MAX_NB_SUBFR * MAX_SHAPE_LPC_ORDER ], /* I Noise shaping coefs             */
+    const opus_int              HarmShapeGain_Q14[ MAX_NB_SUBFR ],          /* I    Long term shaping coefs         */
+    const opus_int              Tilt_Q14[ MAX_NB_SUBFR ],                   /* I    Spectral tilt                   */
+    const opus_int32            LF_shp_Q14[ MAX_NB_SUBFR ],                 /* I    Low frequency shaping coefs     */
+    const opus_int32            Gains_Q16[ MAX_NB_SUBFR ],                  /* I    Quantization step sizes         */
+    const opus_int              pitchL[ MAX_NB_SUBFR ],                     /* I    Pitch lags                      */
+    const opus_int              Lambda_Q10,                                 /* I    Rate/distortion tradeoff        */
+    const opus_int              LTP_scale_Q14                               /* I    LTP state scaling               */
+);
+
+/************/
+/* Silk VAD */
+/************/
+/* Initialize the Silk VAD */
+opus_int silk_VAD_Init(                                         /* O    Return value, 0 if success                  */
+    silk_VAD_state              *psSilk_VAD                     /* I/O  Pointer to Silk VAD state                   */
+);
+
+/* Get speech activity level in Q8 */
+opus_int silk_VAD_GetSA_Q8(                                     /* O    Return value, 0 if success                  */
+    silk_encoder_state          *psEncC,                        /* I/O  Encoder state                               */
+    const opus_int16            pIn[]                           /* I    PCM input                                   */
+);
+
+/* Low-pass filter with variable cutoff frequency based on  */
+/* piece-wise linear interpolation between elliptic filters */
+/* Start by setting transition_frame_no = 1;                */
+void silk_LP_variable_cutoff(
+    silk_LP_state               *psLP,                          /* I/O  LP filter state                             */
+    opus_int16                  *frame,                         /* I/O  Low-pass filtered output signal             */
+    const opus_int              frame_length                    /* I    Frame length                                */
+);
+
+/******************/
+/* NLSF Quantizer */
+/******************/
+/* Limit, stabilize, convert and quantize NLSFs */
+void silk_process_NLSFs(
+    silk_encoder_state          *psEncC,                            /* I/O  Encoder state                               */
+    opus_int16                  PredCoef_Q12[ 2 ][ MAX_LPC_ORDER ], /* O    Prediction coefficients                     */
+    opus_int16                  pNLSF_Q15[         MAX_LPC_ORDER ], /* I/O  Normalized LSFs (quant out) (0 - (2^15-1))  */
+    const opus_int16            prev_NLSFq_Q15[    MAX_LPC_ORDER ]  /* I    Previous Normalized LSFs (0 - (2^15-1))     */
+);
+
+opus_int32 silk_NLSF_encode(                                    /* O    Returns RD value in Q25                     */
+          opus_int8             *NLSFIndices,                   /* I    Codebook path vector [ LPC_ORDER + 1 ]      */
+          opus_int16            *pNLSF_Q15,                     /* I/O  Quantized NLSF vector [ LPC_ORDER ]         */
+    const silk_NLSF_CB_struct   *psNLSF_CB,                     /* I    Codebook object                             */
+    const opus_int16            *pW_QW,                         /* I    NLSF weight vector [ LPC_ORDER ]            */
+    const opus_int              NLSF_mu_Q20,                    /* I    Rate weight for the RD optimization         */
+    const opus_int              nSurvivors,                     /* I    Max survivors after first stage             */
+    const opus_int              signalType                      /* I    Signal type: 0/1/2                          */
+);
+
+/* Compute quantization errors for an LPC_order element input vector for a VQ codebook */
+void silk_NLSF_VQ(
+    opus_int32                  err_Q26[],                      /* O    Quantization errors [K]                     */
+    const opus_int16            in_Q15[],                       /* I    Input vectors to be quantized [LPC_order]   */
+    const opus_uint8            pCB_Q8[],                       /* I    Codebook vectors [K*LPC_order]              */
+    const opus_int              K,                              /* I    Number of codebook vectors                  */
+    const opus_int              LPC_order                       /* I    Number of LPCs                              */
+);
+
+/* Delayed-decision quantizer for NLSF residuals */
+opus_int32 silk_NLSF_del_dec_quant(                             /* O    Returns RD value in Q25                     */
+    opus_int8                   indices[],                      /* O    Quantization indices [ order ]              */
+    const opus_int16            x_Q10[],                        /* I    Input [ order ]                             */
+    const opus_int16            w_Q5[],                         /* I    Weights [ order ]                           */
+    const opus_uint8            pred_coef_Q8[],                 /* I    Backward predictor coefs [ order ]          */
+    const opus_int16            ec_ix[],                        /* I    Indices to entropy coding tables [ order ]  */
+    const opus_uint8            ec_rates_Q5[],                  /* I    Rates []                                    */
+    const opus_int              quant_step_size_Q16,            /* I    Quantization step size                      */
+    const opus_int16            inv_quant_step_size_Q6,         /* I    Inverse quantization step size              */
+    const opus_int32            mu_Q20,                         /* I    R/D tradeoff                                */
+    const opus_int16            order                           /* I    Number of input values                      */
+);
+
+/* Unpack predictor values and indices for entropy coding tables */
+void silk_NLSF_unpack(
+          opus_int16            ec_ix[],                        /* O    Indices to entropy tables [ LPC_ORDER ]     */
+          opus_uint8            pred_Q8[],                      /* O    LSF predictor [ LPC_ORDER ]                 */
+    const silk_NLSF_CB_struct   *psNLSF_CB,                     /* I    Codebook object                             */
+    const opus_int              CB1_index                       /* I    Index of vector in first LSF codebook       */
+);
+
+/***********************/
+/* NLSF vector decoder */
+/***********************/
+void silk_NLSF_decode(
+          opus_int16            *pNLSF_Q15,                     /* O    Quantized NLSF vector [ LPC_ORDER ]         */
+          opus_int8             *NLSFIndices,                   /* I    Codebook path vector [ LPC_ORDER + 1 ]      */
+    const silk_NLSF_CB_struct   *psNLSF_CB                      /* I    Codebook object                             */
+);
+
+/****************************************************/
+/* Decoder Functions                                */
+/****************************************************/
+opus_int silk_init_decoder(
+    silk_decoder_state          *psDec                          /* I/O  Decoder state pointer                       */
+);
+
+/* Set decoder sampling rate */
+opus_int silk_decoder_set_fs(
+    silk_decoder_state          *psDec,                         /* I/O  Decoder state pointer                       */
+    opus_int                    fs_kHz,                         /* I    Sampling frequency (kHz)                    */
+    opus_int32                  fs_API_Hz                       /* I    API Sampling frequency (Hz)                 */
+);
+
+/****************/
+/* Decode frame */
+/****************/
+opus_int silk_decode_frame(
+    silk_decoder_state          *psDec,                         /* I/O  Pointer to Silk decoder state               */
+    ec_dec                      *psRangeDec,                    /* I/O  Compressor data structure                   */
+    opus_int16                  pOut[],                         /* O    Pointer to output speech frame              */
+    opus_int32                  *pN,                            /* O    Pointer to size of output frame             */
+    opus_int                    lostFlag,                       /* I    0: no loss, 1 loss, 2 decode fec            */
+    opus_int                    condCoding                      /* I    The type of conditional coding to use       */
+);
+
+/* Decode indices from bitstream */
+void silk_decode_indices(
+    silk_decoder_state          *psDec,                         /* I/O  State                                       */
+    ec_dec                      *psRangeDec,                    /* I/O  Compressor data structure                   */
+    opus_int                    FrameIndex,                     /* I    Frame number                                */
+    opus_int                    decode_LBRR,                    /* I    Flag indicating LBRR data is being decoded  */
+    opus_int                    condCoding                      /* I    The type of conditional coding to use       */
+);
+
+/* Decode parameters from payload */
+void silk_decode_parameters(
+    silk_decoder_state          *psDec,                         /* I/O  State                                       */
+    silk_decoder_control        *psDecCtrl,                     /* I/O  Decoder control                             */
+    opus_int                    condCoding                      /* I    The type of conditional coding to use       */
+);
+
+/* Core decoder. Performs inverse NSQ operation LTP + LPC */
+void silk_decode_core(
+    silk_decoder_state          *psDec,                         /* I/O  Decoder state                               */
+    silk_decoder_control        *psDecCtrl,                     /* I    Decoder control                             */
+    opus_int16                  xq[],                           /* O    Decoded speech                              */
+    const opus_int              pulses[ MAX_FRAME_LENGTH ]      /* I    Pulse signal                                */
+);
+
+/* Decode quantization indices of excitation (Shell coding) */
+void silk_decode_pulses(
+    ec_dec                      *psRangeDec,                    /* I/O  Compressor data structure                   */
+    opus_int                    pulses[],                       /* O    Excitation signal                           */
+    const opus_int              signalType,                     /* I    Sigtype                                     */
+    const opus_int              quantOffsetType,                /* I    quantOffsetType                             */
+    const opus_int              frame_length                    /* I    Frame length                                */
+);
+
+/******************/
+/* CNG */
+/******************/
+
+/* Reset CNG */
+void silk_CNG_Reset(
+    silk_decoder_state          *psDec                          /* I/O  Decoder state                               */
+);
+
+/* Updates CNG estimate, and applies the CNG when packet was lost */
+void silk_CNG(
+    silk_decoder_state          *psDec,                         /* I/O  Decoder state                               */
+    silk_decoder_control        *psDecCtrl,                     /* I/O  Decoder control                             */
+    opus_int16                  frame[],                        /* I/O  Signal                                      */
+    opus_int                    length                          /* I    Length of residual                          */
+);
+
+/* Encoding of various parameters */
+void silk_encode_indices(
+    silk_encoder_state          *psEncC,                        /* I/O  Encoder state                               */
+    ec_enc                      *psRangeEnc,                    /* I/O  Compressor data structure                   */
+    opus_int                    FrameIndex,                     /* I    Frame number                                */
+    opus_int                    encode_LBRR,                    /* I    Flag indicating LBRR data is being encoded  */
+    opus_int                    condCoding                      /* I    The type of conditional coding to use       */
+);
+
+#endif
diff --git a/drivers/opus/silk/sort.c b/drivers/opus/silk/sort.c
new file mode 100644
index 0000000000..5e9ba08616
--- /dev/null
+++ b/drivers/opus/silk/sort.c
@@ -0,0 +1,154 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef OPUS_HAVE_CONFIG_H
+#include "opus_config.h"
+#endif
+
+/* Insertion sort (fast for already almost sorted arrays):   */
+/* Best case:  O(n)   for an already sorted array            */
+/* Worst case: O(n^2) for an inversely sorted array          */
+/*                                                           */
+/* Shell short:    http://en.wikipedia.org/wiki/Shell_sort   */
+
+#include "SigProc_FIX.h"
+
+void silk_insertion_sort_increasing(
+    opus_int32           *a,             /* I/O   Unsorted / Sorted vector               */
+    opus_int             *idx,           /* O     Index vector for the sorted elements   */
+    const opus_int       L,              /* I     Vector length                          */
+    const opus_int       K               /* I     Number of correctly sorted positions   */
+)
+{
+    opus_int32    value;
+    opus_int        i, j;
+
+    /* Safety checks */
+    silk_assert( K >  0 );
+    silk_assert( L >  0 );
+    silk_assert( L >= K );
+
+    /* Write start indices in index vector */
+    for( i = 0; i < K; i++ ) {
+        idx[ i ] = i;
+    }
+
+    /* Sort vector elements by value, increasing order */
+    for( i = 1; i < K; i++ ) {
+        value = a[ i ];
+        for( j = i - 1; ( j >= 0 ) && ( value < a[ j ] ); j-- ) {
+            a[ j + 1 ]   = a[ j ];       /* Shift value */
+            idx[ j + 1 ] = idx[ j ];     /* Shift index */
+        }
+        a[ j + 1 ]   = value;   /* Write value */
+        idx[ j + 1 ] = i;       /* Write index */
+    }
+
+    /* If less than L values are asked for, check the remaining values, */
+    /* but only spend CPU to ensure that the K first values are correct */
+    for( i = K; i < L; i++ ) {
+        value = a[ i ];
+        if( value < a[ K - 1 ] ) {
+            for( j = K - 2; ( j >= 0 ) && ( value < a[ j ] ); j-- ) {
+                a[ j + 1 ]   = a[ j ];       /* Shift value */
+                idx[ j + 1 ] = idx[ j ];     /* Shift index */
+            }
+            a[ j + 1 ]   = value;   /* Write value */
+            idx[ j + 1 ] = i;       /* Write index */
+        }
+    }
+}
+
+#ifdef OPUS_FIXED_POINT
+/* This function is only used by the fixed-point build */
+void silk_insertion_sort_decreasing_int16(
+    opus_int16                  *a,                 /* I/O   Unsorted / Sorted vector                                   */
+    opus_int                    *idx,               /* O     Index vector for the sorted elements                       */
+    const opus_int              L,                  /* I     Vector length                                              */
+    const opus_int              K                   /* I     Number of correctly sorted positions                       */
+)
+{
+    opus_int i, j;
+    opus_int value;
+
+    /* Safety checks */
+    silk_assert( K >  0 );
+    silk_assert( L >  0 );
+    silk_assert( L >= K );
+
+    /* Write start indices in index vector */
+    for( i = 0; i < K; i++ ) {
+        idx[ i ] = i;
+    }
+
+    /* Sort vector elements by value, decreasing order */
+    for( i = 1; i < K; i++ ) {
+        value = a[ i ];
+        for( j = i - 1; ( j >= 0 ) && ( value > a[ j ] ); j-- ) {
+            a[ j + 1 ]   = a[ j ];     /* Shift value */
+            idx[ j + 1 ] = idx[ j ];   /* Shift index */
+        }
+        a[ j + 1 ]   = value;   /* Write value */
+        idx[ j + 1 ] = i;       /* Write index */
+    }
+
+    /* If less than L values are asked for, check the remaining values, */
+    /* but only spend CPU to ensure that the K first values are correct */
+    for( i = K; i < L; i++ ) {
+        value = a[ i ];
+        if( value > a[ K - 1 ] ) {
+            for( j = K - 2; ( j >= 0 ) && ( value > a[ j ] ); j-- ) {
+                a[ j + 1 ]   = a[ j ];     /* Shift value */
+                idx[ j + 1 ] = idx[ j ];   /* Shift index */
+            }
+            a[ j + 1 ]   = value;   /* Write value */
+            idx[ j + 1 ] = i;       /* Write index */
+        }
+    }
+}
+#endif
+
+void silk_insertion_sort_increasing_all_values_int16(
+     opus_int16                 *a,                 /* I/O   Unsorted / Sorted vector                                   */
+     const opus_int             L                   /* I     Vector length                                              */
+)
+{
+    opus_int    value;
+    opus_int    i, j;
+
+    /* Safety checks */
+    silk_assert( L >  0 );
+
+    /* Sort vector elements by value, increasing order */
+    for( i = 1; i < L; i++ ) {
+        value = a[ i ];
+        for( j = i - 1; ( j >= 0 ) && ( value < a[ j ] ); j-- ) {
+            a[ j + 1 ] = a[ j ]; /* Shift value */
+        }
+        a[ j + 1 ] = value; /* Write value */
+    }
+}
diff --git a/drivers/opus/silk/stereo_LR_to_MS.c b/drivers/opus/silk/stereo_LR_to_MS.c
new file mode 100644
index 0000000000..678f46984b
--- /dev/null
+++ b/drivers/opus/silk/stereo_LR_to_MS.c
@@ -0,0 +1,229 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef OPUS_HAVE_CONFIG_H
+#include "opus_config.h"
+#endif
+
+#include "silk_main.h"
+#include "stack_alloc.h"
+
+/* Convert Left/Right stereo signal to adaptive Mid/Side representation */
+void silk_stereo_LR_to_MS(
+    stereo_enc_state            *state,                         /* I/O  State                                       */
+    opus_int16                  x1[],                           /* I/O  Left input signal, becomes mid signal       */
+    opus_int16                  x2[],                           /* I/O  Right input signal, becomes side signal     */
+    opus_int8                   ix[ 2 ][ 3 ],                   /* O    Quantization indices                        */
+    opus_int8                   *mid_only_flag,                 /* O    Flag: only mid signal coded                 */
+    opus_int32                  mid_side_rates_bps[],           /* O    Bitrates for mid and side signals           */
+    opus_int32                  total_rate_bps,                 /* I    Total bitrate                               */
+    opus_int                    prev_speech_act_Q8,             /* I    Speech activity level in previous frame     */
+    opus_int                    toMono,                         /* I    Last frame before a stereo->mono transition */
+    opus_int                    fs_kHz,                         /* I    Sample rate (kHz)                           */
+    opus_int                    frame_length                    /* I    Number of samples                           */
+)
+{
+    opus_int   n, is10msFrame, denom_Q16, delta0_Q13, delta1_Q13;
+    opus_int32 sum, diff, smooth_coef_Q16, pred_Q13[ 2 ], pred0_Q13, pred1_Q13;
+    opus_int32 LP_ratio_Q14, HP_ratio_Q14, frac_Q16, frac_3_Q16, min_mid_rate_bps, width_Q14, w_Q24, deltaw_Q24;
+    VARDECL( opus_int16, side );
+    VARDECL( opus_int16, LP_mid );
+    VARDECL( opus_int16, HP_mid );
+    VARDECL( opus_int16, LP_side );
+    VARDECL( opus_int16, HP_side );
+    opus_int16 *mid = &x1[ -2 ];
+    SAVE_STACK;
+
+    ALLOC( side, frame_length + 2, opus_int16 );
+    /* Convert to basic mid/side signals */
+    for( n = 0; n < frame_length + 2; n++ ) {
+        sum  = x1[ n - 2 ] + (opus_int32)x2[ n - 2 ];
+        diff = x1[ n - 2 ] - (opus_int32)x2[ n - 2 ];
+        mid[  n ] = (opus_int16)silk_RSHIFT_ROUND( sum, 1 );
+        side[ n ] = (opus_int16)silk_SAT16( silk_RSHIFT_ROUND( diff, 1 ) );
+    }
+
+    /* Buffering */
+    silk_memcpy( mid,  state->sMid,  2 * sizeof( opus_int16 ) );
+    silk_memcpy( side, state->sSide, 2 * sizeof( opus_int16 ) );
+    silk_memcpy( state->sMid,  &mid[  frame_length ], 2 * sizeof( opus_int16 ) );
+    silk_memcpy( state->sSide, &side[ frame_length ], 2 * sizeof( opus_int16 ) );
+
+    /* LP and HP filter mid signal */
+    ALLOC( LP_mid, frame_length, opus_int16 );
+    ALLOC( HP_mid, frame_length, opus_int16 );
+    for( n = 0; n < frame_length; n++ ) {
+        sum = silk_RSHIFT_ROUND( silk_ADD_LSHIFT( mid[ n ] + mid[ n + 2 ], mid[ n + 1 ], 1 ), 2 );
+        LP_mid[ n ] = sum;
+        HP_mid[ n ] = mid[ n + 1 ] - sum;
+    }
+
+    /* LP and HP filter side signal */
+    ALLOC( LP_side, frame_length, opus_int16 );
+    ALLOC( HP_side, frame_length, opus_int16 );
+    for( n = 0; n < frame_length; n++ ) {
+        sum = silk_RSHIFT_ROUND( silk_ADD_LSHIFT( side[ n ] + side[ n + 2 ], side[ n + 1 ], 1 ), 2 );
+        LP_side[ n ] = sum;
+        HP_side[ n ] = side[ n + 1 ] - sum;
+    }
+
+    /* Find energies and predictors */
+    is10msFrame = frame_length == 10 * fs_kHz;
+    smooth_coef_Q16 = is10msFrame ?
+        SILK_FIX_CONST( STEREO_RATIO_SMOOTH_COEF / 2, 16 ) :
+        SILK_FIX_CONST( STEREO_RATIO_SMOOTH_COEF,     16 );
+    smooth_coef_Q16 = silk_SMULWB( silk_SMULBB( prev_speech_act_Q8, prev_speech_act_Q8 ), smooth_coef_Q16 );
+
+    pred_Q13[ 0 ] = silk_stereo_find_predictor( &LP_ratio_Q14, LP_mid, LP_side, &state->mid_side_amp_Q0[ 0 ], frame_length, smooth_coef_Q16 );
+    pred_Q13[ 1 ] = silk_stereo_find_predictor( &HP_ratio_Q14, HP_mid, HP_side, &state->mid_side_amp_Q0[ 2 ], frame_length, smooth_coef_Q16 );
+    /* Ratio of the norms of residual and mid signals */
+    frac_Q16 = silk_SMLABB( HP_ratio_Q14, LP_ratio_Q14, 3 );
+    frac_Q16 = silk_min( frac_Q16, SILK_FIX_CONST( 1, 16 ) );
+
+    /* Determine bitrate distribution between mid and side, and possibly reduce stereo width */
+    total_rate_bps -= is10msFrame ? 1200 : 600;      /* Subtract approximate bitrate for coding stereo parameters */
+    if( total_rate_bps < 1 ) {
+        total_rate_bps = 1;
+    }
+    min_mid_rate_bps = silk_SMLABB( 2000, fs_kHz, 900 );
+    silk_assert( min_mid_rate_bps < 32767 );
+    /* Default bitrate distribution: 8 parts for Mid and (5+3*frac) parts for Side. so: mid_rate = ( 8 / ( 13 + 3 * frac ) ) * total_ rate */
+    frac_3_Q16 = silk_MUL( 3, frac_Q16 );
+    mid_side_rates_bps[ 0 ] = silk_DIV32_varQ( total_rate_bps, SILK_FIX_CONST( 8 + 5, 16 ) + frac_3_Q16, 16+3 );
+    /* If Mid bitrate below minimum, reduce stereo width */
+    if( mid_side_rates_bps[ 0 ] < min_mid_rate_bps ) {
+        mid_side_rates_bps[ 0 ] = min_mid_rate_bps;
+        mid_side_rates_bps[ 1 ] = total_rate_bps - mid_side_rates_bps[ 0 ];
+        /* width = 4 * ( 2 * side_rate - min_rate ) / ( ( 1 + 3 * frac ) * min_rate ) */
+        width_Q14 = silk_DIV32_varQ( silk_LSHIFT( mid_side_rates_bps[ 1 ], 1 ) - min_mid_rate_bps,
+            silk_SMULWB( SILK_FIX_CONST( 1, 16 ) + frac_3_Q16, min_mid_rate_bps ), 14+2 );
+        width_Q14 = silk_LIMIT( width_Q14, 0, SILK_FIX_CONST( 1, 14 ) );
+    } else {
+        mid_side_rates_bps[ 1 ] = total_rate_bps - mid_side_rates_bps[ 0 ];
+        width_Q14 = SILK_FIX_CONST( 1, 14 );
+    }
+
+    /* Smoother */
+    state->smth_width_Q14 = (opus_int16)silk_SMLAWB( state->smth_width_Q14, width_Q14 - state->smth_width_Q14, smooth_coef_Q16 );
+
+    /* At very low bitrates or for inputs that are nearly amplitude panned, switch to panned-mono coding */
+    *mid_only_flag = 0;
+    if( toMono ) {
+        /* Last frame before stereo->mono transition; collapse stereo width */
+        width_Q14 = 0;
+        pred_Q13[ 0 ] = 0;
+        pred_Q13[ 1 ] = 0;
+        silk_stereo_quant_pred( pred_Q13, ix );
+    } else if( state->width_prev_Q14 == 0 &&
+        ( 8 * total_rate_bps < 13 * min_mid_rate_bps || silk_SMULWB( frac_Q16, state->smth_width_Q14 ) < SILK_FIX_CONST( 0.05, 14 ) ) )
+    {
+        /* Code as panned-mono; previous frame already had zero width */
+        /* Scale down and quantize predictors */
+        pred_Q13[ 0 ] = silk_RSHIFT( silk_SMULBB( state->smth_width_Q14, pred_Q13[ 0 ] ), 14 );
+        pred_Q13[ 1 ] = silk_RSHIFT( silk_SMULBB( state->smth_width_Q14, pred_Q13[ 1 ] ), 14 );
+        silk_stereo_quant_pred( pred_Q13, ix );
+        /* Collapse stereo width */
+        width_Q14 = 0;
+        pred_Q13[ 0 ] = 0;
+        pred_Q13[ 1 ] = 0;
+        mid_side_rates_bps[ 0 ] = total_rate_bps;
+        mid_side_rates_bps[ 1 ] = 0;
+        *mid_only_flag = 1;
+    } else if( state->width_prev_Q14 != 0 &&
+        ( 8 * total_rate_bps < 11 * min_mid_rate_bps || silk_SMULWB( frac_Q16, state->smth_width_Q14 ) < SILK_FIX_CONST( 0.02, 14 ) ) )
+    {
+        /* Transition to zero-width stereo */
+        /* Scale down and quantize predictors */
+        pred_Q13[ 0 ] = silk_RSHIFT( silk_SMULBB( state->smth_width_Q14, pred_Q13[ 0 ] ), 14 );
+        pred_Q13[ 1 ] = silk_RSHIFT( silk_SMULBB( state->smth_width_Q14, pred_Q13[ 1 ] ), 14 );
+        silk_stereo_quant_pred( pred_Q13, ix );
+        /* Collapse stereo width */
+        width_Q14 = 0;
+        pred_Q13[ 0 ] = 0;
+        pred_Q13[ 1 ] = 0;
+    } else if( state->smth_width_Q14 > SILK_FIX_CONST( 0.95, 14 ) ) {
+        /* Full-width stereo coding */
+        silk_stereo_quant_pred( pred_Q13, ix );
+        width_Q14 = SILK_FIX_CONST( 1, 14 );
+    } else {
+        /* Reduced-width stereo coding; scale down and quantize predictors */
+        pred_Q13[ 0 ] = silk_RSHIFT( silk_SMULBB( state->smth_width_Q14, pred_Q13[ 0 ] ), 14 );
+        pred_Q13[ 1 ] = silk_RSHIFT( silk_SMULBB( state->smth_width_Q14, pred_Q13[ 1 ] ), 14 );
+        silk_stereo_quant_pred( pred_Q13, ix );
+        width_Q14 = state->smth_width_Q14;
+    }
+
+    /* Make sure to keep on encoding until the tapered output has been transmitted */
+    if( *mid_only_flag == 1 ) {
+        state->silent_side_len += frame_length - STEREO_INTERP_LEN_MS * fs_kHz;
+        if( state->silent_side_len < LA_SHAPE_MS * fs_kHz ) {
+            *mid_only_flag = 0;
+        } else {
+            /* Limit to avoid wrapping around */
+            state->silent_side_len = 10000;
+        }
+    } else {
+        state->silent_side_len = 0;
+    }
+
+    if( *mid_only_flag == 0 && mid_side_rates_bps[ 1 ] < 1 ) {
+        mid_side_rates_bps[ 1 ] = 1;
+        mid_side_rates_bps[ 0 ] = silk_max_int( 1, total_rate_bps - mid_side_rates_bps[ 1 ]);
+    }
+
+    /* Interpolate predictors and subtract prediction from side channel */
+    pred0_Q13  = -state->pred_prev_Q13[ 0 ];
+    pred1_Q13  = -state->pred_prev_Q13[ 1 ];
+    w_Q24      =  silk_LSHIFT( state->width_prev_Q14, 10 );
+    denom_Q16  = silk_DIV32_16( (opus_int32)1 << 16, STEREO_INTERP_LEN_MS * fs_kHz );
+    delta0_Q13 = -silk_RSHIFT_ROUND( silk_SMULBB( pred_Q13[ 0 ] - state->pred_prev_Q13[ 0 ], denom_Q16 ), 16 );
+    delta1_Q13 = -silk_RSHIFT_ROUND( silk_SMULBB( pred_Q13[ 1 ] - state->pred_prev_Q13[ 1 ], denom_Q16 ), 16 );
+    deltaw_Q24 =  silk_LSHIFT( silk_SMULWB( width_Q14 - state->width_prev_Q14, denom_Q16 ), 10 );
+    for( n = 0; n < STEREO_INTERP_LEN_MS * fs_kHz; n++ ) {
+        pred0_Q13 += delta0_Q13;
+        pred1_Q13 += delta1_Q13;
+        w_Q24   += deltaw_Q24;
+        sum = silk_LSHIFT( silk_ADD_LSHIFT( mid[ n ] + mid[ n + 2 ], mid[ n + 1 ], 1 ), 9 );    /* Q11 */
+        sum = silk_SMLAWB( silk_SMULWB( w_Q24, side[ n + 1 ] ), sum, pred0_Q13 );               /* Q8  */
+        sum = silk_SMLAWB( sum, silk_LSHIFT( (opus_int32)mid[ n + 1 ], 11 ), pred1_Q13 );       /* Q8  */
+        x2[ n - 1 ] = (opus_int16)silk_SAT16( silk_RSHIFT_ROUND( sum, 8 ) );
+    }
+
+    pred0_Q13 = -pred_Q13[ 0 ];
+    pred1_Q13 = -pred_Q13[ 1 ];
+    w_Q24     =  silk_LSHIFT( width_Q14, 10 );
+    for( n = STEREO_INTERP_LEN_MS * fs_kHz; n < frame_length; n++ ) {
+        sum = silk_LSHIFT( silk_ADD_LSHIFT( mid[ n ] + mid[ n + 2 ], mid[ n + 1 ], 1 ), 9 );    /* Q11 */
+        sum = silk_SMLAWB( silk_SMULWB( w_Q24, side[ n + 1 ] ), sum, pred0_Q13 );               /* Q8  */
+        sum = silk_SMLAWB( sum, silk_LSHIFT( (opus_int32)mid[ n + 1 ], 11 ), pred1_Q13 );       /* Q8  */
+        x2[ n - 1 ] = (opus_int16)silk_SAT16( silk_RSHIFT_ROUND( sum, 8 ) );
+    }
+    state->pred_prev_Q13[ 0 ] = (opus_int16)pred_Q13[ 0 ];
+    state->pred_prev_Q13[ 1 ] = (opus_int16)pred_Q13[ 1 ];
+    state->width_prev_Q14     = (opus_int16)width_Q14;
+    RESTORE_STACK;
+}
diff --git a/drivers/opus/silk/stereo_MS_to_LR.c b/drivers/opus/silk/stereo_MS_to_LR.c
new file mode 100644
index 0000000000..34f43cf795
--- /dev/null
+++ b/drivers/opus/silk/stereo_MS_to_LR.c
@@ -0,0 +1,85 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef OPUS_HAVE_CONFIG_H
+#include "opus_config.h"
+#endif
+
+#include "silk_main.h"
+
+/* Convert adaptive Mid/Side representation to Left/Right stereo signal */
+void silk_stereo_MS_to_LR(
+    stereo_dec_state            *state,                         /* I/O  State                                       */
+    opus_int16                  x1[],                           /* I/O  Left input signal, becomes mid signal       */
+    opus_int16                  x2[],                           /* I/O  Right input signal, becomes side signal     */
+    const opus_int32            pred_Q13[],                     /* I    Predictors                                  */
+    opus_int                    fs_kHz,                         /* I    Samples rate (kHz)                          */
+    opus_int                    frame_length                    /* I    Number of samples                           */
+)
+{
+    opus_int   n, denom_Q16, delta0_Q13, delta1_Q13;
+    opus_int32 sum, diff, pred0_Q13, pred1_Q13;
+
+    /* Buffering */
+    silk_memcpy( x1, state->sMid,  2 * sizeof( opus_int16 ) );
+    silk_memcpy( x2, state->sSide, 2 * sizeof( opus_int16 ) );
+    silk_memcpy( state->sMid,  &x1[ frame_length ], 2 * sizeof( opus_int16 ) );
+    silk_memcpy( state->sSide, &x2[ frame_length ], 2 * sizeof( opus_int16 ) );
+
+    /* Interpolate predictors and add prediction to side channel */
+    pred0_Q13  = state->pred_prev_Q13[ 0 ];
+    pred1_Q13  = state->pred_prev_Q13[ 1 ];
+    denom_Q16  = silk_DIV32_16( (opus_int32)1 << 16, STEREO_INTERP_LEN_MS * fs_kHz );
+    delta0_Q13 = silk_RSHIFT_ROUND( silk_SMULBB( pred_Q13[ 0 ] - state->pred_prev_Q13[ 0 ], denom_Q16 ), 16 );
+    delta1_Q13 = silk_RSHIFT_ROUND( silk_SMULBB( pred_Q13[ 1 ] - state->pred_prev_Q13[ 1 ], denom_Q16 ), 16 );
+    for( n = 0; n < STEREO_INTERP_LEN_MS * fs_kHz; n++ ) {
+        pred0_Q13 += delta0_Q13;
+        pred1_Q13 += delta1_Q13;
+        sum = silk_LSHIFT( silk_ADD_LSHIFT( x1[ n ] + x1[ n + 2 ], x1[ n + 1 ], 1 ), 9 );       /* Q11 */
+        sum = silk_SMLAWB( silk_LSHIFT( (opus_int32)x2[ n + 1 ], 8 ), sum, pred0_Q13 );         /* Q8  */
+        sum = silk_SMLAWB( sum, silk_LSHIFT( (opus_int32)x1[ n + 1 ], 11 ), pred1_Q13 );        /* Q8  */
+        x2[ n + 1 ] = (opus_int16)silk_SAT16( silk_RSHIFT_ROUND( sum, 8 ) );
+    }
+    pred0_Q13 = pred_Q13[ 0 ];
+    pred1_Q13 = pred_Q13[ 1 ];
+    for( n = STEREO_INTERP_LEN_MS * fs_kHz; n < frame_length; n++ ) {
+        sum = silk_LSHIFT( silk_ADD_LSHIFT( x1[ n ] + x1[ n + 2 ], x1[ n + 1 ], 1 ), 9 );       /* Q11 */
+        sum = silk_SMLAWB( silk_LSHIFT( (opus_int32)x2[ n + 1 ], 8 ), sum, pred0_Q13 );         /* Q8  */
+        sum = silk_SMLAWB( sum, silk_LSHIFT( (opus_int32)x1[ n + 1 ], 11 ), pred1_Q13 );        /* Q8  */
+        x2[ n + 1 ] = (opus_int16)silk_SAT16( silk_RSHIFT_ROUND( sum, 8 ) );
+    }
+    state->pred_prev_Q13[ 0 ] = pred_Q13[ 0 ];
+    state->pred_prev_Q13[ 1 ] = pred_Q13[ 1 ];
+
+    /* Convert to left/right signals */
+    for( n = 0; n < frame_length; n++ ) {
+        sum  = x1[ n + 1 ] + (opus_int32)x2[ n + 1 ];
+        diff = x1[ n + 1 ] - (opus_int32)x2[ n + 1 ];
+        x1[ n + 1 ] = (opus_int16)silk_SAT16( sum );
+        x2[ n + 1 ] = (opus_int16)silk_SAT16( diff );
+    }
+}
diff --git a/drivers/opus/silk/stereo_decode_pred.c b/drivers/opus/silk/stereo_decode_pred.c
new file mode 100644
index 0000000000..56d94e56fe
--- /dev/null
+++ b/drivers/opus/silk/stereo_decode_pred.c
@@ -0,0 +1,73 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef OPUS_HAVE_CONFIG_H
+#include "opus_config.h"
+#endif
+
+#include "silk_main.h"
+
+/* Decode mid/side predictors */
+void silk_stereo_decode_pred(
+    ec_dec                      *psRangeDec,                    /* I/O  Compressor data structure                   */
+    opus_int32                  pred_Q13[]                      /* O    Predictors                                  */
+)
+{
+    opus_int   n, ix[ 2 ][ 3 ];
+    opus_int32 low_Q13, step_Q13;
+
+    /* Entropy decoding */
+    n = ec_dec_icdf( psRangeDec, silk_stereo_pred_joint_iCDF, 8 );
+    ix[ 0 ][ 2 ] = silk_DIV32_16( n, 5 );
+    ix[ 1 ][ 2 ] = n - 5 * ix[ 0 ][ 2 ];
+    for( n = 0; n < 2; n++ ) {
+        ix[ n ][ 0 ] = ec_dec_icdf( psRangeDec, silk_uniform3_iCDF, 8 );
+        ix[ n ][ 1 ] = ec_dec_icdf( psRangeDec, silk_uniform5_iCDF, 8 );
+    }
+
+    /* Dequantize */
+    for( n = 0; n < 2; n++ ) {
+        ix[ n ][ 0 ] += 3 * ix[ n ][ 2 ];
+        low_Q13 = silk_stereo_pred_quant_Q13[ ix[ n ][ 0 ] ];
+        step_Q13 = silk_SMULWB( silk_stereo_pred_quant_Q13[ ix[ n ][ 0 ] + 1 ] - low_Q13,
+            SILK_FIX_CONST( 0.5 / STEREO_QUANT_SUB_STEPS, 16 ) );
+        pred_Q13[ n ] = silk_SMLABB( low_Q13, step_Q13, 2 * ix[ n ][ 1 ] + 1 );
+    }
+
+    /* Subtract second from first predictor (helps when actually applying these) */
+    pred_Q13[ 0 ] -= pred_Q13[ 1 ];
+}
+
+/* Decode mid-only flag */
+void silk_stereo_decode_mid_only(
+    ec_dec                      *psRangeDec,                    /* I/O  Compressor data structure                   */
+    opus_int                    *decode_only_mid                /* O    Flag that only mid channel has been coded   */
+)
+{
+    /* Decode flag that only mid channel is coded */
+    *decode_only_mid = ec_dec_icdf( psRangeDec, silk_stereo_only_code_mid_iCDF, 8 );
+}
diff --git a/drivers/opus/silk/stereo_encode_pred.c b/drivers/opus/silk/stereo_encode_pred.c
new file mode 100644
index 0000000000..bfe75b08e4
--- /dev/null
+++ b/drivers/opus/silk/stereo_encode_pred.c
@@ -0,0 +1,62 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef OPUS_HAVE_CONFIG_H
+#include "opus_config.h"
+#endif
+
+#include "silk_main.h"
+
+/* Entropy code the mid/side quantization indices */
+void silk_stereo_encode_pred(
+    ec_enc                      *psRangeEnc,                    /* I/O  Compressor data structure                   */
+    opus_int8                   ix[ 2 ][ 3 ]                    /* I    Quantization indices                        */
+)
+{
+    opus_int   n;
+
+    /* Entropy coding */
+    n = 5 * ix[ 0 ][ 2 ] + ix[ 1 ][ 2 ];
+    silk_assert( n < 25 );
+    ec_enc_icdf( psRangeEnc, n, silk_stereo_pred_joint_iCDF, 8 );
+    for( n = 0; n < 2; n++ ) {
+        silk_assert( ix[ n ][ 0 ] < 3 );
+        silk_assert( ix[ n ][ 1 ] < STEREO_QUANT_SUB_STEPS );
+        ec_enc_icdf( psRangeEnc, ix[ n ][ 0 ], silk_uniform3_iCDF, 8 );
+        ec_enc_icdf( psRangeEnc, ix[ n ][ 1 ], silk_uniform5_iCDF, 8 );
+    }
+}
+
+/* Entropy code the mid-only flag */
+void silk_stereo_encode_mid_only(
+    ec_enc                      *psRangeEnc,                    /* I/O  Compressor data structure                   */
+    opus_int8                   mid_only_flag
+)
+{
+    /* Encode flag that only mid channel is coded */
+    ec_enc_icdf( psRangeEnc, mid_only_flag, silk_stereo_only_code_mid_iCDF, 8 );
+}
diff --git a/drivers/opus/silk/stereo_find_predictor.c b/drivers/opus/silk/stereo_find_predictor.c
new file mode 100644
index 0000000000..266293dff3
--- /dev/null
+++ b/drivers/opus/silk/stereo_find_predictor.c
@@ -0,0 +1,79 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef OPUS_HAVE_CONFIG_H
+#include "opus_config.h"
+#endif
+
+#include "silk_main.h"
+
+/* Find least-squares prediction gain for one signal based on another and quantize it */
+opus_int32 silk_stereo_find_predictor(                          /* O    Returns predictor in Q13                    */
+    opus_int32                  *ratio_Q14,                     /* O    Ratio of residual and mid energies          */
+    const opus_int16            x[],                            /* I    Basis signal                                */
+    const opus_int16            y[],                            /* I    Target signal                               */
+    opus_int32                  mid_res_amp_Q0[],               /* I/O  Smoothed mid, residual norms                */
+    opus_int                    length,                         /* I    Number of samples                           */
+    opus_int                    smooth_coef_Q16                 /* I    Smoothing coefficient                       */
+)
+{
+    opus_int   scale, scale1, scale2;
+    opus_int32 nrgx, nrgy, corr, pred_Q13, pred2_Q10;
+
+    /* Find  predictor */
+    silk_sum_sqr_shift( &nrgx, &scale1, x, length );
+    silk_sum_sqr_shift( &nrgy, &scale2, y, length );
+    scale = silk_max_int( scale1, scale2 );
+    scale = scale + ( scale & 1 );          /* make even */
+    nrgy = silk_RSHIFT32( nrgy, scale - scale2 );
+    nrgx = silk_RSHIFT32( nrgx, scale - scale1 );
+    nrgx = silk_max_int( nrgx, 1 );
+    corr = silk_inner_prod_aligned_scale( x, y, scale, length );
+    pred_Q13 = silk_DIV32_varQ( corr, nrgx, 13 );
+    pred_Q13 = silk_LIMIT( pred_Q13, -(1 << 14), 1 << 14 );
+    pred2_Q10 = silk_SMULWB( pred_Q13, pred_Q13 );
+
+    /* Faster update for signals with large prediction parameters */
+    smooth_coef_Q16 = (opus_int)silk_max_int( smooth_coef_Q16, silk_abs( pred2_Q10 ) );
+
+    /* Smoothed mid and residual norms */
+    silk_assert( smooth_coef_Q16 < 32768 );
+    scale = silk_RSHIFT( scale, 1 );
+    mid_res_amp_Q0[ 0 ] = silk_SMLAWB( mid_res_amp_Q0[ 0 ], silk_LSHIFT( silk_SQRT_APPROX( nrgx ), scale ) - mid_res_amp_Q0[ 0 ],
+        smooth_coef_Q16 );
+    /* Residual energy = nrgy - 2 * pred * corr + pred^2 * nrgx */
+    nrgy = silk_SUB_LSHIFT32( nrgy, silk_SMULWB( corr, pred_Q13 ), 3 + 1 );
+    nrgy = silk_ADD_LSHIFT32( nrgy, silk_SMULWB( nrgx, pred2_Q10 ), 6 );
+    mid_res_amp_Q0[ 1 ] = silk_SMLAWB( mid_res_amp_Q0[ 1 ], silk_LSHIFT( silk_SQRT_APPROX( nrgy ), scale ) - mid_res_amp_Q0[ 1 ],
+        smooth_coef_Q16 );
+
+    /* Ratio of smoothed residual and mid norms */
+    *ratio_Q14 = silk_DIV32_varQ( mid_res_amp_Q0[ 1 ], silk_max( mid_res_amp_Q0[ 0 ], 1 ), 14 );
+    *ratio_Q14 = silk_LIMIT( *ratio_Q14, 0, 32767 );
+
+    return pred_Q13;
+}
diff --git a/drivers/opus/silk/stereo_quant_pred.c b/drivers/opus/silk/stereo_quant_pred.c
new file mode 100644
index 0000000000..834020d715
--- /dev/null
+++ b/drivers/opus/silk/stereo_quant_pred.c
@@ -0,0 +1,73 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef OPUS_HAVE_CONFIG_H
+#include "opus_config.h"
+#endif
+
+#include "silk_main.h"
+
+/* Quantize mid/side predictors */
+void silk_stereo_quant_pred(
+    opus_int32                  pred_Q13[],                     /* I/O  Predictors (out: quantized)                 */
+    opus_int8                   ix[ 2 ][ 3 ]                    /* O    Quantization indices                        */
+)
+{
+    opus_int   i, j, n;
+    opus_int32 low_Q13, step_Q13, lvl_Q13, err_min_Q13, err_Q13, quant_pred_Q13 = 0;
+
+    /* Quantize */
+    for( n = 0; n < 2; n++ ) {
+        /* Brute-force search over quantization levels */
+        err_min_Q13 = silk_int32_MAX;
+        for( i = 0; i < STEREO_QUANT_TAB_SIZE - 1; i++ ) {
+            low_Q13 = silk_stereo_pred_quant_Q13[ i ];
+            step_Q13 = silk_SMULWB( silk_stereo_pred_quant_Q13[ i + 1 ] - low_Q13,
+                SILK_FIX_CONST( 0.5 / STEREO_QUANT_SUB_STEPS, 16 ) );
+            for( j = 0; j < STEREO_QUANT_SUB_STEPS; j++ ) {
+                lvl_Q13 = silk_SMLABB( low_Q13, step_Q13, 2 * j + 1 );
+                err_Q13 = silk_abs( pred_Q13[ n ] - lvl_Q13 );
+                if( err_Q13 < err_min_Q13 ) {
+                    err_min_Q13 = err_Q13;
+                    quant_pred_Q13 = lvl_Q13;
+                    ix[ n ][ 0 ] = i;
+                    ix[ n ][ 1 ] = j;
+                } else {
+                    /* Error increasing, so we're past the optimum */
+                    goto done;
+                }
+            }
+        }
+        done:
+        ix[ n ][ 2 ]  = silk_DIV32_16( ix[ n ][ 0 ], 3 );
+        ix[ n ][ 0 ] -= ix[ n ][ 2 ] * 3;
+        pred_Q13[ n ] = quant_pred_Q13;
+    }
+
+    /* Subtract second from first predictor (helps when actually applying these) */
+    pred_Q13[ 0 ] -= pred_Q13[ 1 ];
+}
diff --git a/drivers/opus/silk/structs.h b/drivers/opus/silk/structs.h
new file mode 100644
index 0000000000..1826b36a80
--- /dev/null
+++ b/drivers/opus/silk/structs.h
@@ -0,0 +1,327 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifndef SILK_STRUCTS_H
+#define SILK_STRUCTS_H
+
+#include "typedef.h"
+#include "SigProc_FIX.h"
+#include "define.h"
+#include "entenc.h"
+#include "entdec.h"
+
+#ifdef __cplusplus
+extern "C"
+{
+#endif
+
+/************************************/
+/* Noise shaping quantization state */
+/************************************/
+typedef struct {
+    opus_int16                  xq[           2 * MAX_FRAME_LENGTH ]; /* Buffer for quantized output signal                             */
+    opus_int32                  sLTP_shp_Q14[ 2 * MAX_FRAME_LENGTH ];
+    opus_int32                  sLPC_Q14[ MAX_SUB_FRAME_LENGTH + NSQ_LPC_BUF_LENGTH ];
+    opus_int32                  sAR2_Q14[ MAX_SHAPE_LPC_ORDER ];
+    opus_int32                  sLF_AR_shp_Q14;
+    opus_int                    lagPrev;
+    opus_int                    sLTP_buf_idx;
+    opus_int                    sLTP_shp_buf_idx;
+    opus_int32                  rand_seed;
+    opus_int32                  prev_gain_Q16;
+    opus_int                    rewhite_flag;
+} silk_nsq_state;
+
+/********************************/
+/* VAD state                    */
+/********************************/
+typedef struct {
+    opus_int32                  AnaState[ 2 ];                  /* Analysis filterbank state: 0-8 kHz                                   */
+    opus_int32                  AnaState1[ 2 ];                 /* Analysis filterbank state: 0-4 kHz                                   */
+    opus_int32                  AnaState2[ 2 ];                 /* Analysis filterbank state: 0-2 kHz                                   */
+    opus_int32                  XnrgSubfr[ VAD_N_BANDS ];       /* Subframe energies                                                    */
+    opus_int32                  NrgRatioSmth_Q8[ VAD_N_BANDS ]; /* Smoothed energy level in each band                                   */
+    opus_int16                  HPstate;                        /* State of differentiator in the lowest band                           */
+    opus_int32                  NL[ VAD_N_BANDS ];              /* Noise energy level in each band                                      */
+    opus_int32                  inv_NL[ VAD_N_BANDS ];          /* Inverse noise energy level in each band                              */
+    opus_int32                  NoiseLevelBias[ VAD_N_BANDS ];  /* Noise level estimator bias/offset                                    */
+    opus_int32                  counter;                        /* Frame counter used in the initial phase                              */
+} silk_VAD_state;
+
+/* Variable cut-off low-pass filter state */
+typedef struct {
+    opus_int32                   In_LP_State[ 2 ];           /* Low pass filter state */
+    opus_int32                   transition_frame_no;        /* Counter which is mapped to a cut-off frequency */
+    opus_int                     mode;                       /* Operating mode, <0: switch down, >0: switch up; 0: do nothing           */
+} silk_LP_state;
+
+/* Structure containing NLSF codebook */
+typedef struct {
+    const opus_int16             nVectors;
+    const opus_int16             order;
+    const opus_int16             quantStepSize_Q16;
+    const opus_int16             invQuantStepSize_Q6;
+    const opus_uint8             *CB1_NLSF_Q8;
+    const opus_uint8             *CB1_iCDF;
+    const opus_uint8             *pred_Q8;
+    const opus_uint8             *ec_sel;
+    const opus_uint8             *ec_iCDF;
+    const opus_uint8             *ec_Rates_Q5;
+    const opus_int16             *deltaMin_Q15;
+} silk_NLSF_CB_struct;
+
+typedef struct {
+    opus_int16                   pred_prev_Q13[ 2 ];
+    opus_int16                   sMid[ 2 ];
+    opus_int16                   sSide[ 2 ];
+    opus_int32                   mid_side_amp_Q0[ 4 ];
+    opus_int16                   smth_width_Q14;
+    opus_int16                   width_prev_Q14;
+    opus_int16                   silent_side_len;
+    opus_int8                    predIx[ MAX_FRAMES_PER_PACKET ][ 2 ][ 3 ];
+    opus_int8                    mid_only_flags[ MAX_FRAMES_PER_PACKET ];
+} stereo_enc_state;
+
+typedef struct {
+    opus_int16                   pred_prev_Q13[ 2 ];
+    opus_int16                   sMid[ 2 ];
+    opus_int16                   sSide[ 2 ];
+} stereo_dec_state;
+
+typedef struct {
+    opus_int8                    GainsIndices[ MAX_NB_SUBFR ];
+    opus_int8                    LTPIndex[ MAX_NB_SUBFR ];
+    opus_int8                    NLSFIndices[ MAX_LPC_ORDER + 1 ];
+    opus_int16                   lagIndex;
+    opus_int8                    contourIndex;
+    opus_int8                    signalType;
+    opus_int8                    quantOffsetType;
+    opus_int8                    NLSFInterpCoef_Q2;
+    opus_int8                    PERIndex;
+    opus_int8                    LTP_scaleIndex;
+    opus_int8                    Seed;
+} SideInfoIndices;
+
+/********************************/
+/* Encoder state                */
+/********************************/
+typedef struct {
+    opus_int32                   In_HP_State[ 2 ];                  /* High pass filter state                                           */
+    opus_int32                   variable_HP_smth1_Q15;             /* State of first smoother                                          */
+    opus_int32                   variable_HP_smth2_Q15;             /* State of second smoother                                         */
+    silk_LP_state                sLP;                               /* Low pass filter state                                            */
+    silk_VAD_state               sVAD;                              /* Voice activity detector state                                    */
+    silk_nsq_state               sNSQ;                              /* Noise Shape Quantizer State                                      */
+    opus_int16                   prev_NLSFq_Q15[ MAX_LPC_ORDER ];   /* Previously quantized NLSF vector                                 */
+    opus_int                     speech_activity_Q8;                /* Speech activity                                                  */
+    opus_int                     allow_bandwidth_switch;            /* Flag indicating that switching of internal bandwidth is allowed  */
+    opus_int8                    LBRRprevLastGainIndex;
+    opus_int8                    prevSignalType;
+    opus_int                     prevLag;
+    opus_int                     pitch_LPC_win_length;
+    opus_int                     max_pitch_lag;                     /* Highest possible pitch lag (samples)                             */
+    opus_int32                   API_fs_Hz;                         /* API sampling frequency (Hz)                                      */
+    opus_int32                   prev_API_fs_Hz;                    /* Previous API sampling frequency (Hz)                             */
+    opus_int                     maxInternal_fs_Hz;                 /* Maximum internal sampling frequency (Hz)                         */
+    opus_int                     minInternal_fs_Hz;                 /* Minimum internal sampling frequency (Hz)                         */
+    opus_int                     desiredInternal_fs_Hz;             /* Soft request for internal sampling frequency (Hz)                */
+    opus_int                     fs_kHz;                            /* Internal sampling frequency (kHz)                                */
+    opus_int                     nb_subfr;                          /* Number of 5 ms subframes in a frame                              */
+    opus_int                     frame_length;                      /* Frame length (samples)                                           */
+    opus_int                     subfr_length;                      /* Subframe length (samples)                                        */
+    opus_int                     ltp_mem_length;                    /* Length of LTP memory                                             */
+    opus_int                     la_pitch;                          /* Look-ahead for pitch analysis (samples)                          */
+    opus_int                     la_shape;                          /* Look-ahead for noise shape analysis (samples)                    */
+    opus_int                     shapeWinLength;                    /* Window length for noise shape analysis (samples)                 */
+    opus_int32                   TargetRate_bps;                    /* Target bitrate (bps)                                             */
+    opus_int                     PacketSize_ms;                     /* Number of milliseconds to put in each packet                     */
+    opus_int                     PacketLoss_perc;                   /* Packet loss rate measured by farend                              */
+    opus_int32                   frameCounter;
+    opus_int                     Complexity;                        /* Complexity setting                                               */
+    opus_int                     nStatesDelayedDecision;            /* Number of states in delayed decision quantization                */
+    opus_int                     useInterpolatedNLSFs;              /* Flag for using NLSF interpolation                                */
+    opus_int                     shapingLPCOrder;                   /* Filter order for noise shaping filters                           */
+    opus_int                     predictLPCOrder;                   /* Filter order for prediction filters                              */
+    opus_int                     pitchEstimationComplexity;         /* Complexity level for pitch estimator                             */
+    opus_int                     pitchEstimationLPCOrder;           /* Whitening filter order for pitch estimator                       */
+    opus_int32                   pitchEstimationThreshold_Q16;      /* Threshold for pitch estimator                                    */
+    opus_int                     LTPQuantLowComplexity;             /* Flag for low complexity LTP quantization                         */
+    opus_int                     mu_LTP_Q9;                         /* Rate-distortion tradeoff in LTP quantization                     */
+    opus_int32                   sum_log_gain_Q7;					/* Cumulative max prediction gain									*/
+    opus_int                     NLSF_MSVQ_Survivors;               /* Number of survivors in NLSF MSVQ                                 */
+    opus_int                     first_frame_after_reset;           /* Flag for deactivating NLSF interpolation, pitch prediction       */
+    opus_int                     controlled_since_last_payload;     /* Flag for ensuring codec_control only runs once per packet        */
+    opus_int                     warping_Q16;                       /* Warping parameter for warped noise shaping                       */
+    opus_int                     useCBR;                            /* Flag to enable constant bitrate                                  */
+    opus_int                     prefillFlag;                       /* Flag to indicate that only buffers are prefilled, no coding      */
+    const opus_uint8             *pitch_lag_low_bits_iCDF;          /* Pointer to iCDF table for low bits of pitch lag index            */
+    const opus_uint8             *pitch_contour_iCDF;               /* Pointer to iCDF table for pitch contour index                    */
+    const silk_NLSF_CB_struct    *psNLSF_CB;                        /* Pointer to NLSF codebook                                         */
+    opus_int                     input_quality_bands_Q15[ VAD_N_BANDS ];
+    opus_int                     input_tilt_Q15;
+    opus_int                     SNR_dB_Q7;                         /* Quality setting                                                  */
+
+    opus_int8                    VAD_flags[ MAX_FRAMES_PER_PACKET ];
+    opus_int8                    LBRR_flag;
+    opus_int                     LBRR_flags[ MAX_FRAMES_PER_PACKET ];
+
+    SideInfoIndices              indices;
+    opus_int8                    pulses[ MAX_FRAME_LENGTH ];
+
+    int                          arch;
+
+    /* Input/output buffering */
+    opus_int16                   inputBuf[ MAX_FRAME_LENGTH + 2 ];  /* Buffer containing input signal                                   */
+    opus_int                     inputBufIx;
+    opus_int                     nFramesPerPacket;
+    opus_int                     nFramesEncoded;                    /* Number of frames analyzed in current packet                      */
+
+    opus_int                     nChannelsAPI;
+    opus_int                     nChannelsInternal;
+    opus_int                     channelNb;
+
+    /* Parameters For LTP scaling Control */
+    opus_int                     frames_since_onset;
+
+    /* Specifically for entropy coding */
+    opus_int                     ec_prevSignalType;
+    opus_int16                   ec_prevLagIndex;
+
+    silk_resampler_state_struct resampler_state;
+
+    /* DTX */
+    opus_int                     useDTX;                            /* Flag to enable DTX                                               */
+    opus_int                     inDTX;                             /* Flag to signal DTX period                                        */
+    opus_int                     noSpeechCounter;                   /* Counts concecutive nonactive frames, used by DTX                 */
+
+    /* Inband Low Bitrate Redundancy (LBRR) data */
+    opus_int                     useInBandFEC;                      /* Saves the API setting for query                                  */
+    opus_int                     LBRR_enabled;                      /* Depends on useInBandFRC, bitrate and packet loss rate            */
+    opus_int                     LBRR_GainIncreases;                /* Gains increment for coding LBRR frames                           */
+    SideInfoIndices              indices_LBRR[ MAX_FRAMES_PER_PACKET ];
+    opus_int8                    pulses_LBRR[ MAX_FRAMES_PER_PACKET ][ MAX_FRAME_LENGTH ];
+} silk_encoder_state;
+
+
+/* Struct for Packet Loss Concealment */
+typedef struct {
+    opus_int32                  pitchL_Q8;                          /* Pitch lag to use for voiced concealment                          */
+    opus_int16                  LTPCoef_Q14[ LTP_ORDER ];           /* LTP coeficients to use for voiced concealment                    */
+    opus_int16                  prevLPC_Q12[ MAX_LPC_ORDER ];
+    opus_int                    last_frame_lost;                    /* Was previous frame lost                                          */
+    opus_int32                  rand_seed;                          /* Seed for unvoiced signal generation                              */
+    opus_int16                  randScale_Q14;                      /* Scaling of unvoiced random signal                                */
+    opus_int32                  conc_energy;
+    opus_int                    conc_energy_shift;
+    opus_int16                  prevLTP_scale_Q14;
+    opus_int32                  prevGain_Q16[ 2 ];
+    opus_int                    fs_kHz;
+    opus_int                    nb_subfr;
+    opus_int                    subfr_length;
+} silk_PLC_struct;
+
+/* Struct for CNG */
+typedef struct {
+    opus_int32                  CNG_exc_buf_Q14[ MAX_FRAME_LENGTH ];
+    opus_int16                  CNG_smth_NLSF_Q15[ MAX_LPC_ORDER ];
+    opus_int32                  CNG_synth_state[ MAX_LPC_ORDER ];
+    opus_int32                  CNG_smth_Gain_Q16;
+    opus_int32                  rand_seed;
+    opus_int                    fs_kHz;
+} silk_CNG_struct;
+
+/********************************/
+/* Decoder state                */
+/********************************/
+typedef struct {
+    opus_int32                  prev_gain_Q16;
+    opus_int32                  exc_Q14[ MAX_FRAME_LENGTH ];
+    opus_int32                  sLPC_Q14_buf[ MAX_LPC_ORDER ];
+    opus_int16                  outBuf[ MAX_FRAME_LENGTH + 2 * MAX_SUB_FRAME_LENGTH ];  /* Buffer for output signal                     */
+    opus_int                    lagPrev;                            /* Previous Lag                                                     */
+    opus_int8                   LastGainIndex;                      /* Previous gain index                                              */
+    opus_int                    fs_kHz;                             /* Sampling frequency in kHz                                        */
+    opus_int32                  fs_API_hz;                          /* API sample frequency (Hz)                                        */
+    opus_int                    nb_subfr;                           /* Number of 5 ms subframes in a frame                              */
+    opus_int                    frame_length;                       /* Frame length (samples)                                           */
+    opus_int                    subfr_length;                       /* Subframe length (samples)                                        */
+    opus_int                    ltp_mem_length;                     /* Length of LTP memory                                             */
+    opus_int                    LPC_order;                          /* LPC order                                                        */
+    opus_int16                  prevNLSF_Q15[ MAX_LPC_ORDER ];      /* Used to interpolate LSFs                                         */
+    opus_int                    first_frame_after_reset;            /* Flag for deactivating NLSF interpolation                         */
+    const opus_uint8            *pitch_lag_low_bits_iCDF;           /* Pointer to iCDF table for low bits of pitch lag index            */
+    const opus_uint8            *pitch_contour_iCDF;                /* Pointer to iCDF table for pitch contour index                    */
+
+    /* For buffering payload in case of more frames per packet */
+    opus_int                    nFramesDecoded;
+    opus_int                    nFramesPerPacket;
+
+    /* Specifically for entropy coding */
+    opus_int                    ec_prevSignalType;
+    opus_int16                  ec_prevLagIndex;
+
+    opus_int                    VAD_flags[ MAX_FRAMES_PER_PACKET ];
+    opus_int                    LBRR_flag;
+    opus_int                    LBRR_flags[ MAX_FRAMES_PER_PACKET ];
+
+    silk_resampler_state_struct resampler_state;
+
+    const silk_NLSF_CB_struct   *psNLSF_CB;                         /* Pointer to NLSF codebook                                         */
+
+    /* Quantization indices */
+    SideInfoIndices             indices;
+
+    /* CNG state */
+    silk_CNG_struct             sCNG;
+
+    /* Stuff used for PLC */
+    opus_int                    lossCnt;
+    opus_int                    prevSignalType;
+
+    silk_PLC_struct sPLC;
+
+} silk_decoder_state;
+
+/************************/
+/* Decoder control      */
+/************************/
+typedef struct {
+    /* Prediction and coding parameters */
+    opus_int                    pitchL[ MAX_NB_SUBFR ];
+    opus_int32                  Gains_Q16[ MAX_NB_SUBFR ];
+    /* Holds interpolated and final coefficients, 4-byte aligned */
+    silk_DWORD_ALIGN opus_int16 PredCoef_Q12[ 2 ][ MAX_LPC_ORDER ];
+    opus_int16                  LTPCoef_Q14[ LTP_ORDER * MAX_NB_SUBFR ];
+    opus_int                    LTP_scale_Q14;
+} silk_decoder_control;
+
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/drivers/opus/silk/sum_sqr_shift.c b/drivers/opus/silk/sum_sqr_shift.c
new file mode 100644
index 0000000000..8ec27f8a03
--- /dev/null
+++ b/drivers/opus/silk/sum_sqr_shift.c
@@ -0,0 +1,85 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef OPUS_HAVE_CONFIG_H
+#include "opus_config.h"
+#endif
+
+#include "SigProc_FIX.h"
+
+/* Compute number of bits to right shift the sum of squares of a vector */
+/* of int16s to make it fit in an int32                                 */
+void silk_sum_sqr_shift(
+    opus_int32                  *energy,            /* O   Energy of x, after shifting to the right                     */
+    opus_int                    *shift,             /* O   Number of bits right shift applied to energy                 */
+    const opus_int16            *x,                 /* I   Input vector                                                 */
+    opus_int                    len                 /* I   Length of input vector                                       */
+)
+{
+    opus_int   i, shft;
+    opus_int32 nrg_tmp, nrg;
+
+    nrg  = 0;
+    shft = 0;
+    len--;
+    for( i = 0; i < len; i += 2 ) {
+        nrg = silk_SMLABB_ovflw( nrg, x[ i ], x[ i ] );
+        nrg = silk_SMLABB_ovflw( nrg, x[ i + 1 ], x[ i + 1 ] );
+        if( nrg < 0 ) {
+            /* Scale down */
+            nrg = (opus_int32)silk_RSHIFT_uint( (opus_uint32)nrg, 2 );
+            shft = 2;
+            break;
+        }
+    }
+    for( ; i < len; i += 2 ) {
+        nrg_tmp = silk_SMULBB( x[ i ], x[ i ] );
+        nrg_tmp = silk_SMLABB_ovflw( nrg_tmp, x[ i + 1 ], x[ i + 1 ] );
+        nrg = (opus_int32)silk_ADD_RSHIFT_uint( nrg, (opus_uint32)nrg_tmp, shft );
+        if( nrg < 0 ) {
+            /* Scale down */
+            nrg = (opus_int32)silk_RSHIFT_uint( (opus_uint32)nrg, 2 );
+            shft += 2;
+        }
+    }
+    if( i == len ) {
+        /* One sample left to process */
+        nrg_tmp = silk_SMULBB( x[ i ], x[ i ] );
+        nrg = (opus_int32)silk_ADD_RSHIFT_uint( nrg, nrg_tmp, shft );
+    }
+
+    /* Make sure to have at least one extra leading zero (two leading zeros in total) */
+    if( nrg & 0xC0000000 ) {
+        nrg = silk_RSHIFT_uint( (opus_uint32)nrg, 2 );
+        shft += 2;
+    }
+
+    /* Output arguments */
+    *shift  = shft;
+    *energy = nrg;
+}
+
diff --git a/drivers/opus/silk/table_LSF_cos.c b/drivers/opus/silk/table_LSF_cos.c
new file mode 100644
index 0000000000..674b6a03e6
--- /dev/null
+++ b/drivers/opus/silk/table_LSF_cos.c
@@ -0,0 +1,70 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef OPUS_HAVE_CONFIG_H
+#include "opus_config.h"
+#endif
+
+#include "tables.h"
+
+/* Cosine approximation table for LSF conversion */
+/* Q12 values (even) */
+const opus_int16 silk_LSFCosTab_FIX_Q12[ LSF_COS_TAB_SZ_FIX + 1 ] = {
+            8192,             8190,             8182,             8170,
+            8152,             8130,             8104,             8072,
+            8034,             7994,             7946,             7896,
+            7840,             7778,             7714,             7644,
+            7568,             7490,             7406,             7318,
+            7226,             7128,             7026,             6922,
+            6812,             6698,             6580,             6458,
+            6332,             6204,             6070,             5934,
+            5792,             5648,             5502,             5352,
+            5198,             5040,             4880,             4718,
+            4552,             4382,             4212,             4038,
+            3862,             3684,             3502,             3320,
+            3136,             2948,             2760,             2570,
+            2378,             2186,             1990,             1794,
+            1598,             1400,             1202,             1002,
+             802,              602,              402,              202,
+               0,             -202,             -402,             -602,
+            -802,            -1002,            -1202,            -1400,
+           -1598,            -1794,            -1990,            -2186,
+           -2378,            -2570,            -2760,            -2948,
+           -3136,            -3320,            -3502,            -3684,
+           -3862,            -4038,            -4212,            -4382,
+           -4552,            -4718,            -4880,            -5040,
+           -5198,            -5352,            -5502,            -5648,
+           -5792,            -5934,            -6070,            -6204,
+           -6332,            -6458,            -6580,            -6698,
+           -6812,            -6922,            -7026,            -7128,
+           -7226,            -7318,            -7406,            -7490,
+           -7568,            -7644,            -7714,            -7778,
+           -7840,            -7896,            -7946,            -7994,
+           -8034,            -8072,            -8104,            -8130,
+           -8152,            -8170,            -8182,            -8190,
+           -8192
+};
diff --git a/drivers/opus/silk/tables.h b/drivers/opus/silk/tables.h
new file mode 100644
index 0000000000..a91431e854
--- /dev/null
+++ b/drivers/opus/silk/tables.h
@@ -0,0 +1,122 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifndef SILK_TABLES_H
+#define SILK_TABLES_H
+
+#include "define.h"
+#include "structs.h"
+
+#ifdef __cplusplus
+extern "C"
+{
+#endif
+
+/* Entropy coding tables (with size in bytes indicated) */
+extern const opus_uint8  silk_gain_iCDF[ 3 ][ N_LEVELS_QGAIN / 8 ];                                 /* 24 */
+extern const opus_uint8  silk_delta_gain_iCDF[ MAX_DELTA_GAIN_QUANT - MIN_DELTA_GAIN_QUANT + 1 ];   /* 41 */
+
+extern const opus_uint8  silk_pitch_lag_iCDF[ 2 * ( PITCH_EST_MAX_LAG_MS - PITCH_EST_MIN_LAG_MS ) ];/* 32 */
+extern const opus_uint8  silk_pitch_delta_iCDF[ 21 ];                                               /*  21 */
+extern const opus_uint8  silk_pitch_contour_iCDF[ 34 ];                                             /*  34 */
+extern const opus_uint8  silk_pitch_contour_NB_iCDF[ 11 ];                                          /*  11 */
+extern const opus_uint8  silk_pitch_contour_10_ms_iCDF[ 12 ];                                       /*  12 */
+extern const opus_uint8  silk_pitch_contour_10_ms_NB_iCDF[ 3 ];                                     /*   3 */
+
+extern const opus_uint8  silk_pulses_per_block_iCDF[ N_RATE_LEVELS ][ MAX_PULSES + 2 ];             /* 180 */
+extern const opus_uint8  silk_pulses_per_block_BITS_Q5[ N_RATE_LEVELS - 1 ][ MAX_PULSES + 2 ];      /* 162 */
+
+extern const opus_uint8  silk_rate_levels_iCDF[ 2 ][ N_RATE_LEVELS - 1 ];                           /*  18 */
+extern const opus_uint8  silk_rate_levels_BITS_Q5[ 2 ][ N_RATE_LEVELS - 1 ];                        /*  18 */
+
+extern const opus_uint8  silk_max_pulses_table[ 4 ];                                                /*   4 */
+
+extern const opus_uint8  silk_shell_code_table0[ 152 ];                                             /* 152 */
+extern const opus_uint8  silk_shell_code_table1[ 152 ];                                             /* 152 */
+extern const opus_uint8  silk_shell_code_table2[ 152 ];                                             /* 152 */
+extern const opus_uint8  silk_shell_code_table3[ 152 ];                                             /* 152 */
+extern const opus_uint8  silk_shell_code_table_offsets[ MAX_PULSES + 1 ];                           /*  17 */
+
+extern const opus_uint8  silk_lsb_iCDF[ 2 ];                                                        /*   2 */
+
+extern const opus_uint8  silk_sign_iCDF[ 42 ];                                                      /*  42 */
+
+extern const opus_uint8  silk_uniform3_iCDF[ 3 ];                                                   /*   3 */
+extern const opus_uint8  silk_uniform4_iCDF[ 4 ];                                                   /*   4 */
+extern const opus_uint8  silk_uniform5_iCDF[ 5 ];                                                   /*   5 */
+extern const opus_uint8  silk_uniform6_iCDF[ 6 ];                                                   /*   6 */
+extern const opus_uint8  silk_uniform8_iCDF[ 8 ];                                                   /*   8 */
+
+extern const opus_uint8  silk_NLSF_EXT_iCDF[ 7 ];                                                   /*   7 */
+
+extern const opus_uint8  silk_LTP_per_index_iCDF[ 3 ];                                              /*   3 */
+extern const opus_uint8  * const silk_LTP_gain_iCDF_ptrs[ NB_LTP_CBKS ];                            /*   3 */
+extern const opus_uint8  * const silk_LTP_gain_BITS_Q5_ptrs[ NB_LTP_CBKS ];                         /*   3 */
+extern const opus_int16  silk_LTP_gain_middle_avg_RD_Q14;
+extern const opus_int8   * const silk_LTP_vq_ptrs_Q7[ NB_LTP_CBKS ];                                /* 168 */
+extern const opus_uint8  * const silk_LTP_vq_gain_ptrs_Q7[NB_LTP_CBKS];
+
+extern const opus_int8   silk_LTP_vq_sizes[ NB_LTP_CBKS ];                                          /*   3 */
+
+extern const opus_uint8  silk_LTPscale_iCDF[ 3 ];                                                   /*   4 */
+extern const opus_int16  silk_LTPScales_table_Q14[ 3 ];                                             /*   6 */
+
+extern const opus_uint8  silk_type_offset_VAD_iCDF[ 4 ];                                            /*   4 */
+extern const opus_uint8  silk_type_offset_no_VAD_iCDF[ 2 ];                                         /*   2 */
+
+extern const opus_int16  silk_stereo_pred_quant_Q13[ STEREO_QUANT_TAB_SIZE ];                       /*  32 */
+extern const opus_uint8  silk_stereo_pred_joint_iCDF[ 25 ];                                         /*  25 */
+extern const opus_uint8  silk_stereo_only_code_mid_iCDF[ 2 ];                                       /*   2 */
+
+extern const opus_uint8  * const silk_LBRR_flags_iCDF_ptr[ 2 ];                                     /*  10 */
+
+extern const opus_uint8  silk_NLSF_interpolation_factor_iCDF[ 5 ];                                  /*   5 */
+
+extern const silk_NLSF_CB_struct silk_NLSF_CB_WB;                                                   /* 1040 */
+extern const silk_NLSF_CB_struct silk_NLSF_CB_NB_MB;                                                /* 728 */
+
+/* Piece-wise linear mapping from bitrate in kbps to coding quality in dB SNR */
+extern const opus_int32  silk_TargetRate_table_NB[  TARGET_RATE_TAB_SZ ];                           /*  32 */
+extern const opus_int32  silk_TargetRate_table_MB[  TARGET_RATE_TAB_SZ ];                           /*  32 */
+extern const opus_int32  silk_TargetRate_table_WB[  TARGET_RATE_TAB_SZ ];                           /*  32 */
+extern const opus_int16  silk_SNR_table_Q1[         TARGET_RATE_TAB_SZ ];                           /*  32 */
+
+/* Quantization offsets */
+extern const opus_int16  silk_Quantization_Offsets_Q10[ 2 ][ 2 ];                                   /*   8 */
+
+/* Interpolation points for filter coefficients used in the bandwidth transition smoother */
+extern const opus_int32  silk_Transition_LP_B_Q28[ TRANSITION_INT_NUM ][ TRANSITION_NB ];           /*  60 */
+extern const opus_int32  silk_Transition_LP_A_Q28[ TRANSITION_INT_NUM ][ TRANSITION_NA ];           /*  60 */
+
+/* Rom table with cosine values */
+extern const opus_int16  silk_LSFCosTab_FIX_Q12[ LSF_COS_TAB_SZ_FIX + 1 ];                          /* 258 */
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/drivers/opus/silk/tables_LTP.c b/drivers/opus/silk/tables_LTP.c
new file mode 100644
index 0000000000..56b672db8b
--- /dev/null
+++ b/drivers/opus/silk/tables_LTP.c
@@ -0,0 +1,296 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef OPUS_HAVE_CONFIG_H
+#include "opus_config.h"
+#endif
+
+#include "tables.h"
+
+const opus_uint8 silk_LTP_per_index_iCDF[3] = {
+       179,     99,      0
+};
+
+static const opus_uint8 silk_LTP_gain_iCDF_0[8] = {
+        71,     56,     43,     30,     21,     12,      6,      0
+};
+
+static const opus_uint8 silk_LTP_gain_iCDF_1[16] = {
+       199,    165,    144,    124,    109,     96,     84,     71,
+        61,     51,     42,     32,     23,     15,      8,      0
+};
+
+static const opus_uint8 silk_LTP_gain_iCDF_2[32] = {
+       241,    225,    211,    199,    187,    175,    164,    153,
+       142,    132,    123,    114,    105,     96,     88,     80,
+        72,     64,     57,     50,     44,     38,     33,     29,
+        24,     20,     16,     12,      9,      5,      2,      0
+};
+
+const opus_int16 silk_LTP_gain_middle_avg_RD_Q14 = 12304;
+
+static const opus_uint8 silk_LTP_gain_BITS_Q5_0[8] = {
+        15,    131,    138,    138,    155,    155,    173,    173
+};
+
+static const opus_uint8 silk_LTP_gain_BITS_Q5_1[16] = {
+        69,     93,    115,    118,    131,    138,    141,    138,
+       150,    150,    155,    150,    155,    160,    166,    160
+};
+
+static const opus_uint8 silk_LTP_gain_BITS_Q5_2[32] = {
+       131,    128,    134,    141,    141,    141,    145,    145,
+       145,    150,    155,    155,    155,    155,    160,    160,
+       160,    160,    166,    166,    173,    173,    182,    192,
+       182,    192,    192,    192,    205,    192,    205,    224
+};
+
+const opus_uint8 * const silk_LTP_gain_iCDF_ptrs[NB_LTP_CBKS] = {
+    silk_LTP_gain_iCDF_0,
+    silk_LTP_gain_iCDF_1,
+    silk_LTP_gain_iCDF_2
+};
+
+const opus_uint8 * const silk_LTP_gain_BITS_Q5_ptrs[NB_LTP_CBKS] = {
+    silk_LTP_gain_BITS_Q5_0,
+    silk_LTP_gain_BITS_Q5_1,
+    silk_LTP_gain_BITS_Q5_2
+};
+
+static const opus_int8 silk_LTP_gain_vq_0[8][5] =
+{
+{
+         4,      6,     24,      7,      5
+},
+{
+         0,      0,      2,      0,      0
+},
+{
+        12,     28,     41,     13,     -4
+},
+{
+        -9,     15,     42,     25,     14
+},
+{
+         1,     -2,     62,     41,     -9
+},
+{
+       -10,     37,     65,     -4,      3
+},
+{
+        -6,      4,     66,      7,     -8
+},
+{
+        16,     14,     38,     -3,     33
+}
+};
+
+static const opus_int8 silk_LTP_gain_vq_1[16][5] =
+{
+{
+        13,     22,     39,     23,     12
+},
+{
+        -1,     36,     64,     27,     -6
+},
+{
+        -7,     10,     55,     43,     17
+},
+{
+         1,      1,      8,      1,      1
+},
+{
+         6,    -11,     74,     53,     -9
+},
+{
+       -12,     55,     76,    -12,      8
+},
+{
+        -3,      3,     93,     27,     -4
+},
+{
+        26,     39,     59,      3,     -8
+},
+{
+         2,      0,     77,     11,      9
+},
+{
+        -8,     22,     44,     -6,      7
+},
+{
+        40,      9,     26,      3,      9
+},
+{
+        -7,     20,    101,     -7,      4
+},
+{
+         3,     -8,     42,     26,      0
+},
+{
+       -15,     33,     68,      2,     23
+},
+{
+        -2,     55,     46,     -2,     15
+},
+{
+         3,     -1,     21,     16,     41
+}
+};
+
+static const opus_int8 silk_LTP_gain_vq_2[32][5] =
+{
+{
+        -6,     27,     61,     39,      5
+},
+{
+       -11,     42,     88,      4,      1
+},
+{
+        -2,     60,     65,      6,     -4
+},
+{
+        -1,     -5,     73,     56,      1
+},
+{
+        -9,     19,     94,     29,     -9
+},
+{
+         0,     12,     99,      6,      4
+},
+{
+         8,    -19,    102,     46,    -13
+},
+{
+         3,      2,     13,      3,      2
+},
+{
+         9,    -21,     84,     72,    -18
+},
+{
+       -11,     46,    104,    -22,      8
+},
+{
+        18,     38,     48,     23,      0
+},
+{
+       -16,     70,     83,    -21,     11
+},
+{
+         5,    -11,    117,     22,     -8
+},
+{
+        -6,     23,    117,    -12,      3
+},
+{
+         3,     -8,     95,     28,      4
+},
+{
+       -10,     15,     77,     60,    -15
+},
+{
+        -1,      4,    124,      2,     -4
+},
+{
+         3,     38,     84,     24,    -25
+},
+{
+         2,     13,     42,     13,     31
+},
+{
+        21,     -4,     56,     46,     -1
+},
+{
+        -1,     35,     79,    -13,     19
+},
+{
+        -7,     65,     88,     -9,    -14
+},
+{
+        20,      4,     81,     49,    -29
+},
+{
+        20,      0,     75,      3,    -17
+},
+{
+         5,     -9,     44,     92,     -8
+},
+{
+         1,     -3,     22,     69,     31
+},
+{
+        -6,     95,     41,    -12,      5
+},
+{
+        39,     67,     16,     -4,      1
+},
+{
+         0,     -6,    120,     55,    -36
+},
+{
+       -13,     44,    122,      4,    -24
+},
+{
+        81,      5,     11,      3,      7
+},
+{
+         2,      0,      9,     10,     88
+}
+};
+
+const opus_int8 * const silk_LTP_vq_ptrs_Q7[NB_LTP_CBKS] = {
+    (opus_int8 *)&silk_LTP_gain_vq_0[0][0],
+    (opus_int8 *)&silk_LTP_gain_vq_1[0][0],
+    (opus_int8 *)&silk_LTP_gain_vq_2[0][0]
+};
+
+/* Maximum frequency-dependent response of the pitch taps above,
+   computed as max(abs(freqz(taps))) */
+static const opus_uint8 silk_LTP_gain_vq_0_gain[8] = {
+      46,      2,     90,     87,     93,     91,     82,     98
+};
+
+static const opus_uint8 silk_LTP_gain_vq_1_gain[16] = {
+     109,    120,    118,     12,    113,    115,    117,    119,
+      99,     59,     87,    111,     63,    111,    112,     80
+};
+
+static const opus_uint8 silk_LTP_gain_vq_2_gain[32] = {
+     126,    124,    125,    124,    129,    121,    126,     23,
+     132,    127,    127,    127,    126,    127,    122,    133,
+     130,    134,    101,    118,    119,    145,    126,     86,
+     124,    120,    123,    119,    170,    173,    107,    109
+};
+
+const opus_uint8 * const silk_LTP_vq_gain_ptrs_Q7[NB_LTP_CBKS] = {
+    &silk_LTP_gain_vq_0_gain[0],
+    &silk_LTP_gain_vq_1_gain[0],
+    &silk_LTP_gain_vq_2_gain[0]
+};
+
+const opus_int8 silk_LTP_vq_sizes[NB_LTP_CBKS] = {
+    8, 16, 32
+};
diff --git a/drivers/opus/silk/tables_NLSF_CB_NB_MB.c b/drivers/opus/silk/tables_NLSF_CB_NB_MB.c
new file mode 100644
index 0000000000..ded35eee74
--- /dev/null
+++ b/drivers/opus/silk/tables_NLSF_CB_NB_MB.c
@@ -0,0 +1,159 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef OPUS_HAVE_CONFIG_H
+#include "opus_config.h"
+#endif
+
+#include "tables.h"
+
+static const opus_uint8 silk_NLSF_CB1_NB_MB_Q8[ 320 ] = {
+        12,     35,     60,     83,    108,    132,    157,    180,
+       206,    228,     15,     32,     55,     77,    101,    125,
+       151,    175,    201,    225,     19,     42,     66,     89,
+       114,    137,    162,    184,    209,    230,     12,     25,
+        50,     72,     97,    120,    147,    172,    200,    223,
+        26,     44,     69,     90,    114,    135,    159,    180,
+       205,    225,     13,     22,     53,     80,    106,    130,
+       156,    180,    205,    228,     15,     25,     44,     64,
+        90,    115,    142,    168,    196,    222,     19,     24,
+        62,     82,    100,    120,    145,    168,    190,    214,
+        22,     31,     50,     79,    103,    120,    151,    170,
+       203,    227,     21,     29,     45,     65,    106,    124,
+       150,    171,    196,    224,     30,     49,     75,     97,
+       121,    142,    165,    186,    209,    229,     19,     25,
+        52,     70,     93,    116,    143,    166,    192,    219,
+        26,     34,     62,     75,     97,    118,    145,    167,
+       194,    217,     25,     33,     56,     70,     91,    113,
+       143,    165,    196,    223,     21,     34,     51,     72,
+        97,    117,    145,    171,    196,    222,     20,     29,
+        50,     67,     90,    117,    144,    168,    197,    221,
+        22,     31,     48,     66,     95,    117,    146,    168,
+       196,    222,     24,     33,     51,     77,    116,    134,
+       158,    180,    200,    224,     21,     28,     70,     87,
+       106,    124,    149,    170,    194,    217,     26,     33,
+        53,     64,     83,    117,    152,    173,    204,    225,
+        27,     34,     65,     95,    108,    129,    155,    174,
+       210,    225,     20,     26,     72,     99,    113,    131,
+       154,    176,    200,    219,     34,     43,     61,     78,
+        93,    114,    155,    177,    205,    229,     23,     29,
+        54,     97,    124,    138,    163,    179,    209,    229,
+        30,     38,     56,     89,    118,    129,    158,    178,
+       200,    231,     21,     29,     49,     63,     85,    111,
+       142,    163,    193,    222,     27,     48,     77,    103,
+       133,    158,    179,    196,    215,    232,     29,     47,
+        74,     99,    124,    151,    176,    198,    220,    237,
+        33,     42,     61,     76,     93,    121,    155,    174,
+       207,    225,     29,     53,     87,    112,    136,    154,
+       170,    188,    208,    227,     24,     30,     52,     84,
+       131,    150,    166,    186,    203,    229,     37,     48,
+        64,     84,    104,    118,    156,    177,    201,    230
+};
+
+static const opus_uint8 silk_NLSF_CB1_iCDF_NB_MB[ 64 ] = {
+       212,    178,    148,    129,    108,     96,     85,     82,
+        79,     77,     61,     59,     57,     56,     51,     49,
+        48,     45,     42,     41,     40,     38,     36,     34,
+        31,     30,     21,     12,     10,      3,      1,      0,
+       255,    245,    244,    236,    233,    225,    217,    203,
+       190,    176,    175,    161,    149,    136,    125,    114,
+       102,     91,     81,     71,     60,     52,     43,     35,
+        28,     20,     19,     18,     12,     11,      5,      0
+};
+
+static const opus_uint8 silk_NLSF_CB2_SELECT_NB_MB[ 160 ] = {
+        16,      0,      0,      0,      0,     99,     66,     36,
+        36,     34,     36,     34,     34,     34,     34,     83,
+        69,     36,     52,     34,    116,    102,     70,     68,
+        68,    176,    102,     68,     68,     34,     65,     85,
+        68,     84,     36,    116,    141,    152,    139,    170,
+       132,    187,    184,    216,    137,    132,    249,    168,
+       185,    139,    104,    102,    100,     68,     68,    178,
+       218,    185,    185,    170,    244,    216,    187,    187,
+       170,    244,    187,    187,    219,    138,    103,    155,
+       184,    185,    137,    116,    183,    155,    152,    136,
+       132,    217,    184,    184,    170,    164,    217,    171,
+       155,    139,    244,    169,    184,    185,    170,    164,
+       216,    223,    218,    138,    214,    143,    188,    218,
+       168,    244,    141,    136,    155,    170,    168,    138,
+       220,    219,    139,    164,    219,    202,    216,    137,
+       168,    186,    246,    185,    139,    116,    185,    219,
+       185,    138,    100,    100,    134,    100,    102,     34,
+        68,     68,    100,     68,    168,    203,    221,    218,
+       168,    167,    154,    136,    104,     70,    164,    246,
+       171,    137,    139,    137,    155,    218,    219,    139
+};
+
+static const opus_uint8 silk_NLSF_CB2_iCDF_NB_MB[ 72 ] = {
+       255,    254,    253,    238,     14,      3,      2,      1,
+         0,    255,    254,    252,    218,     35,      3,      2,
+         1,      0,    255,    254,    250,    208,     59,      4,
+         2,      1,      0,    255,    254,    246,    194,     71,
+        10,      2,      1,      0,    255,    252,    236,    183,
+        82,      8,      2,      1,      0,    255,    252,    235,
+       180,     90,     17,      2,      1,      0,    255,    248,
+       224,    171,     97,     30,      4,      1,      0,    255,
+       254,    236,    173,     95,     37,      7,      1,      0
+};
+
+static const opus_uint8 silk_NLSF_CB2_BITS_NB_MB_Q5[ 72 ] = {
+       255,    255,    255,    131,      6,    145,    255,    255,
+       255,    255,    255,    236,     93,     15,     96,    255,
+       255,    255,    255,    255,    194,     83,     25,     71,
+       221,    255,    255,    255,    255,    162,     73,     34,
+        66,    162,    255,    255,    255,    210,    126,     73,
+        43,     57,    173,    255,    255,    255,    201,    125,
+        71,     48,     58,    130,    255,    255,    255,    166,
+       110,     73,     57,     62,    104,    210,    255,    255,
+       251,    123,     65,     55,     68,    100,    171,    255
+};
+
+static const opus_uint8 silk_NLSF_PRED_NB_MB_Q8[ 18 ] = {
+       179,    138,    140,    148,    151,    149,    153,    151,
+       163,    116,     67,     82,     59,     92,     72,    100,
+        89,     92
+};
+
+static const opus_int16 silk_NLSF_DELTA_MIN_NB_MB_Q15[ 11 ] = {
+       250,      3,      6,      3,      3,      3,      4,      3,
+         3,      3,    461
+};
+
+const silk_NLSF_CB_struct silk_NLSF_CB_NB_MB =
+{
+    32,
+    10,
+    SILK_FIX_CONST( 0.18, 16 ),
+    SILK_FIX_CONST( 1.0 / 0.18, 6 ),
+    silk_NLSF_CB1_NB_MB_Q8,
+    silk_NLSF_CB1_iCDF_NB_MB,
+    silk_NLSF_PRED_NB_MB_Q8,
+    silk_NLSF_CB2_SELECT_NB_MB,
+    silk_NLSF_CB2_iCDF_NB_MB,
+    silk_NLSF_CB2_BITS_NB_MB_Q5,
+    silk_NLSF_DELTA_MIN_NB_MB_Q15,
+};
diff --git a/drivers/opus/silk/tables_NLSF_CB_WB.c b/drivers/opus/silk/tables_NLSF_CB_WB.c
new file mode 100644
index 0000000000..d83567ea6f
--- /dev/null
+++ b/drivers/opus/silk/tables_NLSF_CB_WB.c
@@ -0,0 +1,198 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef OPUS_HAVE_CONFIG_H
+#include "opus_config.h"
+#endif
+
+#include "tables.h"
+
+static const opus_uint8 silk_NLSF_CB1_WB_Q8[ 512 ] = {
+         7,     23,     38,     54,     69,     85,    100,    116,
+       131,    147,    162,    178,    193,    208,    223,    239,
+        13,     25,     41,     55,     69,     83,     98,    112,
+       127,    142,    157,    171,    187,    203,    220,    236,
+        15,     21,     34,     51,     61,     78,     92,    106,
+       126,    136,    152,    167,    185,    205,    225,    240,
+        10,     21,     36,     50,     63,     79,     95,    110,
+       126,    141,    157,    173,    189,    205,    221,    237,
+        17,     20,     37,     51,     59,     78,     89,    107,
+       123,    134,    150,    164,    184,    205,    224,    240,
+        10,     15,     32,     51,     67,     81,     96,    112,
+       129,    142,    158,    173,    189,    204,    220,    236,
+         8,     21,     37,     51,     65,     79,     98,    113,
+       126,    138,    155,    168,    179,    192,    209,    218,
+        12,     15,     34,     55,     63,     78,     87,    108,
+       118,    131,    148,    167,    185,    203,    219,    236,
+        16,     19,     32,     36,     56,     79,     91,    108,
+       118,    136,    154,    171,    186,    204,    220,    237,
+        11,     28,     43,     58,     74,     89,    105,    120,
+       135,    150,    165,    180,    196,    211,    226,    241,
+         6,     16,     33,     46,     60,     75,     92,    107,
+       123,    137,    156,    169,    185,    199,    214,    225,
+        11,     19,     30,     44,     57,     74,     89,    105,
+       121,    135,    152,    169,    186,    202,    218,    234,
+        12,     19,     29,     46,     57,     71,     88,    100,
+       120,    132,    148,    165,    182,    199,    216,    233,
+        17,     23,     35,     46,     56,     77,     92,    106,
+       123,    134,    152,    167,    185,    204,    222,    237,
+        14,     17,     45,     53,     63,     75,     89,    107,
+       115,    132,    151,    171,    188,    206,    221,    240,
+         9,     16,     29,     40,     56,     71,     88,    103,
+       119,    137,    154,    171,    189,    205,    222,    237,
+        16,     19,     36,     48,     57,     76,     87,    105,
+       118,    132,    150,    167,    185,    202,    218,    236,
+        12,     17,     29,     54,     71,     81,     94,    104,
+       126,    136,    149,    164,    182,    201,    221,    237,
+        15,     28,     47,     62,     79,     97,    115,    129,
+       142,    155,    168,    180,    194,    208,    223,    238,
+         8,     14,     30,     45,     62,     78,     94,    111,
+       127,    143,    159,    175,    192,    207,    223,    239,
+        17,     30,     49,     62,     79,     92,    107,    119,
+       132,    145,    160,    174,    190,    204,    220,    235,
+        14,     19,     36,     45,     61,     76,     91,    108,
+       121,    138,    154,    172,    189,    205,    222,    238,
+        12,     18,     31,     45,     60,     76,     91,    107,
+       123,    138,    154,    171,    187,    204,    221,    236,
+        13,     17,     31,     43,     53,     70,     83,    103,
+       114,    131,    149,    167,    185,    203,    220,    237,
+        17,     22,     35,     42,     58,     78,     93,    110,
+       125,    139,    155,    170,    188,    206,    224,    240,
+         8,     15,     34,     50,     67,     83,     99,    115,
+       131,    146,    162,    178,    193,    209,    224,    239,
+        13,     16,     41,     66,     73,     86,     95,    111,
+       128,    137,    150,    163,    183,    206,    225,    241,
+        17,     25,     37,     52,     63,     75,     92,    102,
+       119,    132,    144,    160,    175,    191,    212,    231,
+        19,     31,     49,     65,     83,    100,    117,    133,
+       147,    161,    174,    187,    200,    213,    227,    242,
+        18,     31,     52,     68,     88,    103,    117,    126,
+       138,    149,    163,    177,    192,    207,    223,    239,
+        16,     29,     47,     61,     76,     90,    106,    119,
+       133,    147,    161,    176,    193,    209,    224,    240,
+        15,     21,     35,     50,     61,     73,     86,     97,
+       110,    119,    129,    141,    175,    198,    218,    237
+};
+
+static const opus_uint8 silk_NLSF_CB1_iCDF_WB[ 64 ] = {
+       225,    204,    201,    184,    183,    175,    158,    154,
+       153,    135,    119,    115,    113,    110,    109,     99,
+        98,     95,     79,     68,     52,     50,     48,     45,
+        43,     32,     31,     27,     18,     10,      3,      0,
+       255,    251,    235,    230,    212,    201,    196,    182,
+       167,    166,    163,    151,    138,    124,    110,    104,
+        90,     78,     76,     70,     69,     57,     45,     34,
+        24,     21,     11,      6,      5,      4,      3,      0
+};
+
+static const opus_uint8 silk_NLSF_CB2_SELECT_WB[ 256 ] = {
+         0,      0,      0,      0,      0,      0,      0,      1,
+       100,    102,    102,     68,     68,     36,     34,     96,
+       164,    107,    158,    185,    180,    185,    139,    102,
+        64,     66,     36,     34,     34,      0,      1,     32,
+       208,    139,    141,    191,    152,    185,    155,    104,
+        96,    171,    104,    166,    102,    102,    102,    132,
+         1,      0,      0,      0,      0,     16,     16,      0,
+        80,    109,     78,    107,    185,    139,    103,    101,
+       208,    212,    141,    139,    173,    153,    123,    103,
+        36,      0,      0,      0,      0,      0,      0,      1,
+        48,      0,      0,      0,      0,      0,      0,     32,
+        68,    135,    123,    119,    119,    103,     69,     98,
+        68,    103,    120,    118,    118,    102,     71,     98,
+       134,    136,    157,    184,    182,    153,    139,    134,
+       208,    168,    248,     75,    189,    143,    121,    107,
+        32,     49,     34,     34,     34,      0,     17,      2,
+       210,    235,    139,    123,    185,    137,    105,    134,
+        98,    135,    104,    182,    100,    183,    171,    134,
+       100,     70,     68,     70,     66,     66,     34,    131,
+        64,    166,    102,     68,     36,      2,      1,      0,
+       134,    166,    102,     68,     34,     34,     66,    132,
+       212,    246,    158,    139,    107,    107,     87,    102,
+       100,    219,    125,    122,    137,    118,    103,    132,
+       114,    135,    137,    105,    171,    106,     50,     34,
+       164,    214,    141,    143,    185,    151,    121,    103,
+       192,     34,      0,      0,      0,      0,      0,      1,
+       208,    109,     74,    187,    134,    249,    159,    137,
+       102,    110,    154,    118,     87,    101,    119,    101,
+         0,      2,      0,     36,     36,     66,     68,     35,
+        96,    164,    102,    100,     36,      0,      2,     33,
+       167,    138,    174,    102,    100,     84,      2,      2,
+       100,    107,    120,    119,     36,    197,     24,      0
+};
+
+static const opus_uint8 silk_NLSF_CB2_iCDF_WB[ 72 ] = {
+       255,    254,    253,    244,     12,      3,      2,      1,
+         0,    255,    254,    252,    224,     38,      3,      2,
+         1,      0,    255,    254,    251,    209,     57,      4,
+         2,      1,      0,    255,    254,    244,    195,     69,
+         4,      2,      1,      0,    255,    251,    232,    184,
+        84,      7,      2,      1,      0,    255,    254,    240,
+       186,     86,     14,      2,      1,      0,    255,    254,
+       239,    178,     91,     30,      5,      1,      0,    255,
+       248,    227,    177,    100,     19,      2,      1,      0
+};
+
+static const opus_uint8 silk_NLSF_CB2_BITS_WB_Q5[ 72 ] = {
+       255,    255,    255,    156,      4,    154,    255,    255,
+       255,    255,    255,    227,    102,     15,     92,    255,
+       255,    255,    255,    255,    213,     83,     24,     72,
+       236,    255,    255,    255,    255,    150,     76,     33,
+        63,    214,    255,    255,    255,    190,    121,     77,
+        43,     55,    185,    255,    255,    255,    245,    137,
+        71,     43,     59,    139,    255,    255,    255,    255,
+       131,     66,     50,     66,    107,    194,    255,    255,
+       166,    116,     76,     55,     53,    125,    255,    255
+};
+
+static const opus_uint8 silk_NLSF_PRED_WB_Q8[ 30 ] = {
+       175,    148,    160,    176,    178,    173,    174,    164,
+       177,    174,    196,    182,    198,    192,    182,     68,
+        62,     66,     60,     72,    117,     85,     90,    118,
+       136,    151,    142,    160,    142,    155
+};
+
+static const opus_int16 silk_NLSF_DELTA_MIN_WB_Q15[ 17 ] = {
+       100,      3,     40,      3,      3,      3,      5,     14,
+        14,     10,     11,      3,      8,      9,      7,      3,
+       347
+};
+
+const silk_NLSF_CB_struct silk_NLSF_CB_WB =
+{
+    32,
+    16,
+    SILK_FIX_CONST( 0.15, 16 ),
+    SILK_FIX_CONST( 1.0 / 0.15, 6 ),
+    silk_NLSF_CB1_WB_Q8,
+    silk_NLSF_CB1_iCDF_WB,
+    silk_NLSF_PRED_WB_Q8,
+    silk_NLSF_CB2_SELECT_WB,
+    silk_NLSF_CB2_iCDF_WB,
+    silk_NLSF_CB2_BITS_WB_Q5,
+    silk_NLSF_DELTA_MIN_WB_Q15,
+};
+
diff --git a/drivers/opus/silk/tables_gain.c b/drivers/opus/silk/tables_gain.c
new file mode 100644
index 0000000000..6df980616b
--- /dev/null
+++ b/drivers/opus/silk/tables_gain.c
@@ -0,0 +1,63 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef OPUS_HAVE_CONFIG_H
+#include "opus_config.h"
+#endif
+
+#include "tables.h"
+
+#ifdef __cplusplus
+extern "C"
+{
+#endif
+
+const opus_uint8 silk_gain_iCDF[ 3 ][ N_LEVELS_QGAIN / 8 ] =
+{
+{
+       224,    112,     44,     15,      3,      2,      1,      0
+},
+{
+       254,    237,    192,    132,     70,     23,      4,      0
+},
+{
+       255,    252,    226,    155,     61,     11,      2,      0
+}
+};
+
+const opus_uint8 silk_delta_gain_iCDF[ MAX_DELTA_GAIN_QUANT - MIN_DELTA_GAIN_QUANT + 1 ] = {
+       250,    245,    234,    203,     71,     50,     42,     38,
+        35,     33,     31,     29,     28,     27,     26,     25,
+        24,     23,     22,     21,     20,     19,     18,     17,
+        16,     15,     14,     13,     12,     11,     10,      9,
+         8,      7,      6,      5,      4,      3,      2,      1,
+         0
+};
+
+#ifdef __cplusplus
+}
+#endif
diff --git a/drivers/opus/silk/tables_other.c b/drivers/opus/silk/tables_other.c
new file mode 100644
index 0000000000..246e960fa4
--- /dev/null
+++ b/drivers/opus/silk/tables_other.c
@@ -0,0 +1,138 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef OPUS_HAVE_CONFIG_H
+#include "opus_config.h"
+#endif
+
+#include "structs.h"
+#include "define.h"
+#include "tables.h"
+
+#ifdef __cplusplus
+extern "C"
+{
+#endif
+
+/* Piece-wise linear mapping from bitrate in kbps to coding quality in dB SNR */
+const opus_int32 silk_TargetRate_table_NB[ TARGET_RATE_TAB_SZ ] = {
+    0,      8000,   9400,   11500,  13500,  17500,  25000,  MAX_TARGET_RATE_BPS
+};
+const opus_int32 silk_TargetRate_table_MB[ TARGET_RATE_TAB_SZ ] = {
+    0,      9000,   12000,  14500,  18500,  24500,  35500,  MAX_TARGET_RATE_BPS
+};
+const opus_int32 silk_TargetRate_table_WB[ TARGET_RATE_TAB_SZ ] = {
+    0,      10500,  14000,  17000,  21500,  28500,  42000,  MAX_TARGET_RATE_BPS
+};
+const opus_int16 silk_SNR_table_Q1[ TARGET_RATE_TAB_SZ ] = {
+    18,     29,     38,     40,     46,     52,     62,     84
+};
+
+/* Tables for stereo predictor coding */
+const opus_int16 silk_stereo_pred_quant_Q13[ STEREO_QUANT_TAB_SIZE ] = {
+    -13732, -10050, -8266, -7526, -6500, -5000, -2950,  -820,
+       820,   2950,  5000,  6500,  7526,  8266, 10050, 13732
+};
+const opus_uint8  silk_stereo_pred_joint_iCDF[ 25 ] = {
+    249, 247, 246, 245, 244,
+    234, 210, 202, 201, 200,
+    197, 174,  82,  59,  56,
+     55,  54,  46,  22,  12,
+     11,  10,   9,   7,   0
+};
+const opus_uint8  silk_stereo_only_code_mid_iCDF[ 2 ] = { 64, 0 };
+
+/* Tables for LBRR flags */
+static const opus_uint8 silk_LBRR_flags_2_iCDF[ 3 ] = { 203, 150, 0 };
+static const opus_uint8 silk_LBRR_flags_3_iCDF[ 7 ] = { 215, 195, 166, 125, 110, 82, 0 };
+const opus_uint8 * const silk_LBRR_flags_iCDF_ptr[ 2 ] = {
+    silk_LBRR_flags_2_iCDF,
+    silk_LBRR_flags_3_iCDF
+};
+
+/* Table for LSB coding */
+const opus_uint8 silk_lsb_iCDF[ 2 ] = { 120, 0 };
+
+/* Tables for LTPScale */
+const opus_uint8 silk_LTPscale_iCDF[ 3 ] = { 128, 64, 0 };
+
+/* Tables for signal type and offset coding */
+const opus_uint8 silk_type_offset_VAD_iCDF[ 4 ] = {
+       232,    158,    10,      0
+};
+const opus_uint8 silk_type_offset_no_VAD_iCDF[ 2 ] = {
+       230,      0
+};
+
+/* Tables for NLSF interpolation factor */
+const opus_uint8 silk_NLSF_interpolation_factor_iCDF[ 5 ] = { 243, 221, 192, 181, 0 };
+
+/* Quantization offsets */
+const opus_int16  silk_Quantization_Offsets_Q10[ 2 ][ 2 ] = {
+    { OFFSET_UVL_Q10, OFFSET_UVH_Q10 }, { OFFSET_VL_Q10, OFFSET_VH_Q10 }
+};
+
+/* Table for LTPScale */
+const opus_int16 silk_LTPScales_table_Q14[ 3 ] = { 15565, 12288, 8192 };
+
+/* Uniform entropy tables */
+const opus_uint8 silk_uniform3_iCDF[ 3 ] = { 171, 85, 0 };
+const opus_uint8 silk_uniform4_iCDF[ 4 ] = { 192, 128, 64, 0 };
+const opus_uint8 silk_uniform5_iCDF[ 5 ] = { 205, 154, 102, 51, 0 };
+const opus_uint8 silk_uniform6_iCDF[ 6 ] = { 213, 171, 128, 85, 43, 0 };
+const opus_uint8 silk_uniform8_iCDF[ 8 ] = { 224, 192, 160, 128, 96, 64, 32, 0 };
+
+const opus_uint8 silk_NLSF_EXT_iCDF[ 7 ] = { 100, 40, 16, 7, 3, 1, 0 };
+
+/*  Elliptic/Cauer filters designed with 0.1 dB passband ripple,
+        80 dB minimum stopband attenuation, and
+        [0.95 : 0.15 : 0.35] normalized cut off frequencies. */
+
+/* Interpolation points for filter coefficients used in the bandwidth transition smoother */
+const opus_int32 silk_Transition_LP_B_Q28[ TRANSITION_INT_NUM ][ TRANSITION_NB ] =
+{
+{    250767114,  501534038,  250767114  },
+{    209867381,  419732057,  209867381  },
+{    170987846,  341967853,  170987846  },
+{    131531482,  263046905,  131531482  },
+{     89306658,  178584282,   89306658  }
+};
+
+/* Interpolation points for filter coefficients used in the bandwidth transition smoother */
+const opus_int32 silk_Transition_LP_A_Q28[ TRANSITION_INT_NUM ][ TRANSITION_NA ] =
+{
+{    506393414,  239854379  },
+{    411067935,  169683996  },
+{    306733530,  116694253  },
+{    185807084,   77959395  },
+{     35497197,   57401098  }
+};
+
+#ifdef __cplusplus
+}
+#endif
+
diff --git a/drivers/opus/silk/tables_pitch_lag.c b/drivers/opus/silk/tables_pitch_lag.c
new file mode 100644
index 0000000000..0af5c5ace7
--- /dev/null
+++ b/drivers/opus/silk/tables_pitch_lag.c
@@ -0,0 +1,69 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef OPUS_HAVE_CONFIG_H
+#include "opus_config.h"
+#endif
+
+#include "tables.h"
+
+const opus_uint8 silk_pitch_lag_iCDF[ 2 * ( PITCH_EST_MAX_LAG_MS - PITCH_EST_MIN_LAG_MS ) ] = {
+       253,    250,    244,    233,    212,    182,    150,    131,
+       120,    110,     98,     85,     72,     60,     49,     40,
+        32,     25,     19,     15,     13,     11,      9,      8,
+         7,      6,      5,      4,      3,      2,      1,      0
+};
+
+const opus_uint8 silk_pitch_delta_iCDF[21] = {
+       210,    208,    206,    203,    199,    193,    183,    168,
+       142,    104,     74,     52,     37,     27,     20,     14,
+        10,      6,      4,      2,      0
+};
+
+const opus_uint8 silk_pitch_contour_iCDF[34] = {
+       223,    201,    183,    167,    152,    138,    124,    111,
+        98,     88,     79,     70,     62,     56,     50,     44,
+        39,     35,     31,     27,     24,     21,     18,     16,
+        14,     12,     10,      8,      6,      4,      3,      2,
+         1,      0
+};
+
+const opus_uint8 silk_pitch_contour_NB_iCDF[11] = {
+       188,    176,    155,    138,    119,     97,     67,     43,
+        26,     10,      0
+};
+
+const opus_uint8 silk_pitch_contour_10_ms_iCDF[12] = {
+       165,    119,     80,     61,     47,     35,     27,     20,
+        14,      9,      4,      0
+};
+
+const opus_uint8 silk_pitch_contour_10_ms_NB_iCDF[3] = {
+       113,     63,      0
+};
+
+
diff --git a/drivers/opus/silk/tables_pulses_per_block.c b/drivers/opus/silk/tables_pulses_per_block.c
new file mode 100644
index 0000000000..05ba2318f8
--- /dev/null
+++ b/drivers/opus/silk/tables_pulses_per_block.c
@@ -0,0 +1,264 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef OPUS_HAVE_CONFIG_H
+#include "opus_config.h"
+#endif
+
+#include "tables.h"
+
+const opus_uint8 silk_max_pulses_table[ 4 ] = {
+         8,     10,     12,     16
+};
+
+const opus_uint8 silk_pulses_per_block_iCDF[ 10 ][ 18 ] = {
+{
+       125,     51,     26,     18,     15,     12,     11,     10,
+         9,      8,      7,      6,      5,      4,      3,      2,
+         1,      0
+},
+{
+       198,    105,     45,     22,     15,     12,     11,     10,
+         9,      8,      7,      6,      5,      4,      3,      2,
+         1,      0
+},
+{
+       213,    162,    116,     83,     59,     43,     32,     24,
+        18,     15,     12,      9,      7,      6,      5,      3,
+         2,      0
+},
+{
+       239,    187,    116,     59,     28,     16,     11,     10,
+         9,      8,      7,      6,      5,      4,      3,      2,
+         1,      0
+},
+{
+       250,    229,    188,    135,     86,     51,     30,     19,
+        13,     10,      8,      6,      5,      4,      3,      2,
+         1,      0
+},
+{
+       249,    235,    213,    185,    156,    128,    103,     83,
+        66,     53,     42,     33,     26,     21,     17,     13,
+        10,      0
+},
+{
+       254,    249,    235,    206,    164,    118,     77,     46,
+        27,     16,     10,      7,      5,      4,      3,      2,
+         1,      0
+},
+{
+       255,    253,    249,    239,    220,    191,    156,    119,
+        85,     57,     37,     23,     15,     10,      6,      4,
+         2,      0
+},
+{
+       255,    253,    251,    246,    237,    223,    203,    179,
+       152,    124,     98,     75,     55,     40,     29,     21,
+        15,      0
+},
+{
+       255,    254,    253,    247,    220,    162,    106,     67,
+        42,     28,     18,     12,      9,      6,      4,      3,
+         2,      0
+}
+};
+
+const opus_uint8 silk_pulses_per_block_BITS_Q5[ 9 ][ 18 ] = {
+{
+        31,     57,    107,    160,    205,    205,    255,    255,
+       255,    255,    255,    255,    255,    255,    255,    255,
+       255,    255
+},
+{
+        69,     47,     67,    111,    166,    205,    255,    255,
+       255,    255,    255,    255,    255,    255,    255,    255,
+       255,    255
+},
+{
+        82,     74,     79,     95,    109,    128,    145,    160,
+       173,    205,    205,    205,    224,    255,    255,    224,
+       255,    224
+},
+{
+       125,     74,     59,     69,     97,    141,    182,    255,
+       255,    255,    255,    255,    255,    255,    255,    255,
+       255,    255
+},
+{
+       173,    115,     85,     73,     76,     92,    115,    145,
+       173,    205,    224,    224,    255,    255,    255,    255,
+       255,    255
+},
+{
+       166,    134,    113,    102,    101,    102,    107,    118,
+       125,    138,    145,    155,    166,    182,    192,    192,
+       205,    150
+},
+{
+       224,    182,    134,    101,     83,     79,     85,     97,
+       120,    145,    173,    205,    224,    255,    255,    255,
+       255,    255
+},
+{
+       255,    224,    192,    150,    120,    101,     92,     89,
+        93,    102,    118,    134,    160,    182,    192,    224,
+       224,    224
+},
+{
+       255,    224,    224,    182,    155,    134,    118,    109,
+       104,    102,    106,    111,    118,    131,    145,    160,
+       173,    131
+}
+};
+
+const opus_uint8 silk_rate_levels_iCDF[ 2 ][ 9 ] =
+{
+{
+       241,    190,    178,    132,     87,     74,     41,     14,
+         0
+},
+{
+       223,    193,    157,    140,    106,     57,     39,     18,
+         0
+}
+};
+
+const opus_uint8 silk_rate_levels_BITS_Q5[ 2 ][ 9 ] =
+{
+{
+       131,     74,    141,     79,     80,    138,     95,    104,
+       134
+},
+{
+        95,     99,     91,    125,     93,     76,    123,    115,
+       123
+}
+};
+
+const opus_uint8 silk_shell_code_table0[ 152 ] = {
+       128,      0,    214,     42,      0,    235,    128,     21,
+         0,    244,    184,     72,     11,      0,    248,    214,
+       128,     42,      7,      0,    248,    225,    170,     80,
+        25,      5,      0,    251,    236,    198,    126,     54,
+        18,      3,      0,    250,    238,    211,    159,     82,
+        35,     15,      5,      0,    250,    231,    203,    168,
+       128,     88,     53,     25,      6,      0,    252,    238,
+       216,    185,    148,    108,     71,     40,     18,      4,
+         0,    253,    243,    225,    199,    166,    128,     90,
+        57,     31,     13,      3,      0,    254,    246,    233,
+       212,    183,    147,    109,     73,     44,     23,     10,
+         2,      0,    255,    250,    240,    223,    198,    166,
+       128,     90,     58,     33,     16,      6,      1,      0,
+       255,    251,    244,    231,    210,    181,    146,    110,
+        75,     46,     25,     12,      5,      1,      0,    255,
+       253,    248,    238,    221,    196,    164,    128,     92,
+        60,     35,     18,      8,      3,      1,      0,    255,
+       253,    249,    242,    229,    208,    180,    146,    110,
+        76,     48,     27,     14,      7,      3,      1,      0
+};
+
+const opus_uint8 silk_shell_code_table1[ 152 ] = {
+       129,      0,    207,     50,      0,    236,    129,     20,
+         0,    245,    185,     72,     10,      0,    249,    213,
+       129,     42,      6,      0,    250,    226,    169,     87,
+        27,      4,      0,    251,    233,    194,    130,     62,
+        20,      4,      0,    250,    236,    207,    160,     99,
+        47,     17,      3,      0,    255,    240,    217,    182,
+       131,     81,     41,     11,      1,      0,    255,    254,
+       233,    201,    159,    107,     61,     20,      2,      1,
+         0,    255,    249,    233,    206,    170,    128,     86,
+        50,     23,      7,      1,      0,    255,    250,    238,
+       217,    186,    148,    108,     70,     39,     18,      6,
+         1,      0,    255,    252,    243,    226,    200,    166,
+       128,     90,     56,     30,     13,      4,      1,      0,
+       255,    252,    245,    231,    209,    180,    146,    110,
+        76,     47,     25,     11,      4,      1,      0,    255,
+       253,    248,    237,    219,    194,    163,    128,     93,
+        62,     37,     19,      8,      3,      1,      0,    255,
+       254,    250,    241,    226,    205,    177,    145,    111,
+        79,     51,     30,     15,      6,      2,      1,      0
+};
+
+const opus_uint8 silk_shell_code_table2[ 152 ] = {
+       129,      0,    203,     54,      0,    234,    129,     23,
+         0,    245,    184,     73,     10,      0,    250,    215,
+       129,     41,      5,      0,    252,    232,    173,     86,
+        24,      3,      0,    253,    240,    200,    129,     56,
+        15,      2,      0,    253,    244,    217,    164,     94,
+        38,     10,      1,      0,    253,    245,    226,    189,
+       132,     71,     27,      7,      1,      0,    253,    246,
+       231,    203,    159,    105,     56,     23,      6,      1,
+         0,    255,    248,    235,    213,    179,    133,     85,
+        47,     19,      5,      1,      0,    255,    254,    243,
+       221,    194,    159,    117,     70,     37,     12,      2,
+         1,      0,    255,    254,    248,    234,    208,    171,
+       128,     85,     48,     22,      8,      2,      1,      0,
+       255,    254,    250,    240,    220,    189,    149,    107,
+        67,     36,     16,      6,      2,      1,      0,    255,
+       254,    251,    243,    227,    201,    166,    128,     90,
+        55,     29,     13,      5,      2,      1,      0,    255,
+       254,    252,    246,    234,    213,    183,    147,    109,
+        73,     43,     22,     10,      4,      2,      1,      0
+};
+
+const opus_uint8 silk_shell_code_table3[ 152 ] = {
+       130,      0,    200,     58,      0,    231,    130,     26,
+         0,    244,    184,     76,     12,      0,    249,    214,
+       130,     43,      6,      0,    252,    232,    173,     87,
+        24,      3,      0,    253,    241,    203,    131,     56,
+        14,      2,      0,    254,    246,    221,    167,     94,
+        35,      8,      1,      0,    254,    249,    232,    193,
+       130,     65,     23,      5,      1,      0,    255,    251,
+       239,    211,    162,     99,     45,     15,      4,      1,
+         0,    255,    251,    243,    223,    186,    131,     74,
+        33,     11,      3,      1,      0,    255,    252,    245,
+       230,    202,    158,    105,     57,     24,      8,      2,
+         1,      0,    255,    253,    247,    235,    214,    179,
+       132,     84,     44,     19,      7,      2,      1,      0,
+       255,    254,    250,    240,    223,    196,    159,    112,
+        69,     36,     15,      6,      2,      1,      0,    255,
+       254,    253,    245,    231,    209,    176,    136,     93,
+        55,     27,     11,      3,      2,      1,      0,    255,
+       254,    253,    252,    239,    221,    194,    158,    117,
+        76,     42,     18,      4,      3,      2,      1,      0
+};
+
+const opus_uint8 silk_shell_code_table_offsets[ 17 ] = {
+         0,      0,      2,      5,      9,     14,     20,     27,
+        35,     44,     54,     65,     77,     90,    104,    119,
+       135
+};
+
+const opus_uint8 silk_sign_iCDF[ 42 ] = {
+       254,     49,     67,     77,     82,     93,     99,
+       198,     11,     18,     24,     31,     36,     45,
+       255,     46,     66,     78,     87,     94,    104,
+       208,     14,     21,     32,     42,     51,     66,
+       255,     94,    104,    109,    112,    115,    118,
+       248,     53,     69,     80,     88,     95,    102
+};
diff --git a/drivers/opus/silk/tuning_parameters.h b/drivers/opus/silk/tuning_parameters.h
new file mode 100644
index 0000000000..e1057bbaae
--- /dev/null
+++ b/drivers/opus/silk/tuning_parameters.h
@@ -0,0 +1,171 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifndef SILK_TUNING_PARAMETERS_H
+#define SILK_TUNING_PARAMETERS_H
+
+#ifdef __cplusplus
+extern "C"
+{
+#endif
+
+/* Decay time for bitreservoir */
+#define BITRESERVOIR_DECAY_TIME_MS                      500
+
+/*******************/
+/* Pitch estimator */
+/*******************/
+
+/* Level of noise floor for whitening filter LPC analysis in pitch analysis */
+#define FIND_PITCH_WHITE_NOISE_FRACTION                 1e-3f
+
+/* Bandwidth expansion for whitening filter in pitch analysis */
+#define FIND_PITCH_BANDWIDTH_EXPANSION                  0.99f
+
+/*********************/
+/* Linear prediction */
+/*********************/
+
+/* LPC analysis regularization */
+#define FIND_LPC_COND_FAC                               1e-5f
+
+/* LTP analysis defines */
+#define FIND_LTP_COND_FAC                               1e-5f
+#define LTP_DAMPING                                     0.05f
+#define LTP_SMOOTHING                                   0.1f
+
+/* LTP quantization settings */
+#define MU_LTP_QUANT_NB                                 0.03f
+#define MU_LTP_QUANT_MB                                 0.025f
+#define MU_LTP_QUANT_WB                                 0.02f
+
+/* Max cumulative LTP gain */
+#define MAX_SUM_LOG_GAIN_DB								250.0f
+
+/***********************/
+/* High pass filtering */
+/***********************/
+
+/* Smoothing parameters for low end of pitch frequency range estimation */
+#define VARIABLE_HP_SMTH_COEF1                          0.1f
+#define VARIABLE_HP_SMTH_COEF2                          0.015f
+#define VARIABLE_HP_MAX_DELTA_FREQ                      0.4f
+
+/* Min and max cut-off frequency values (-3 dB points) */
+#define VARIABLE_HP_MIN_CUTOFF_HZ                       60
+#define VARIABLE_HP_MAX_CUTOFF_HZ                       100
+
+/***********/
+/* Various */
+/***********/
+
+/* VAD threshold */
+#define SPEECH_ACTIVITY_DTX_THRES                       0.05f
+
+/* Speech Activity LBRR enable threshold */
+#define LBRR_SPEECH_ACTIVITY_THRES                      0.3f
+
+/*************************/
+/* Perceptual parameters */
+/*************************/
+
+/* reduction in coding SNR during low speech activity */
+#define BG_SNR_DECR_dB                                  2.0f
+
+/* factor for reducing quantization noise during voiced speech */
+#define HARM_SNR_INCR_dB                                2.0f
+
+/* factor for reducing quantization noise for unvoiced sparse signals */
+#define SPARSE_SNR_INCR_dB                              2.0f
+
+/* threshold for sparseness measure above which to use lower quantization offset during unvoiced */
+#define SPARSENESS_THRESHOLD_QNT_OFFSET                 0.75f
+
+/* warping control */
+#define WARPING_MULTIPLIER                              0.015f
+
+/* fraction added to first autocorrelation value */
+#define SHAPE_WHITE_NOISE_FRACTION                      5e-5f
+
+/* noise shaping filter chirp factor */
+#define BANDWIDTH_EXPANSION                             0.95f
+
+/* difference between chirp factors for analysis and synthesis noise shaping filters at low bitrates */
+#define LOW_RATE_BANDWIDTH_EXPANSION_DELTA              0.01f
+
+/* extra harmonic boosting (signal shaping) at low bitrates */
+#define LOW_RATE_HARMONIC_BOOST                         0.1f
+
+/* extra harmonic boosting (signal shaping) for noisy input signals */
+#define LOW_INPUT_QUALITY_HARMONIC_BOOST                0.1f
+
+/* harmonic noise shaping */
+#define HARMONIC_SHAPING                                0.3f
+
+/* extra harmonic noise shaping for high bitrates or noisy input */
+#define HIGH_RATE_OR_LOW_QUALITY_HARMONIC_SHAPING       0.2f
+
+/* parameter for shaping noise towards higher frequencies */
+#define HP_NOISE_COEF                                   0.25f
+
+/* parameter for shaping noise even more towards higher frequencies during voiced speech */
+#define HARM_HP_NOISE_COEF                              0.35f
+
+/* parameter for applying a high-pass tilt to the input signal */
+#define INPUT_TILT                                      0.05f
+
+/* parameter for extra high-pass tilt to the input signal at high rates */
+#define HIGH_RATE_INPUT_TILT                            0.1f
+
+/* parameter for reducing noise at the very low frequencies */
+#define LOW_FREQ_SHAPING                                4.0f
+
+/* less reduction of noise at the very low frequencies for signals with low SNR at low frequencies */
+#define LOW_QUALITY_LOW_FREQ_SHAPING_DECR               0.5f
+
+/* subframe smoothing coefficient for HarmBoost, HarmShapeGain, Tilt (lower -> more smoothing) */
+#define SUBFR_SMTH_COEF                                 0.4f
+
+/* parameters defining the R/D tradeoff in the residual quantizer */
+#define LAMBDA_OFFSET                                   1.2f
+#define LAMBDA_SPEECH_ACT                               -0.2f
+#define LAMBDA_DELAYED_DECISIONS                        -0.05f
+#define LAMBDA_INPUT_QUALITY                            -0.1f
+#define LAMBDA_CODING_QUALITY                           -0.2f
+#define LAMBDA_QUANT_OFFSET                             0.8f
+
+/* Compensation in bitrate calculations for 10 ms modes */
+#define REDUCE_BITRATE_10_MS_BPS                        2200
+
+/* Maximum time before allowing a bandwidth transition */
+#define MAX_BANDWIDTH_SWITCH_DELAY_MS                   5000
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* SILK_TUNING_PARAMETERS_H */
diff --git a/drivers/opus/silk/typedef.h b/drivers/opus/silk/typedef.h
new file mode 100644
index 0000000000..ca2361bc82
--- /dev/null
+++ b/drivers/opus/silk/typedef.h
@@ -0,0 +1,78 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifndef SILK_TYPEDEF_H
+#define SILK_TYPEDEF_H
+
+#include "opus_types.h"
+#include "opus_defines.h"
+
+#ifndef OPUS_FIXED_POINT
+# include <float.h>
+# define silk_float      float
+# define silk_float_MAX  FLT_MAX
+#endif
+
+#define silk_int64_MAX   ((opus_int64)0x7FFFFFFFFFFFFFFFLL)   /*  2^63 - 1 */
+#define silk_int64_MIN   ((opus_int64)0x8000000000000000LL)   /* -2^63 */
+#define silk_int32_MAX   0x7FFFFFFF                           /*  2^31 - 1 =  2147483647 */
+#define silk_int32_MIN   ((opus_int32)0x80000000)             /* -2^31     = -2147483648 */
+#define silk_int16_MAX   0x7FFF                               /*  2^15 - 1 =  32767 */
+#define silk_int16_MIN   ((opus_int16)0x8000)                 /* -2^15     = -32768 */
+#define silk_int8_MAX    0x7F                                 /*  2^7 - 1  =  127 */
+#define silk_int8_MIN    ((opus_int8)0x80)                    /* -2^7      = -128 */
+#define silk_uint8_MAX   0xFF                                 /*  2^8 - 1 = 255 */
+
+#define silk_TRUE        1
+#define silk_FALSE       0
+
+/* assertions */
+#if (defined _WIN32 && !defined _WINCE && !defined(__GNUC__) && !defined(NO_ASSERTS))
+# ifndef silk_assert
+#  include <crtdbg.h>      /* ASSERTE() */
+#  define silk_assert(COND)   _ASSERTE(COND)
+# endif
+#else
+# ifdef ENABLE_ASSERTIONS
+#  include <stdio.h>
+#  include <stdlib.h>
+#define silk_fatal(str) _silk_fatal(str, __FILE__, __LINE__);
+#ifdef __GNUC__
+__attribute__((noreturn))
+#endif
+static OPUS_INLINE void _silk_fatal(const char *str, const char *file, int line)
+{
+   fprintf (stderr, "Fatal (internal) error in %s, line %d: %s\n", file, line, str);
+   abort();
+}
+#  define silk_assert(COND) {if (!(COND)) {silk_fatal("assertion failed: " #COND);}}
+# else
+#  define silk_assert(COND)
+# endif
+#endif
+
+#endif /* SILK_TYPEDEF_H */
diff --git a/drivers/opus/stream.c b/drivers/opus/stream.c
new file mode 100644
index 0000000000..17293f2bca
--- /dev/null
+++ b/drivers/opus/stream.c
@@ -0,0 +1,366 @@
+/********************************************************************
+ *                                                                  *
+ * THIS FILE IS PART OF THE libopusfile SOFTWARE CODEC SOURCE CODE. *
+ * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS     *
+ * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
+ * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
+ *                                                                  *
+ * THE libopusfile SOURCE CODE IS (C) COPYRIGHT 1994-2012           *
+ * by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
+ *                                                                  *
+ ********************************************************************
+
+ function: stdio-based convenience library for opening/seeking/decoding
+ last mod: $Id: vorbisfile.c 17573 2010-10-27 14:53:59Z xiphmont $
+
+ ********************************************************************/
+#ifdef OPUS_HAVE_CONFIG_H
+#include "opus_config.h"
+#endif
+
+#include "internal.h"
+#include <sys/types.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <errno.h>
+#include <string.h>
+#if defined(_WIN32)
+# include <io.h>
+#endif
+
+typedef struct OpusMemStream OpusMemStream;
+
+#define OP_MEM_SIZE_MAX (~(size_t)0>>1)
+#define OP_MEM_DIFF_MAX ((ptrdiff_t)OP_MEM_SIZE_MAX)
+
+/*The context information needed to read from a block of memory as if it were a
+   file.*/
+struct OpusMemStream{
+  /*The block of memory to read from.*/
+  const unsigned char *data;
+  /*The total size of the block.
+    This must be at most OP_MEM_SIZE_MAX to prevent signed overflow while
+     seeking.*/
+  ptrdiff_t            size;
+  /*The current file position.
+    This is allowed to be set arbitrarily greater than size (i.e., past the end
+     of the block, though we will not read data past the end of the block), but
+     is not allowed to be negative (i.e., before the beginning of the block).*/
+  ptrdiff_t            pos;
+};
+
+static int op_fread(void *_stream,unsigned char *_ptr,int _buf_size){
+  FILE   *stream;
+  size_t  ret;
+  /*Check for empty read.*/
+  if(_buf_size<=0)return 0;
+  stream=(FILE *)_stream;
+  ret=fread(_ptr,1,_buf_size,stream);
+  OP_ASSERT(ret<=(size_t)_buf_size);
+  /*If ret==0 and !feof(stream), there was a read error.*/
+  return ret>0||feof(stream)?(int)ret:OP_EREAD;
+}
+
+static int op_fseek(void *_stream,opus_int64 _offset,int _whence){
+#if defined(_WIN32)
+  /*_fseeki64() is not exposed until MSCVCRT80.
+    This is the default starting with MSVC 2005 (_MSC_VER>=1400), but we want
+     to allow linking against older MSVCRT versions for compatibility back to
+     XP without installing extra runtime libraries.
+    i686-pc-mingw32 does not have fseeko() and requires
+     __MSVCRT_VERSION__>=0x800 for _fseeki64(), which screws up linking with
+     other libraries (that don't use MSVCRT80 from MSVC 2005 by default).
+    i686-w64-mingw32 does have fseeko() and respects _FILE_OFFSET_BITS, but I
+     don't know how to detect that at compile time.
+    We could just use fseeko64() (which is available in both), but its
+     implemented using fgetpos()/fsetpos() just like this code, except without
+     the overflow checking, so we prefer our version.*/
+  opus_int64 pos;
+  /*We don't use fpos_t directly because it might be a struct if __STDC__ is
+     non-zero or _INTEGRAL_MAX_BITS < 64.
+    I'm not certain when the latter is true, but someone could in theory set
+     the former.
+    Either way, it should be binary compatible with a normal 64-bit int (this
+     assumption is not portable, but I believe it is true for MSVCRT).*/
+  OP_ASSERT(sizeof(pos)==sizeof(fpos_t));
+  /*Translate the seek to an absolute one.*/
+  if(_whence==SEEK_CUR){
+    int ret;
+    ret=fgetpos((FILE *)_stream,(fpos_t *)&pos);
+    if(ret)return ret;
+  }
+  else if(_whence==SEEK_END)pos=_filelengthi64(_fileno((FILE *)_stream));
+  else if(_whence==SEEK_SET)pos=0;
+  else return -1;
+  /*Check for errors or overflow.*/
+  if(pos<0||_offset<-pos||_offset>OP_INT64_MAX-pos)return -1;
+  pos+=_offset;
+  return fsetpos((FILE *)_stream,(fpos_t *)&pos);
+#else
+  /*This function actually conforms to the SUSv2 and POSIX.1-2001, so we prefer
+     it except on Windows.*/
+  return fseeko((FILE *)_stream,(off_t)_offset,_whence);
+#endif
+}
+
+static opus_int64 op_ftell(void *_stream){
+#if defined(_WIN32)
+  /*_ftelli64() is not exposed until MSCVCRT80, and ftello()/ftello64() have
+     the same problems as fseeko()/fseeko64() in MingW.
+    See above for a more detailed explanation.*/
+  opus_int64 pos;
+  OP_ASSERT(sizeof(pos)==sizeof(fpos_t));
+  return fgetpos((FILE *)_stream,(fpos_t *)&pos)?-1:pos;
+#else
+  /*This function actually conforms to the SUSv2 and POSIX.1-2001, so we prefer
+     it except on Windows.*/
+  return ftello((FILE *)_stream);
+#endif
+}
+
+static const OpusFileCallbacks OP_FILE_CALLBACKS={
+  op_fread,
+  op_fseek,
+  op_ftell,
+  (op_close_func)fclose
+};
+
+#if defined(_WIN32)
+# include <stddef.h>
+# include <errno.h>
+
+/*Windows doesn't accept UTF-8 by default, and we don't have a wchar_t API,
+   so if we just pass the path to fopen(), then there'd be no way for a user
+   of our API to open a Unicode filename.
+  Instead, we translate from UTF-8 to UTF-16 and use Windows' wchar_t API.
+  This makes this API more consistent with platforms where the character set
+   used by fopen is the same as used on disk, which is generally UTF-8, and
+   with our metadata API, which always uses UTF-8.*/
+static wchar_t *op_utf8_to_utf16(const char *_src){
+  wchar_t *dst;
+  size_t   len;
+  len=strlen(_src);
+  /*Worst-case output is 1 wide character per 1 input character.*/
+  dst=(wchar_t *)_ogg_malloc(sizeof(*dst)*(len+1));
+  if(dst!=NULL){
+    size_t si;
+    size_t di;
+    for(di=si=0;si<len;si++){
+      int c0;
+      c0=(unsigned char)_src[si];
+      if(!(c0&0x80)){
+        /*Start byte says this is a 1-byte sequence.*/
+        dst[di++]=(wchar_t)c0;
+        continue;
+      }
+      else{
+        int c1;
+        /*This is safe, because c0 was not 0 and _src is NUL-terminated.*/
+        c1=(unsigned char)_src[si+1];
+        if((c1&0xC0)==0x80){
+          /*Found at least one continuation byte.*/
+          if((c0&0xE0)==0xC0){
+            wchar_t w;
+            /*Start byte says this is a 2-byte sequence.*/
+            w=(c0&0x1F)<<6|c1&0x3F;
+            if(w>=0x80U){
+              /*This is a 2-byte sequence that is not overlong.*/
+              dst[di++]=w;
+              si++;
+              continue;
+            }
+          }
+          else{
+            int c2;
+            /*This is safe, because c1 was not 0 and _src is NUL-terminated.*/
+            c2=(unsigned char)_src[si+2];
+            if((c2&0xC0)==0x80){
+              /*Found at least two continuation bytes.*/
+              if((c0&0xF0)==0xE0){
+                wchar_t w;
+                /*Start byte says this is a 3-byte sequence.*/
+                w=(c0&0xF)<<12|(c1&0x3F)<<6|c2&0x3F;
+                if(w>=0x800U&&(w<0xD800||w>=0xE000)&&w<0xFFFE){
+                  /*This is a 3-byte sequence that is not overlong, not a
+                     UTF-16 surrogate pair value, and not a 'not a character'
+                     value.*/
+                  dst[di++]=w;
+                  si+=2;
+                  continue;
+                }
+              }
+              else{
+                int c3;
+                /*This is safe, because c2 was not 0 and _src is
+                   NUL-terminated.*/
+                c3=(unsigned char)_src[si+3];
+                if((c3&0xC0)==0x80){
+                  /*Found at least three continuation bytes.*/
+                  if((c0&0xF8)==0xF0){
+                    opus_uint32 w;
+                    /*Start byte says this is a 4-byte sequence.*/
+                    w=(c0&7)<<18|(c1&0x3F)<<12|(c2&0x3F)<<6&(c3&0x3F);
+                    if(w>=0x10000U&&w<0x110000U){
+                      /*This is a 4-byte sequence that is not overlong and not
+                         greater than the largest valid Unicode code point.
+                        Convert it to a surrogate pair.*/
+                      w-=0x10000;
+                      dst[di++]=(wchar_t)(0xD800+(w>>10));
+                      dst[di++]=(wchar_t)(0xDC00+(w&0x3FF));
+                      si+=3;
+                      continue;
+                    }
+                  }
+                }
+              }
+            }
+          }
+        }
+      }
+      /*If we got here, we encountered an illegal UTF-8 sequence.*/
+      _ogg_free(dst);
+      return NULL;
+    }
+    OP_ASSERT(di<=len);
+    dst[di]='\0';
+  }
+  return dst;
+}
+
+#endif
+
+void *op_fopen(OpusFileCallbacks *_cb,const char *_path,const char *_mode){
+  FILE *fp;
+#if !defined(_WIN32)
+  fp=fopen(_path,_mode);
+#else
+  fp=NULL;
+  if(_path==NULL||_mode==NULL)errno=EINVAL;
+  else{
+    wchar_t *wpath;
+    wchar_t *wmode;
+    wpath=op_utf8_to_utf16(_path);
+    wmode=op_utf8_to_utf16(_mode);
+    if(wmode==NULL)errno=EINVAL;
+    else if(wpath==NULL)errno=ENOENT;
+    else fp=_wfopen(wpath,wmode);
+    _ogg_free(wmode);
+    _ogg_free(wpath);
+  }
+#endif
+  if(fp!=NULL)*_cb=*&OP_FILE_CALLBACKS;
+  return fp;
+}
+
+void *op_fdopen(OpusFileCallbacks *_cb,int _fd,const char *_mode){
+  FILE *fp;
+  fp=fdopen(_fd,_mode);
+  if(fp!=NULL)*_cb=*&OP_FILE_CALLBACKS;
+  return fp;
+}
+
+void *op_freopen(OpusFileCallbacks *_cb,const char *_path,const char *_mode,
+ void *_stream){
+  FILE *fp;
+#if !defined(_WIN32)
+  fp=freopen(_path,_mode,(FILE *)_stream);
+#else
+  fp=NULL;
+  if(_path==NULL||_mode==NULL)errno=EINVAL;
+  else{
+    wchar_t *wpath;
+    wchar_t *wmode;
+    wpath=op_utf8_to_utf16(_path);
+    wmode=op_utf8_to_utf16(_mode);
+    if(wmode==NULL)errno=EINVAL;
+    else if(wpath==NULL)errno=ENOENT;
+    else fp=_wfreopen(wpath,wmode,(FILE *)_stream);
+    _ogg_free(wmode);
+    _ogg_free(wpath);
+  }
+#endif
+  if(fp!=NULL)*_cb=*&OP_FILE_CALLBACKS;
+  return fp;
+}
+
+static int op_mem_read(void *_stream,unsigned char *_ptr,int _buf_size){
+  OpusMemStream *stream;
+  ptrdiff_t      size;
+  ptrdiff_t      pos;
+  stream=(OpusMemStream *)_stream;
+  /*Check for empty read.*/
+  if(_buf_size<=0)return 0;
+  size=stream->size;
+  pos=stream->pos;
+  /*Check for EOF.*/
+  if(pos>=size)return 0;
+  /*Check for a short read.*/
+  _buf_size=(int)OP_MIN(size-pos,_buf_size);
+  memcpy(_ptr,stream->data+pos,_buf_size);
+  pos+=_buf_size;
+  stream->pos=pos;
+  return _buf_size;
+}
+
+static int op_mem_seek(void *_stream,opus_int64 _offset,int _whence){
+  OpusMemStream *stream;
+  ptrdiff_t      pos;
+  stream=(OpusMemStream *)_stream;
+  pos=stream->pos;
+  OP_ASSERT(pos>=0);
+  switch(_whence){
+    case SEEK_SET:{
+      /*Check for overflow:*/
+      if(_offset<0||_offset>OP_MEM_DIFF_MAX)return -1;
+      pos=(ptrdiff_t)_offset;
+    }break;
+    case SEEK_CUR:{
+      /*Check for overflow:*/
+      if(_offset<-pos||_offset>OP_MEM_DIFF_MAX-pos)return -1;
+      pos=(ptrdiff_t)(pos+_offset);
+    }break;
+    case SEEK_END:{
+      ptrdiff_t size;
+      size=stream->size;
+      OP_ASSERT(size>=0);
+      /*Check for overflow:*/
+      if(_offset>size||_offset<size-OP_MEM_DIFF_MAX)return -1;
+      pos=(ptrdiff_t)(size-_offset);
+    }break;
+    default:return -1;
+  }
+  stream->pos=pos;
+  return 0;
+}
+
+static opus_int64 op_mem_tell(void *_stream){
+  OpusMemStream *stream;
+  stream=(OpusMemStream *)_stream;
+  return (ogg_int64_t)stream->pos;
+}
+
+static int op_mem_close(void *_stream){
+  _ogg_free(_stream);
+  return 0;
+}
+
+static const OpusFileCallbacks OP_MEM_CALLBACKS={
+  op_mem_read,
+  op_mem_seek,
+  op_mem_tell,
+  op_mem_close
+};
+
+void *op_mem_stream_create(OpusFileCallbacks *_cb,
+ const unsigned char *_data,size_t _size){
+  OpusMemStream *stream;
+  if(_size>OP_MEM_SIZE_MAX)return NULL;
+  stream=(OpusMemStream *)_ogg_malloc(sizeof(*stream));
+  if(stream!=NULL){
+    *_cb=*&OP_MEM_CALLBACKS;
+    stream->data=_data;
+    stream->size=_size;
+    stream->pos=0;
+  }
+  return stream;
+}
diff --git a/drivers/opus/tansig_table.h b/drivers/opus/tansig_table.h
new file mode 100644
index 0000000000..c76f844a72
--- /dev/null
+++ b/drivers/opus/tansig_table.h
@@ -0,0 +1,45 @@
+/* This file is auto-generated by gen_tables */
+
+static const float tansig_table[201] = {
+0.000000f, 0.039979f, 0.079830f, 0.119427f, 0.158649f,
+0.197375f, 0.235496f, 0.272905f, 0.309507f, 0.345214f,
+0.379949f, 0.413644f, 0.446244f, 0.477700f, 0.507977f,
+0.537050f, 0.564900f, 0.591519f, 0.616909f, 0.641077f,
+0.664037f, 0.685809f, 0.706419f, 0.725897f, 0.744277f,
+0.761594f, 0.777888f, 0.793199f, 0.807569f, 0.821040f,
+0.833655f, 0.845456f, 0.856485f, 0.866784f, 0.876393f,
+0.885352f, 0.893698f, 0.901468f, 0.908698f, 0.915420f,
+0.921669f, 0.927473f, 0.932862f, 0.937863f, 0.942503f,
+0.946806f, 0.950795f, 0.954492f, 0.957917f, 0.961090f,
+0.964028f, 0.966747f, 0.969265f, 0.971594f, 0.973749f,
+0.975743f, 0.977587f, 0.979293f, 0.980869f, 0.982327f,
+0.983675f, 0.984921f, 0.986072f, 0.987136f, 0.988119f,
+0.989027f, 0.989867f, 0.990642f, 0.991359f, 0.992020f,
+0.992631f, 0.993196f, 0.993718f, 0.994199f, 0.994644f,
+0.995055f, 0.995434f, 0.995784f, 0.996108f, 0.996407f,
+0.996682f, 0.996937f, 0.997172f, 0.997389f, 0.997590f,
+0.997775f, 0.997946f, 0.998104f, 0.998249f, 0.998384f,
+0.998508f, 0.998623f, 0.998728f, 0.998826f, 0.998916f,
+0.999000f, 0.999076f, 0.999147f, 0.999213f, 0.999273f,
+0.999329f, 0.999381f, 0.999428f, 0.999472f, 0.999513f,
+0.999550f, 0.999585f, 0.999617f, 0.999646f, 0.999673f,
+0.999699f, 0.999722f, 0.999743f, 0.999763f, 0.999781f,
+0.999798f, 0.999813f, 0.999828f, 0.999841f, 0.999853f,
+0.999865f, 0.999875f, 0.999885f, 0.999893f, 0.999902f,
+0.999909f, 0.999916f, 0.999923f, 0.999929f, 0.999934f,
+0.999939f, 0.999944f, 0.999948f, 0.999952f, 0.999956f,
+0.999959f, 0.999962f, 0.999965f, 0.999968f, 0.999970f,
+0.999973f, 0.999975f, 0.999977f, 0.999978f, 0.999980f,
+0.999982f, 0.999983f, 0.999984f, 0.999986f, 0.999987f,
+0.999988f, 0.999989f, 0.999990f, 0.999990f, 0.999991f,
+0.999992f, 0.999992f, 0.999993f, 0.999994f, 0.999994f,
+0.999994f, 0.999995f, 0.999995f, 0.999996f, 0.999996f,
+0.999996f, 0.999997f, 0.999997f, 0.999997f, 0.999997f,
+0.999997f, 0.999998f, 0.999998f, 0.999998f, 0.999998f,
+0.999998f, 0.999998f, 0.999999f, 0.999999f, 0.999999f,
+0.999999f, 0.999999f, 0.999999f, 0.999999f, 0.999999f,
+0.999999f, 0.999999f, 0.999999f, 0.999999f, 0.999999f,
+1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f,
+1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f,
+1.000000f,
+};
diff --git a/drivers/opus/wincerts.c b/drivers/opus/wincerts.c
new file mode 100644
index 0000000000..568a085e43
--- /dev/null
+++ b/drivers/opus/wincerts.c
@@ -0,0 +1,171 @@
+/********************************************************************
+ *                                                                  *
+ * THIS FILE IS PART OF THE libopusfile SOFTWARE CODEC SOURCE CODE. *
+ * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS     *
+ * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
+ * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
+ *                                                                  *
+ * THE libopusfile SOURCE CODE IS (C) COPYRIGHT 2013                *
+ * by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
+ *                                                                  *
+ ********************************************************************/
+
+/*This should really be part of OpenSSL, but there's been a patch [1] sitting
+   in their bugtracker for over two years that implements this, without any
+   action, so I'm giving up and re-implementing it locally.
+
+  [1] <http://rt.openssl.org/Ticket/Display.html?id=2158>*/
+
+#ifdef OPUS_HAVE_CONFIG_H
+#include "opus_config.h"
+#endif
+
+#include "internal.h"
+#if defined(OP_ENABLE_HTTP)&&defined(_WIN32)
+/*You must include windows.h before wincrypt.h and x509.h.*/
+# define WIN32_LEAN_AND_MEAN
+# define WIN32_EXTRA_LEAN
+# include <windows.h>
+/*You must include wincrypt.h before x509.h, too, or X509_NAME doesn't get
+   defined properly.*/
+# include <wincrypt.h>
+# include <openssl/ssl.h>
+# include <openssl/err.h>
+# include <openssl/x509.h>
+
+static int op_capi_new(X509_LOOKUP *_lu){
+  HCERTSTORE h_store;
+  h_store=CertOpenStore(CERT_STORE_PROV_SYSTEM_A,0,0,
+   CERT_STORE_OPEN_EXISTING_FLAG|CERT_STORE_READONLY_FLAG|
+   CERT_SYSTEM_STORE_CURRENT_USER|CERT_STORE_SHARE_CONTEXT_FLAG,"ROOT");
+  if(h_store!=NULL){
+    _lu->method_data=(char *)h_store;
+    return 1;
+  }
+  return 0;
+}
+
+static void op_capi_free(X509_LOOKUP *_lu){
+  HCERTSTORE h_store;
+  h_store=(HCERTSTORE)_lu->method_data;
+# if defined(OP_ENABLE_ASSERTIONS)
+  OP_ALWAYS_TRUE(CertCloseStore(h_store,CERT_CLOSE_STORE_CHECK_FLAG));
+# else
+  CertCloseStore(h_store,0);
+# endif
+}
+
+static int op_capi_retrieve_by_subject(X509_LOOKUP *_lu,int _type,
+ X509_NAME *_name,X509_OBJECT *_ret){
+  X509_OBJECT *obj;
+  CRYPTO_w_lock(CRYPTO_LOCK_X509_STORE);
+  obj=X509_OBJECT_retrieve_by_subject(_lu->store_ctx->objs,_type,_name);
+  CRYPTO_w_unlock(CRYPTO_LOCK_X509_STORE);
+  if(obj!=NULL){
+    _ret->type=obj->type;
+    memcpy(&_ret->data,&obj->data,sizeof(_ret->data));
+    return 1;
+  }
+  return 0;
+}
+
+static int op_capi_get_by_subject(X509_LOOKUP *_lu,int _type,X509_NAME *_name,
+ X509_OBJECT *_ret){
+  HCERTSTORE h_store;
+  if(_name==NULL)return 0;
+  if(_name->bytes==NULL||_name->bytes->length<=0||_name->modified){
+    if(i2d_X509_NAME(_name,NULL)<0)return 0;
+    OP_ASSERT(_name->bytes->length>0);
+  }
+  h_store=(HCERTSTORE)_lu->method_data;
+  switch(_type){
+    case X509_LU_X509:{
+      CERT_NAME_BLOB  find_para;
+      PCCERT_CONTEXT  cert;
+      X509           *x;
+      int             ret;
+      /*Although X509_NAME contains a canon_enc field, that "canonical" [1]
+         encoding was just made up by OpenSSL.
+        It doesn't correspond to any actual standard, and since it drops the
+         initial sequence header, won't be recognized by the Crypto API.
+        The assumption here is that CertFindCertificateInStore() will allow any
+         appropriate variations in the encoding when it does its comparison.
+        This is, however, emphatically not true under Wine, which just compares
+         the encodings with memcmp().
+        Most of the time things work anyway, though, and there isn't really
+         anything we can do to make the situation better.
+
+        [1] A "canonical form" is defined as the one where, if you locked 10
+         mathematicians in a room and asked them to come up with a
+         representation for something, it's the answer that 9 of them would
+         give you back.
+        I don't think OpenSSL's encoding qualifies.*/
+      find_para.cbData=_name->bytes->length;
+      find_para.pbData=(unsigned char *)_name->bytes->data;
+      cert=CertFindCertificateInStore(h_store,X509_ASN_ENCODING,0,
+       CERT_FIND_SUBJECT_NAME,&find_para,NULL);
+      if(cert==NULL)return 0;
+      x=d2i_X509(NULL,(const unsigned char **)&cert->pbCertEncoded,
+       cert->cbCertEncoded);
+      CertFreeCertificateContext(cert);
+      if(x==NULL)return 0;
+      ret=X509_STORE_add_cert(_lu->store_ctx,x);
+      X509_free(x);
+      if(ret)return op_capi_retrieve_by_subject(_lu,_type,_name,_ret);
+    }break;
+    case X509_LU_CRL:{
+      CERT_INFO      cert_info;
+      CERT_CONTEXT   find_para;
+      PCCRL_CONTEXT  crl;
+      X509_CRL      *x;
+      int            ret;
+      ret=op_capi_retrieve_by_subject(_lu,_type,_name,_ret);
+      if(ret>0)return ret;
+      memset(&cert_info,0,sizeof(cert_info));
+      cert_info.Issuer.cbData=_name->bytes->length;
+      cert_info.Issuer.pbData=(unsigned char *)_name->bytes->data;
+      memset(&find_para,0,sizeof(find_para));
+      find_para.pCertInfo=&cert_info;
+      crl=CertFindCRLInStore(h_store,0,0,CRL_FIND_ISSUED_BY,&find_para,NULL);
+      if(crl==NULL)return 0;
+      x=d2i_X509_CRL(NULL,(const unsigned char **)&crl->pbCrlEncoded,
+       crl->cbCrlEncoded);
+      CertFreeCRLContext(crl);
+      if(x==NULL)return 0;
+      ret=X509_STORE_add_crl(_lu->store_ctx,x);
+      X509_CRL_free(x);
+      if(ret)return op_capi_retrieve_by_subject(_lu,_type,_name,_ret);
+    }break;
+  }
+  return 0;
+}
+
+/*This is not const because OpenSSL doesn't allow it, even though it won't
+   write to it.*/
+static X509_LOOKUP_METHOD X509_LOOKUP_CAPI={
+  "Load Crypto API store into cache",
+  op_capi_new,
+  op_capi_free,
+  NULL,
+  NULL,
+  NULL,
+  op_capi_get_by_subject,
+  NULL,
+  NULL,
+  NULL
+};
+
+int SSL_CTX_set_default_verify_paths_win32(SSL_CTX *_ssl_ctx){
+  X509_STORE  *store;
+  X509_LOOKUP *lu;
+  /*We intentionally do not add the normal default paths, as they are usually
+     wrong, and are just asking to be used as an exploit vector.*/
+  store=SSL_CTX_get_cert_store(_ssl_ctx);
+  OP_ASSERT(store!=NULL);
+  lu=X509_STORE_add_lookup(store,&X509_LOOKUP_CAPI);
+  if(lu==NULL)return 0;
+  ERR_clear_error();
+  return 1;
+}
+
+#endif
diff --git a/drivers/opus/winerrno.h b/drivers/opus/winerrno.h
new file mode 100644
index 0000000000..32a90b4ee1
--- /dev/null
+++ b/drivers/opus/winerrno.h
@@ -0,0 +1,90 @@
+/********************************************************************
+ *                                                                  *
+ * THIS FILE IS PART OF THE libopusfile SOFTWARE CODEC SOURCE CODE. *
+ * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS     *
+ * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
+ * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
+ *                                                                  *
+ * THE libopusfile SOURCE CODE IS (C) COPYRIGHT 2012                *
+ * by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
+ *                                                                  *
+ ********************************************************************/
+#if !defined(_opusfile_winerrno_h)
+# define _opusfile_winerrno_h (1)
+
+# include <errno.h>
+# include <winerror.h>
+
+/*These conflict with the MSVC errno.h definitions, but we don't need to use
+   the original ones in any file that deals with sockets.
+  We could map the WSA errors to the errno.h ones (most of which are only
+   available on sufficiently new versions of MSVC), but they aren't ordered the
+   same, and given how rarely we actually look at the values, I don't think
+   it's worth a lookup table.*/
+# undef EWOULDBLOCK
+# undef EINPROGRESS
+# undef EALREADY
+# undef ENOTSOCK
+# undef EDESTADDRREQ
+# undef EMSGSIZE
+# undef EPROTOTYPE
+# undef ENOPROTOOPT
+# undef EPROTONOSUPPORT
+# undef EOPNOTSUPP
+# undef EAFNOSUPPORT
+# undef EADDRINUSE
+# undef EADDRNOTAVAIL
+# undef ENETDOWN
+# undef ENETUNREACH
+# undef ENETRESET
+# undef ECONNABORTED
+# undef ECONNRESET
+# undef ENOBUFS
+# undef EISCONN
+# undef ENOTCONN
+# undef ETIMEDOUT
+# undef ECONNREFUSED
+# undef ELOOP
+# undef ENAMETOOLONG
+# undef EHOSTUNREACH
+# undef ENOTEMPTY
+
+# define EWOULDBLOCK     (WSAEWOULDBLOCK-WSABASEERR)
+# define EINPROGRESS     (WSAEINPROGRESS-WSABASEERR)
+# define EALREADY        (WSAEALREADY-WSABASEERR)
+# define ENOTSOCK        (WSAENOTSOCK-WSABASEERR)
+# define EDESTADDRREQ    (WSAEDESTADDRREQ-WSABASEERR)
+# define EMSGSIZE        (WSAEMSGSIZE-WSABASEERR)
+# define EPROTOTYPE      (WSAEPROTOTYPE-WSABASEERR)
+# define ENOPROTOOPT     (WSAENOPROTOOPT-WSABASEERR)
+# define EPROTONOSUPPORT (WSAEPROTONOSUPPORT-WSABASEERR)
+# define ESOCKTNOSUPPORT (WSAESOCKTNOSUPPORT-WSABASEERR)
+# define EOPNOTSUPP      (WSAEOPNOTSUPP-WSABASEERR)
+# define EPFNOSUPPORT    (WSAEPFNOSUPPORT-WSABASEERR)
+# define EAFNOSUPPORT    (WSAEAFNOSUPPORT-WSABASEERR)
+# define EADDRINUSE      (WSAEADDRINUSE-WSABASEERR)
+# define EADDRNOTAVAIL   (WSAEADDRNOTAVAIL-WSABASEERR)
+# define ENETDOWN        (WSAENETDOWN-WSABASEERR)
+# define ENETUNREACH     (WSAENETUNREACH-WSABASEERR)
+# define ENETRESET       (WSAENETRESET-WSABASEERR)
+# define ECONNABORTED    (WSAECONNABORTED-WSABASEERR)
+# define ECONNRESET      (WSAECONNRESET-WSABASEERR)
+# define ENOBUFS         (WSAENOBUFS-WSABASEERR)
+# define EISCONN         (WSAEISCONN-WSABASEERR)
+# define ENOTCONN        (WSAENOTCONN-WSABASEERR)
+# define ESHUTDOWN       (WSAESHUTDOWN-WSABASEERR)
+# define ETOOMANYREFS    (WSAETOOMANYREFS-WSABASEERR)
+# define ETIMEDOUT       (WSAETIMEDOUT-WSABASEERR)
+# define ECONNREFUSED    (WSAECONNREFUSED-WSABASEERR)
+# define ELOOP           (WSAELOOP-WSABASEERR)
+# define ENAMETOOLONG    (WSAENAMETOOLONG-WSABASEERR)
+# define EHOSTDOWN       (WSAEHOSTDOWN-WSABASEERR)
+# define EHOSTUNREACH    (WSAEHOSTUNREACH-WSABASEERR)
+# define ENOTEMPTY       (WSAENOTEMPTY-WSABASEERR)
+# define EPROCLIM        (WSAEPROCLIM-WSABASEERR)
+# define EUSERS          (WSAEUSERS-WSABASEERR)
+# define EDQUOT          (WSAEDQUOT-WSABASEERR)
+# define ESTALE          (WSAESTALE-WSABASEERR)
+# define EREMOTE         (WSAEREMOTE-WSABASEERR)
+
+#endif
diff --git a/drivers/pulseaudio/audio_driver_pulseaudio.cpp b/drivers/pulseaudio/audio_driver_pulseaudio.cpp
index dfe9ddc55f..4cda141f92 100644
--- a/drivers/pulseaudio/audio_driver_pulseaudio.cpp
+++ b/drivers/pulseaudio/audio_driver_pulseaudio.cpp
@@ -82,6 +82,17 @@ Error AudioDriverPulseAudio::init() {
 	return OK;
 }
 
+float AudioDriverPulseAudio::get_latency() {
+
+	if (latency==0)	{ //only do this once since it's approximate anyway
+		int error_code;
+		pa_usec_t palat = pa_simple_get_latency( pulse,&error_code);
+		latency=double(palat)/1000000.0;
+	}
+
+	return latency;
+}
+
 void AudioDriverPulseAudio::thread_func(void* p_udata) {
 
     AudioDriverPulseAudio* ad = (AudioDriverPulseAudio*)p_udata;
@@ -121,6 +132,7 @@ void AudioDriverPulseAudio::thread_func(void* p_udata) {
             ad->exit_thread = true;
             break;
         }
+
     }
 
     ad->thread_exited = true;
@@ -185,6 +197,7 @@ AudioDriverPulseAudio::AudioDriverPulseAudio() {
 	mutex = NULL;
     thread = NULL;
     pulse = NULL;
+    latency=0;
 }
 
 AudioDriverPulseAudio::~AudioDriverPulseAudio() {
diff --git a/drivers/pulseaudio/audio_driver_pulseaudio.h b/drivers/pulseaudio/audio_driver_pulseaudio.h
index e82e0c24be..e7c8bcce36 100644
--- a/drivers/pulseaudio/audio_driver_pulseaudio.h
+++ b/drivers/pulseaudio/audio_driver_pulseaudio.h
@@ -58,6 +58,8 @@ class AudioDriverPulseAudio : public AudioDriverSW {
 	mutable bool exit_thread;
 	bool pcm_open;
 
+	float latency;
+
 public:
 
 	const char* get_name() const {
@@ -72,6 +74,9 @@ public:
 	virtual void unlock();
 	virtual void finish();
 
+	virtual float get_latency();
+
+
     AudioDriverPulseAudio();
     ~AudioDriverPulseAudio();
 };
diff --git a/drivers/register_driver_types.cpp b/drivers/register_driver_types.cpp
index 01f6a8b5b0..2647d23011 100644
--- a/drivers/register_driver_types.cpp
+++ b/drivers/register_driver_types.cpp
@@ -37,13 +37,19 @@
 #include "vorbis/audio_stream_ogg_vorbis.h"
 #endif
 
+#ifdef OPUS_ENABLED
+#include "opus/audio_stream_opus.h"
+#endif
 
 #ifdef SPEEX_ENABLED
 #include "speex/audio_stream_speex.h"
 #endif
 
 #ifdef THEORA_ENABLED
-//#include "theora/video_stream_theora.h"
+#include "theora/video_stream_theora.h"
+#endif
+
+#ifdef THEORAPLAYER_ENABLED
 #include "theoraplayer/video_stream_theoraplayer.h"
 #endif
 
@@ -85,12 +91,19 @@ static ResourceFormatLoaderAudioStreamOGG *vorbis_stream_loader=NULL;
 static ResourceFormatLoaderAudioStreamOGGVorbis *vorbis_stream_loader=NULL;
 #endif
 
+#ifdef OPUS_ENABLED
+static ResourceFormatLoaderAudioStreamOpus *opus_stream_loader=NULL;
+#endif
+
 #ifdef SPEEX_ENABLED
 static ResourceFormatLoaderAudioStreamSpeex *speex_stream_loader=NULL;
 #endif
 
 #ifdef THEORA_ENABLED
-//static ResourceFormatLoaderVideoStreamTheora* theora_stream_loader = NULL;
+static ResourceFormatLoaderVideoStreamTheora* theora_stream_loader = NULL;
+#endif
+
+#ifdef THEORAPLAYER_ENABLED
 static ResourceFormatLoaderVideoStreamTheoraplayer* theoraplayer_stream_loader = NULL;
 #endif
 
@@ -169,6 +182,11 @@ void register_driver_types() {
 	ObjectTypeDB::register_type<AudioStreamOGGVorbis>();
 #endif
 
+#ifdef OPUS_ENABLED
+	opus_stream_loader=memnew( ResourceFormatLoaderAudioStreamOpus );
+	ResourceLoader::add_resource_format_loader( opus_stream_loader );
+	ObjectTypeDB::register_type<AudioStreamOpus>();
+#endif
 
 #ifdef DDS_ENABLED
 	resource_loader_dds = memnew( ResourceFormatDDS );
@@ -205,9 +223,12 @@ void register_driver_types() {
 #endif
 
 #ifdef THEORA_ENABLED
-	//theora_stream_loader = memnew( ResourceFormatLoaderVideoStreamTheora );
-	//ResourceLoader::add_resource_format_loader(theora_stream_loader);
-	//ObjectTypeDB::register_type<VideoStreamTheora>();
+	theora_stream_loader = memnew( ResourceFormatLoaderVideoStreamTheora );
+	ResourceLoader::add_resource_format_loader(theora_stream_loader);
+	ObjectTypeDB::register_type<VideoStreamTheora>();
+#endif
+
+#ifdef THEORAPLAYER_ENABLED
 	theoraplayer_stream_loader = memnew( ResourceFormatLoaderVideoStreamTheoraplayer );
 	ResourceLoader::add_resource_format_loader(theoraplayer_stream_loader);
 	ObjectTypeDB::register_type<VideoStreamTheoraplayer>();
@@ -239,12 +260,19 @@ void unregister_driver_types() {
 	memdelete( vorbis_stream_loader );
 #endif
 
+#ifdef OPUS_ENABLED
+	memdelete( opus_stream_loader );
+#endif
+
 #ifdef SPEEX_ENABLED
 	memdelete( speex_stream_loader );
 #endif
 
 #ifdef THEORA_ENABLED
-	//memdelete (theora_stream_loader);
+	memdelete (theora_stream_loader);
+#endif
+
+#ifdef THEORAPLAYER_ENABLED
 	memdelete (theoraplayer_stream_loader);
 #endif
 
diff --git a/drivers/rtaudio/RtAudio.cpp b/drivers/rtaudio/RtAudio.cpp
index 8876f72e21..72ca836907 100644
--- a/drivers/rtaudio/RtAudio.cpp
+++ b/drivers/rtaudio/RtAudio.cpp
@@ -1,10234 +1,10234 @@
-#ifdef RTAUDIO_ENABLED
-/************************************************************************/
-/*! \class RtAudio
-    \brief Realtime audio i/o C++ classes.
-
-    RtAudio provides a common API (Application Programming Interface)
-    for realtime audio input/output across Linux (native ALSA, Jack,
-    and OSS), Macintosh OS X (CoreAudio and Jack), and Windows
-    (DirectSound, ASIO and WASAPI) operating systems.
-
-    RtAudio WWW site: http://www.music.mcgill.ca/~gary/rtaudio/
-
-    RtAudio: realtime audio i/o C++ classes
-    Copyright (c) 2001-2014 Gary P. Scavone
-
-    Permission is hereby granted, free of charge, to any person
-    obtaining a copy of this software and associated documentation files
-    (the "Software"), to deal in the Software without restriction,
-    including without limitation the rights to use, copy, modify, merge,
-    publish, distribute, sublicense, and/or sell copies of the Software,
-    and to permit persons to whom the Software is furnished to do so,
-    subject to the following conditions:
-
-    The above copyright notice and this permission notice shall be
-    included in all copies or substantial portions of the Software.
-
-    Any person wishing to distribute modifications to the Software is
-    asked to send the modifications to the original developer so that
-    they can be incorporated into the canonical version.  This is,
-    however, not a binding provision of this license.
-
-    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
-    EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
-    MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
-    IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR
-    ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF
-    CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
-    WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-*/
-/************************************************************************/
-
-// RtAudio: Version 4.1.1
-
-#include "RtAudio.h"
-#include <iostream>
-#include <cstdlib>
-#include <cstring>
-#include <climits>
-#include <algorithm>
-
-// Static variable definitions.
-const unsigned int RtApi::MAX_SAMPLE_RATES = 14;
-const unsigned int RtApi::SAMPLE_RATES[] = {
-  4000, 5512, 8000, 9600, 11025, 16000, 22050,
-  32000, 44100, 48000, 88200, 96000, 176400, 192000
-};
-
-#if defined(__WINDOWS_DS__) || defined(__WINDOWS_ASIO__) || defined(__WINDOWS_WASAPI__)
-#ifdef WINRT_ENABLED
-  #define MUTEX_INITIALIZE(A) InitializeCriticalSectionEx(A, 0, 0)
-#else
-  #define MUTEX_INITIALIZE(A) InitializeCriticalSection(A)
-#endif
-  #define MUTEX_DESTROY(A)    DeleteCriticalSection(A)
-  #define MUTEX_LOCK(A)       EnterCriticalSection(A)
-  #define MUTEX_UNLOCK(A)     LeaveCriticalSection(A)
-
-  #include "tchar.h"
-
-  static std::string convertCharPointerToStdString(const char *text)
-  {
-    return std::string(text);
-  }
-
-  static std::string convertCharPointerToStdString(const wchar_t *text)
-  {
-    int length = WideCharToMultiByte(CP_UTF8, 0, text, -1, NULL, 0, NULL, NULL);
-    std::string s( length-1, '\0' );
-    WideCharToMultiByte(CP_UTF8, 0, text, -1, &s[0], length, NULL, NULL);
-    return s;
-  }
-
-#elif defined(__LINUX_ALSA__) || defined(__LINUX_PULSE__) || defined(__UNIX_JACK__) || defined(__LINUX_OSS__) || defined(__MACOSX_CORE__)
-  // pthread API
-  #define MUTEX_INITIALIZE(A) pthread_mutex_init(A, NULL)
-  #define MUTEX_DESTROY(A)    pthread_mutex_destroy(A)
-  #define MUTEX_LOCK(A)       pthread_mutex_lock(A)
-  #define MUTEX_UNLOCK(A)     pthread_mutex_unlock(A)
-#else
-  #define MUTEX_INITIALIZE(A) abs(*A) // dummy definitions
-  #define MUTEX_DESTROY(A)    abs(*A) // dummy definitions
-#endif
-
-// *************************************************** //
-//
-// RtAudio definitions.
-//
-// *************************************************** //
-
-std::string RtAudio :: getVersion( void ) throw()
-{
-  return RTAUDIO_VERSION;
-}
-
-void RtAudio :: getCompiledApi( std::vector<RtAudio::Api> &apis ) throw()
-{
-  apis.clear();
-
-  // The order here will control the order of RtAudio's API search in
-  // the constructor.
-#if defined(__UNIX_JACK__)
-  apis.push_back( UNIX_JACK );
-#endif
-#if defined(__LINUX_ALSA__)
-  apis.push_back( LINUX_ALSA );
-#endif
-#if defined(__LINUX_PULSE__)
-  apis.push_back( LINUX_PULSE );
-#endif
-#if defined(__LINUX_OSS__)
-  apis.push_back( LINUX_OSS );
-#endif
-#if defined(__WINDOWS_ASIO__)
-  apis.push_back( WINDOWS_ASIO );
-#endif
-#if defined(__WINDOWS_WASAPI__)
-  apis.push_back( WINDOWS_WASAPI );
-#endif
-#if defined(__WINDOWS_DS__)
-  apis.push_back( WINDOWS_DS );
-#endif
-#if defined(__MACOSX_CORE__)
-  apis.push_back( MACOSX_CORE );
-#endif
-#if defined(__RTAUDIO_DUMMY__)
-  apis.push_back( RTAUDIO_DUMMY );
-#endif
-}
-
-void RtAudio :: openRtApi( RtAudio::Api api )
-{
-  if ( rtapi_ )
-    delete rtapi_;
-  rtapi_ = 0;
-
-#if defined(__UNIX_JACK__)
-  if ( api == UNIX_JACK )
-    rtapi_ = new RtApiJack();
-#endif
-#if defined(__LINUX_ALSA__)
-  if ( api == LINUX_ALSA )
-    rtapi_ = new RtApiAlsa();
-#endif
-#if defined(__LINUX_PULSE__)
-  if ( api == LINUX_PULSE )
-    rtapi_ = new RtApiPulse();
-#endif
-#if defined(__LINUX_OSS__)
-  if ( api == LINUX_OSS )
-    rtapi_ = new RtApiOss();
-#endif
-#if defined(__WINDOWS_ASIO__)
-  if ( api == WINDOWS_ASIO )
-    rtapi_ = new RtApiAsio();
-#endif
-#if defined(__WINDOWS_WASAPI__)
-  if ( api == WINDOWS_WASAPI )
-    rtapi_ = new RtApiWasapi();
-#endif
-#if defined(__WINDOWS_DS__)
-  if ( api == WINDOWS_DS )
-    rtapi_ = new RtApiDs();
-#endif
-#if defined(__MACOSX_CORE__)
-  if ( api == MACOSX_CORE )
-    rtapi_ = new RtApiCore();
-#endif
-#if defined(__RTAUDIO_DUMMY__)
-  if ( api == RTAUDIO_DUMMY )
-    rtapi_ = new RtApiDummy();
-#endif
-}
-
-RtAudio :: RtAudio( RtAudio::Api api )
-{
-  rtapi_ = 0;
-
-  if ( api != UNSPECIFIED ) {
-    // Attempt to open the specified API.
-    openRtApi( api );
-    if ( rtapi_ ) return;
-
-    // No compiled support for specified API value.  Issue a debug
-    // warning and continue as if no API was specified.
-    std::cerr << "\nRtAudio: no compiled support for specified API argument!\n" << std::endl;
-  }
-
-  // Iterate through the compiled APIs and return as soon as we find
-  // one with at least one device or we reach the end of the list.
-  std::vector< RtAudio::Api > apis;
-  getCompiledApi( apis );
-  for ( unsigned int i=0; i<apis.size(); i++ ) {
-    openRtApi( apis[i] );
-    if ( rtapi_ && rtapi_->getDeviceCount() ) break;
-  }
-
-  if ( rtapi_ ) return;
-
-  // It should not be possible to get here because the preprocessor
-  // definition __RTAUDIO_DUMMY__ is automatically defined if no
-  // API-specific definitions are passed to the compiler. But just in
-  // case something weird happens, we'll thow an error.
-  std::string errorText = "\nRtAudio: no compiled API support found ... critical error!!\n\n";
-  throw( RtAudioError( errorText, RtAudioError::UNSPECIFIED ) );
-}
-
-RtAudio :: ~RtAudio() throw()
-{
-  if ( rtapi_ )
-    delete rtapi_;
-}
-
-void RtAudio :: openStream( RtAudio::StreamParameters *outputParameters,
-                            RtAudio::StreamParameters *inputParameters,
-                            RtAudioFormat format, unsigned int sampleRate,
-                            unsigned int *bufferFrames,
-                            RtAudioCallback callback, void *userData,
-                            RtAudio::StreamOptions *options,
-                            RtAudioErrorCallback errorCallback )
-{
-  return rtapi_->openStream( outputParameters, inputParameters, format,
-                             sampleRate, bufferFrames, callback,
-                             userData, options, errorCallback );
-}
-
-// *************************************************** //
-//
-// Public RtApi definitions (see end of file for
-// private or protected utility functions).
-//
-// *************************************************** //
-
-RtApi :: RtApi()
-{
-  stream_.state = STREAM_CLOSED;
-  stream_.mode = UNINITIALIZED;
-  stream_.apiHandle = 0;
-  stream_.userBuffer[0] = 0;
-  stream_.userBuffer[1] = 0;
-  MUTEX_INITIALIZE( &stream_.mutex );
-  showWarnings_ = true;
-  firstErrorOccurred_ = false;
-}
-
-RtApi :: ~RtApi()
-{
-  MUTEX_DESTROY( &stream_.mutex );
-}
-
-void RtApi :: openStream( RtAudio::StreamParameters *oParams,
-                          RtAudio::StreamParameters *iParams,
-                          RtAudioFormat format, unsigned int sampleRate,
-                          unsigned int *bufferFrames,
-                          RtAudioCallback callback, void *userData,
-                          RtAudio::StreamOptions *options,
-                          RtAudioErrorCallback errorCallback )
-{
-  if ( stream_.state != STREAM_CLOSED ) {
-    errorText_ = "RtApi::openStream: a stream is already open!";
-    error( RtAudioError::INVALID_USE );
-    return;
-  }
-
-  // Clear stream information potentially left from a previously open stream.
-  clearStreamInfo();
-
-  if ( oParams && oParams->nChannels < 1 ) {
-    errorText_ = "RtApi::openStream: a non-NULL output StreamParameters structure cannot have an nChannels value less than one.";
-    error( RtAudioError::INVALID_USE );
-    return;
-  }
-
-  if ( iParams && iParams->nChannels < 1 ) {
-    errorText_ = "RtApi::openStream: a non-NULL input StreamParameters structure cannot have an nChannels value less than one.";
-    error( RtAudioError::INVALID_USE );
-    return;
-  }
-
-  if ( oParams == NULL && iParams == NULL ) {
-    errorText_ = "RtApi::openStream: input and output StreamParameters structures are both NULL!";
-    error( RtAudioError::INVALID_USE );
-    return;
-  }
-
-  if ( formatBytes(format) == 0 ) {
-    errorText_ = "RtApi::openStream: 'format' parameter value is undefined.";
-    error( RtAudioError::INVALID_USE );
-    return;
-  }
-
-  unsigned int nDevices = getDeviceCount();
-  unsigned int oChannels = 0;
-  if ( oParams ) {
-    oChannels = oParams->nChannels;
-    if ( oParams->deviceId >= nDevices ) {
-      errorText_ = "RtApi::openStream: output device parameter value is invalid.";
-      error( RtAudioError::INVALID_USE );
-      return;
-    }
-  }
-
-  unsigned int iChannels = 0;
-  if ( iParams ) {
-    iChannels = iParams->nChannels;
-    if ( iParams->deviceId >= nDevices ) {
-      errorText_ = "RtApi::openStream: input device parameter value is invalid.";
-      error( RtAudioError::INVALID_USE );
-      return;
-    }
-  }
-
-  bool result;
-
-  if ( oChannels > 0 ) {
-
-    result = probeDeviceOpen( oParams->deviceId, OUTPUT, oChannels, oParams->firstChannel,
-                              sampleRate, format, bufferFrames, options );
-    if ( result == false ) {
-      error( RtAudioError::SYSTEM_ERROR );
-      return;
-    }
-  }
-
-  if ( iChannels > 0 ) {
-
-    result = probeDeviceOpen( iParams->deviceId, INPUT, iChannels, iParams->firstChannel,
-                              sampleRate, format, bufferFrames, options );
-    if ( result == false ) {
-      if ( oChannels > 0 ) closeStream();
-      error( RtAudioError::SYSTEM_ERROR );
-      return;
-    }
-  }
-
-  stream_.callbackInfo.callback = (void *) callback;
-  stream_.callbackInfo.userData = userData;
-  stream_.callbackInfo.errorCallback = (void *) errorCallback;
-
-  if ( options ) options->numberOfBuffers = stream_.nBuffers;
-  stream_.state = STREAM_STOPPED;
-}
-
-unsigned int RtApi :: getDefaultInputDevice( void )
-{
-  // Should be implemented in subclasses if possible.
-  return 0;
-}
-
-unsigned int RtApi :: getDefaultOutputDevice( void )
-{
-  // Should be implemented in subclasses if possible.
-  return 0;
-}
-
-void RtApi :: closeStream( void )
-{
-  // MUST be implemented in subclasses!
-  return;
-}
-
-bool RtApi :: probeDeviceOpen( unsigned int /*device*/, StreamMode /*mode*/, unsigned int /*channels*/,
-                               unsigned int /*firstChannel*/, unsigned int /*sampleRate*/,
-                               RtAudioFormat /*format*/, unsigned int * /*bufferSize*/,
-                               RtAudio::StreamOptions * /*options*/ )
-{
-  // MUST be implemented in subclasses!
-  return FAILURE;
-}
-
-void RtApi :: tickStreamTime( void )
-{
-  // Subclasses that do not provide their own implementation of
-  // getStreamTime should call this function once per buffer I/O to
-  // provide basic stream time support.
-
-  stream_.streamTime += ( stream_.bufferSize * 1.0 / stream_.sampleRate );
-
-#if defined( HAVE_GETTIMEOFDAY )
-  gettimeofday( &stream_.lastTickTimestamp, NULL );
-#endif
-}
-
-long RtApi :: getStreamLatency( void )
-{
-  verifyStream();
-
-  long totalLatency = 0;
-  if ( stream_.mode == OUTPUT || stream_.mode == DUPLEX )
-    totalLatency = stream_.latency[0];
-  if ( stream_.mode == INPUT || stream_.mode == DUPLEX )
-    totalLatency += stream_.latency[1];
-
-  return totalLatency;
-}
-
-double RtApi :: getStreamTime( void )
-{
-  verifyStream();
-
-#if defined( HAVE_GETTIMEOFDAY )
-  // Return a very accurate estimate of the stream time by
-  // adding in the elapsed time since the last tick.
-  struct timeval then;
-  struct timeval now;
-
-  if ( stream_.state != STREAM_RUNNING || stream_.streamTime == 0.0 )
-    return stream_.streamTime;
-
-  gettimeofday( &now, NULL );
-  then = stream_.lastTickTimestamp;
-  return stream_.streamTime +
-    ((now.tv_sec + 0.000001 * now.tv_usec) -
-     (then.tv_sec + 0.000001 * then.tv_usec));     
-#else
-  return stream_.streamTime;
-#endif
-}
-
-void RtApi :: setStreamTime( double time )
-{
-  verifyStream();
-
-  if ( time >= 0.0 )
-    stream_.streamTime = time;
-}
-
-unsigned int RtApi :: getStreamSampleRate( void )
-{
- verifyStream();
-
- return stream_.sampleRate;
-}
-
-
-// *************************************************** //
-//
-// OS/API-specific methods.
-//
-// *************************************************** //
-
-#if defined(__MACOSX_CORE__)
-
-// The OS X CoreAudio API is designed to use a separate callback
-// procedure for each of its audio devices.  A single RtAudio duplex
-// stream using two different devices is supported here, though it
-// cannot be guaranteed to always behave correctly because we cannot
-// synchronize these two callbacks.
-//
-// A property listener is installed for over/underrun information.
-// However, no functionality is currently provided to allow property
-// listeners to trigger user handlers because it is unclear what could
-// be done if a critical stream parameter (buffer size, sample rate,
-// device disconnect) notification arrived.  The listeners entail
-// quite a bit of extra code and most likely, a user program wouldn't
-// be prepared for the result anyway.  However, we do provide a flag
-// to the client callback function to inform of an over/underrun.
-
-// A structure to hold various information related to the CoreAudio API
-// implementation.
-struct CoreHandle {
-  AudioDeviceID id[2];    // device ids
-#if defined( MAC_OS_X_VERSION_10_5 ) && ( MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_5 )
-  AudioDeviceIOProcID procId[2];
-#endif
-  UInt32 iStream[2];      // device stream index (or first if using multiple)
-  UInt32 nStreams[2];     // number of streams to use
-  bool xrun[2];
-  char *deviceBuffer;
-  pthread_cond_t condition;
-  int drainCounter;       // Tracks callback counts when draining
-  bool internalDrain;     // Indicates if stop is initiated from callback or not.
-
-  CoreHandle()
-    :deviceBuffer(0), drainCounter(0), internalDrain(false) { nStreams[0] = 1; nStreams[1] = 1; id[0] = 0; id[1] = 0; xrun[0] = false; xrun[1] = false; }
-};
-
-RtApiCore:: RtApiCore()
-{
-#if defined( AVAILABLE_MAC_OS_X_VERSION_10_6_AND_LATER )
-  // This is a largely undocumented but absolutely necessary
-  // requirement starting with OS-X 10.6.  If not called, queries and
-  // updates to various audio device properties are not handled
-  // correctly.
-  CFRunLoopRef theRunLoop = NULL;
-  AudioObjectPropertyAddress property = { kAudioHardwarePropertyRunLoop,
-                                          kAudioObjectPropertyScopeGlobal,
-                                          kAudioObjectPropertyElementMaster };
-  OSStatus result = AudioObjectSetPropertyData( kAudioObjectSystemObject, &property, 0, NULL, sizeof(CFRunLoopRef), &theRunLoop);
-  if ( result != noErr ) {
-    errorText_ = "RtApiCore::RtApiCore: error setting run loop property!";
-    error( RtAudioError::WARNING );
-  }
-#endif
-}
-
-RtApiCore :: ~RtApiCore()
-{
-  // The subclass destructor gets called before the base class
-  // destructor, so close an existing stream before deallocating
-  // apiDeviceId memory.
-  if ( stream_.state != STREAM_CLOSED ) closeStream();
-}
-
-unsigned int RtApiCore :: getDeviceCount( void )
-{
-  // Find out how many audio devices there are, if any.
-  UInt32 dataSize;
-  AudioObjectPropertyAddress propertyAddress = { kAudioHardwarePropertyDevices, kAudioObjectPropertyScopeGlobal, kAudioObjectPropertyElementMaster };
-  OSStatus result = AudioObjectGetPropertyDataSize( kAudioObjectSystemObject, &propertyAddress, 0, NULL, &dataSize );
-  if ( result != noErr ) {
-    errorText_ = "RtApiCore::getDeviceCount: OS-X error getting device info!";
-    error( RtAudioError::WARNING );
-    return 0;
-  }
-
-  return dataSize / sizeof( AudioDeviceID );
-}
-
-unsigned int RtApiCore :: getDefaultInputDevice( void )
-{
-  unsigned int nDevices = getDeviceCount();
-  if ( nDevices <= 1 ) return 0;
-
-  AudioDeviceID id;
-  UInt32 dataSize = sizeof( AudioDeviceID );
-  AudioObjectPropertyAddress property = { kAudioHardwarePropertyDefaultInputDevice, kAudioObjectPropertyScopeGlobal, kAudioObjectPropertyElementMaster };
-  OSStatus result = AudioObjectGetPropertyData( kAudioObjectSystemObject, &property, 0, NULL, &dataSize, &id );
-  if ( result != noErr ) {
-    errorText_ = "RtApiCore::getDefaultInputDevice: OS-X system error getting device.";
-    error( RtAudioError::WARNING );
-    return 0;
-  }
-
-  dataSize *= nDevices;
-  AudioDeviceID deviceList[ nDevices ];
-  property.mSelector = kAudioHardwarePropertyDevices;
-  result = AudioObjectGetPropertyData( kAudioObjectSystemObject, &property, 0, NULL, &dataSize, (void *) &deviceList );
-  if ( result != noErr ) {
-    errorText_ = "RtApiCore::getDefaultInputDevice: OS-X system error getting device IDs.";
-    error( RtAudioError::WARNING );
-    return 0;
-  }
-
-  for ( unsigned int i=0; i<nDevices; i++ )
-    if ( id == deviceList[i] ) return i;
-
-  errorText_ = "RtApiCore::getDefaultInputDevice: No default device found!";
-  error( RtAudioError::WARNING );
-  return 0;
-}
-
-unsigned int RtApiCore :: getDefaultOutputDevice( void )
-{
-  unsigned int nDevices = getDeviceCount();
-  if ( nDevices <= 1 ) return 0;
-
-  AudioDeviceID id;
-  UInt32 dataSize = sizeof( AudioDeviceID );
-  AudioObjectPropertyAddress property = { kAudioHardwarePropertyDefaultOutputDevice, kAudioObjectPropertyScopeGlobal, kAudioObjectPropertyElementMaster };
-  OSStatus result = AudioObjectGetPropertyData( kAudioObjectSystemObject, &property, 0, NULL, &dataSize, &id );
-  if ( result != noErr ) {
-    errorText_ = "RtApiCore::getDefaultOutputDevice: OS-X system error getting device.";
-    error( RtAudioError::WARNING );
-    return 0;
-  }
-
-  dataSize = sizeof( AudioDeviceID ) * nDevices;
-  AudioDeviceID deviceList[ nDevices ];
-  property.mSelector = kAudioHardwarePropertyDevices;
-  result = AudioObjectGetPropertyData( kAudioObjectSystemObject, &property, 0, NULL, &dataSize, (void *) &deviceList );
-  if ( result != noErr ) {
-    errorText_ = "RtApiCore::getDefaultOutputDevice: OS-X system error getting device IDs.";
-    error( RtAudioError::WARNING );
-    return 0;
-  }
-
-  for ( unsigned int i=0; i<nDevices; i++ )
-    if ( id == deviceList[i] ) return i;
-
-  errorText_ = "RtApiCore::getDefaultOutputDevice: No default device found!";
-  error( RtAudioError::WARNING );
-  return 0;
-}
-
-RtAudio::DeviceInfo RtApiCore :: getDeviceInfo( unsigned int device )
-{
-  RtAudio::DeviceInfo info;
-  info.probed = false;
-
-  // Get device ID
-  unsigned int nDevices = getDeviceCount();
-  if ( nDevices == 0 ) {
-    errorText_ = "RtApiCore::getDeviceInfo: no devices found!";
-    error( RtAudioError::INVALID_USE );
-    return info;
-  }
-
-  if ( device >= nDevices ) {
-    errorText_ = "RtApiCore::getDeviceInfo: device ID is invalid!";
-    error( RtAudioError::INVALID_USE );
-    return info;
-  }
-
-  AudioDeviceID deviceList[ nDevices ];
-  UInt32 dataSize = sizeof( AudioDeviceID ) * nDevices;
-  AudioObjectPropertyAddress property = { kAudioHardwarePropertyDevices,
-                                          kAudioObjectPropertyScopeGlobal,
-                                          kAudioObjectPropertyElementMaster };
-  OSStatus result = AudioObjectGetPropertyData( kAudioObjectSystemObject, &property,
-                                                0, NULL, &dataSize, (void *) &deviceList );
-  if ( result != noErr ) {
-    errorText_ = "RtApiCore::getDeviceInfo: OS-X system error getting device IDs.";
-    error( RtAudioError::WARNING );
-    return info;
-  }
-
-  AudioDeviceID id = deviceList[ device ];
-
-  // Get the device name.
-  info.name.erase();
-  CFStringRef cfname;
-  dataSize = sizeof( CFStringRef );
-  property.mSelector = kAudioObjectPropertyManufacturer;
-  result = AudioObjectGetPropertyData( id, &property, 0, NULL, &dataSize, &cfname );
-  if ( result != noErr ) {
-    errorStream_ << "RtApiCore::probeDeviceInfo: system error (" << getErrorCode( result ) << ") getting device manufacturer.";
-    errorText_ = errorStream_.str();
-    error( RtAudioError::WARNING );
-    return info;
-  }
-
-  //const char *mname = CFStringGetCStringPtr( cfname, CFStringGetSystemEncoding() );
-  int length = CFStringGetLength(cfname);
-  char *mname = (char *)malloc(length * 3 + 1);
-#if defined( UNICODE ) || defined( _UNICODE )
-  CFStringGetCString(cfname, mname, length * 3 + 1, kCFStringEncodingUTF8);
-#else
-  CFStringGetCString(cfname, mname, length * 3 + 1, CFStringGetSystemEncoding());
-#endif
-  info.name.append( (const char *)mname, strlen(mname) );
-  info.name.append( ": " );
-  CFRelease( cfname );
-  free(mname);
-
-  property.mSelector = kAudioObjectPropertyName;
-  result = AudioObjectGetPropertyData( id, &property, 0, NULL, &dataSize, &cfname );
-  if ( result != noErr ) {
-    errorStream_ << "RtApiCore::probeDeviceInfo: system error (" << getErrorCode( result ) << ") getting device name.";
-    errorText_ = errorStream_.str();
-    error( RtAudioError::WARNING );
-    return info;
-  }
-
-  //const char *name = CFStringGetCStringPtr( cfname, CFStringGetSystemEncoding() );
-  length = CFStringGetLength(cfname);
-  char *name = (char *)malloc(length * 3 + 1);
-#if defined( UNICODE ) || defined( _UNICODE )
-  CFStringGetCString(cfname, name, length * 3 + 1, kCFStringEncodingUTF8);
-#else
-  CFStringGetCString(cfname, name, length * 3 + 1, CFStringGetSystemEncoding());
-#endif
-  info.name.append( (const char *)name, strlen(name) );
-  CFRelease( cfname );
-  free(name);
-
-  // Get the output stream "configuration".
-  AudioBufferList	*bufferList = nil;
-  property.mSelector = kAudioDevicePropertyStreamConfiguration;
-  property.mScope = kAudioDevicePropertyScopeOutput;
-  //  property.mElement = kAudioObjectPropertyElementWildcard;
-  dataSize = 0;
-  result = AudioObjectGetPropertyDataSize( id, &property, 0, NULL, &dataSize );
-  if ( result != noErr || dataSize == 0 ) {
-    errorStream_ << "RtApiCore::getDeviceInfo: system error (" << getErrorCode( result ) << ") getting output stream configuration info for device (" << device << ").";
-    errorText_ = errorStream_.str();
-    error( RtAudioError::WARNING );
-    return info;
-  }
-
-  // Allocate the AudioBufferList.
-  bufferList = (AudioBufferList *) malloc( dataSize );
-  if ( bufferList == NULL ) {
-    errorText_ = "RtApiCore::getDeviceInfo: memory error allocating output AudioBufferList.";
-    error( RtAudioError::WARNING );
-    return info;
-  }
-
-  result = AudioObjectGetPropertyData( id, &property, 0, NULL, &dataSize, bufferList );
-  if ( result != noErr || dataSize == 0 ) {
-    free( bufferList );
-    errorStream_ << "RtApiCore::getDeviceInfo: system error (" << getErrorCode( result ) << ") getting output stream configuration for device (" << device << ").";
-    errorText_ = errorStream_.str();
-    error( RtAudioError::WARNING );
-    return info;
-  }
-
-  // Get output channel information.
-  unsigned int i, nStreams = bufferList->mNumberBuffers;
-  for ( i=0; i<nStreams; i++ )
-    info.outputChannels += bufferList->mBuffers[i].mNumberChannels;
-  free( bufferList );
-
-  // Get the input stream "configuration".
-  property.mScope = kAudioDevicePropertyScopeInput;
-  result = AudioObjectGetPropertyDataSize( id, &property, 0, NULL, &dataSize );
-  if ( result != noErr || dataSize == 0 ) {
-    errorStream_ << "RtApiCore::getDeviceInfo: system error (" << getErrorCode( result ) << ") getting input stream configuration info for device (" << device << ").";
-    errorText_ = errorStream_.str();
-    error( RtAudioError::WARNING );
-    return info;
-  }
-
-  // Allocate the AudioBufferList.
-  bufferList = (AudioBufferList *) malloc( dataSize );
-  if ( bufferList == NULL ) {
-    errorText_ = "RtApiCore::getDeviceInfo: memory error allocating input AudioBufferList.";
-    error( RtAudioError::WARNING );
-    return info;
-  }
-
-  result = AudioObjectGetPropertyData( id, &property, 0, NULL, &dataSize, bufferList );
-  if (result != noErr || dataSize == 0) {
-    free( bufferList );
-    errorStream_ << "RtApiCore::getDeviceInfo: system error (" << getErrorCode( result ) << ") getting input stream configuration for device (" << device << ").";
-    errorText_ = errorStream_.str();
-    error( RtAudioError::WARNING );
-    return info;
-  }
-
-  // Get input channel information.
-  nStreams = bufferList->mNumberBuffers;
-  for ( i=0; i<nStreams; i++ )
-    info.inputChannels += bufferList->mBuffers[i].mNumberChannels;
-  free( bufferList );
-
-  // If device opens for both playback and capture, we determine the channels.
-  if ( info.outputChannels > 0 && info.inputChannels > 0 )
-    info.duplexChannels = (info.outputChannels > info.inputChannels) ? info.inputChannels : info.outputChannels;
-
-  // Probe the device sample rates.
-  bool isInput = false;
-  if ( info.outputChannels == 0 ) isInput = true;
-
-  // Determine the supported sample rates.
-  property.mSelector = kAudioDevicePropertyAvailableNominalSampleRates;
-  if ( isInput == false ) property.mScope = kAudioDevicePropertyScopeOutput;
-  result = AudioObjectGetPropertyDataSize( id, &property, 0, NULL, &dataSize );
-  if ( result != kAudioHardwareNoError || dataSize == 0 ) {
-    errorStream_ << "RtApiCore::getDeviceInfo: system error (" << getErrorCode( result ) << ") getting sample rate info.";
-    errorText_ = errorStream_.str();
-    error( RtAudioError::WARNING );
-    return info;
-  }
-
-  UInt32 nRanges = dataSize / sizeof( AudioValueRange );
-  AudioValueRange rangeList[ nRanges ];
-  result = AudioObjectGetPropertyData( id, &property, 0, NULL, &dataSize, &rangeList );
-  if ( result != kAudioHardwareNoError ) {
-    errorStream_ << "RtApiCore::getDeviceInfo: system error (" << getErrorCode( result ) << ") getting sample rates.";
-    errorText_ = errorStream_.str();
-    error( RtAudioError::WARNING );
-    return info;
-  }
-
-  // The sample rate reporting mechanism is a bit of a mystery.  It
-  // seems that it can either return individual rates or a range of
-  // rates.  I assume that if the min / max range values are the same,
-  // then that represents a single supported rate and if the min / max
-  // range values are different, the device supports an arbitrary
-  // range of values (though there might be multiple ranges, so we'll
-  // use the most conservative range).
-  Float64 minimumRate = 1.0, maximumRate = 10000000000.0;
-  bool haveValueRange = false;
-  info.sampleRates.clear();
-  for ( UInt32 i=0; i<nRanges; i++ ) {
-    if ( rangeList[i].mMinimum == rangeList[i].mMaximum ) {
-      unsigned int tmpSr = (unsigned int) rangeList[i].mMinimum;
-      info.sampleRates.push_back( tmpSr );
-
-      if ( !info.preferredSampleRate || ( tmpSr <= 48000 && tmpSr > info.preferredSampleRate ) )
-        info.preferredSampleRate = tmpSr;
-
-    } else {
-      haveValueRange = true;
-      if ( rangeList[i].mMinimum > minimumRate ) minimumRate = rangeList[i].mMinimum;
-      if ( rangeList[i].mMaximum < maximumRate ) maximumRate = rangeList[i].mMaximum;
-    }
-  }
-
-  if ( haveValueRange ) {
-    for ( unsigned int k=0; k<MAX_SAMPLE_RATES; k++ ) {
-      if ( SAMPLE_RATES[k] >= (unsigned int) minimumRate && SAMPLE_RATES[k] <= (unsigned int) maximumRate ) {
-        info.sampleRates.push_back( SAMPLE_RATES[k] );
-
-        if ( !info.preferredSampleRate || ( SAMPLE_RATES[k] <= 48000 && SAMPLE_RATES[k] > info.preferredSampleRate ) )
-          info.preferredSampleRate = SAMPLE_RATES[k];
-      }
-    }
-  }
-
-  // Sort and remove any redundant values
-  std::sort( info.sampleRates.begin(), info.sampleRates.end() );
-  info.sampleRates.erase( unique( info.sampleRates.begin(), info.sampleRates.end() ), info.sampleRates.end() );
-
-  if ( info.sampleRates.size() == 0 ) {
-    errorStream_ << "RtApiCore::probeDeviceInfo: No supported sample rates found for device (" << device << ").";
-    errorText_ = errorStream_.str();
-    error( RtAudioError::WARNING );
-    return info;
-  }
-
-  // CoreAudio always uses 32-bit floating point data for PCM streams.
-  // Thus, any other "physical" formats supported by the device are of
-  // no interest to the client.
-  info.nativeFormats = RTAUDIO_FLOAT32;
-
-  if ( info.outputChannels > 0 )
-    if ( getDefaultOutputDevice() == device ) info.isDefaultOutput = true;
-  if ( info.inputChannels > 0 )
-    if ( getDefaultInputDevice() == device ) info.isDefaultInput = true;
-
-  info.probed = true;
-  return info;
-}
-
-static OSStatus callbackHandler( AudioDeviceID inDevice,
-                                 const AudioTimeStamp* /*inNow*/,
-                                 const AudioBufferList* inInputData,
-                                 const AudioTimeStamp* /*inInputTime*/,
-                                 AudioBufferList* outOutputData,
-                                 const AudioTimeStamp* /*inOutputTime*/,
-                                 void* infoPointer )
-{
-  CallbackInfo *info = (CallbackInfo *) infoPointer;
-
-  RtApiCore *object = (RtApiCore *) info->object;
-  if ( object->callbackEvent( inDevice, inInputData, outOutputData ) == false )
-    return kAudioHardwareUnspecifiedError;
-  else
-    return kAudioHardwareNoError;
-}
-
-static OSStatus xrunListener( AudioObjectID /*inDevice*/,
-                              UInt32 nAddresses,
-                              const AudioObjectPropertyAddress properties[],
-                              void* handlePointer )
-{
-  CoreHandle *handle = (CoreHandle *) handlePointer;
-  for ( UInt32 i=0; i<nAddresses; i++ ) {
-    if ( properties[i].mSelector == kAudioDeviceProcessorOverload ) {
-      if ( properties[i].mScope == kAudioDevicePropertyScopeInput )
-        handle->xrun[1] = true;
-      else
-        handle->xrun[0] = true;
-    }
-  }
-
-  return kAudioHardwareNoError;
-}
-
-static OSStatus rateListener( AudioObjectID inDevice,
-                              UInt32 /*nAddresses*/,
-                              const AudioObjectPropertyAddress /*properties*/[],
-                              void* ratePointer )
-{
-  Float64 *rate = (Float64 *) ratePointer;
-  UInt32 dataSize = sizeof( Float64 );
-  AudioObjectPropertyAddress property = { kAudioDevicePropertyNominalSampleRate,
-                                          kAudioObjectPropertyScopeGlobal,
-                                          kAudioObjectPropertyElementMaster };
-  AudioObjectGetPropertyData( inDevice, &property, 0, NULL, &dataSize, rate );
-  return kAudioHardwareNoError;
-}
-
-bool RtApiCore :: probeDeviceOpen( unsigned int device, StreamMode mode, unsigned int channels,
-                                   unsigned int firstChannel, unsigned int sampleRate,
-                                   RtAudioFormat format, unsigned int *bufferSize,
-                                   RtAudio::StreamOptions *options )
-{
-  // Get device ID
-  unsigned int nDevices = getDeviceCount();
-  if ( nDevices == 0 ) {
-    // This should not happen because a check is made before this function is called.
-    errorText_ = "RtApiCore::probeDeviceOpen: no devices found!";
-    return FAILURE;
-  }
-
-  if ( device >= nDevices ) {
-    // This should not happen because a check is made before this function is called.
-    errorText_ = "RtApiCore::probeDeviceOpen: device ID is invalid!";
-    return FAILURE;
-  }
-
-  AudioDeviceID deviceList[ nDevices ];
-  UInt32 dataSize = sizeof( AudioDeviceID ) * nDevices;
-  AudioObjectPropertyAddress property = { kAudioHardwarePropertyDevices,
-                                          kAudioObjectPropertyScopeGlobal,
-                                          kAudioObjectPropertyElementMaster };
-  OSStatus result = AudioObjectGetPropertyData( kAudioObjectSystemObject, &property,
-                                                0, NULL, &dataSize, (void *) &deviceList );
-  if ( result != noErr ) {
-    errorText_ = "RtApiCore::probeDeviceOpen: OS-X system error getting device IDs.";
-    return FAILURE;
-  }
-
-  AudioDeviceID id = deviceList[ device ];
-
-  // Setup for stream mode.
-  bool isInput = false;
-  if ( mode == INPUT ) {
-    isInput = true;
-    property.mScope = kAudioDevicePropertyScopeInput;
-  }
-  else
-    property.mScope = kAudioDevicePropertyScopeOutput;
-
-  // Get the stream "configuration".
-  AudioBufferList	*bufferList = nil;
-  dataSize = 0;
-  property.mSelector = kAudioDevicePropertyStreamConfiguration;
-  result = AudioObjectGetPropertyDataSize( id, &property, 0, NULL, &dataSize );
-  if ( result != noErr || dataSize == 0 ) {
-    errorStream_ << "RtApiCore::probeDeviceOpen: system error (" << getErrorCode( result ) << ") getting stream configuration info for device (" << device << ").";
-    errorText_ = errorStream_.str();
-    return FAILURE;
-  }
-
-  // Allocate the AudioBufferList.
-  bufferList = (AudioBufferList *) malloc( dataSize );
-  if ( bufferList == NULL ) {
-    errorText_ = "RtApiCore::probeDeviceOpen: memory error allocating AudioBufferList.";
-    return FAILURE;
-  }
-
-  result = AudioObjectGetPropertyData( id, &property, 0, NULL, &dataSize, bufferList );
-  if (result != noErr || dataSize == 0) {
-    free( bufferList );
-    errorStream_ << "RtApiCore::probeDeviceOpen: system error (" << getErrorCode( result ) << ") getting stream configuration for device (" << device << ").";
-    errorText_ = errorStream_.str();
-    return FAILURE;
-  }
-
-  // Search for one or more streams that contain the desired number of
-  // channels. CoreAudio devices can have an arbitrary number of
-  // streams and each stream can have an arbitrary number of channels.
-  // For each stream, a single buffer of interleaved samples is
-  // provided.  RtAudio prefers the use of one stream of interleaved
-  // data or multiple consecutive single-channel streams.  However, we
-  // now support multiple consecutive multi-channel streams of
-  // interleaved data as well.
-  UInt32 iStream, offsetCounter = firstChannel;
-  UInt32 nStreams = bufferList->mNumberBuffers;
-  bool monoMode = false;
-  bool foundStream = false;
-
-  // First check that the device supports the requested number of
-  // channels.
-  UInt32 deviceChannels = 0;
-  for ( iStream=0; iStream<nStreams; iStream++ )
-    deviceChannels += bufferList->mBuffers[iStream].mNumberChannels;
-
-  if ( deviceChannels < ( channels + firstChannel ) ) {
-    free( bufferList );
-    errorStream_ << "RtApiCore::probeDeviceOpen: the device (" << device << ") does not support the requested channel count.";
-    errorText_ = errorStream_.str();
-    return FAILURE;
-  }
-
-  // Look for a single stream meeting our needs.
-  UInt32 firstStream, streamCount = 1, streamChannels = 0, channelOffset = 0;
-  for ( iStream=0; iStream<nStreams; iStream++ ) {
-    streamChannels = bufferList->mBuffers[iStream].mNumberChannels;
-    if ( streamChannels >= channels + offsetCounter ) {
-      firstStream = iStream;
-      channelOffset = offsetCounter;
-      foundStream = true;
-      break;
-    }
-    if ( streamChannels > offsetCounter ) break;
-    offsetCounter -= streamChannels;
-  }
-
-  // If we didn't find a single stream above, then we should be able
-  // to meet the channel specification with multiple streams.
-  if ( foundStream == false ) {
-    monoMode = true;
-    offsetCounter = firstChannel;
-    for ( iStream=0; iStream<nStreams; iStream++ ) {
-      streamChannels = bufferList->mBuffers[iStream].mNumberChannels;
-      if ( streamChannels > offsetCounter ) break;
-      offsetCounter -= streamChannels;
-    }
-
-    firstStream = iStream;
-    channelOffset = offsetCounter;
-    Int32 channelCounter = channels + offsetCounter - streamChannels;
-
-    if ( streamChannels > 1 ) monoMode = false;
-    while ( channelCounter > 0 ) {
-      streamChannels = bufferList->mBuffers[++iStream].mNumberChannels;
-      if ( streamChannels > 1 ) monoMode = false;
-      channelCounter -= streamChannels;
-      streamCount++;
-    }
-  }
-
-  free( bufferList );
-
-  // Determine the buffer size.
-  AudioValueRange	bufferRange;
-  dataSize = sizeof( AudioValueRange );
-  property.mSelector = kAudioDevicePropertyBufferFrameSizeRange;
-  result = AudioObjectGetPropertyData( id, &property, 0, NULL, &dataSize, &bufferRange );
-
-  if ( result != noErr ) {
-    errorStream_ << "RtApiCore::probeDeviceOpen: system error (" << getErrorCode( result ) << ") getting buffer size range for device (" << device << ").";
-    errorText_ = errorStream_.str();
-    return FAILURE;
-  }
-
-  if ( bufferRange.mMinimum > *bufferSize ) *bufferSize = (unsigned long) bufferRange.mMinimum;
-  else if ( bufferRange.mMaximum < *bufferSize ) *bufferSize = (unsigned long) bufferRange.mMaximum;
-  if ( options && options->flags & RTAUDIO_MINIMIZE_LATENCY ) *bufferSize = (unsigned long) bufferRange.mMinimum;
-
-  // Set the buffer size.  For multiple streams, I'm assuming we only
-  // need to make this setting for the master channel.
-  UInt32 theSize = (UInt32) *bufferSize;
-  dataSize = sizeof( UInt32 );
-  property.mSelector = kAudioDevicePropertyBufferFrameSize;
-  result = AudioObjectSetPropertyData( id, &property, 0, NULL, dataSize, &theSize );
-
-  if ( result != noErr ) {
-    errorStream_ << "RtApiCore::probeDeviceOpen: system error (" << getErrorCode( result ) << ") setting the buffer size for device (" << device << ").";
-    errorText_ = errorStream_.str();
-    return FAILURE;
-  }
-
-  // If attempting to setup a duplex stream, the bufferSize parameter
-  // MUST be the same in both directions!
-  *bufferSize = theSize;
-  if ( stream_.mode == OUTPUT && mode == INPUT && *bufferSize != stream_.bufferSize ) {
-    errorStream_ << "RtApiCore::probeDeviceOpen: system error setting buffer size for duplex stream on device (" << device << ").";
-    errorText_ = errorStream_.str();
-    return FAILURE;
-  }
-
-  stream_.bufferSize = *bufferSize;
-  stream_.nBuffers = 1;
-
-  // Try to set "hog" mode ... it's not clear to me this is working.
-  if ( options && options->flags & RTAUDIO_HOG_DEVICE ) {
-    pid_t hog_pid;
-    dataSize = sizeof( hog_pid );
-    property.mSelector = kAudioDevicePropertyHogMode;
-    result = AudioObjectGetPropertyData( id, &property, 0, NULL, &dataSize, &hog_pid );
-    if ( result != noErr ) {
-      errorStream_ << "RtApiCore::probeDeviceOpen: system error (" << getErrorCode( result ) << ") getting 'hog' state!";
-      errorText_ = errorStream_.str();
-      return FAILURE;
-    }
-
-    if ( hog_pid != getpid() ) {
-      hog_pid = getpid();
-      result = AudioObjectSetPropertyData( id, &property, 0, NULL, dataSize, &hog_pid );
-      if ( result != noErr ) {
-        errorStream_ << "RtApiCore::probeDeviceOpen: system error (" << getErrorCode( result ) << ") setting 'hog' state!";
-        errorText_ = errorStream_.str();
-        return FAILURE;
-      }
-    }
-  }
-
-  // Check and if necessary, change the sample rate for the device.
-  Float64 nominalRate;
-  dataSize = sizeof( Float64 );
-  property.mSelector = kAudioDevicePropertyNominalSampleRate;
-  result = AudioObjectGetPropertyData( id, &property, 0, NULL, &dataSize, &nominalRate );
-  if ( result != noErr ) {
-    errorStream_ << "RtApiCore::probeDeviceOpen: system error (" << getErrorCode( result ) << ") getting current sample rate.";
-    errorText_ = errorStream_.str();
-    return FAILURE;
-  }
-
-  // Only change the sample rate if off by more than 1 Hz.
-  if ( fabs( nominalRate - (double)sampleRate ) > 1.0 ) {
-
-    // Set a property listener for the sample rate change
-    Float64 reportedRate = 0.0;
-    AudioObjectPropertyAddress tmp = { kAudioDevicePropertyNominalSampleRate, kAudioObjectPropertyScopeGlobal, kAudioObjectPropertyElementMaster };
-    result = AudioObjectAddPropertyListener( id, &tmp, rateListener, (void *) &reportedRate );
-    if ( result != noErr ) {
-      errorStream_ << "RtApiCore::probeDeviceOpen: system error (" << getErrorCode( result ) << ") setting sample rate property listener for device (" << device << ").";
-      errorText_ = errorStream_.str();
-      return FAILURE;
-    }
-
-    nominalRate = (Float64) sampleRate;
-    result = AudioObjectSetPropertyData( id, &property, 0, NULL, dataSize, &nominalRate );
-    if ( result != noErr ) {
-      AudioObjectRemovePropertyListener( id, &tmp, rateListener, (void *) &reportedRate );
-      errorStream_ << "RtApiCore::probeDeviceOpen: system error (" << getErrorCode( result ) << ") setting sample rate for device (" << device << ").";
-      errorText_ = errorStream_.str();
-      return FAILURE;
-    }
-
-    // Now wait until the reported nominal rate is what we just set.
-    UInt32 microCounter = 0;
-    while ( reportedRate != nominalRate ) {
-      microCounter += 5000;
-      if ( microCounter > 5000000 ) break;
-      usleep( 5000 );
-    }
-
-    // Remove the property listener.
-    AudioObjectRemovePropertyListener( id, &tmp, rateListener, (void *) &reportedRate );
-
-    if ( microCounter > 5000000 ) {
-      errorStream_ << "RtApiCore::probeDeviceOpen: timeout waiting for sample rate update for device (" << device << ").";
-      errorText_ = errorStream_.str();
-      return FAILURE;
-    }
-  }
-
-  // Now set the stream format for all streams.  Also, check the
-  // physical format of the device and change that if necessary.
-  AudioStreamBasicDescription	description;
-  dataSize = sizeof( AudioStreamBasicDescription );
-  property.mSelector = kAudioStreamPropertyVirtualFormat;
-  result = AudioObjectGetPropertyData( id, &property, 0, NULL, &dataSize, &description );
-  if ( result != noErr ) {
-    errorStream_ << "RtApiCore::probeDeviceOpen: system error (" << getErrorCode( result ) << ") getting stream format for device (" << device << ").";
-    errorText_ = errorStream_.str();
-    return FAILURE;
-  }
-
-  // Set the sample rate and data format id.  However, only make the
-  // change if the sample rate is not within 1.0 of the desired
-  // rate and the format is not linear pcm.
-  bool updateFormat = false;
-  if ( fabs( description.mSampleRate - (Float64)sampleRate ) > 1.0 ) {
-    description.mSampleRate = (Float64) sampleRate;
-    updateFormat = true;
-  }
-
-  if ( description.mFormatID != kAudioFormatLinearPCM ) {
-    description.mFormatID = kAudioFormatLinearPCM;
-    updateFormat = true;
-  }
-
-  if ( updateFormat ) {
-    result = AudioObjectSetPropertyData( id, &property, 0, NULL, dataSize, &description );
-    if ( result != noErr ) {
-      errorStream_ << "RtApiCore::probeDeviceOpen: system error (" << getErrorCode( result ) << ") setting sample rate or data format for device (" << device << ").";
-      errorText_ = errorStream_.str();
-      return FAILURE;
-    }
-  }
-
-  // Now check the physical format.
-  property.mSelector = kAudioStreamPropertyPhysicalFormat;
-  result = AudioObjectGetPropertyData( id, &property, 0, NULL,  &dataSize, &description );
-  if ( result != noErr ) {
-    errorStream_ << "RtApiCore::probeDeviceOpen: system error (" << getErrorCode( result ) << ") getting stream physical format for device (" << device << ").";
-    errorText_ = errorStream_.str();
-    return FAILURE;
-  }
-
-  //std::cout << "Current physical stream format:" << std::endl;
-  //std::cout << "   mBitsPerChan = " << description.mBitsPerChannel << std::endl;
-  //std::cout << "   aligned high = " << (description.mFormatFlags & kAudioFormatFlagIsAlignedHigh) << ", isPacked = " << (description.mFormatFlags & kAudioFormatFlagIsPacked) << std::endl;
-  //std::cout << "   bytesPerFrame = " << description.mBytesPerFrame << std::endl;
-  //std::cout << "   sample rate = " << description.mSampleRate << std::endl;
-
-  if ( description.mFormatID != kAudioFormatLinearPCM || description.mBitsPerChannel < 16 ) {
-    description.mFormatID = kAudioFormatLinearPCM;
-    //description.mSampleRate = (Float64) sampleRate;
-    AudioStreamBasicDescription	testDescription = description;
-    UInt32 formatFlags;
-
-    // We'll try higher bit rates first and then work our way down.
-    std::vector< std::pair<UInt32, UInt32>  > physicalFormats;
-    formatFlags = (description.mFormatFlags | kLinearPCMFormatFlagIsFloat) & ~kLinearPCMFormatFlagIsSignedInteger;
-    physicalFormats.push_back( std::pair<Float32, UInt32>( 32, formatFlags ) );
-    formatFlags = (description.mFormatFlags | kLinearPCMFormatFlagIsSignedInteger | kAudioFormatFlagIsPacked) & ~kLinearPCMFormatFlagIsFloat;
-    physicalFormats.push_back( std::pair<Float32, UInt32>( 32, formatFlags ) );
-    physicalFormats.push_back( std::pair<Float32, UInt32>( 24, formatFlags ) );   // 24-bit packed
-    formatFlags &= ~( kAudioFormatFlagIsPacked | kAudioFormatFlagIsAlignedHigh );
-    physicalFormats.push_back( std::pair<Float32, UInt32>( 24.2, formatFlags ) ); // 24-bit in 4 bytes, aligned low
-    formatFlags |= kAudioFormatFlagIsAlignedHigh;
-    physicalFormats.push_back( std::pair<Float32, UInt32>( 24.4, formatFlags ) ); // 24-bit in 4 bytes, aligned high
-    formatFlags = (description.mFormatFlags | kLinearPCMFormatFlagIsSignedInteger | kAudioFormatFlagIsPacked) & ~kLinearPCMFormatFlagIsFloat;
-    physicalFormats.push_back( std::pair<Float32, UInt32>( 16, formatFlags ) );
-    physicalFormats.push_back( std::pair<Float32, UInt32>( 8, formatFlags ) );
-
-    bool setPhysicalFormat = false;
-    for( unsigned int i=0; i<physicalFormats.size(); i++ ) {
-      testDescription = description;
-      testDescription.mBitsPerChannel = (UInt32) physicalFormats[i].first;
-      testDescription.mFormatFlags = physicalFormats[i].second;
-      if ( (24 == (UInt32)physicalFormats[i].first) && ~( physicalFormats[i].second & kAudioFormatFlagIsPacked ) )
-        testDescription.mBytesPerFrame =  4 * testDescription.mChannelsPerFrame;
-      else
-        testDescription.mBytesPerFrame =  testDescription.mBitsPerChannel/8 * testDescription.mChannelsPerFrame;
-      testDescription.mBytesPerPacket = testDescription.mBytesPerFrame * testDescription.mFramesPerPacket;
-      result = AudioObjectSetPropertyData( id, &property, 0, NULL, dataSize, &testDescription );
-      if ( result == noErr ) {
-        setPhysicalFormat = true;
-        //std::cout << "Updated physical stream format:" << std::endl;
-        //std::cout << "   mBitsPerChan = " << testDescription.mBitsPerChannel << std::endl;
-        //std::cout << "   aligned high = " << (testDescription.mFormatFlags & kAudioFormatFlagIsAlignedHigh) << ", isPacked = " << (testDescription.mFormatFlags & kAudioFormatFlagIsPacked) << std::endl;
-        //std::cout << "   bytesPerFrame = " << testDescription.mBytesPerFrame << std::endl;
-        //std::cout << "   sample rate = " << testDescription.mSampleRate << std::endl;
-        break;
-      }
-    }
-
-    if ( !setPhysicalFormat ) {
-      errorStream_ << "RtApiCore::probeDeviceOpen: system error (" << getErrorCode( result ) << ") setting physical data format for device (" << device << ").";
-      errorText_ = errorStream_.str();
-      return FAILURE;
-    }
-  } // done setting virtual/physical formats.
-
-  // Get the stream / device latency.
-  UInt32 latency;
-  dataSize = sizeof( UInt32 );
-  property.mSelector = kAudioDevicePropertyLatency;
-  if ( AudioObjectHasProperty( id, &property ) == true ) {
-    result = AudioObjectGetPropertyData( id, &property, 0, NULL, &dataSize, &latency );
-    if ( result == kAudioHardwareNoError ) stream_.latency[ mode ] = latency;
-    else {
-      errorStream_ << "RtApiCore::probeDeviceOpen: system error (" << getErrorCode( result ) << ") getting device latency for device (" << device << ").";
-      errorText_ = errorStream_.str();
-      error( RtAudioError::WARNING );
-    }
-  }
-
-  // Byte-swapping: According to AudioHardware.h, the stream data will
-  // always be presented in native-endian format, so we should never
-  // need to byte swap.
-  stream_.doByteSwap[mode] = false;
-
-  // From the CoreAudio documentation, PCM data must be supplied as
-  // 32-bit floats.
-  stream_.userFormat = format;
-  stream_.deviceFormat[mode] = RTAUDIO_FLOAT32;
-
-  if ( streamCount == 1 )
-    stream_.nDeviceChannels[mode] = description.mChannelsPerFrame;
-  else // multiple streams
-    stream_.nDeviceChannels[mode] = channels;
-  stream_.nUserChannels[mode] = channels;
-  stream_.channelOffset[mode] = channelOffset;  // offset within a CoreAudio stream
-  if ( options && options->flags & RTAUDIO_NONINTERLEAVED ) stream_.userInterleaved = false;
-  else stream_.userInterleaved = true;
-  stream_.deviceInterleaved[mode] = true;
-  if ( monoMode == true ) stream_.deviceInterleaved[mode] = false;
-
-  // Set flags for buffer conversion.
-  stream_.doConvertBuffer[mode] = false;
-  if ( stream_.userFormat != stream_.deviceFormat[mode] )
-    stream_.doConvertBuffer[mode] = true;
-  if ( stream_.nUserChannels[mode] < stream_.nDeviceChannels[mode] )
-    stream_.doConvertBuffer[mode] = true;
-  if ( streamCount == 1 ) {
-    if ( stream_.nUserChannels[mode] > 1 &&
-         stream_.userInterleaved != stream_.deviceInterleaved[mode] )
-      stream_.doConvertBuffer[mode] = true;
-  }
-  else if ( monoMode && stream_.userInterleaved )
-    stream_.doConvertBuffer[mode] = true;
-
-  // Allocate our CoreHandle structure for the stream.
-  CoreHandle *handle = 0;
-  if ( stream_.apiHandle == 0 ) {
-    try {
-      handle = new CoreHandle;
-    }
-    catch ( std::bad_alloc& ) {
-      errorText_ = "RtApiCore::probeDeviceOpen: error allocating CoreHandle memory.";
-      goto error;
-    }
-
-    if ( pthread_cond_init( &handle->condition, NULL ) ) {
-      errorText_ = "RtApiCore::probeDeviceOpen: error initializing pthread condition variable.";
-      goto error;
-    }
-    stream_.apiHandle = (void *) handle;
-  }
-  else
-    handle = (CoreHandle *) stream_.apiHandle;
-  handle->iStream[mode] = firstStream;
-  handle->nStreams[mode] = streamCount;
-  handle->id[mode] = id;
-
-  // Allocate necessary internal buffers.
-  unsigned long bufferBytes;
-  bufferBytes = stream_.nUserChannels[mode] * *bufferSize * formatBytes( stream_.userFormat );
-  //  stream_.userBuffer[mode] = (char *) calloc( bufferBytes, 1 );
-  stream_.userBuffer[mode] = (char *) malloc( bufferBytes * sizeof(char) );
-  memset( stream_.userBuffer[mode], 0, bufferBytes * sizeof(char) );
-  if ( stream_.userBuffer[mode] == NULL ) {
-    errorText_ = "RtApiCore::probeDeviceOpen: error allocating user buffer memory.";
-    goto error;
-  }
-
-  // If possible, we will make use of the CoreAudio stream buffers as
-  // "device buffers".  However, we can't do this if using multiple
-  // streams.
-  if ( stream_.doConvertBuffer[mode] && handle->nStreams[mode] > 1 ) {
-
-    bool makeBuffer = true;
-    bufferBytes = stream_.nDeviceChannels[mode] * formatBytes( stream_.deviceFormat[mode] );
-    if ( mode == INPUT ) {
-      if ( stream_.mode == OUTPUT && stream_.deviceBuffer ) {
-        unsigned long bytesOut = stream_.nDeviceChannels[0] * formatBytes( stream_.deviceFormat[0] );
-        if ( bufferBytes <= bytesOut ) makeBuffer = false;
-      }
-    }
-
-    if ( makeBuffer ) {
-      bufferBytes *= *bufferSize;
-      if ( stream_.deviceBuffer ) free( stream_.deviceBuffer );
-      stream_.deviceBuffer = (char *) calloc( bufferBytes, 1 );
-      if ( stream_.deviceBuffer == NULL ) {
-        errorText_ = "RtApiCore::probeDeviceOpen: error allocating device buffer memory.";
-        goto error;
-      }
-    }
-  }
-
-  stream_.sampleRate = sampleRate;
-  stream_.device[mode] = device;
-  stream_.state = STREAM_STOPPED;
-  stream_.callbackInfo.object = (void *) this;
-
-  // Setup the buffer conversion information structure.
-  if ( stream_.doConvertBuffer[mode] ) {
-    if ( streamCount > 1 ) setConvertInfo( mode, 0 );
-    else setConvertInfo( mode, channelOffset );
-  }
-
-  if ( mode == INPUT && stream_.mode == OUTPUT && stream_.device[0] == device )
-    // Only one callback procedure per device.
-    stream_.mode = DUPLEX;
-  else {
-#if defined( MAC_OS_X_VERSION_10_5 ) && ( MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_5 )
-    result = AudioDeviceCreateIOProcID( id, callbackHandler, (void *) &stream_.callbackInfo, &handle->procId[mode] );
-#else
-    // deprecated in favor of AudioDeviceCreateIOProcID()
-    result = AudioDeviceAddIOProc( id, callbackHandler, (void *) &stream_.callbackInfo );
-#endif
-    if ( result != noErr ) {
-      errorStream_ << "RtApiCore::probeDeviceOpen: system error setting callback for device (" << device << ").";
-      errorText_ = errorStream_.str();
-      goto error;
-    }
-    if ( stream_.mode == OUTPUT && mode == INPUT )
-      stream_.mode = DUPLEX;
-    else
-      stream_.mode = mode;
-  }
-
-  // Setup the device property listener for over/underload.
-  property.mSelector = kAudioDeviceProcessorOverload;
-  property.mScope = kAudioObjectPropertyScopeGlobal;
-  result = AudioObjectAddPropertyListener( id, &property, xrunListener, (void *) handle );
-
-  return SUCCESS;
-
- error:
-  if ( handle ) {
-    pthread_cond_destroy( &handle->condition );
-    delete handle;
-    stream_.apiHandle = 0;
-  }
-
-  for ( int i=0; i<2; i++ ) {
-    if ( stream_.userBuffer[i] ) {
-      free( stream_.userBuffer[i] );
-      stream_.userBuffer[i] = 0;
-    }
-  }
-
-  if ( stream_.deviceBuffer ) {
-    free( stream_.deviceBuffer );
-    stream_.deviceBuffer = 0;
-  }
-
-  stream_.state = STREAM_CLOSED;
-  return FAILURE;
-}
-
-void RtApiCore :: closeStream( void )
-{
-  if ( stream_.state == STREAM_CLOSED ) {
-    errorText_ = "RtApiCore::closeStream(): no open stream to close!";
-    error( RtAudioError::WARNING );
-    return;
-  }
-
-  CoreHandle *handle = (CoreHandle *) stream_.apiHandle;
-  if ( stream_.mode == OUTPUT || stream_.mode == DUPLEX ) {
-    if (handle) {
-      AudioObjectPropertyAddress property = { kAudioHardwarePropertyDevices,
-        kAudioObjectPropertyScopeGlobal,
-        kAudioObjectPropertyElementMaster };
-
-      property.mSelector = kAudioDeviceProcessorOverload;
-      property.mScope = kAudioObjectPropertyScopeGlobal;
-      if (AudioObjectRemovePropertyListener( handle->id[0], &property, xrunListener, (void *) handle ) != noErr) {
-        errorText_ = "RtApiCore::closeStream(): error removing property listener!";
-        error( RtAudioError::WARNING );
-      }
-    }
-    if ( stream_.state == STREAM_RUNNING )
-      AudioDeviceStop( handle->id[0], callbackHandler );
-#if defined( MAC_OS_X_VERSION_10_5 ) && ( MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_5 )
-    AudioDeviceDestroyIOProcID( handle->id[0], handle->procId[0] );
-#else
-    // deprecated in favor of AudioDeviceDestroyIOProcID()
-    AudioDeviceRemoveIOProc( handle->id[0], callbackHandler );
-#endif
-  }
-
-  if ( stream_.mode == INPUT || ( stream_.mode == DUPLEX && stream_.device[0] != stream_.device[1] ) ) {
-    if (handle) {
-      AudioObjectPropertyAddress property = { kAudioHardwarePropertyDevices,
-        kAudioObjectPropertyScopeGlobal,
-        kAudioObjectPropertyElementMaster };
-
-      property.mSelector = kAudioDeviceProcessorOverload;
-      property.mScope = kAudioObjectPropertyScopeGlobal;
-      if (AudioObjectRemovePropertyListener( handle->id[1], &property, xrunListener, (void *) handle ) != noErr) {
-        errorText_ = "RtApiCore::closeStream(): error removing property listener!";
-        error( RtAudioError::WARNING );
-      }
-    }
-    if ( stream_.state == STREAM_RUNNING )
-      AudioDeviceStop( handle->id[1], callbackHandler );
-#if defined( MAC_OS_X_VERSION_10_5 ) && ( MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_5 )
-    AudioDeviceDestroyIOProcID( handle->id[1], handle->procId[1] );
-#else
-    // deprecated in favor of AudioDeviceDestroyIOProcID()
-    AudioDeviceRemoveIOProc( handle->id[1], callbackHandler );
-#endif
-  }
-
-  for ( int i=0; i<2; i++ ) {
-    if ( stream_.userBuffer[i] ) {
-      free( stream_.userBuffer[i] );
-      stream_.userBuffer[i] = 0;
-    }
-  }
-
-  if ( stream_.deviceBuffer ) {
-    free( stream_.deviceBuffer );
-    stream_.deviceBuffer = 0;
-  }
-
-  // Destroy pthread condition variable.
-  pthread_cond_destroy( &handle->condition );
-  delete handle;
-  stream_.apiHandle = 0;
-
-  stream_.mode = UNINITIALIZED;
-  stream_.state = STREAM_CLOSED;
-}
-
-void RtApiCore :: startStream( void )
-{
-  verifyStream();
-  if ( stream_.state == STREAM_RUNNING ) {
-    errorText_ = "RtApiCore::startStream(): the stream is already running!";
-    error( RtAudioError::WARNING );
-    return;
-  }
-
-  OSStatus result = noErr;
-  CoreHandle *handle = (CoreHandle *) stream_.apiHandle;
-  if ( stream_.mode == OUTPUT || stream_.mode == DUPLEX ) {
-
-    result = AudioDeviceStart( handle->id[0], callbackHandler );
-    if ( result != noErr ) {
-      errorStream_ << "RtApiCore::startStream: system error (" << getErrorCode( result ) << ") starting callback procedure on device (" << stream_.device[0] << ").";
-      errorText_ = errorStream_.str();
-      goto unlock;
-    }
-  }
-
-  if ( stream_.mode == INPUT ||
-       ( stream_.mode == DUPLEX && stream_.device[0] != stream_.device[1] ) ) {
-
-    result = AudioDeviceStart( handle->id[1], callbackHandler );
-    if ( result != noErr ) {
-      errorStream_ << "RtApiCore::startStream: system error starting input callback procedure on device (" << stream_.device[1] << ").";
-      errorText_ = errorStream_.str();
-      goto unlock;
-    }
-  }
-
-  handle->drainCounter = 0;
-  handle->internalDrain = false;
-  stream_.state = STREAM_RUNNING;
-
- unlock:
-  if ( result == noErr ) return;
-  error( RtAudioError::SYSTEM_ERROR );
-}
-
-void RtApiCore :: stopStream( void )
-{
-  verifyStream();
-  if ( stream_.state == STREAM_STOPPED ) {
-    errorText_ = "RtApiCore::stopStream(): the stream is already stopped!";
-    error( RtAudioError::WARNING );
-    return;
-  }
-
-  OSStatus result = noErr;
-  CoreHandle *handle = (CoreHandle *) stream_.apiHandle;
-  if ( stream_.mode == OUTPUT || stream_.mode == DUPLEX ) {
-
-    if ( handle->drainCounter == 0 ) {
-      handle->drainCounter = 2;
-      pthread_cond_wait( &handle->condition, &stream_.mutex ); // block until signaled
-    }
-
-    result = AudioDeviceStop( handle->id[0], callbackHandler );
-    if ( result != noErr ) {
-      errorStream_ << "RtApiCore::stopStream: system error (" << getErrorCode( result ) << ") stopping callback procedure on device (" << stream_.device[0] << ").";
-      errorText_ = errorStream_.str();
-      goto unlock;
-    }
-  }
-
-  if ( stream_.mode == INPUT || ( stream_.mode == DUPLEX && stream_.device[0] != stream_.device[1] ) ) {
-
-    result = AudioDeviceStop( handle->id[1], callbackHandler );
-    if ( result != noErr ) {
-      errorStream_ << "RtApiCore::stopStream: system error (" << getErrorCode( result ) << ") stopping input callback procedure on device (" << stream_.device[1] << ").";
-      errorText_ = errorStream_.str();
-      goto unlock;
-    }
-  }
-
-  stream_.state = STREAM_STOPPED;
-
- unlock:
-  if ( result == noErr ) return;
-  error( RtAudioError::SYSTEM_ERROR );
-}
-
-void RtApiCore :: abortStream( void )
-{
-  verifyStream();
-  if ( stream_.state == STREAM_STOPPED ) {
-    errorText_ = "RtApiCore::abortStream(): the stream is already stopped!";
-    error( RtAudioError::WARNING );
-    return;
-  }
-
-  CoreHandle *handle = (CoreHandle *) stream_.apiHandle;
-  handle->drainCounter = 2;
-
-  stopStream();
-}
-
-// This function will be called by a spawned thread when the user
-// callback function signals that the stream should be stopped or
-// aborted.  It is better to handle it this way because the
-// callbackEvent() function probably should return before the AudioDeviceStop()
-// function is called.
-static void *coreStopStream( void *ptr )
-{
-  CallbackInfo *info = (CallbackInfo *) ptr;
-  RtApiCore *object = (RtApiCore *) info->object;
-
-  object->stopStream();
-  pthread_exit( NULL );
-}
-
-bool RtApiCore :: callbackEvent( AudioDeviceID deviceId,
-                                 const AudioBufferList *inBufferList,
-                                 const AudioBufferList *outBufferList )
-{
-  if ( stream_.state == STREAM_STOPPED || stream_.state == STREAM_STOPPING ) return SUCCESS;
-  if ( stream_.state == STREAM_CLOSED ) {
-    errorText_ = "RtApiCore::callbackEvent(): the stream is closed ... this shouldn't happen!";
-    error( RtAudioError::WARNING );
-    return FAILURE;
-  }
-
-  CallbackInfo *info = (CallbackInfo *) &stream_.callbackInfo;
-  CoreHandle *handle = (CoreHandle *) stream_.apiHandle;
-
-  // Check if we were draining the stream and signal is finished.
-  if ( handle->drainCounter > 3 ) {
-    ThreadHandle threadId;
-
-    stream_.state = STREAM_STOPPING;
-    if ( handle->internalDrain == true )
-      pthread_create( &threadId, NULL, coreStopStream, info );
-    else // external call to stopStream()
-      pthread_cond_signal( &handle->condition );
-    return SUCCESS;
-  }
-
-  AudioDeviceID outputDevice = handle->id[0];
-
-  // Invoke user callback to get fresh output data UNLESS we are
-  // draining stream or duplex mode AND the input/output devices are
-  // different AND this function is called for the input device.
-  if ( handle->drainCounter == 0 && ( stream_.mode != DUPLEX || deviceId == outputDevice ) ) {
-    RtAudioCallback callback = (RtAudioCallback) info->callback;
-    double streamTime = getStreamTime();
-    RtAudioStreamStatus status = 0;
-    if ( stream_.mode != INPUT && handle->xrun[0] == true ) {
-      status |= RTAUDIO_OUTPUT_UNDERFLOW;
-      handle->xrun[0] = false;
-    }
-    if ( stream_.mode != OUTPUT && handle->xrun[1] == true ) {
-      status |= RTAUDIO_INPUT_OVERFLOW;
-      handle->xrun[1] = false;
-    }
-
-    int cbReturnValue = callback( stream_.userBuffer[0], stream_.userBuffer[1],
-                                  stream_.bufferSize, streamTime, status, info->userData );
-    if ( cbReturnValue == 2 ) {
-      stream_.state = STREAM_STOPPING;
-      handle->drainCounter = 2;
-      abortStream();
-      return SUCCESS;
-    }
-    else if ( cbReturnValue == 1 ) {
-      handle->drainCounter = 1;
-      handle->internalDrain = true;
-    }
-  }
-
-  if ( stream_.mode == OUTPUT || ( stream_.mode == DUPLEX && deviceId == outputDevice ) ) {
-
-    if ( handle->drainCounter > 1 ) { // write zeros to the output stream
-
-      if ( handle->nStreams[0] == 1 ) {
-        memset( outBufferList->mBuffers[handle->iStream[0]].mData,
-                0,
-                outBufferList->mBuffers[handle->iStream[0]].mDataByteSize );
-      }
-      else { // fill multiple streams with zeros
-        for ( unsigned int i=0; i<handle->nStreams[0]; i++ ) {
-          memset( outBufferList->mBuffers[handle->iStream[0]+i].mData,
-                  0,
-                  outBufferList->mBuffers[handle->iStream[0]+i].mDataByteSize );
-        }
-      }
-    }
-    else if ( handle->nStreams[0] == 1 ) {
-      if ( stream_.doConvertBuffer[0] ) { // convert directly to CoreAudio stream buffer
-        convertBuffer( (char *) outBufferList->mBuffers[handle->iStream[0]].mData,
-                       stream_.userBuffer[0], stream_.convertInfo[0] );
-      }
-      else { // copy from user buffer
-        memcpy( outBufferList->mBuffers[handle->iStream[0]].mData,
-                stream_.userBuffer[0],
-                outBufferList->mBuffers[handle->iStream[0]].mDataByteSize );
-      }
-    }
-    else { // fill multiple streams
-      Float32 *inBuffer = (Float32 *) stream_.userBuffer[0];
-      if ( stream_.doConvertBuffer[0] ) {
-        convertBuffer( stream_.deviceBuffer, stream_.userBuffer[0], stream_.convertInfo[0] );
-        inBuffer = (Float32 *) stream_.deviceBuffer;
-      }
-
-      if ( stream_.deviceInterleaved[0] == false ) { // mono mode
-        UInt32 bufferBytes = outBufferList->mBuffers[handle->iStream[0]].mDataByteSize;
-        for ( unsigned int i=0; i<stream_.nUserChannels[0]; i++ ) {
-          memcpy( outBufferList->mBuffers[handle->iStream[0]+i].mData,
-                  (void *)&inBuffer[i*stream_.bufferSize], bufferBytes );
-        }
-      }
-      else { // fill multiple multi-channel streams with interleaved data
-        UInt32 streamChannels, channelsLeft, inJump, outJump, inOffset;
-        Float32 *out, *in;
-
-        bool inInterleaved = ( stream_.userInterleaved ) ? true : false;
-        UInt32 inChannels = stream_.nUserChannels[0];
-        if ( stream_.doConvertBuffer[0] ) {
-          inInterleaved = true; // device buffer will always be interleaved for nStreams > 1 and not mono mode
-          inChannels = stream_.nDeviceChannels[0];
-        }
-
-        if ( inInterleaved ) inOffset = 1;
-        else inOffset = stream_.bufferSize;
-
-        channelsLeft = inChannels;
-        for ( unsigned int i=0; i<handle->nStreams[0]; i++ ) {
-          in = inBuffer;
-          out = (Float32 *) outBufferList->mBuffers[handle->iStream[0]+i].mData;
-          streamChannels = outBufferList->mBuffers[handle->iStream[0]+i].mNumberChannels;
-
-          outJump = 0;
-          // Account for possible channel offset in first stream
-          if ( i == 0 && stream_.channelOffset[0] > 0 ) {
-            streamChannels -= stream_.channelOffset[0];
-            outJump = stream_.channelOffset[0];
-            out += outJump;
-          }
-
-          // Account for possible unfilled channels at end of the last stream
-          if ( streamChannels > channelsLeft ) {
-            outJump = streamChannels - channelsLeft;
-            streamChannels = channelsLeft;
-          }
-
-          // Determine input buffer offsets and skips
-          if ( inInterleaved ) {
-            inJump = inChannels;
-            in += inChannels - channelsLeft;
-          }
-          else {
-            inJump = 1;
-            in += (inChannels - channelsLeft) * inOffset;
-          }
-
-          for ( unsigned int i=0; i<stream_.bufferSize; i++ ) {
-            for ( unsigned int j=0; j<streamChannels; j++ ) {
-              *out++ = in[j*inOffset];
-            }
-            out += outJump;
-            in += inJump;
-          }
-          channelsLeft -= streamChannels;
-        }
-      }
-    }
-  }
-
-  // Don't bother draining input
-  if ( handle->drainCounter ) {
-    handle->drainCounter++;
-    goto unlock;
-  }
-
-  AudioDeviceID inputDevice;
-  inputDevice = handle->id[1];
-  if ( stream_.mode == INPUT || ( stream_.mode == DUPLEX && deviceId == inputDevice ) ) {
-
-    if ( handle->nStreams[1] == 1 ) {
-      if ( stream_.doConvertBuffer[1] ) { // convert directly from CoreAudio stream buffer
-        convertBuffer( stream_.userBuffer[1],
-                       (char *) inBufferList->mBuffers[handle->iStream[1]].mData,
-                       stream_.convertInfo[1] );
-      }
-      else { // copy to user buffer
-        memcpy( stream_.userBuffer[1],
-                inBufferList->mBuffers[handle->iStream[1]].mData,
-                inBufferList->mBuffers[handle->iStream[1]].mDataByteSize );
-      }
-    }
-    else { // read from multiple streams
-      Float32 *outBuffer = (Float32 *) stream_.userBuffer[1];
-      if ( stream_.doConvertBuffer[1] ) outBuffer = (Float32 *) stream_.deviceBuffer;
-
-      if ( stream_.deviceInterleaved[1] == false ) { // mono mode
-        UInt32 bufferBytes = inBufferList->mBuffers[handle->iStream[1]].mDataByteSize;
-        for ( unsigned int i=0; i<stream_.nUserChannels[1]; i++ ) {
-          memcpy( (void *)&outBuffer[i*stream_.bufferSize],
-                  inBufferList->mBuffers[handle->iStream[1]+i].mData, bufferBytes );
-        }
-      }
-      else { // read from multiple multi-channel streams
-        UInt32 streamChannels, channelsLeft, inJump, outJump, outOffset;
-        Float32 *out, *in;
-
-        bool outInterleaved = ( stream_.userInterleaved ) ? true : false;
-        UInt32 outChannels = stream_.nUserChannels[1];
-        if ( stream_.doConvertBuffer[1] ) {
-          outInterleaved = true; // device buffer will always be interleaved for nStreams > 1 and not mono mode
-          outChannels = stream_.nDeviceChannels[1];
-        }
-
-        if ( outInterleaved ) outOffset = 1;
-        else outOffset = stream_.bufferSize;
-
-        channelsLeft = outChannels;
-        for ( unsigned int i=0; i<handle->nStreams[1]; i++ ) {
-          out = outBuffer;
-          in = (Float32 *) inBufferList->mBuffers[handle->iStream[1]+i].mData;
-          streamChannels = inBufferList->mBuffers[handle->iStream[1]+i].mNumberChannels;
-
-          inJump = 0;
-          // Account for possible channel offset in first stream
-          if ( i == 0 && stream_.channelOffset[1] > 0 ) {
-            streamChannels -= stream_.channelOffset[1];
-            inJump = stream_.channelOffset[1];
-            in += inJump;
-          }
-
-          // Account for possible unread channels at end of the last stream
-          if ( streamChannels > channelsLeft ) {
-            inJump = streamChannels - channelsLeft;
-            streamChannels = channelsLeft;
-          }
-
-          // Determine output buffer offsets and skips
-          if ( outInterleaved ) {
-            outJump = outChannels;
-            out += outChannels - channelsLeft;
-          }
-          else {
-            outJump = 1;
-            out += (outChannels - channelsLeft) * outOffset;
-          }
-
-          for ( unsigned int i=0; i<stream_.bufferSize; i++ ) {
-            for ( unsigned int j=0; j<streamChannels; j++ ) {
-              out[j*outOffset] = *in++;
-            }
-            out += outJump;
-            in += inJump;
-          }
-          channelsLeft -= streamChannels;
-        }
-      }
-      
-      if ( stream_.doConvertBuffer[1] ) { // convert from our internal "device" buffer
-        convertBuffer( stream_.userBuffer[1],
-                       stream_.deviceBuffer,
-                       stream_.convertInfo[1] );
-      }
-    }
-  }
-
- unlock:
-  //MUTEX_UNLOCK( &stream_.mutex );
-
-  RtApi::tickStreamTime();
-  return SUCCESS;
-}
-
-const char* RtApiCore :: getErrorCode( OSStatus code )
-{
-  switch( code ) {
-
-  case kAudioHardwareNotRunningError:
-    return "kAudioHardwareNotRunningError";
-
-  case kAudioHardwareUnspecifiedError:
-    return "kAudioHardwareUnspecifiedError";
-
-  case kAudioHardwareUnknownPropertyError:
-    return "kAudioHardwareUnknownPropertyError";
-
-  case kAudioHardwareBadPropertySizeError:
-    return "kAudioHardwareBadPropertySizeError";
-
-  case kAudioHardwareIllegalOperationError:
-    return "kAudioHardwareIllegalOperationError";
-
-  case kAudioHardwareBadObjectError:
-    return "kAudioHardwareBadObjectError";
-
-  case kAudioHardwareBadDeviceError:
-    return "kAudioHardwareBadDeviceError";
-
-  case kAudioHardwareBadStreamError:
-    return "kAudioHardwareBadStreamError";
-
-  case kAudioHardwareUnsupportedOperationError:
-    return "kAudioHardwareUnsupportedOperationError";
-
-  case kAudioDeviceUnsupportedFormatError:
-    return "kAudioDeviceUnsupportedFormatError";
-
-  case kAudioDevicePermissionsError:
-    return "kAudioDevicePermissionsError";
-
-  default:
-    return "CoreAudio unknown error";
-  }
-}
-
-  //******************** End of __MACOSX_CORE__ *********************//
-#endif
-
-#if defined(__UNIX_JACK__)
-
-// JACK is a low-latency audio server, originally written for the
-// GNU/Linux operating system and now also ported to OS-X. It can
-// connect a number of different applications to an audio device, as
-// well as allowing them to share audio between themselves.
-//
-// When using JACK with RtAudio, "devices" refer to JACK clients that
-// have ports connected to the server.  The JACK server is typically
-// started in a terminal as follows:
-//
-// .jackd -d alsa -d hw:0
-//
-// or through an interface program such as qjackctl.  Many of the
-// parameters normally set for a stream are fixed by the JACK server
-// and can be specified when the JACK server is started.  In
-// particular,
-//
-// .jackd -d alsa -d hw:0 -r 44100 -p 512 -n 4
-//
-// specifies a sample rate of 44100 Hz, a buffer size of 512 sample
-// frames, and number of buffers = 4.  Once the server is running, it
-// is not possible to override these values.  If the values are not
-// specified in the command-line, the JACK server uses default values.
-//
-// The JACK server does not have to be running when an instance of
-// RtApiJack is created, though the function getDeviceCount() will
-// report 0 devices found until JACK has been started.  When no
-// devices are available (i.e., the JACK server is not running), a
-// stream cannot be opened.
-
-#include <jack/jack.h>
-#include <unistd.h>
-#include <cstdio>
-
-// A structure to hold various information related to the Jack API
-// implementation.
-struct JackHandle {
-  jack_client_t *client;
-  jack_port_t **ports[2];
-  std::string deviceName[2];
-  bool xrun[2];
-  pthread_cond_t condition;
-  int drainCounter;       // Tracks callback counts when draining
-  bool internalDrain;     // Indicates if stop is initiated from callback or not.
-
-  JackHandle()
-    :client(0), drainCounter(0), internalDrain(false) { ports[0] = 0; ports[1] = 0; xrun[0] = false; xrun[1] = false; }
-};
-
-static void jackSilentError( const char * ) {};
-
-RtApiJack :: RtApiJack()
-{
-  // Nothing to do here.
-#if !defined(__RTAUDIO_DEBUG__)
-  // Turn off Jack's internal error reporting.
-  jack_set_error_function( &jackSilentError );
-#endif
-}
-
-RtApiJack :: ~RtApiJack()
-{
-  if ( stream_.state != STREAM_CLOSED ) closeStream();
-}
-
-unsigned int RtApiJack :: getDeviceCount( void )
-{
-  // See if we can become a jack client.
-  jack_options_t options = (jack_options_t) ( JackNoStartServer ); //JackNullOption;
-  jack_status_t *status = NULL;
-  jack_client_t *client = jack_client_open( "RtApiJackCount", options, status );
-  if ( client == 0 ) return 0;
-
-  const char **ports;
-  std::string port, previousPort;
-  unsigned int nChannels = 0, nDevices = 0;
-  ports = jack_get_ports( client, NULL, NULL, 0 );
-  if ( ports ) {
-    // Parse the port names up to the first colon (:).
-    size_t iColon = 0;
-    do {
-      port = (char *) ports[ nChannels ];
-      iColon = port.find(":");
-      if ( iColon != std::string::npos ) {
-        port = port.substr( 0, iColon + 1 );
-        if ( port != previousPort ) {
-          nDevices++;
-          previousPort = port;
-        }
-      }
-    } while ( ports[++nChannels] );
-    free( ports );
-  }
-
-  jack_client_close( client );
-  return nDevices;
-}
-
-RtAudio::DeviceInfo RtApiJack :: getDeviceInfo( unsigned int device )
-{
-  RtAudio::DeviceInfo info;
-  info.probed = false;
-
-  jack_options_t options = (jack_options_t) ( JackNoStartServer ); //JackNullOption
-  jack_status_t *status = NULL;
-  jack_client_t *client = jack_client_open( "RtApiJackInfo", options, status );
-  if ( client == 0 ) {
-    errorText_ = "RtApiJack::getDeviceInfo: Jack server not found or connection error!";
-    error( RtAudioError::WARNING );
-    return info;
-  }
-
-  const char **ports;
-  std::string port, previousPort;
-  unsigned int nPorts = 0, nDevices = 0;
-  ports = jack_get_ports( client, NULL, NULL, 0 );
-  if ( ports ) {
-    // Parse the port names up to the first colon (:).
-    size_t iColon = 0;
-    do {
-      port = (char *) ports[ nPorts ];
-      iColon = port.find(":");
-      if ( iColon != std::string::npos ) {
-        port = port.substr( 0, iColon );
-        if ( port != previousPort ) {
-          if ( nDevices == device ) info.name = port;
-          nDevices++;
-          previousPort = port;
-        }
-      }
-    } while ( ports[++nPorts] );
-    free( ports );
-  }
-
-  if ( device >= nDevices ) {
-    jack_client_close( client );
-    errorText_ = "RtApiJack::getDeviceInfo: device ID is invalid!";
-    error( RtAudioError::INVALID_USE );
-    return info;
-  }
-
-  // Get the current jack server sample rate.
-  info.sampleRates.clear();
-
-  info.preferredSampleRate = jack_get_sample_rate( client );
-  info.sampleRates.push_back( info.preferredSampleRate );
-
-  // Count the available ports containing the client name as device
-  // channels.  Jack "input ports" equal RtAudio output channels.
-  unsigned int nChannels = 0;
-  ports = jack_get_ports( client, info.name.c_str(), NULL, JackPortIsInput );
-  if ( ports ) {
-    while ( ports[ nChannels ] ) nChannels++;
-    free( ports );
-    info.outputChannels = nChannels;
-  }
-
-  // Jack "output ports" equal RtAudio input channels.
-  nChannels = 0;
-  ports = jack_get_ports( client, info.name.c_str(), NULL, JackPortIsOutput );
-  if ( ports ) {
-    while ( ports[ nChannels ] ) nChannels++;
-    free( ports );
-    info.inputChannels = nChannels;
-  }
-
-  if ( info.outputChannels == 0 && info.inputChannels == 0 ) {
-    jack_client_close(client);
-    errorText_ = "RtApiJack::getDeviceInfo: error determining Jack input/output channels!";
-    error( RtAudioError::WARNING );
-    return info;
-  }
-
-  // If device opens for both playback and capture, we determine the channels.
-  if ( info.outputChannels > 0 && info.inputChannels > 0 )
-    info.duplexChannels = (info.outputChannels > info.inputChannels) ? info.inputChannels : info.outputChannels;
-
-  // Jack always uses 32-bit floats.
-  info.nativeFormats = RTAUDIO_FLOAT32;
-
-  // Jack doesn't provide default devices so we'll use the first available one.
-  if ( device == 0 && info.outputChannels > 0 )
-    info.isDefaultOutput = true;
-  if ( device == 0 && info.inputChannels > 0 )
-    info.isDefaultInput = true;
-
-  jack_client_close(client);
-  info.probed = true;
-  return info;
-}
-
-static int jackCallbackHandler( jack_nframes_t nframes, void *infoPointer )
-{
-  CallbackInfo *info = (CallbackInfo *) infoPointer;
-
-  RtApiJack *object = (RtApiJack *) info->object;
-  if ( object->callbackEvent( (unsigned long) nframes ) == false ) return 1;
-
-  return 0;
-}
-
-// This function will be called by a spawned thread when the Jack
-// server signals that it is shutting down.  It is necessary to handle
-// it this way because the jackShutdown() function must return before
-// the jack_deactivate() function (in closeStream()) will return.
-static void *jackCloseStream( void *ptr )
-{
-  CallbackInfo *info = (CallbackInfo *) ptr;
-  RtApiJack *object = (RtApiJack *) info->object;
-
-  object->closeStream();
-
-  pthread_exit( NULL );
-}
-static void jackShutdown( void *infoPointer )
-{
-  CallbackInfo *info = (CallbackInfo *) infoPointer;
-  RtApiJack *object = (RtApiJack *) info->object;
-
-  // Check current stream state.  If stopped, then we'll assume this
-  // was called as a result of a call to RtApiJack::stopStream (the
-  // deactivation of a client handle causes this function to be called).
-  // If not, we'll assume the Jack server is shutting down or some
-  // other problem occurred and we should close the stream.
-  if ( object->isStreamRunning() == false ) return;
-
-  ThreadHandle threadId;
-  pthread_create( &threadId, NULL, jackCloseStream, info );
-  std::cerr << "\nRtApiJack: the Jack server is shutting down this client ... stream stopped and closed!!\n" << std::endl;
-}
-
-static int jackXrun( void *infoPointer )
-{
-  JackHandle *handle = (JackHandle *) infoPointer;
-
-  if ( handle->ports[0] ) handle->xrun[0] = true;
-  if ( handle->ports[1] ) handle->xrun[1] = true;
-
-  return 0;
-}
-
-bool RtApiJack :: probeDeviceOpen( unsigned int device, StreamMode mode, unsigned int channels,
-                                   unsigned int firstChannel, unsigned int sampleRate,
-                                   RtAudioFormat format, unsigned int *bufferSize,
-                                   RtAudio::StreamOptions *options )
-{
-  JackHandle *handle = (JackHandle *) stream_.apiHandle;
-
-  // Look for jack server and try to become a client (only do once per stream).
-  jack_client_t *client = 0;
-  if ( mode == OUTPUT || ( mode == INPUT && stream_.mode != OUTPUT ) ) {
-    jack_options_t jackoptions = (jack_options_t) ( JackNoStartServer ); //JackNullOption;
-    jack_status_t *status = NULL;
-    if ( options && !options->streamName.empty() )
-      client = jack_client_open( options->streamName.c_str(), jackoptions, status );
-    else
-      client = jack_client_open( "RtApiJack", jackoptions, status );
-    if ( client == 0 ) {
-      errorText_ = "RtApiJack::probeDeviceOpen: Jack server not found or connection error!";
-      error( RtAudioError::WARNING );
-      return FAILURE;
-    }
-  }
-  else {
-    // The handle must have been created on an earlier pass.
-    client = handle->client;
-  }
-
-  const char **ports;
-  std::string port, previousPort, deviceName;
-  unsigned int nPorts = 0, nDevices = 0;
-  ports = jack_get_ports( client, NULL, NULL, 0 );
-  if ( ports ) {
-    // Parse the port names up to the first colon (:).
-    size_t iColon = 0;
-    do {
-      port = (char *) ports[ nPorts ];
-      iColon = port.find(":");
-      if ( iColon != std::string::npos ) {
-        port = port.substr( 0, iColon );
-        if ( port != previousPort ) {
-          if ( nDevices == device ) deviceName = port;
-          nDevices++;
-          previousPort = port;
-        }
-      }
-    } while ( ports[++nPorts] );
-    free( ports );
-  }
-
-  if ( device >= nDevices ) {
-    errorText_ = "RtApiJack::probeDeviceOpen: device ID is invalid!";
-    return FAILURE;
-  }
-
-  // Count the available ports containing the client name as device
-  // channels.  Jack "input ports" equal RtAudio output channels.
-  unsigned int nChannels = 0;
-  unsigned long flag = JackPortIsInput;
-  if ( mode == INPUT ) flag = JackPortIsOutput;
-  ports = jack_get_ports( client, deviceName.c_str(), NULL, flag );
-  if ( ports ) {
-    while ( ports[ nChannels ] ) nChannels++;
-    free( ports );
-  }
-
-  // Compare the jack ports for specified client to the requested number of channels.
-  if ( nChannels < (channels + firstChannel) ) {
-    errorStream_ << "RtApiJack::probeDeviceOpen: requested number of channels (" << channels << ") + offset (" << firstChannel << ") not found for specified device (" << device << ":" << deviceName << ").";
-    errorText_ = errorStream_.str();
-    return FAILURE;
-  }
-
-  // Check the jack server sample rate.
-  unsigned int jackRate = jack_get_sample_rate( client );
-  if ( sampleRate != jackRate ) {
-    jack_client_close( client );
-    errorStream_ << "RtApiJack::probeDeviceOpen: the requested sample rate (" << sampleRate << ") is different than the JACK server rate (" << jackRate << ").";
-    errorText_ = errorStream_.str();
-    return FAILURE;
-  }
-  stream_.sampleRate = jackRate;
-
-  // Get the latency of the JACK port.
-  ports = jack_get_ports( client, deviceName.c_str(), NULL, flag );
-  if ( ports[ firstChannel ] ) {
-    // Added by Ge Wang
-    jack_latency_callback_mode_t cbmode = (mode == INPUT ? JackCaptureLatency : JackPlaybackLatency);
-    // the range (usually the min and max are equal)
-    jack_latency_range_t latrange; latrange.min = latrange.max = 0;
-    // get the latency range
-    jack_port_get_latency_range( jack_port_by_name( client, ports[firstChannel] ), cbmode, &latrange );
-    // be optimistic, use the min!
-    stream_.latency[mode] = latrange.min;
-    //stream_.latency[mode] = jack_port_get_latency( jack_port_by_name( client, ports[ firstChannel ] ) );
-  }
-  free( ports );
-
-  // The jack server always uses 32-bit floating-point data.
-  stream_.deviceFormat[mode] = RTAUDIO_FLOAT32;
-  stream_.userFormat = format;
-
-  if ( options && options->flags & RTAUDIO_NONINTERLEAVED ) stream_.userInterleaved = false;
-  else stream_.userInterleaved = true;
-
-  // Jack always uses non-interleaved buffers.
-  stream_.deviceInterleaved[mode] = false;
-
-  // Jack always provides host byte-ordered data.
-  stream_.doByteSwap[mode] = false;
-
-  // Get the buffer size.  The buffer size and number of buffers
-  // (periods) is set when the jack server is started.
-  stream_.bufferSize = (int) jack_get_buffer_size( client );
-  *bufferSize = stream_.bufferSize;
-
-  stream_.nDeviceChannels[mode] = channels;
-  stream_.nUserChannels[mode] = channels;
-
-  // Set flags for buffer conversion.
-  stream_.doConvertBuffer[mode] = false;
-  if ( stream_.userFormat != stream_.deviceFormat[mode] )
-    stream_.doConvertBuffer[mode] = true;
-  if ( stream_.userInterleaved != stream_.deviceInterleaved[mode] &&
-       stream_.nUserChannels[mode] > 1 )
-    stream_.doConvertBuffer[mode] = true;
-
-  // Allocate our JackHandle structure for the stream.
-  if ( handle == 0 ) {
-    try {
-      handle = new JackHandle;
-    }
-    catch ( std::bad_alloc& ) {
-      errorText_ = "RtApiJack::probeDeviceOpen: error allocating JackHandle memory.";
-      goto error;
-    }
-
-    if ( pthread_cond_init(&handle->condition, NULL) ) {
-      errorText_ = "RtApiJack::probeDeviceOpen: error initializing pthread condition variable.";
-      goto error;
-    }
-    stream_.apiHandle = (void *) handle;
-    handle->client = client;
-  }
-  handle->deviceName[mode] = deviceName;
-
-  // Allocate necessary internal buffers.
-  unsigned long bufferBytes;
-  bufferBytes = stream_.nUserChannels[mode] * *bufferSize * formatBytes( stream_.userFormat );
-  stream_.userBuffer[mode] = (char *) calloc( bufferBytes, 1 );
-  if ( stream_.userBuffer[mode] == NULL ) {
-    errorText_ = "RtApiJack::probeDeviceOpen: error allocating user buffer memory.";
-    goto error;
-  }
-
-  if ( stream_.doConvertBuffer[mode] ) {
-
-    bool makeBuffer = true;
-    if ( mode == OUTPUT )
-      bufferBytes = stream_.nDeviceChannels[0] * formatBytes( stream_.deviceFormat[0] );
-    else { // mode == INPUT
-      bufferBytes = stream_.nDeviceChannels[1] * formatBytes( stream_.deviceFormat[1] );
-      if ( stream_.mode == OUTPUT && stream_.deviceBuffer ) {
-        unsigned long bytesOut = stream_.nDeviceChannels[0] * formatBytes(stream_.deviceFormat[0]);
-        if ( bufferBytes < bytesOut ) makeBuffer = false;
-      }
-    }
-
-    if ( makeBuffer ) {
-      bufferBytes *= *bufferSize;
-      if ( stream_.deviceBuffer ) free( stream_.deviceBuffer );
-      stream_.deviceBuffer = (char *) calloc( bufferBytes, 1 );
-      if ( stream_.deviceBuffer == NULL ) {
-        errorText_ = "RtApiJack::probeDeviceOpen: error allocating device buffer memory.";
-        goto error;
-      }
-    }
-  }
-
-  // Allocate memory for the Jack ports (channels) identifiers.
-  handle->ports[mode] = (jack_port_t **) malloc ( sizeof (jack_port_t *) * channels );
-  if ( handle->ports[mode] == NULL )  {
-    errorText_ = "RtApiJack::probeDeviceOpen: error allocating port memory.";
-    goto error;
-  }
-
-  stream_.device[mode] = device;
-  stream_.channelOffset[mode] = firstChannel;
-  stream_.state = STREAM_STOPPED;
-  stream_.callbackInfo.object = (void *) this;
-
-  if ( stream_.mode == OUTPUT && mode == INPUT )
-    // We had already set up the stream for output.
-    stream_.mode = DUPLEX;
-  else {
-    stream_.mode = mode;
-    jack_set_process_callback( handle->client, jackCallbackHandler, (void *) &stream_.callbackInfo );
-    jack_set_xrun_callback( handle->client, jackXrun, (void *) &handle );
-    jack_on_shutdown( handle->client, jackShutdown, (void *) &stream_.callbackInfo );
-  }
-
-  // Register our ports.
-  char label[64];
-  if ( mode == OUTPUT ) {
-    for ( unsigned int i=0; i<stream_.nUserChannels[0]; i++ ) {
-      snprintf( label, 64, "outport %d", i );
-      handle->ports[0][i] = jack_port_register( handle->client, (const char *)label,
-                                                JACK_DEFAULT_AUDIO_TYPE, JackPortIsOutput, 0 );
-    }
-  }
-  else {
-    for ( unsigned int i=0; i<stream_.nUserChannels[1]; i++ ) {
-      snprintf( label, 64, "inport %d", i );
-      handle->ports[1][i] = jack_port_register( handle->client, (const char *)label,
-                                                JACK_DEFAULT_AUDIO_TYPE, JackPortIsInput, 0 );
-    }
-  }
-
-  // Setup the buffer conversion information structure.  We don't use
-  // buffers to do channel offsets, so we override that parameter
-  // here.
-  if ( stream_.doConvertBuffer[mode] ) setConvertInfo( mode, 0 );
-
-  return SUCCESS;
-
- error:
-  if ( handle ) {
-    pthread_cond_destroy( &handle->condition );
-    jack_client_close( handle->client );
-
-    if ( handle->ports[0] ) free( handle->ports[0] );
-    if ( handle->ports[1] ) free( handle->ports[1] );
-
-    delete handle;
-    stream_.apiHandle = 0;
-  }
-
-  for ( int i=0; i<2; i++ ) {
-    if ( stream_.userBuffer[i] ) {
-      free( stream_.userBuffer[i] );
-      stream_.userBuffer[i] = 0;
-    }
-  }
-
-  if ( stream_.deviceBuffer ) {
-    free( stream_.deviceBuffer );
-    stream_.deviceBuffer = 0;
-  }
-
-  return FAILURE;
-}
-
-void RtApiJack :: closeStream( void )
-{
-  if ( stream_.state == STREAM_CLOSED ) {
-    errorText_ = "RtApiJack::closeStream(): no open stream to close!";
-    error( RtAudioError::WARNING );
-    return;
-  }
-
-  JackHandle *handle = (JackHandle *) stream_.apiHandle;
-  if ( handle ) {
-
-    if ( stream_.state == STREAM_RUNNING )
-      jack_deactivate( handle->client );
-
-    jack_client_close( handle->client );
-  }
-
-  if ( handle ) {
-    if ( handle->ports[0] ) free( handle->ports[0] );
-    if ( handle->ports[1] ) free( handle->ports[1] );
-    pthread_cond_destroy( &handle->condition );
-    delete handle;
-    stream_.apiHandle = 0;
-  }
-
-  for ( int i=0; i<2; i++ ) {
-    if ( stream_.userBuffer[i] ) {
-      free( stream_.userBuffer[i] );
-      stream_.userBuffer[i] = 0;
-    }
-  }
-
-  if ( stream_.deviceBuffer ) {
-    free( stream_.deviceBuffer );
-    stream_.deviceBuffer = 0;
-  }
-
-  stream_.mode = UNINITIALIZED;
-  stream_.state = STREAM_CLOSED;
-}
-
-void RtApiJack :: startStream( void )
-{
-  verifyStream();
-  if ( stream_.state == STREAM_RUNNING ) {
-    errorText_ = "RtApiJack::startStream(): the stream is already running!";
-    error( RtAudioError::WARNING );
-    return;
-  }
-
-  JackHandle *handle = (JackHandle *) stream_.apiHandle;
-  int result = jack_activate( handle->client );
-  if ( result ) {
-    errorText_ = "RtApiJack::startStream(): unable to activate JACK client!";
-    goto unlock;
-  }
-
-  const char **ports;
-
-  // Get the list of available ports.
-  if ( stream_.mode == OUTPUT || stream_.mode == DUPLEX ) {
-    result = 1;
-    ports = jack_get_ports( handle->client, handle->deviceName[0].c_str(), NULL, JackPortIsInput);
-    if ( ports == NULL) {
-      errorText_ = "RtApiJack::startStream(): error determining available JACK input ports!";
-      goto unlock;
-    }
-
-    // Now make the port connections.  Since RtAudio wasn't designed to
-    // allow the user to select particular channels of a device, we'll
-    // just open the first "nChannels" ports with offset.
-    for ( unsigned int i=0; i<stream_.nUserChannels[0]; i++ ) {
-      result = 1;
-      if ( ports[ stream_.channelOffset[0] + i ] )
-        result = jack_connect( handle->client, jack_port_name( handle->ports[0][i] ), ports[ stream_.channelOffset[0] + i ] );
-      if ( result ) {
-        free( ports );
-        errorText_ = "RtApiJack::startStream(): error connecting output ports!";
-        goto unlock;
-      }
-    }
-    free(ports);
-  }
-
-  if ( stream_.mode == INPUT || stream_.mode == DUPLEX ) {
-    result = 1;
-    ports = jack_get_ports( handle->client, handle->deviceName[1].c_str(), NULL, JackPortIsOutput );
-    if ( ports == NULL) {
-      errorText_ = "RtApiJack::startStream(): error determining available JACK output ports!";
-      goto unlock;
-    }
-
-    // Now make the port connections.  See note above.
-    for ( unsigned int i=0; i<stream_.nUserChannels[1]; i++ ) {
-      result = 1;
-      if ( ports[ stream_.channelOffset[1] + i ] )
-        result = jack_connect( handle->client, ports[ stream_.channelOffset[1] + i ], jack_port_name( handle->ports[1][i] ) );
-      if ( result ) {
-        free( ports );
-        errorText_ = "RtApiJack::startStream(): error connecting input ports!";
-        goto unlock;
-      }
-    }
-    free(ports);
-  }
-
-  handle->drainCounter = 0;
-  handle->internalDrain = false;
-  stream_.state = STREAM_RUNNING;
-
- unlock:
-  if ( result == 0 ) return;
-  error( RtAudioError::SYSTEM_ERROR );
-}
-
-void RtApiJack :: stopStream( void )
-{
-  verifyStream();
-  if ( stream_.state == STREAM_STOPPED ) {
-    errorText_ = "RtApiJack::stopStream(): the stream is already stopped!";
-    error( RtAudioError::WARNING );
-    return;
-  }
-
-  JackHandle *handle = (JackHandle *) stream_.apiHandle;
-  if ( stream_.mode == OUTPUT || stream_.mode == DUPLEX ) {
-
-    if ( handle->drainCounter == 0 ) {
-      handle->drainCounter = 2;
-      pthread_cond_wait( &handle->condition, &stream_.mutex ); // block until signaled
-    }
-  }
-
-  jack_deactivate( handle->client );
-  stream_.state = STREAM_STOPPED;
-}
-
-void RtApiJack :: abortStream( void )
-{
-  verifyStream();
-  if ( stream_.state == STREAM_STOPPED ) {
-    errorText_ = "RtApiJack::abortStream(): the stream is already stopped!";
-    error( RtAudioError::WARNING );
-    return;
-  }
-
-  JackHandle *handle = (JackHandle *) stream_.apiHandle;
-  handle->drainCounter = 2;
-
-  stopStream();
-}
-
-// This function will be called by a spawned thread when the user
-// callback function signals that the stream should be stopped or
-// aborted.  It is necessary to handle it this way because the
-// callbackEvent() function must return before the jack_deactivate()
-// function will return.
-static void *jackStopStream( void *ptr )
-{
-  CallbackInfo *info = (CallbackInfo *) ptr;
-  RtApiJack *object = (RtApiJack *) info->object;
-
-  object->stopStream();
-  pthread_exit( NULL );
-}
-
-bool RtApiJack :: callbackEvent( unsigned long nframes )
-{
-  if ( stream_.state == STREAM_STOPPED || stream_.state == STREAM_STOPPING ) return SUCCESS;
-  if ( stream_.state == STREAM_CLOSED ) {
-    errorText_ = "RtApiCore::callbackEvent(): the stream is closed ... this shouldn't happen!";
-    error( RtAudioError::WARNING );
-    return FAILURE;
-  }
-  if ( stream_.bufferSize != nframes ) {
-    errorText_ = "RtApiCore::callbackEvent(): the JACK buffer size has changed ... cannot process!";
-    error( RtAudioError::WARNING );
-    return FAILURE;
-  }
-
-  CallbackInfo *info = (CallbackInfo *) &stream_.callbackInfo;
-  JackHandle *handle = (JackHandle *) stream_.apiHandle;
-
-  // Check if we were draining the stream and signal is finished.
-  if ( handle->drainCounter > 3 ) {
-    ThreadHandle threadId;
-
-    stream_.state = STREAM_STOPPING;
-    if ( handle->internalDrain == true )
-      pthread_create( &threadId, NULL, jackStopStream, info );
-    else
-      pthread_cond_signal( &handle->condition );
-    return SUCCESS;
-  }
-
-  // Invoke user callback first, to get fresh output data.
-  if ( handle->drainCounter == 0 ) {
-    RtAudioCallback callback = (RtAudioCallback) info->callback;
-    double streamTime = getStreamTime();
-    RtAudioStreamStatus status = 0;
-    if ( stream_.mode != INPUT && handle->xrun[0] == true ) {
-      status |= RTAUDIO_OUTPUT_UNDERFLOW;
-      handle->xrun[0] = false;
-    }
-    if ( stream_.mode != OUTPUT && handle->xrun[1] == true ) {
-      status |= RTAUDIO_INPUT_OVERFLOW;
-      handle->xrun[1] = false;
-    }
-    int cbReturnValue = callback( stream_.userBuffer[0], stream_.userBuffer[1],
-                                  stream_.bufferSize, streamTime, status, info->userData );
-    if ( cbReturnValue == 2 ) {
-      stream_.state = STREAM_STOPPING;
-      handle->drainCounter = 2;
-      ThreadHandle id;
-      pthread_create( &id, NULL, jackStopStream, info );
-      return SUCCESS;
-    }
-    else if ( cbReturnValue == 1 ) {
-      handle->drainCounter = 1;
-      handle->internalDrain = true;
-    }
-  }
-
-  jack_default_audio_sample_t *jackbuffer;
-  unsigned long bufferBytes = nframes * sizeof( jack_default_audio_sample_t );
-  if ( stream_.mode == OUTPUT || stream_.mode == DUPLEX ) {
-
-    if ( handle->drainCounter > 1 ) { // write zeros to the output stream
-
-      for ( unsigned int i=0; i<stream_.nDeviceChannels[0]; i++ ) {
-        jackbuffer = (jack_default_audio_sample_t *) jack_port_get_buffer( handle->ports[0][i], (jack_nframes_t) nframes );
-        memset( jackbuffer, 0, bufferBytes );
-      }
-
-    }
-    else if ( stream_.doConvertBuffer[0] ) {
-
-      convertBuffer( stream_.deviceBuffer, stream_.userBuffer[0], stream_.convertInfo[0] );
-
-      for ( unsigned int i=0; i<stream_.nDeviceChannels[0]; i++ ) {
-        jackbuffer = (jack_default_audio_sample_t *) jack_port_get_buffer( handle->ports[0][i], (jack_nframes_t) nframes );
-        memcpy( jackbuffer, &stream_.deviceBuffer[i*bufferBytes], bufferBytes );
-      }
-    }
-    else { // no buffer conversion
-      for ( unsigned int i=0; i<stream_.nUserChannels[0]; i++ ) {
-        jackbuffer = (jack_default_audio_sample_t *) jack_port_get_buffer( handle->ports[0][i], (jack_nframes_t) nframes );
-        memcpy( jackbuffer, &stream_.userBuffer[0][i*bufferBytes], bufferBytes );
-      }
-    }
-  }
-
-  // Don't bother draining input
-  if ( handle->drainCounter ) {
-    handle->drainCounter++;
-    goto unlock;
-  }
-
-  if ( stream_.mode == INPUT || stream_.mode == DUPLEX ) {
-
-    if ( stream_.doConvertBuffer[1] ) {
-      for ( unsigned int i=0; i<stream_.nDeviceChannels[1]; i++ ) {
-        jackbuffer = (jack_default_audio_sample_t *) jack_port_get_buffer( handle->ports[1][i], (jack_nframes_t) nframes );
-        memcpy( &stream_.deviceBuffer[i*bufferBytes], jackbuffer, bufferBytes );
-      }
-      convertBuffer( stream_.userBuffer[1], stream_.deviceBuffer, stream_.convertInfo[1] );
-    }
-    else { // no buffer conversion
-      for ( unsigned int i=0; i<stream_.nUserChannels[1]; i++ ) {
-        jackbuffer = (jack_default_audio_sample_t *) jack_port_get_buffer( handle->ports[1][i], (jack_nframes_t) nframes );
-        memcpy( &stream_.userBuffer[1][i*bufferBytes], jackbuffer, bufferBytes );
-      }
-    }
-  }
-
- unlock:
-  RtApi::tickStreamTime();
-  return SUCCESS;
-}
-  //******************** End of __UNIX_JACK__ *********************//
-#endif
-
-#if defined(__WINDOWS_ASIO__) // ASIO API on Windows
-
-// The ASIO API is designed around a callback scheme, so this
-// implementation is similar to that used for OS-X CoreAudio and Linux
-// Jack.  The primary constraint with ASIO is that it only allows
-// access to a single driver at a time.  Thus, it is not possible to
-// have more than one simultaneous RtAudio stream.
-//
-// This implementation also requires a number of external ASIO files
-// and a few global variables.  The ASIO callback scheme does not
-// allow for the passing of user data, so we must create a global
-// pointer to our callbackInfo structure.
-//
-// On unix systems, we make use of a pthread condition variable.
-// Since there is no equivalent in Windows, I hacked something based
-// on information found in
-// http://www.cs.wustl.edu/~schmidt/win32-cv-1.html.
-
-#include "asiosys.h"
-#include "asio.h"
-#include "iasiothiscallresolver.h"
-#include "asiodrivers.h"
-#include <cmath>
-
-static AsioDrivers drivers;
-static ASIOCallbacks asioCallbacks;
-static ASIODriverInfo driverInfo;
-static CallbackInfo *asioCallbackInfo;
-static bool asioXRun;
-
-struct AsioHandle {
-  int drainCounter;       // Tracks callback counts when draining
-  bool internalDrain;     // Indicates if stop is initiated from callback or not.
-  ASIOBufferInfo *bufferInfos;
-  HANDLE condition;
-
-  AsioHandle()
-    :drainCounter(0), internalDrain(false), bufferInfos(0) {}
-};
-
-// Function declarations (definitions at end of section)
-static const char* getAsioErrorString( ASIOError result );
-static void sampleRateChanged( ASIOSampleRate sRate );
-static long asioMessages( long selector, long value, void* message, double* opt );
-
-RtApiAsio :: RtApiAsio()
-{
-  // ASIO cannot run on a multi-threaded appartment. You can call
-  // CoInitialize beforehand, but it must be for appartment threading
-  // (in which case, CoInitilialize will return S_FALSE here).
-  coInitialized_ = false;
-  HRESULT hr = CoInitialize( NULL ); 
-  if ( FAILED(hr) ) {
-    errorText_ = "RtApiAsio::ASIO requires a single-threaded appartment. Call CoInitializeEx(0,COINIT_APARTMENTTHREADED)";
-    error( RtAudioError::WARNING );
-  }
-  coInitialized_ = true;
-
-  drivers.removeCurrentDriver();
-  driverInfo.asioVersion = 2;
-
-  // See note in DirectSound implementation about GetDesktopWindow().
-  driverInfo.sysRef = GetForegroundWindow();
-}
-
-RtApiAsio :: ~RtApiAsio()
-{
-  if ( stream_.state != STREAM_CLOSED ) closeStream();
-  if ( coInitialized_ ) CoUninitialize();
-}
-
-unsigned int RtApiAsio :: getDeviceCount( void )
-{
-  return (unsigned int) drivers.asioGetNumDev();
-}
-
-RtAudio::DeviceInfo RtApiAsio :: getDeviceInfo( unsigned int device )
-{
-  RtAudio::DeviceInfo info;
-  info.probed = false;
-
-  // Get device ID
-  unsigned int nDevices = getDeviceCount();
-  if ( nDevices == 0 ) {
-    errorText_ = "RtApiAsio::getDeviceInfo: no devices found!";
-    error( RtAudioError::INVALID_USE );
-    return info;
-  }
-
-  if ( device >= nDevices ) {
-    errorText_ = "RtApiAsio::getDeviceInfo: device ID is invalid!";
-    error( RtAudioError::INVALID_USE );
-    return info;
-  }
-
-  // If a stream is already open, we cannot probe other devices.  Thus, use the saved results.
-  if ( stream_.state != STREAM_CLOSED ) {
-    if ( device >= devices_.size() ) {
-      errorText_ = "RtApiAsio::getDeviceInfo: device ID was not present before stream was opened.";
-      error( RtAudioError::WARNING );
-      return info;
-    }
-    return devices_[ device ];
-  }
-
-  char driverName[32];
-  ASIOError result = drivers.asioGetDriverName( (int) device, driverName, 32 );
-  if ( result != ASE_OK ) {
-    errorStream_ << "RtApiAsio::getDeviceInfo: unable to get driver name (" << getAsioErrorString( result ) << ").";
-    errorText_ = errorStream_.str();
-    error( RtAudioError::WARNING );
-    return info;
-  }
-
-  info.name = driverName;
-
-  if ( !drivers.loadDriver( driverName ) ) {
-    errorStream_ << "RtApiAsio::getDeviceInfo: unable to load driver (" << driverName << ").";
-    errorText_ = errorStream_.str();
-    error( RtAudioError::WARNING );
-    return info;
-  }
-
-  result = ASIOInit( &driverInfo );
-  if ( result != ASE_OK ) {
-    errorStream_ << "RtApiAsio::getDeviceInfo: error (" << getAsioErrorString( result ) << ") initializing driver (" << driverName << ").";
-    errorText_ = errorStream_.str();
-    error( RtAudioError::WARNING );
-    return info;
-  }
-
-  // Determine the device channel information.
-  long inputChannels, outputChannels;
-  result = ASIOGetChannels( &inputChannels, &outputChannels );
-  if ( result != ASE_OK ) {
-    drivers.removeCurrentDriver();
-    errorStream_ << "RtApiAsio::getDeviceInfo: error (" << getAsioErrorString( result ) << ") getting channel count (" << driverName << ").";
-    errorText_ = errorStream_.str();
-    error( RtAudioError::WARNING );
-    return info;
-  }
-
-  info.outputChannels = outputChannels;
-  info.inputChannels = inputChannels;
-  if ( info.outputChannels > 0 && info.inputChannels > 0 )
-    info.duplexChannels = (info.outputChannels > info.inputChannels) ? info.inputChannels : info.outputChannels;
-
-  // Determine the supported sample rates.
-  info.sampleRates.clear();
-  for ( unsigned int i=0; i<MAX_SAMPLE_RATES; i++ ) {
-    result = ASIOCanSampleRate( (ASIOSampleRate) SAMPLE_RATES[i] );
-    if ( result == ASE_OK ) {
-      info.sampleRates.push_back( SAMPLE_RATES[i] );
-
-      if ( !info.preferredSampleRate || ( SAMPLE_RATES[i] <= 48000 && SAMPLE_RATES[i] > info.preferredSampleRate ) )
-        info.preferredSampleRate = SAMPLE_RATES[i];
-    }
-  }
-
-  // Determine supported data types ... just check first channel and assume rest are the same.
-  ASIOChannelInfo channelInfo;
-  channelInfo.channel = 0;
-  channelInfo.isInput = true;
-  if ( info.inputChannels <= 0 ) channelInfo.isInput = false;
-  result = ASIOGetChannelInfo( &channelInfo );
-  if ( result != ASE_OK ) {
-    drivers.removeCurrentDriver();
-    errorStream_ << "RtApiAsio::getDeviceInfo: error (" << getAsioErrorString( result ) << ") getting driver channel info (" << driverName << ").";
-    errorText_ = errorStream_.str();
-    error( RtAudioError::WARNING );
-    return info;
-  }
-
-  info.nativeFormats = 0;
-  if ( channelInfo.type == ASIOSTInt16MSB || channelInfo.type == ASIOSTInt16LSB )
-    info.nativeFormats |= RTAUDIO_SINT16;
-  else if ( channelInfo.type == ASIOSTInt32MSB || channelInfo.type == ASIOSTInt32LSB )
-    info.nativeFormats |= RTAUDIO_SINT32;
-  else if ( channelInfo.type == ASIOSTFloat32MSB || channelInfo.type == ASIOSTFloat32LSB )
-    info.nativeFormats |= RTAUDIO_FLOAT32;
-  else if ( channelInfo.type == ASIOSTFloat64MSB || channelInfo.type == ASIOSTFloat64LSB )
-    info.nativeFormats |= RTAUDIO_FLOAT64;
-  else if ( channelInfo.type == ASIOSTInt24MSB || channelInfo.type == ASIOSTInt24LSB )
-    info.nativeFormats |= RTAUDIO_SINT24;
-
-  if ( info.outputChannels > 0 )
-    if ( getDefaultOutputDevice() == device ) info.isDefaultOutput = true;
-  if ( info.inputChannels > 0 )
-    if ( getDefaultInputDevice() == device ) info.isDefaultInput = true;
-
-  info.probed = true;
-  drivers.removeCurrentDriver();
-  return info;
-}
-
-static void bufferSwitch( long index, ASIOBool /*processNow*/ )
-{
-  RtApiAsio *object = (RtApiAsio *) asioCallbackInfo->object;
-  object->callbackEvent( index );
-}
-
-void RtApiAsio :: saveDeviceInfo( void )
-{
-  devices_.clear();
-
-  unsigned int nDevices = getDeviceCount();
-  devices_.resize( nDevices );
-  for ( unsigned int i=0; i<nDevices; i++ )
-    devices_[i] = getDeviceInfo( i );
-}
-
-bool RtApiAsio :: probeDeviceOpen( unsigned int device, StreamMode mode, unsigned int channels,
-                                   unsigned int firstChannel, unsigned int sampleRate,
-                                   RtAudioFormat format, unsigned int *bufferSize,
-                                   RtAudio::StreamOptions *options )
-{////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-  bool isDuplexInput =  mode == INPUT && stream_.mode == OUTPUT;
-
-  // For ASIO, a duplex stream MUST use the same driver.
-  if ( isDuplexInput && stream_.device[0] != device ) {
-    errorText_ = "RtApiAsio::probeDeviceOpen: an ASIO duplex stream must use the same device for input and output!";
-    return FAILURE;
-  }
-
-  char driverName[32];
-  ASIOError result = drivers.asioGetDriverName( (int) device, driverName, 32 );
-  if ( result != ASE_OK ) {
-    errorStream_ << "RtApiAsio::probeDeviceOpen: unable to get driver name (" << getAsioErrorString( result ) << ").";
-    errorText_ = errorStream_.str();
-    return FAILURE;
-  }
-
-  // Only load the driver once for duplex stream.
-  if ( !isDuplexInput ) {
-    // The getDeviceInfo() function will not work when a stream is open
-    // because ASIO does not allow multiple devices to run at the same
-    // time.  Thus, we'll probe the system before opening a stream and
-    // save the results for use by getDeviceInfo().
-    this->saveDeviceInfo();
-
-    if ( !drivers.loadDriver( driverName ) ) {
-      errorStream_ << "RtApiAsio::probeDeviceOpen: unable to load driver (" << driverName << ").";
-      errorText_ = errorStream_.str();
-      return FAILURE;
-    }
-
-    result = ASIOInit( &driverInfo );
-    if ( result != ASE_OK ) {
-      errorStream_ << "RtApiAsio::probeDeviceOpen: error (" << getAsioErrorString( result ) << ") initializing driver (" << driverName << ").";
-      errorText_ = errorStream_.str();
-      return FAILURE;
-    }
-  }
-
-  // keep them before any "goto error", they are used for error cleanup + goto device boundary checks
-  bool buffersAllocated = false;
-  AsioHandle *handle = (AsioHandle *) stream_.apiHandle;
-  unsigned int nChannels;
-
-
-  // Check the device channel count.
-  long inputChannels, outputChannels;
-  result = ASIOGetChannels( &inputChannels, &outputChannels );
-  if ( result != ASE_OK ) {
-    errorStream_ << "RtApiAsio::probeDeviceOpen: error (" << getAsioErrorString( result ) << ") getting channel count (" << driverName << ").";
-    errorText_ = errorStream_.str();
-    goto error;
-  }
-
-  if ( ( mode == OUTPUT && (channels+firstChannel) > (unsigned int) outputChannels) ||
-       ( mode == INPUT && (channels+firstChannel) > (unsigned int) inputChannels) ) {
-    errorStream_ << "RtApiAsio::probeDeviceOpen: driver (" << driverName << ") does not support requested channel count (" << channels << ") + offset (" << firstChannel << ").";
-    errorText_ = errorStream_.str();
-    goto error;
-  }
-  stream_.nDeviceChannels[mode] = channels;
-  stream_.nUserChannels[mode] = channels;
-  stream_.channelOffset[mode] = firstChannel;
-
-  // Verify the sample rate is supported.
-  result = ASIOCanSampleRate( (ASIOSampleRate) sampleRate );
-  if ( result != ASE_OK ) {
-    errorStream_ << "RtApiAsio::probeDeviceOpen: driver (" << driverName << ") does not support requested sample rate (" << sampleRate << ").";
-    errorText_ = errorStream_.str();
-    goto error;
-  }
-
-  // Get the current sample rate
-  ASIOSampleRate currentRate;
-  result = ASIOGetSampleRate( &currentRate );
-  if ( result != ASE_OK ) {
-    errorStream_ << "RtApiAsio::probeDeviceOpen: driver (" << driverName << ") error getting sample rate.";
-    errorText_ = errorStream_.str();
-    goto error;
-  }
-
-  // Set the sample rate only if necessary
-  if ( currentRate != sampleRate ) {
-    result = ASIOSetSampleRate( (ASIOSampleRate) sampleRate );
-    if ( result != ASE_OK ) {
-      errorStream_ << "RtApiAsio::probeDeviceOpen: driver (" << driverName << ") error setting sample rate (" << sampleRate << ").";
-      errorText_ = errorStream_.str();
-      goto error;
-    }
-  }
-
-  // Determine the driver data type.
-  ASIOChannelInfo channelInfo;
-  channelInfo.channel = 0;
-  if ( mode == OUTPUT ) channelInfo.isInput = false;
-  else channelInfo.isInput = true;
-  result = ASIOGetChannelInfo( &channelInfo );
-  if ( result != ASE_OK ) {
-    errorStream_ << "RtApiAsio::probeDeviceOpen: driver (" << driverName << ") error (" << getAsioErrorString( result ) << ") getting data format.";
-    errorText_ = errorStream_.str();
-    goto error;
-  }
-
-  // Assuming WINDOWS host is always little-endian.
-  stream_.doByteSwap[mode] = false;
-  stream_.userFormat = format;
-  stream_.deviceFormat[mode] = 0;
-  if ( channelInfo.type == ASIOSTInt16MSB || channelInfo.type == ASIOSTInt16LSB ) {
-    stream_.deviceFormat[mode] = RTAUDIO_SINT16;
-    if ( channelInfo.type == ASIOSTInt16MSB ) stream_.doByteSwap[mode] = true;
-  }
-  else if ( channelInfo.type == ASIOSTInt32MSB || channelInfo.type == ASIOSTInt32LSB ) {
-    stream_.deviceFormat[mode] = RTAUDIO_SINT32;
-    if ( channelInfo.type == ASIOSTInt32MSB ) stream_.doByteSwap[mode] = true;
-  }
-  else if ( channelInfo.type == ASIOSTFloat32MSB || channelInfo.type == ASIOSTFloat32LSB ) {
-    stream_.deviceFormat[mode] = RTAUDIO_FLOAT32;
-    if ( channelInfo.type == ASIOSTFloat32MSB ) stream_.doByteSwap[mode] = true;
-  }
-  else if ( channelInfo.type == ASIOSTFloat64MSB || channelInfo.type == ASIOSTFloat64LSB ) {
-    stream_.deviceFormat[mode] = RTAUDIO_FLOAT64;
-    if ( channelInfo.type == ASIOSTFloat64MSB ) stream_.doByteSwap[mode] = true;
-  }
-  else if ( channelInfo.type == ASIOSTInt24MSB || channelInfo.type == ASIOSTInt24LSB ) {
-    stream_.deviceFormat[mode] = RTAUDIO_SINT24;
-    if ( channelInfo.type == ASIOSTInt24MSB ) stream_.doByteSwap[mode] = true;
-  }
-
-  if ( stream_.deviceFormat[mode] == 0 ) {
-    errorStream_ << "RtApiAsio::probeDeviceOpen: driver (" << driverName << ") data format not supported by RtAudio.";
-    errorText_ = errorStream_.str();
-    goto error;
-  }
-
-  // Set the buffer size.  For a duplex stream, this will end up
-  // setting the buffer size based on the input constraints, which
-  // should be ok.
-  long minSize, maxSize, preferSize, granularity;
-  result = ASIOGetBufferSize( &minSize, &maxSize, &preferSize, &granularity );
-  if ( result != ASE_OK ) {
-    errorStream_ << "RtApiAsio::probeDeviceOpen: driver (" << driverName << ") error (" << getAsioErrorString( result ) << ") getting buffer size.";
-    errorText_ = errorStream_.str();
-    goto error;
-  }
-
-  if ( isDuplexInput ) {
-    // When this is the duplex input (output was opened before), then we have to use the same
-    // buffersize as the output, because it might use the preferred buffer size, which most
-    // likely wasn't passed as input to this. The buffer sizes have to be identically anyway,
-    // So instead of throwing an error, make them equal. The caller uses the reference
-    // to the "bufferSize" param as usual to set up processing buffers.
-
-    *bufferSize = stream_.bufferSize;
-
-  } else {
-    if ( *bufferSize == 0 ) *bufferSize = preferSize;
-    else if ( *bufferSize < (unsigned int) minSize ) *bufferSize = (unsigned int) minSize;
-    else if ( *bufferSize > (unsigned int) maxSize ) *bufferSize = (unsigned int) maxSize;
-    else if ( granularity == -1 ) {
-      // Make sure bufferSize is a power of two.
-      int log2_of_min_size = 0;
-      int log2_of_max_size = 0;
-
-      for ( unsigned int i = 0; i < sizeof(long) * 8; i++ ) {
-        if ( minSize & ((long)1 << i) ) log2_of_min_size = i;
-        if ( maxSize & ((long)1 << i) ) log2_of_max_size = i;
-      }
-
-      long min_delta = std::abs( (long)*bufferSize - ((long)1 << log2_of_min_size) );
-      int min_delta_num = log2_of_min_size;
-
-      for (int i = log2_of_min_size + 1; i <= log2_of_max_size; i++) {
-        long current_delta = std::abs( (long)*bufferSize - ((long)1 << i) );
-        if (current_delta < min_delta) {
-          min_delta = current_delta;
-          min_delta_num = i;
-        }
-      }
-
-      *bufferSize = ( (unsigned int)1 << min_delta_num );
-      if ( *bufferSize < (unsigned int) minSize ) *bufferSize = (unsigned int) minSize;
-      else if ( *bufferSize > (unsigned int) maxSize ) *bufferSize = (unsigned int) maxSize;
-    }
-    else if ( granularity != 0 ) {
-      // Set to an even multiple of granularity, rounding up.
-      *bufferSize = (*bufferSize + granularity-1) / granularity * granularity;
-    }
-  }
-
-  /*
-  // we don't use it anymore, see above!
-  // Just left it here for the case...
-  if ( isDuplexInput && stream_.bufferSize != *bufferSize ) {
-    errorText_ = "RtApiAsio::probeDeviceOpen: input/output buffersize discrepancy!";
-    goto error;
-  }
-  */
-
-  stream_.bufferSize = *bufferSize;
-  stream_.nBuffers = 2;
-
-  if ( options && options->flags & RTAUDIO_NONINTERLEAVED ) stream_.userInterleaved = false;
-  else stream_.userInterleaved = true;
-
-  // ASIO always uses non-interleaved buffers.
-  stream_.deviceInterleaved[mode] = false;
-
-  // Allocate, if necessary, our AsioHandle structure for the stream.
-  if ( handle == 0 ) {
-    try {
-      handle = new AsioHandle;
-    }
-    catch ( std::bad_alloc& ) {
-      errorText_ = "RtApiAsio::probeDeviceOpen: error allocating AsioHandle memory.";
-      goto error;
-    }
-    handle->bufferInfos = 0;
-
-    // Create a manual-reset event.
-    handle->condition = CreateEvent( NULL,   // no security
-                                     TRUE,   // manual-reset
-                                     FALSE,  // non-signaled initially
-                                     NULL ); // unnamed
-    stream_.apiHandle = (void *) handle;
-  }
-
-  // Create the ASIO internal buffers.  Since RtAudio sets up input
-  // and output separately, we'll have to dispose of previously
-  // created output buffers for a duplex stream.
-  if ( mode == INPUT && stream_.mode == OUTPUT ) {
-    ASIODisposeBuffers();
-    if ( handle->bufferInfos ) free( handle->bufferInfos );
-  }
-
-  // Allocate, initialize, and save the bufferInfos in our stream callbackInfo structure.
-  unsigned int i;
-  nChannels = stream_.nDeviceChannels[0] + stream_.nDeviceChannels[1];
-  handle->bufferInfos = (ASIOBufferInfo *) malloc( nChannels * sizeof(ASIOBufferInfo) );
-  if ( handle->bufferInfos == NULL ) {
-    errorStream_ << "RtApiAsio::probeDeviceOpen: error allocating bufferInfo memory for driver (" << driverName << ").";
-    errorText_ = errorStream_.str();
-    goto error;
-  }
-
-  ASIOBufferInfo *infos;
-  infos = handle->bufferInfos;
-  for ( i=0; i<stream_.nDeviceChannels[0]; i++, infos++ ) {
-    infos->isInput = ASIOFalse;
-    infos->channelNum = i + stream_.channelOffset[0];
-    infos->buffers[0] = infos->buffers[1] = 0;
-  }
-  for ( i=0; i<stream_.nDeviceChannels[1]; i++, infos++ ) {
-    infos->isInput = ASIOTrue;
-    infos->channelNum = i + stream_.channelOffset[1];
-    infos->buffers[0] = infos->buffers[1] = 0;
-  }
-
-  // prepare for callbacks
-  stream_.sampleRate = sampleRate;
-  stream_.device[mode] = device;
-  stream_.mode = isDuplexInput ? DUPLEX : mode;
-
-  // store this class instance before registering callbacks, that are going to use it
-  asioCallbackInfo = &stream_.callbackInfo;
-  stream_.callbackInfo.object = (void *) this;
-
-  // Set up the ASIO callback structure and create the ASIO data buffers.
-  asioCallbacks.bufferSwitch = &bufferSwitch;
-  asioCallbacks.sampleRateDidChange = &sampleRateChanged;
-  asioCallbacks.asioMessage = &asioMessages;
-  asioCallbacks.bufferSwitchTimeInfo = NULL;
-  result = ASIOCreateBuffers( handle->bufferInfos, nChannels, stream_.bufferSize, &asioCallbacks );
-  if ( result != ASE_OK ) {
-    // Standard method failed. This can happen with strict/misbehaving drivers that return valid buffer size ranges
-    // but only accept the preferred buffer size as parameter for ASIOCreateBuffers. eg. Creatives ASIO driver
-    // in that case, let's be naïve and try that instead
-    *bufferSize = preferSize;
-    stream_.bufferSize = *bufferSize;
-    result = ASIOCreateBuffers( handle->bufferInfos, nChannels, stream_.bufferSize, &asioCallbacks );
-  }
-
-  if ( result != ASE_OK ) {
-    errorStream_ << "RtApiAsio::probeDeviceOpen: driver (" << driverName << ") error (" << getAsioErrorString( result ) << ") creating buffers.";
-    errorText_ = errorStream_.str();
-    goto error;
-  }
-  buffersAllocated = true;  
-  stream_.state = STREAM_STOPPED;
-
-  // Set flags for buffer conversion.
-  stream_.doConvertBuffer[mode] = false;
-  if ( stream_.userFormat != stream_.deviceFormat[mode] )
-    stream_.doConvertBuffer[mode] = true;
-  if ( stream_.userInterleaved != stream_.deviceInterleaved[mode] &&
-       stream_.nUserChannels[mode] > 1 )
-    stream_.doConvertBuffer[mode] = true;
-
-  // Allocate necessary internal buffers
-  unsigned long bufferBytes;
-  bufferBytes = stream_.nUserChannels[mode] * *bufferSize * formatBytes( stream_.userFormat );
-  stream_.userBuffer[mode] = (char *) calloc( bufferBytes, 1 );
-  if ( stream_.userBuffer[mode] == NULL ) {
-    errorText_ = "RtApiAsio::probeDeviceOpen: error allocating user buffer memory.";
-    goto error;
-  }
-
-  if ( stream_.doConvertBuffer[mode] ) {
-
-    bool makeBuffer = true;
-    bufferBytes = stream_.nDeviceChannels[mode] * formatBytes( stream_.deviceFormat[mode] );
-    if ( isDuplexInput && stream_.deviceBuffer ) {
-      unsigned long bytesOut = stream_.nDeviceChannels[0] * formatBytes( stream_.deviceFormat[0] );
-      if ( bufferBytes <= bytesOut ) makeBuffer = false;
-    }
-
-    if ( makeBuffer ) {
-      bufferBytes *= *bufferSize;
-      if ( stream_.deviceBuffer ) free( stream_.deviceBuffer );
-      stream_.deviceBuffer = (char *) calloc( bufferBytes, 1 );
-      if ( stream_.deviceBuffer == NULL ) {
-        errorText_ = "RtApiAsio::probeDeviceOpen: error allocating device buffer memory.";
-        goto error;
-      }
-    }
-  }
-
-  // Determine device latencies
-  long inputLatency, outputLatency;
-  result = ASIOGetLatencies( &inputLatency, &outputLatency );
-  if ( result != ASE_OK ) {
-    errorStream_ << "RtApiAsio::probeDeviceOpen: driver (" << driverName << ") error (" << getAsioErrorString( result ) << ") getting latency.";
-    errorText_ = errorStream_.str();
-    error( RtAudioError::WARNING); // warn but don't fail
-  }
-  else {
-    stream_.latency[0] = outputLatency;
-    stream_.latency[1] = inputLatency;
-  }
-
-  // Setup the buffer conversion information structure.  We don't use
-  // buffers to do channel offsets, so we override that parameter
-  // here.
-  if ( stream_.doConvertBuffer[mode] ) setConvertInfo( mode, 0 );
-
-  return SUCCESS;
-
- error:
-  if ( !isDuplexInput ) {
-    // the cleanup for error in the duplex input, is done by RtApi::openStream
-    // So we clean up for single channel only
-
-    if ( buffersAllocated )
-      ASIODisposeBuffers();
-
-    drivers.removeCurrentDriver();
-
-    if ( handle ) {
-      CloseHandle( handle->condition );
-      if ( handle->bufferInfos )
-        free( handle->bufferInfos );
-
-      delete handle;
-      stream_.apiHandle = 0;
-    }
-
-
-    if ( stream_.userBuffer[mode] ) {
-      free( stream_.userBuffer[mode] );
-      stream_.userBuffer[mode] = 0;
-    }
-
-    if ( stream_.deviceBuffer ) {
-      free( stream_.deviceBuffer );
-      stream_.deviceBuffer = 0;
-    }
-  }
-
-  return FAILURE;
-}////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-void RtApiAsio :: closeStream()
-{
-  if ( stream_.state == STREAM_CLOSED ) {
-    errorText_ = "RtApiAsio::closeStream(): no open stream to close!";
-    error( RtAudioError::WARNING );
-    return;
-  }
-
-  if ( stream_.state == STREAM_RUNNING ) {
-    stream_.state = STREAM_STOPPED;
-    ASIOStop();
-  }
-  ASIODisposeBuffers();
-  drivers.removeCurrentDriver();
-
-  AsioHandle *handle = (AsioHandle *) stream_.apiHandle;
-  if ( handle ) {
-    CloseHandle( handle->condition );
-    if ( handle->bufferInfos )
-      free( handle->bufferInfos );
-    delete handle;
-    stream_.apiHandle = 0;
-  }
-
-  for ( int i=0; i<2; i++ ) {
-    if ( stream_.userBuffer[i] ) {
-      free( stream_.userBuffer[i] );
-      stream_.userBuffer[i] = 0;
-    }
-  }
-
-  if ( stream_.deviceBuffer ) {
-    free( stream_.deviceBuffer );
-    stream_.deviceBuffer = 0;
-  }
-
-  stream_.mode = UNINITIALIZED;
-  stream_.state = STREAM_CLOSED;
-}
-
-bool stopThreadCalled = false;
-
-void RtApiAsio :: startStream()
-{
-  verifyStream();
-  if ( stream_.state == STREAM_RUNNING ) {
-    errorText_ = "RtApiAsio::startStream(): the stream is already running!";
-    error( RtAudioError::WARNING );
-    return;
-  }
-
-  AsioHandle *handle = (AsioHandle *) stream_.apiHandle;
-  ASIOError result = ASIOStart();
-  if ( result != ASE_OK ) {
-    errorStream_ << "RtApiAsio::startStream: error (" << getAsioErrorString( result ) << ") starting device.";
-    errorText_ = errorStream_.str();
-    goto unlock;
-  }
-
-  handle->drainCounter = 0;
-  handle->internalDrain = false;
-  ResetEvent( handle->condition );
-  stream_.state = STREAM_RUNNING;
-  asioXRun = false;
-
- unlock:
-  stopThreadCalled = false;
-
-  if ( result == ASE_OK ) return;
-  error( RtAudioError::SYSTEM_ERROR );
-}
-
-void RtApiAsio :: stopStream()
-{
-  verifyStream();
-  if ( stream_.state == STREAM_STOPPED ) {
-    errorText_ = "RtApiAsio::stopStream(): the stream is already stopped!";
-    error( RtAudioError::WARNING );
-    return;
-  }
-
-  AsioHandle *handle = (AsioHandle *) stream_.apiHandle;
-  if ( stream_.mode == OUTPUT || stream_.mode == DUPLEX ) {
-    if ( handle->drainCounter == 0 ) {
-      handle->drainCounter = 2;
-      WaitForSingleObject( handle->condition, INFINITE );  // block until signaled
-    }
-  }
-
-  stream_.state = STREAM_STOPPED;
-
-  ASIOError result = ASIOStop();
-  if ( result != ASE_OK ) {
-    errorStream_ << "RtApiAsio::stopStream: error (" << getAsioErrorString( result ) << ") stopping device.";
-    errorText_ = errorStream_.str();
-  }
-
-  if ( result == ASE_OK ) return;
-  error( RtAudioError::SYSTEM_ERROR );
-}
-
-void RtApiAsio :: abortStream()
-{
-  verifyStream();
-  if ( stream_.state == STREAM_STOPPED ) {
-    errorText_ = "RtApiAsio::abortStream(): the stream is already stopped!";
-    error( RtAudioError::WARNING );
-    return;
-  }
-
-  // The following lines were commented-out because some behavior was
-  // noted where the device buffers need to be zeroed to avoid
-  // continuing sound, even when the device buffers are completely
-  // disposed.  So now, calling abort is the same as calling stop.
-  // AsioHandle *handle = (AsioHandle *) stream_.apiHandle;
-  // handle->drainCounter = 2;
-  stopStream();
-}
-
-// This function will be called by a spawned thread when the user
-// callback function signals that the stream should be stopped or
-// aborted.  It is necessary to handle it this way because the
-// callbackEvent() function must return before the ASIOStop()
-// function will return.
-static unsigned __stdcall asioStopStream( void *ptr )
-{
-  CallbackInfo *info = (CallbackInfo *) ptr;
-  RtApiAsio *object = (RtApiAsio *) info->object;
-
-  object->stopStream();
-  _endthreadex( 0 );
-  return 0;
-}
-
-bool RtApiAsio :: callbackEvent( long bufferIndex )
-{
-  if ( stream_.state == STREAM_STOPPED || stream_.state == STREAM_STOPPING ) return SUCCESS;
-  if ( stream_.state == STREAM_CLOSED ) {
-    errorText_ = "RtApiAsio::callbackEvent(): the stream is closed ... this shouldn't happen!";
-    error( RtAudioError::WARNING );
-    return FAILURE;
-  }
-
-  CallbackInfo *info = (CallbackInfo *) &stream_.callbackInfo;
-  AsioHandle *handle = (AsioHandle *) stream_.apiHandle;
-
-  // Check if we were draining the stream and signal if finished.
-  if ( handle->drainCounter > 3 ) {
-
-    stream_.state = STREAM_STOPPING;
-    if ( handle->internalDrain == false )
-      SetEvent( handle->condition );
-    else { // spawn a thread to stop the stream
-      unsigned threadId;
-      stream_.callbackInfo.thread = _beginthreadex( NULL, 0, &asioStopStream,
-                                                    &stream_.callbackInfo, 0, &threadId );
-    }
-    return SUCCESS;
-  }
-
-  // Invoke user callback to get fresh output data UNLESS we are
-  // draining stream.
-  if ( handle->drainCounter == 0 ) {
-    RtAudioCallback callback = (RtAudioCallback) info->callback;
-    double streamTime = getStreamTime();
-    RtAudioStreamStatus status = 0;
-    if ( stream_.mode != INPUT && asioXRun == true ) {
-      status |= RTAUDIO_OUTPUT_UNDERFLOW;
-      asioXRun = false;
-    }
-    if ( stream_.mode != OUTPUT && asioXRun == true ) {
-      status |= RTAUDIO_INPUT_OVERFLOW;
-      asioXRun = false;
-    }
-    int cbReturnValue = callback( stream_.userBuffer[0], stream_.userBuffer[1],
-                                     stream_.bufferSize, streamTime, status, info->userData );
-    if ( cbReturnValue == 2 ) {
-      stream_.state = STREAM_STOPPING;
-      handle->drainCounter = 2;
-      unsigned threadId;
-      stream_.callbackInfo.thread = _beginthreadex( NULL, 0, &asioStopStream,
-                                                    &stream_.callbackInfo, 0, &threadId );
-      return SUCCESS;
-    }
-    else if ( cbReturnValue == 1 ) {
-      handle->drainCounter = 1;
-      handle->internalDrain = true;
-    }
-  }
-
-  unsigned int nChannels, bufferBytes, i, j;
-  nChannels = stream_.nDeviceChannels[0] + stream_.nDeviceChannels[1];
-  if ( stream_.mode == OUTPUT || stream_.mode == DUPLEX ) {
-
-    bufferBytes = stream_.bufferSize * formatBytes( stream_.deviceFormat[0] );
-
-    if ( handle->drainCounter > 1 ) { // write zeros to the output stream
-
-      for ( i=0, j=0; i<nChannels; i++ ) {
-        if ( handle->bufferInfos[i].isInput != ASIOTrue )
-          memset( handle->bufferInfos[i].buffers[bufferIndex], 0, bufferBytes );
-      }
-
-    }
-    else if ( stream_.doConvertBuffer[0] ) {
-
-      convertBuffer( stream_.deviceBuffer, stream_.userBuffer[0], stream_.convertInfo[0] );
-      if ( stream_.doByteSwap[0] )
-        byteSwapBuffer( stream_.deviceBuffer,
-                        stream_.bufferSize * stream_.nDeviceChannels[0],
-                        stream_.deviceFormat[0] );
-
-      for ( i=0, j=0; i<nChannels; i++ ) {
-        if ( handle->bufferInfos[i].isInput != ASIOTrue )
-          memcpy( handle->bufferInfos[i].buffers[bufferIndex],
-                  &stream_.deviceBuffer[j++*bufferBytes], bufferBytes );
-      }
-
-    }
-    else {
-
-      if ( stream_.doByteSwap[0] )
-        byteSwapBuffer( stream_.userBuffer[0],
-                        stream_.bufferSize * stream_.nUserChannels[0],
-                        stream_.userFormat );
-
-      for ( i=0, j=0; i<nChannels; i++ ) {
-        if ( handle->bufferInfos[i].isInput != ASIOTrue )
-          memcpy( handle->bufferInfos[i].buffers[bufferIndex],
-                  &stream_.userBuffer[0][bufferBytes*j++], bufferBytes );
-      }
-
-    }
-  }
-
-  // Don't bother draining input
-  if ( handle->drainCounter ) {
-    handle->drainCounter++;
-    goto unlock;
-  }
-
-  if ( stream_.mode == INPUT || stream_.mode == DUPLEX ) {
-
-    bufferBytes = stream_.bufferSize * formatBytes(stream_.deviceFormat[1]);
-
-    if (stream_.doConvertBuffer[1]) {
-
-      // Always interleave ASIO input data.
-      for ( i=0, j=0; i<nChannels; i++ ) {
-        if ( handle->bufferInfos[i].isInput == ASIOTrue )
-          memcpy( &stream_.deviceBuffer[j++*bufferBytes],
-                  handle->bufferInfos[i].buffers[bufferIndex],
-                  bufferBytes );
-      }
-
-      if ( stream_.doByteSwap[1] )
-        byteSwapBuffer( stream_.deviceBuffer,
-                        stream_.bufferSize * stream_.nDeviceChannels[1],
-                        stream_.deviceFormat[1] );
-      convertBuffer( stream_.userBuffer[1], stream_.deviceBuffer, stream_.convertInfo[1] );
-
-    }
-    else {
-      for ( i=0, j=0; i<nChannels; i++ ) {
-        if ( handle->bufferInfos[i].isInput == ASIOTrue ) {
-          memcpy( &stream_.userBuffer[1][bufferBytes*j++],
-                  handle->bufferInfos[i].buffers[bufferIndex],
-                  bufferBytes );
-        }
-      }
-
-      if ( stream_.doByteSwap[1] )
-        byteSwapBuffer( stream_.userBuffer[1],
-                        stream_.bufferSize * stream_.nUserChannels[1],
-                        stream_.userFormat );
-    }
-  }
-
- unlock:
-  // The following call was suggested by Malte Clasen.  While the API
-  // documentation indicates it should not be required, some device
-  // drivers apparently do not function correctly without it.
-  ASIOOutputReady();
-
-  RtApi::tickStreamTime();
-  return SUCCESS;
-}
-
-static void sampleRateChanged( ASIOSampleRate sRate )
-{
-  // The ASIO documentation says that this usually only happens during
-  // external sync.  Audio processing is not stopped by the driver,
-  // actual sample rate might not have even changed, maybe only the
-  // sample rate status of an AES/EBU or S/PDIF digital input at the
-  // audio device.
-
-  RtApi *object = (RtApi *) asioCallbackInfo->object;
-  try {
-    object->stopStream();
-  }
-  catch ( RtAudioError &exception ) {
-    std::cerr << "\nRtApiAsio: sampleRateChanged() error (" << exception.getMessage() << ")!\n" << std::endl;
-    return;
-  }
-
-  std::cerr << "\nRtApiAsio: driver reports sample rate changed to " << sRate << " ... stream stopped!!!\n" << std::endl;
-}
-
-static long asioMessages( long selector, long value, void* /*message*/, double* /*opt*/ )
-{
-  long ret = 0;
-
-  switch( selector ) {
-  case kAsioSelectorSupported:
-    if ( value == kAsioResetRequest
-         || value == kAsioEngineVersion
-         || value == kAsioResyncRequest
-         || value == kAsioLatenciesChanged
-         // The following three were added for ASIO 2.0, you don't
-         // necessarily have to support them.
-         || value == kAsioSupportsTimeInfo
-         || value == kAsioSupportsTimeCode
-         || value == kAsioSupportsInputMonitor)
-      ret = 1L;
-    break;
-  case kAsioResetRequest:
-    // Defer the task and perform the reset of the driver during the
-    // next "safe" situation.  You cannot reset the driver right now,
-    // as this code is called from the driver.  Reset the driver is
-    // done by completely destruct is. I.e. ASIOStop(),
-    // ASIODisposeBuffers(), Destruction Afterwards you initialize the
-    // driver again.
-    std::cerr << "\nRtApiAsio: driver reset requested!!!" << std::endl;
-    ret = 1L;
-    break;
-  case kAsioResyncRequest:
-    // This informs the application that the driver encountered some
-    // non-fatal data loss.  It is used for synchronization purposes
-    // of different media.  Added mainly to work around the Win16Mutex
-    // problems in Windows 95/98 with the Windows Multimedia system,
-    // which could lose data because the Mutex was held too long by
-    // another thread.  However a driver can issue it in other
-    // situations, too.
-    // std::cerr << "\nRtApiAsio: driver resync requested!!!" << std::endl;
-    asioXRun = true;
-    ret = 1L;
-    break;
-  case kAsioLatenciesChanged:
-    // This will inform the host application that the drivers were
-    // latencies changed.  Beware, it this does not mean that the
-    // buffer sizes have changed!  You might need to update internal
-    // delay data.
-    std::cerr << "\nRtApiAsio: driver latency may have changed!!!" << std::endl;
-    ret = 1L;
-    break;
-  case kAsioEngineVersion:
-    // Return the supported ASIO version of the host application.  If
-    // a host application does not implement this selector, ASIO 1.0
-    // is assumed by the driver.
-    ret = 2L;
-    break;
-  case kAsioSupportsTimeInfo:
-    // Informs the driver whether the
-    // asioCallbacks.bufferSwitchTimeInfo() callback is supported.
-    // For compatibility with ASIO 1.0 drivers the host application
-    // should always support the "old" bufferSwitch method, too.
-    ret = 0;
-    break;
-  case kAsioSupportsTimeCode:
-    // Informs the driver whether application is interested in time
-    // code info.  If an application does not need to know about time
-    // code, the driver has less work to do.
-    ret = 0;
-    break;
-  }
-  return ret;
-}
-
-static const char* getAsioErrorString( ASIOError result )
-{
-  struct Messages 
-  {
-    ASIOError value;
-    const char*message;
-  };
-
-  static const Messages m[] = 
-    {
-      {   ASE_NotPresent,    "Hardware input or output is not present or available." },
-      {   ASE_HWMalfunction,  "Hardware is malfunctioning." },
-      {   ASE_InvalidParameter, "Invalid input parameter." },
-      {   ASE_InvalidMode,      "Invalid mode." },
-      {   ASE_SPNotAdvancing,     "Sample position not advancing." },
-      {   ASE_NoClock,            "Sample clock or rate cannot be determined or is not present." },
-      {   ASE_NoMemory,           "Not enough memory to complete the request." }
-    };
-
-  for ( unsigned int i = 0; i < sizeof(m)/sizeof(m[0]); ++i )
-    if ( m[i].value == result ) return m[i].message;
-
-  return "Unknown error.";
-}
-
-//******************** End of __WINDOWS_ASIO__ *********************//
-#endif
-
-
-#if defined(__WINDOWS_WASAPI__) // Windows WASAPI API
-
-// Authored by Marcus Tomlinson <themarcustomlinson@gmail.com>, April 2014
-// - Introduces support for the Windows WASAPI API
-// - Aims to deliver bit streams to and from hardware at the lowest possible latency, via the absolute minimum buffer sizes required
-// - Provides flexible stream configuration to an otherwise strict and inflexible WASAPI interface
-// - Includes automatic internal conversion of sample rate and buffer size between hardware and the user
-
-#ifndef INITGUID
-  #define INITGUID
-#endif
-#include <audioclient.h>
-#include <avrt.h>
-#include <mmdeviceapi.h>
-#include <functiondiscoverykeys_devpkey.h>
-
-//=============================================================================
-
-#define SAFE_RELEASE( objectPtr )\
-if ( objectPtr )\
-{\
-  objectPtr->Release();\
-  objectPtr = NULL;\
-}
-
-typedef HANDLE ( __stdcall *TAvSetMmThreadCharacteristicsPtr )( LPCWSTR TaskName, LPDWORD TaskIndex );
-
-//-----------------------------------------------------------------------------
-
-// WASAPI dictates stream sample rate, format, channel count, and in some cases, buffer size.
-// Therefore we must perform all necessary conversions to user buffers in order to satisfy these
-// requirements. WasapiBuffer ring buffers are used between HwIn->UserIn and UserOut->HwOut to
-// provide intermediate storage for read / write synchronization.
-class WasapiBuffer
-{
-public:
-  WasapiBuffer()
-    : buffer_( NULL ),
-      bufferSize_( 0 ),
-      inIndex_( 0 ),
-      outIndex_( 0 ) {}
-
-  ~WasapiBuffer() {
-    free( buffer_ );
-  }
-
-  // sets the length of the internal ring buffer
-  void setBufferSize( unsigned int bufferSize, unsigned int formatBytes ) {
-    free( buffer_ );
-
-    buffer_ = ( char* ) calloc( bufferSize, formatBytes );
-
-    bufferSize_ = bufferSize;
-    inIndex_ = 0;
-    outIndex_ = 0;
-  }
-
-  // attempt to push a buffer into the ring buffer at the current "in" index
-  bool pushBuffer( char* buffer, unsigned int bufferSize, RtAudioFormat format )
-  {
-    if ( !buffer ||                 // incoming buffer is NULL
-         bufferSize == 0 ||         // incoming buffer has no data
-         bufferSize > bufferSize_ ) // incoming buffer too large
-    {
-      return false;
-    }
-
-    unsigned int relOutIndex = outIndex_;
-    unsigned int inIndexEnd = inIndex_ + bufferSize;
-    if ( relOutIndex < inIndex_ && inIndexEnd >= bufferSize_ ) {
-      relOutIndex += bufferSize_;
-    }
-
-    // "in" index can end on the "out" index but cannot begin at it
-    if ( inIndex_ <= relOutIndex && inIndexEnd > relOutIndex ) {
-      return false; // not enough space between "in" index and "out" index
-    }
-
-    // copy buffer from external to internal
-    int fromZeroSize = inIndex_ + bufferSize - bufferSize_;
-    fromZeroSize = fromZeroSize < 0 ? 0 : fromZeroSize;
-    int fromInSize = bufferSize - fromZeroSize;
-
-    switch( format )
-      {
-      case RTAUDIO_SINT8:
-        memcpy( &( ( char* ) buffer_ )[inIndex_], buffer, fromInSize * sizeof( char ) );
-        memcpy( buffer_, &( ( char* ) buffer )[fromInSize], fromZeroSize * sizeof( char ) );
-        break;
-      case RTAUDIO_SINT16:
-        memcpy( &( ( short* ) buffer_ )[inIndex_], buffer, fromInSize * sizeof( short ) );
-        memcpy( buffer_, &( ( short* ) buffer )[fromInSize], fromZeroSize * sizeof( short ) );
-        break;
-      case RTAUDIO_SINT24:
-        memcpy( &( ( S24* ) buffer_ )[inIndex_], buffer, fromInSize * sizeof( S24 ) );
-        memcpy( buffer_, &( ( S24* ) buffer )[fromInSize], fromZeroSize * sizeof( S24 ) );
-        break;
-      case RTAUDIO_SINT32:
-        memcpy( &( ( int* ) buffer_ )[inIndex_], buffer, fromInSize * sizeof( int ) );
-        memcpy( buffer_, &( ( int* ) buffer )[fromInSize], fromZeroSize * sizeof( int ) );
-        break;
-      case RTAUDIO_FLOAT32:
-        memcpy( &( ( float* ) buffer_ )[inIndex_], buffer, fromInSize * sizeof( float ) );
-        memcpy( buffer_, &( ( float* ) buffer )[fromInSize], fromZeroSize * sizeof( float ) );
-        break;
-      case RTAUDIO_FLOAT64:
-        memcpy( &( ( double* ) buffer_ )[inIndex_], buffer, fromInSize * sizeof( double ) );
-        memcpy( buffer_, &( ( double* ) buffer )[fromInSize], fromZeroSize * sizeof( double ) );
-        break;
-    }
-
-    // update "in" index
-    inIndex_ += bufferSize;
-    inIndex_ %= bufferSize_;
-
-    return true;
-  }
-
-  // attempt to pull a buffer from the ring buffer from the current "out" index
-  bool pullBuffer( char* buffer, unsigned int bufferSize, RtAudioFormat format )
-  {
-    if ( !buffer ||                 // incoming buffer is NULL
-         bufferSize == 0 ||         // incoming buffer has no data
-         bufferSize > bufferSize_ ) // incoming buffer too large
-    {
-      return false;
-    }
-
-    unsigned int relInIndex = inIndex_;
-    unsigned int outIndexEnd = outIndex_ + bufferSize;
-    if ( relInIndex < outIndex_ && outIndexEnd >= bufferSize_ ) {
-      relInIndex += bufferSize_;
-    }
-
-    // "out" index can begin at and end on the "in" index
-    if ( outIndex_ < relInIndex && outIndexEnd > relInIndex ) {
-      return false; // not enough space between "out" index and "in" index
-    }
-
-    // copy buffer from internal to external
-    int fromZeroSize = outIndex_ + bufferSize - bufferSize_;
-    fromZeroSize = fromZeroSize < 0 ? 0 : fromZeroSize;
-    int fromOutSize = bufferSize - fromZeroSize;
-
-    switch( format )
-    {
-      case RTAUDIO_SINT8:
-        memcpy( buffer, &( ( char* ) buffer_ )[outIndex_], fromOutSize * sizeof( char ) );
-        memcpy( &( ( char* ) buffer )[fromOutSize], buffer_, fromZeroSize * sizeof( char ) );
-        break;
-      case RTAUDIO_SINT16:
-        memcpy( buffer, &( ( short* ) buffer_ )[outIndex_], fromOutSize * sizeof( short ) );
-        memcpy( &( ( short* ) buffer )[fromOutSize], buffer_, fromZeroSize * sizeof( short ) );
-        break;
-      case RTAUDIO_SINT24:
-        memcpy( buffer, &( ( S24* ) buffer_ )[outIndex_], fromOutSize * sizeof( S24 ) );
-        memcpy( &( ( S24* ) buffer )[fromOutSize], buffer_, fromZeroSize * sizeof( S24 ) );
-        break;
-      case RTAUDIO_SINT32:
-        memcpy( buffer, &( ( int* ) buffer_ )[outIndex_], fromOutSize * sizeof( int ) );
-        memcpy( &( ( int* ) buffer )[fromOutSize], buffer_, fromZeroSize * sizeof( int ) );
-        break;
-      case RTAUDIO_FLOAT32:
-        memcpy( buffer, &( ( float* ) buffer_ )[outIndex_], fromOutSize * sizeof( float ) );
-        memcpy( &( ( float* ) buffer )[fromOutSize], buffer_, fromZeroSize * sizeof( float ) );
-        break;
-      case RTAUDIO_FLOAT64:
-        memcpy( buffer, &( ( double* ) buffer_ )[outIndex_], fromOutSize * sizeof( double ) );
-        memcpy( &( ( double* ) buffer )[fromOutSize], buffer_, fromZeroSize * sizeof( double ) );
-        break;
-    }
-
-    // update "out" index
-    outIndex_ += bufferSize;
-    outIndex_ %= bufferSize_;
-
-    return true;
-  }
-
-private:
-  char* buffer_;
-  unsigned int bufferSize_;
-  unsigned int inIndex_;
-  unsigned int outIndex_;
-};
-
-//-----------------------------------------------------------------------------
-
-// In order to satisfy WASAPI's buffer requirements, we need a means of converting sample rate
-// between HW and the user. The convertBufferWasapi function is used to perform this conversion
-// between HwIn->UserIn and UserOut->HwOut during the stream callback loop.
-// This sample rate converter favors speed over quality, and works best with conversions between
-// one rate and its multiple.
-void convertBufferWasapi( char* outBuffer,
-                          const char* inBuffer,
-                          const unsigned int& channelCount,
-                          const unsigned int& inSampleRate,
-                          const unsigned int& outSampleRate,
-                          const unsigned int& inSampleCount,
-                          unsigned int& outSampleCount,
-                          const RtAudioFormat& format )
-{
-  // calculate the new outSampleCount and relative sampleStep
-  float sampleRatio = ( float ) outSampleRate / inSampleRate;
-  float sampleStep = 1.0f / sampleRatio;
-  float inSampleFraction = 0.0f;
-
-  outSampleCount = ( unsigned int ) roundf( inSampleCount * sampleRatio );
-
-  // frame-by-frame, copy each relative input sample into it's corresponding output sample
-  for ( unsigned int outSample = 0; outSample < outSampleCount; outSample++ )
-  {
-    unsigned int inSample = ( unsigned int ) inSampleFraction;
-
-    switch ( format )
-    {
-      case RTAUDIO_SINT8:
-        memcpy( &( ( char* ) outBuffer )[ outSample * channelCount ], &( ( char* ) inBuffer )[ inSample * channelCount ], channelCount * sizeof( char ) );
-        break;
-      case RTAUDIO_SINT16:
-        memcpy( &( ( short* ) outBuffer )[ outSample * channelCount ], &( ( short* ) inBuffer )[ inSample * channelCount ], channelCount * sizeof( short ) );
-        break;
-      case RTAUDIO_SINT24:
-        memcpy( &( ( S24* ) outBuffer )[ outSample * channelCount ], &( ( S24* ) inBuffer )[ inSample * channelCount ], channelCount * sizeof( S24 ) );
-        break;
-      case RTAUDIO_SINT32:
-        memcpy( &( ( int* ) outBuffer )[ outSample * channelCount ], &( ( int* ) inBuffer )[ inSample * channelCount ], channelCount * sizeof( int ) );
-        break;
-      case RTAUDIO_FLOAT32:
-        memcpy( &( ( float* ) outBuffer )[ outSample * channelCount ], &( ( float* ) inBuffer )[ inSample * channelCount ], channelCount * sizeof( float ) );
-        break;
-      case RTAUDIO_FLOAT64:
-        memcpy( &( ( double* ) outBuffer )[ outSample * channelCount ], &( ( double* ) inBuffer )[ inSample * channelCount ], channelCount * sizeof( double ) );
-        break;
-    }
-
-    // jump to next in sample
-    inSampleFraction += sampleStep;
-  }
-}
-
-//-----------------------------------------------------------------------------
-
-// A structure to hold various information related to the WASAPI implementation.
-struct WasapiHandle
-{
-  IAudioClient* captureAudioClient;
-  IAudioClient* renderAudioClient;
-  IAudioCaptureClient* captureClient;
-  IAudioRenderClient* renderClient;
-  HANDLE captureEvent;
-  HANDLE renderEvent;
-
-  WasapiHandle()
-  : captureAudioClient( NULL ),
-    renderAudioClient( NULL ),
-    captureClient( NULL ),
-    renderClient( NULL ),
-    captureEvent( NULL ),
-    renderEvent( NULL ) {}
-};
-
-//=============================================================================
-
-RtApiWasapi::RtApiWasapi()
-  : coInitialized_( false ), deviceEnumerator_( NULL )
-{
-  // WASAPI can run either apartment or multi-threaded
-  HRESULT hr = CoInitialize( NULL );
-  if ( !FAILED( hr ) )
-    coInitialized_ = true;
-
-  // Instantiate device enumerator
-  hr = CoCreateInstance( __uuidof( MMDeviceEnumerator ), NULL,
-                         CLSCTX_ALL, __uuidof( IMMDeviceEnumerator ),
-                         ( void** ) &deviceEnumerator_ );
-
-  if ( FAILED( hr ) ) {
-    errorText_ = "RtApiWasapi::RtApiWasapi: Unable to instantiate device enumerator";
-    error( RtAudioError::DRIVER_ERROR );
-  }
-}
-
-//-----------------------------------------------------------------------------
-
-RtApiWasapi::~RtApiWasapi()
-{
-  if ( stream_.state != STREAM_CLOSED )
-    closeStream();
-
-  SAFE_RELEASE( deviceEnumerator_ );
-
-  // If this object previously called CoInitialize()
-  if ( coInitialized_ )
-    CoUninitialize();
-}
-
-//=============================================================================
-
-unsigned int RtApiWasapi::getDeviceCount( void )
-{
-  unsigned int captureDeviceCount = 0;
-  unsigned int renderDeviceCount = 0;
-
-  IMMDeviceCollection* captureDevices = NULL;
-  IMMDeviceCollection* renderDevices = NULL;
-
-  // Count capture devices
-  errorText_.clear();
-  HRESULT hr = deviceEnumerator_->EnumAudioEndpoints( eCapture, DEVICE_STATE_ACTIVE, &captureDevices );
-  if ( FAILED( hr ) ) {
-    errorText_ = "RtApiWasapi::getDeviceCount: Unable to retrieve capture device collection.";
-    goto Exit;
-  }
-
-  hr = captureDevices->GetCount( &captureDeviceCount );
-  if ( FAILED( hr ) ) {
-    errorText_ = "RtApiWasapi::getDeviceCount: Unable to retrieve capture device count.";
-    goto Exit;
-  }
-
-  // Count render devices
-  hr = deviceEnumerator_->EnumAudioEndpoints( eRender, DEVICE_STATE_ACTIVE, &renderDevices );
-  if ( FAILED( hr ) ) {
-    errorText_ = "RtApiWasapi::getDeviceCount: Unable to retrieve render device collection.";
-    goto Exit;
-  }
-
-  hr = renderDevices->GetCount( &renderDeviceCount );
-  if ( FAILED( hr ) ) {
-    errorText_ = "RtApiWasapi::getDeviceCount: Unable to retrieve render device count.";
-    goto Exit;
-  }
-
-Exit:
-  // release all references
-  SAFE_RELEASE( captureDevices );
-  SAFE_RELEASE( renderDevices );
-
-  if ( errorText_.empty() )
-    return captureDeviceCount + renderDeviceCount;
-
-  error( RtAudioError::DRIVER_ERROR );
-  return 0;
-}
-
-//-----------------------------------------------------------------------------
-
-RtAudio::DeviceInfo RtApiWasapi::getDeviceInfo( unsigned int device )
-{
-  RtAudio::DeviceInfo info;
-  unsigned int captureDeviceCount = 0;
-  unsigned int renderDeviceCount = 0;
-  std::string defaultDeviceName;
-  bool isCaptureDevice = false;
-
-  PROPVARIANT deviceNameProp;
-  PROPVARIANT defaultDeviceNameProp;
-
-  IMMDeviceCollection* captureDevices = NULL;
-  IMMDeviceCollection* renderDevices = NULL;
-  IMMDevice* devicePtr = NULL;
-  IMMDevice* defaultDevicePtr = NULL;
-  IAudioClient* audioClient = NULL;
-  IPropertyStore* devicePropStore = NULL;
-  IPropertyStore* defaultDevicePropStore = NULL;
-
-  WAVEFORMATEX* deviceFormat = NULL;
-  WAVEFORMATEX* closestMatchFormat = NULL;
-
-  // probed
-  info.probed = false;
-
-  // Count capture devices
-  errorText_.clear();
-  RtAudioError::Type errorType = RtAudioError::DRIVER_ERROR;
-  HRESULT hr = deviceEnumerator_->EnumAudioEndpoints( eCapture, DEVICE_STATE_ACTIVE, &captureDevices );
-  if ( FAILED( hr ) ) {
-    errorText_ = "RtApiWasapi::getDeviceInfo: Unable to retrieve capture device collection.";
-    goto Exit;
-  }
-
-  hr = captureDevices->GetCount( &captureDeviceCount );
-  if ( FAILED( hr ) ) {
-    errorText_ = "RtApiWasapi::getDeviceInfo: Unable to retrieve capture device count.";
-    goto Exit;
-  }
-
-  // Count render devices
-  hr = deviceEnumerator_->EnumAudioEndpoints( eRender, DEVICE_STATE_ACTIVE, &renderDevices );
-  if ( FAILED( hr ) ) {
-    errorText_ = "RtApiWasapi::getDeviceInfo: Unable to retrieve render device collection.";
-    goto Exit;
-  }
-
-  hr = renderDevices->GetCount( &renderDeviceCount );
-  if ( FAILED( hr ) ) {
-    errorText_ = "RtApiWasapi::getDeviceInfo: Unable to retrieve render device count.";
-    goto Exit;
-  }
-
-  // validate device index
-  if ( device >= captureDeviceCount + renderDeviceCount ) {
-    errorText_ = "RtApiWasapi::getDeviceInfo: Invalid device index.";
-    errorType = RtAudioError::INVALID_USE;
-    goto Exit;
-  }
-
-  // determine whether index falls within capture or render devices
-  if ( device >= renderDeviceCount ) {
-    hr = captureDevices->Item( device - renderDeviceCount, &devicePtr );
-    if ( FAILED( hr ) ) {
-      errorText_ = "RtApiWasapi::getDeviceInfo: Unable to retrieve capture device handle.";
-      goto Exit;
-    }
-    isCaptureDevice = true;
-  }
-  else {
-    hr = renderDevices->Item( device, &devicePtr );
-    if ( FAILED( hr ) ) {
-      errorText_ = "RtApiWasapi::getDeviceInfo: Unable to retrieve render device handle.";
-      goto Exit;
-    }
-    isCaptureDevice = false;
-  }
-
-  // get default device name
-  if ( isCaptureDevice ) {
-    hr = deviceEnumerator_->GetDefaultAudioEndpoint( eCapture, eConsole, &defaultDevicePtr );
-    if ( FAILED( hr ) ) {
-      errorText_ = "RtApiWasapi::getDeviceInfo: Unable to retrieve default capture device handle.";
-      goto Exit;
-    }
-  }
-  else {
-    hr = deviceEnumerator_->GetDefaultAudioEndpoint( eRender, eConsole, &defaultDevicePtr );
-    if ( FAILED( hr ) ) {
-      errorText_ = "RtApiWasapi::getDeviceInfo: Unable to retrieve default render device handle.";
-      goto Exit;
-    }
-  }
-
-  hr = defaultDevicePtr->OpenPropertyStore( STGM_READ, &defaultDevicePropStore );
-  if ( FAILED( hr ) ) {
-    errorText_ = "RtApiWasapi::getDeviceInfo: Unable to open default device property store.";
-    goto Exit;
-  }
-  PropVariantInit( &defaultDeviceNameProp );
-
-  hr = defaultDevicePropStore->GetValue( PKEY_Device_FriendlyName, &defaultDeviceNameProp );
-  if ( FAILED( hr ) ) {
-    errorText_ = "RtApiWasapi::getDeviceInfo: Unable to retrieve default device property: PKEY_Device_FriendlyName.";
-    goto Exit;
-  }
-
-  defaultDeviceName = convertCharPointerToStdString(defaultDeviceNameProp.pwszVal);
-
-  // name
-  hr = devicePtr->OpenPropertyStore( STGM_READ, &devicePropStore );
-  if ( FAILED( hr ) ) {
-    errorText_ = "RtApiWasapi::getDeviceInfo: Unable to open device property store.";
-    goto Exit;
-  }
-
-  PropVariantInit( &deviceNameProp );
-
-  hr = devicePropStore->GetValue( PKEY_Device_FriendlyName, &deviceNameProp );
-  if ( FAILED( hr ) ) {
-    errorText_ = "RtApiWasapi::getDeviceInfo: Unable to retrieve device property: PKEY_Device_FriendlyName.";
-    goto Exit;
-  }
-
-  info.name =convertCharPointerToStdString(deviceNameProp.pwszVal);
-
-  // is default
-  if ( isCaptureDevice ) {
-    info.isDefaultInput = info.name == defaultDeviceName;
-    info.isDefaultOutput = false;
-  }
-  else {
-    info.isDefaultInput = false;
-    info.isDefaultOutput = info.name == defaultDeviceName;
-  }
-
-  // channel count
-  hr = devicePtr->Activate( __uuidof( IAudioClient ), CLSCTX_ALL, NULL, ( void** ) &audioClient );
-  if ( FAILED( hr ) ) {
-    errorText_ = "RtApiWasapi::getDeviceInfo: Unable to retrieve device audio client.";
-    goto Exit;
-  }
-
-  hr = audioClient->GetMixFormat( &deviceFormat );
-  if ( FAILED( hr ) ) {
-    errorText_ = "RtApiWasapi::getDeviceInfo: Unable to retrieve device mix format.";
-    goto Exit;
-  }
-
-  if ( isCaptureDevice ) {
-    info.inputChannels = deviceFormat->nChannels;
-    info.outputChannels = 0;
-    info.duplexChannels = 0;
-  }
-  else {
-    info.inputChannels = 0;
-    info.outputChannels = deviceFormat->nChannels;
-    info.duplexChannels = 0;
-  }
-
-  // sample rates
-  info.sampleRates.clear();
-
-  // allow support for all sample rates as we have a built-in sample rate converter
-  for ( unsigned int i = 0; i < MAX_SAMPLE_RATES; i++ ) {
-    info.sampleRates.push_back( SAMPLE_RATES[i] );
-  }
-  info.preferredSampleRate = deviceFormat->nSamplesPerSec;
-
-  // native format
-  info.nativeFormats = 0;
-
-  if ( deviceFormat->wFormatTag == WAVE_FORMAT_IEEE_FLOAT ||
-       ( deviceFormat->wFormatTag == WAVE_FORMAT_EXTENSIBLE &&
-         ( ( WAVEFORMATEXTENSIBLE* ) deviceFormat )->SubFormat == KSDATAFORMAT_SUBTYPE_IEEE_FLOAT ) )
-  {
-    if ( deviceFormat->wBitsPerSample == 32 ) {
-      info.nativeFormats |= RTAUDIO_FLOAT32;
-    }
-    else if ( deviceFormat->wBitsPerSample == 64 ) {
-      info.nativeFormats |= RTAUDIO_FLOAT64;
-    }
-  }
-  else if ( deviceFormat->wFormatTag == WAVE_FORMAT_PCM ||
-           ( deviceFormat->wFormatTag == WAVE_FORMAT_EXTENSIBLE &&
-             ( ( WAVEFORMATEXTENSIBLE* ) deviceFormat )->SubFormat == KSDATAFORMAT_SUBTYPE_PCM ) )
-  {
-    if ( deviceFormat->wBitsPerSample == 8 ) {
-      info.nativeFormats |= RTAUDIO_SINT8;
-    }
-    else if ( deviceFormat->wBitsPerSample == 16 ) {
-      info.nativeFormats |= RTAUDIO_SINT16;
-    }
-    else if ( deviceFormat->wBitsPerSample == 24 ) {
-      info.nativeFormats |= RTAUDIO_SINT24;
-    }
-    else if ( deviceFormat->wBitsPerSample == 32 ) {
-      info.nativeFormats |= RTAUDIO_SINT32;
-    }
-  }
-
-  // probed
-  info.probed = true;
-
-Exit:
-  // release all references
-  PropVariantClear( &deviceNameProp );
-  PropVariantClear( &defaultDeviceNameProp );
-
-  SAFE_RELEASE( captureDevices );
-  SAFE_RELEASE( renderDevices );
-  SAFE_RELEASE( devicePtr );
-  SAFE_RELEASE( defaultDevicePtr );
-  SAFE_RELEASE( audioClient );
-  SAFE_RELEASE( devicePropStore );
-  SAFE_RELEASE( defaultDevicePropStore );
-
-  CoTaskMemFree( deviceFormat );
-  CoTaskMemFree( closestMatchFormat );
-
-  if ( !errorText_.empty() )
-    error( errorType );
-  return info;
-}
-
-//-----------------------------------------------------------------------------
-
-unsigned int RtApiWasapi::getDefaultOutputDevice( void )
-{
-  for ( unsigned int i = 0; i < getDeviceCount(); i++ ) {
-    if ( getDeviceInfo( i ).isDefaultOutput ) {
-      return i;
-    }
-  }
-
-  return 0;
-}
-
-//-----------------------------------------------------------------------------
-
-unsigned int RtApiWasapi::getDefaultInputDevice( void )
-{
-  for ( unsigned int i = 0; i < getDeviceCount(); i++ ) {
-    if ( getDeviceInfo( i ).isDefaultInput ) {
-      return i;
-    }
-  }
-
-  return 0;
-}
-
-//-----------------------------------------------------------------------------
-
-void RtApiWasapi::closeStream( void )
-{
-  if ( stream_.state == STREAM_CLOSED ) {
-    errorText_ = "RtApiWasapi::closeStream: No open stream to close.";
-    error( RtAudioError::WARNING );
-    return;
-  }
-
-  if ( stream_.state != STREAM_STOPPED )
-    stopStream();
-
-  // clean up stream memory
-  SAFE_RELEASE( ( ( WasapiHandle* ) stream_.apiHandle )->captureAudioClient )
-  SAFE_RELEASE( ( ( WasapiHandle* ) stream_.apiHandle )->renderAudioClient )
-
-  SAFE_RELEASE( ( ( WasapiHandle* ) stream_.apiHandle )->captureClient )
-  SAFE_RELEASE( ( ( WasapiHandle* ) stream_.apiHandle )->renderClient )
-
-  if ( ( ( WasapiHandle* ) stream_.apiHandle )->captureEvent )
-    CloseHandle( ( ( WasapiHandle* ) stream_.apiHandle )->captureEvent );
-
-  if ( ( ( WasapiHandle* ) stream_.apiHandle )->renderEvent )
-    CloseHandle( ( ( WasapiHandle* ) stream_.apiHandle )->renderEvent );
-
-  delete ( WasapiHandle* ) stream_.apiHandle;
-  stream_.apiHandle = NULL;
-
-  for ( int i = 0; i < 2; i++ ) {
-    if ( stream_.userBuffer[i] ) {
-      free( stream_.userBuffer[i] );
-      stream_.userBuffer[i] = 0;
-    }
-  }
-
-  if ( stream_.deviceBuffer ) {
-    free( stream_.deviceBuffer );
-    stream_.deviceBuffer = 0;
-  }
-
-  // update stream state
-  stream_.state = STREAM_CLOSED;
-}
-
-//-----------------------------------------------------------------------------
-
-void RtApiWasapi::startStream( void )
-{
-  verifyStream();
-
-  if ( stream_.state == STREAM_RUNNING ) {
-    errorText_ = "RtApiWasapi::startStream: The stream is already running.";
-    error( RtAudioError::WARNING );
-    return;
-  }
-
-  // update stream state
-  stream_.state = STREAM_RUNNING;
-
-  // create WASAPI stream thread
-  stream_.callbackInfo.thread = ( ThreadHandle ) CreateThread( NULL, 0, runWasapiThread, this, CREATE_SUSPENDED, NULL );
-
-  if ( !stream_.callbackInfo.thread ) {
-    errorText_ = "RtApiWasapi::startStream: Unable to instantiate callback thread.";
-    error( RtAudioError::THREAD_ERROR );
-  }
-  else {
-    SetThreadPriority( ( void* ) stream_.callbackInfo.thread, stream_.callbackInfo.priority );
-    ResumeThread( ( void* ) stream_.callbackInfo.thread );
-  }
-}
-
-//-----------------------------------------------------------------------------
-
-void RtApiWasapi::stopStream( void )
-{
-  verifyStream();
-
-  if ( stream_.state == STREAM_STOPPED ) {
-    errorText_ = "RtApiWasapi::stopStream: The stream is already stopped.";
-    error( RtAudioError::WARNING );
-    return;
-  }
-
-  // inform stream thread by setting stream state to STREAM_STOPPING
-  stream_.state = STREAM_STOPPING;
-
-  // wait until stream thread is stopped
-  while( stream_.state != STREAM_STOPPED ) {
-    Sleep( 1 );
-  }
-
-  // Wait for the last buffer to play before stopping.
-  Sleep( 1000 * stream_.bufferSize / stream_.sampleRate );
-
-  // stop capture client if applicable
-  if ( ( ( WasapiHandle* ) stream_.apiHandle )->captureAudioClient ) {
-    HRESULT hr = ( ( WasapiHandle* ) stream_.apiHandle )->captureAudioClient->Stop();
-    if ( FAILED( hr ) ) {
-      errorText_ = "RtApiWasapi::stopStream: Unable to stop capture stream.";
-      error( RtAudioError::DRIVER_ERROR );
-      return;
-    }
-  }
-
-  // stop render client if applicable
-  if ( ( ( WasapiHandle* ) stream_.apiHandle )->renderAudioClient ) {
-    HRESULT hr = ( ( WasapiHandle* ) stream_.apiHandle )->renderAudioClient->Stop();
-    if ( FAILED( hr ) ) {
-      errorText_ = "RtApiWasapi::stopStream: Unable to stop render stream.";
-      error( RtAudioError::DRIVER_ERROR );
-      return;
-    }
-  }
-
-  // close thread handle
-  if ( stream_.callbackInfo.thread && !CloseHandle( ( void* ) stream_.callbackInfo.thread ) ) {
-    errorText_ = "RtApiWasapi::stopStream: Unable to close callback thread.";
-    error( RtAudioError::THREAD_ERROR );
-    return;
-  }
-
-  stream_.callbackInfo.thread = (ThreadHandle) NULL;
-}
-
-//-----------------------------------------------------------------------------
-
-void RtApiWasapi::abortStream( void )
-{
-  verifyStream();
-
-  if ( stream_.state == STREAM_STOPPED ) {
-    errorText_ = "RtApiWasapi::abortStream: The stream is already stopped.";
-    error( RtAudioError::WARNING );
-    return;
-  }
-
-  // inform stream thread by setting stream state to STREAM_STOPPING
-  stream_.state = STREAM_STOPPING;
-
-  // wait until stream thread is stopped
-  while ( stream_.state != STREAM_STOPPED ) {
-    Sleep( 1 );
-  }
-
-  // stop capture client if applicable
-  if ( ( ( WasapiHandle* ) stream_.apiHandle )->captureAudioClient ) {
-    HRESULT hr = ( ( WasapiHandle* ) stream_.apiHandle )->captureAudioClient->Stop();
-    if ( FAILED( hr ) ) {
-      errorText_ = "RtApiWasapi::abortStream: Unable to stop capture stream.";
-      error( RtAudioError::DRIVER_ERROR );
-      return;
-    }
-  }
-
-  // stop render client if applicable
-  if ( ( ( WasapiHandle* ) stream_.apiHandle )->renderAudioClient ) {
-    HRESULT hr = ( ( WasapiHandle* ) stream_.apiHandle )->renderAudioClient->Stop();
-    if ( FAILED( hr ) ) {
-      errorText_ = "RtApiWasapi::abortStream: Unable to stop render stream.";
-      error( RtAudioError::DRIVER_ERROR );
-      return;
-    }
-  }
-
-  // close thread handle
-  if ( stream_.callbackInfo.thread && !CloseHandle( ( void* ) stream_.callbackInfo.thread ) ) {
-    errorText_ = "RtApiWasapi::abortStream: Unable to close callback thread.";
-    error( RtAudioError::THREAD_ERROR );
-    return;
-  }
-
-  stream_.callbackInfo.thread = (ThreadHandle) NULL;
-}
-
-//-----------------------------------------------------------------------------
-
-bool RtApiWasapi::probeDeviceOpen( unsigned int device, StreamMode mode, unsigned int channels,
-                                   unsigned int firstChannel, unsigned int sampleRate,
-                                   RtAudioFormat format, unsigned int* bufferSize,
-                                   RtAudio::StreamOptions* options )
-{
-  bool methodResult = FAILURE;
-  unsigned int captureDeviceCount = 0;
-  unsigned int renderDeviceCount = 0;
-
-  IMMDeviceCollection* captureDevices = NULL;
-  IMMDeviceCollection* renderDevices = NULL;
-  IMMDevice* devicePtr = NULL;
-  WAVEFORMATEX* deviceFormat = NULL;
-  unsigned int bufferBytes;
-  stream_.state = STREAM_STOPPED;
-
-  // create API Handle if not already created
-  if ( !stream_.apiHandle )
-    stream_.apiHandle = ( void* ) new WasapiHandle();
-
-  // Count capture devices
-  errorText_.clear();
-  RtAudioError::Type errorType = RtAudioError::DRIVER_ERROR;
-  HRESULT hr = deviceEnumerator_->EnumAudioEndpoints( eCapture, DEVICE_STATE_ACTIVE, &captureDevices );
-  if ( FAILED( hr ) ) {
-    errorText_ = "RtApiWasapi::probeDeviceOpen: Unable to retrieve capture device collection.";
-    goto Exit;
-  }
-
-  hr = captureDevices->GetCount( &captureDeviceCount );
-  if ( FAILED( hr ) ) {
-    errorText_ = "RtApiWasapi::probeDeviceOpen: Unable to retrieve capture device count.";
-    goto Exit;
-  }
-
-  // Count render devices
-  hr = deviceEnumerator_->EnumAudioEndpoints( eRender, DEVICE_STATE_ACTIVE, &renderDevices );
-  if ( FAILED( hr ) ) {
-    errorText_ = "RtApiWasapi::probeDeviceOpen: Unable to retrieve render device collection.";
-    goto Exit;
-  }
-
-  hr = renderDevices->GetCount( &renderDeviceCount );
-  if ( FAILED( hr ) ) {
-    errorText_ = "RtApiWasapi::probeDeviceOpen: Unable to retrieve render device count.";
-    goto Exit;
-  }
-
-  // validate device index
-  if ( device >= captureDeviceCount + renderDeviceCount ) {
-    errorType = RtAudioError::INVALID_USE;
-    errorText_ = "RtApiWasapi::probeDeviceOpen: Invalid device index.";
-    goto Exit;
-  }
-
-  // determine whether index falls within capture or render devices
-  if ( device >= renderDeviceCount ) {
-    if ( mode != INPUT ) {
-      errorType = RtAudioError::INVALID_USE;
-      errorText_ = "RtApiWasapi::probeDeviceOpen: Capture device selected as output device.";
-      goto Exit;
-    }
-
-    // retrieve captureAudioClient from devicePtr
-    IAudioClient*& captureAudioClient = ( ( WasapiHandle* ) stream_.apiHandle )->captureAudioClient;
-
-    hr = captureDevices->Item( device - renderDeviceCount, &devicePtr );
-    if ( FAILED( hr ) ) {
-      errorText_ = "RtApiWasapi::probeDeviceOpen: Unable to retrieve capture device handle.";
-      goto Exit;
-    }
-
-    hr = devicePtr->Activate( __uuidof( IAudioClient ), CLSCTX_ALL,
-                              NULL, ( void** ) &captureAudioClient );
-    if ( FAILED( hr ) ) {
-      errorText_ = "RtApiWasapi::probeDeviceOpen: Unable to retrieve device audio client.";
-      goto Exit;
-    }
-
-    hr = captureAudioClient->GetMixFormat( &deviceFormat );
-    if ( FAILED( hr ) ) {
-      errorText_ = "RtApiWasapi::probeDeviceOpen: Unable to retrieve device mix format.";
-      goto Exit;
-    }
-
-    stream_.nDeviceChannels[mode] = deviceFormat->nChannels;
-    captureAudioClient->GetStreamLatency( ( long long* ) &stream_.latency[mode] );
-  }
-  else {
-    if ( mode != OUTPUT ) {
-      errorType = RtAudioError::INVALID_USE;
-      errorText_ = "RtApiWasapi::probeDeviceOpen: Render device selected as input device.";
-      goto Exit;
-    }
-
-    // retrieve renderAudioClient from devicePtr
-    IAudioClient*& renderAudioClient = ( ( WasapiHandle* ) stream_.apiHandle )->renderAudioClient;
-
-    hr = renderDevices->Item( device, &devicePtr );
-    if ( FAILED( hr ) ) {
-      errorText_ = "RtApiWasapi::probeDeviceOpen: Unable to retrieve render device handle.";
-      goto Exit;
-    }
-
-    hr = devicePtr->Activate( __uuidof( IAudioClient ), CLSCTX_ALL,
-                              NULL, ( void** ) &renderAudioClient );
-    if ( FAILED( hr ) ) {
-      errorText_ = "RtApiWasapi::probeDeviceOpen: Unable to retrieve device audio client.";
-      goto Exit;
-    }
-
-    hr = renderAudioClient->GetMixFormat( &deviceFormat );
-    if ( FAILED( hr ) ) {
-      errorText_ = "RtApiWasapi::probeDeviceOpen: Unable to retrieve device mix format.";
-      goto Exit;
-    }
-
-    stream_.nDeviceChannels[mode] = deviceFormat->nChannels;
-    renderAudioClient->GetStreamLatency( ( long long* ) &stream_.latency[mode] );
-  }
-
-  // fill stream data
-  if ( ( stream_.mode == OUTPUT && mode == INPUT ) ||
-       ( stream_.mode == INPUT && mode == OUTPUT ) ) {
-    stream_.mode = DUPLEX;
-  }
-  else {
-    stream_.mode = mode;
-  }
-
-  stream_.device[mode] = device;
-  stream_.doByteSwap[mode] = false;
-  stream_.sampleRate = sampleRate;
-  stream_.bufferSize = *bufferSize;
-  stream_.nBuffers = 1;
-  stream_.nUserChannels[mode] = channels;
-  stream_.channelOffset[mode] = firstChannel;
-  stream_.userFormat = format;
-  stream_.deviceFormat[mode] = getDeviceInfo( device ).nativeFormats;
-
-  if ( options && options->flags & RTAUDIO_NONINTERLEAVED )
-    stream_.userInterleaved = false;
-  else
-    stream_.userInterleaved = true;
-  stream_.deviceInterleaved[mode] = true;
-
-  // Set flags for buffer conversion.
-  stream_.doConvertBuffer[mode] = false;
-  if ( stream_.userFormat != stream_.deviceFormat[mode] ||
-       stream_.nUserChannels != stream_.nDeviceChannels )
-    stream_.doConvertBuffer[mode] = true;
-  else if ( stream_.userInterleaved != stream_.deviceInterleaved[mode] &&
-            stream_.nUserChannels[mode] > 1 )
-    stream_.doConvertBuffer[mode] = true;
-
-  if ( stream_.doConvertBuffer[mode] )
-    setConvertInfo( mode, 0 );
-
-  // Allocate necessary internal buffers
-  bufferBytes = stream_.nUserChannels[mode] * stream_.bufferSize * formatBytes( stream_.userFormat );
-
-  stream_.userBuffer[mode] = ( char* ) calloc( bufferBytes, 1 );
-  if ( !stream_.userBuffer[mode] ) {
-    errorType = RtAudioError::MEMORY_ERROR;
-    errorText_ = "RtApiWasapi::probeDeviceOpen: Error allocating user buffer memory.";
-    goto Exit;
-  }
-
-  if ( options && options->flags & RTAUDIO_SCHEDULE_REALTIME )
-    stream_.callbackInfo.priority = 15;
-  else
-    stream_.callbackInfo.priority = 0;
-
-  ///! TODO: RTAUDIO_MINIMIZE_LATENCY // Provide stream buffers directly to callback
-  ///! TODO: RTAUDIO_HOG_DEVICE       // Exclusive mode
-
-  methodResult = SUCCESS;
-
-Exit:
-  //clean up
-  SAFE_RELEASE( captureDevices );
-  SAFE_RELEASE( renderDevices );
-  SAFE_RELEASE( devicePtr );
-  CoTaskMemFree( deviceFormat );
-
-  // if method failed, close the stream
-  if ( methodResult == FAILURE )
-    closeStream();
-
-  if ( !errorText_.empty() )
-    error( errorType );
-  return methodResult;
-}
-
-//=============================================================================
-
-DWORD WINAPI RtApiWasapi::runWasapiThread( void* wasapiPtr )
-{
-  if ( wasapiPtr )
-    ( ( RtApiWasapi* ) wasapiPtr )->wasapiThread();
-
-  return 0;
-}
-
-DWORD WINAPI RtApiWasapi::stopWasapiThread( void* wasapiPtr )
-{
-  if ( wasapiPtr )
-    ( ( RtApiWasapi* ) wasapiPtr )->stopStream();
-
-  return 0;
-}
-
-DWORD WINAPI RtApiWasapi::abortWasapiThread( void* wasapiPtr )
-{
-  if ( wasapiPtr )
-    ( ( RtApiWasapi* ) wasapiPtr )->abortStream();
-
-  return 0;
-}
-
-//-----------------------------------------------------------------------------
-
-void RtApiWasapi::wasapiThread()
-{
-  // as this is a new thread, we must CoInitialize it
-  CoInitialize( NULL );
-
-  HRESULT hr;
-
-  IAudioClient* captureAudioClient = ( ( WasapiHandle* ) stream_.apiHandle )->captureAudioClient;
-  IAudioClient* renderAudioClient = ( ( WasapiHandle* ) stream_.apiHandle )->renderAudioClient;
-  IAudioCaptureClient* captureClient = ( ( WasapiHandle* ) stream_.apiHandle )->captureClient;
-  IAudioRenderClient* renderClient = ( ( WasapiHandle* ) stream_.apiHandle )->renderClient;
-  HANDLE captureEvent = ( ( WasapiHandle* ) stream_.apiHandle )->captureEvent;
-  HANDLE renderEvent = ( ( WasapiHandle* ) stream_.apiHandle )->renderEvent;
-
-  WAVEFORMATEX* captureFormat = NULL;
-  WAVEFORMATEX* renderFormat = NULL;
-  float captureSrRatio = 0.0f;
-  float renderSrRatio = 0.0f;
-  WasapiBuffer captureBuffer;
-  WasapiBuffer renderBuffer;
-
-  // declare local stream variables
-  RtAudioCallback callback = ( RtAudioCallback ) stream_.callbackInfo.callback;
-  BYTE* streamBuffer = NULL;
-  unsigned long captureFlags = 0;
-  unsigned int bufferFrameCount = 0;
-  unsigned int numFramesPadding = 0;
-  unsigned int convBufferSize = 0;
-  bool callbackPushed = false;
-  bool callbackPulled = false;
-  bool callbackStopped = false;
-  int callbackResult = 0;
-
-  // convBuffer is used to store converted buffers between WASAPI and the user
-  char* convBuffer = NULL;
-  unsigned int convBuffSize = 0;
-  unsigned int deviceBuffSize = 0;
-
-  errorText_.clear();
-  RtAudioError::Type errorType = RtAudioError::DRIVER_ERROR;
-
-  // Attempt to assign "Pro Audio" characteristic to thread
-  HMODULE AvrtDll = LoadLibrary( (LPCTSTR) "AVRT.dll" );
-  if ( AvrtDll ) {
-    DWORD taskIndex = 0;
-    TAvSetMmThreadCharacteristicsPtr AvSetMmThreadCharacteristicsPtr = ( TAvSetMmThreadCharacteristicsPtr ) GetProcAddress( AvrtDll, "AvSetMmThreadCharacteristicsW" );
-    AvSetMmThreadCharacteristicsPtr( L"Pro Audio", &taskIndex );
-    FreeLibrary( AvrtDll );
-  }
-
-  // start capture stream if applicable
-  if ( captureAudioClient ) {
-    hr = captureAudioClient->GetMixFormat( &captureFormat );
-    if ( FAILED( hr ) ) {
-      errorText_ = "RtApiWasapi::wasapiThread: Unable to retrieve device mix format.";
-      goto Exit;
-    }
-
-    captureSrRatio = ( ( float ) captureFormat->nSamplesPerSec / stream_.sampleRate );
-
-    // initialize capture stream according to desire buffer size
-    float desiredBufferSize = stream_.bufferSize * captureSrRatio;
-    REFERENCE_TIME desiredBufferPeriod = ( REFERENCE_TIME ) ( ( float ) desiredBufferSize * 10000000 / captureFormat->nSamplesPerSec );
-
-    if ( !captureClient ) {
-      hr = captureAudioClient->Initialize( AUDCLNT_SHAREMODE_SHARED,
-                                           AUDCLNT_STREAMFLAGS_EVENTCALLBACK,
-                                           desiredBufferPeriod,
-                                           desiredBufferPeriod,
-                                           captureFormat,
-                                           NULL );
-      if ( FAILED( hr ) ) {
-        errorText_ = "RtApiWasapi::wasapiThread: Unable to initialize capture audio client.";
-        goto Exit;
-      }
-
-      hr = captureAudioClient->GetService( __uuidof( IAudioCaptureClient ),
-                                           ( void** ) &captureClient );
-      if ( FAILED( hr ) ) {
-        errorText_ = "RtApiWasapi::wasapiThread: Unable to retrieve capture client handle.";
-        goto Exit;
-      }
-
-      // configure captureEvent to trigger on every available capture buffer
-      captureEvent = CreateEvent( NULL, FALSE, FALSE, NULL );
-      if ( !captureEvent ) {
-        errorType = RtAudioError::SYSTEM_ERROR;
-        errorText_ = "RtApiWasapi::wasapiThread: Unable to create capture event.";
-        goto Exit;
-      }
-
-      hr = captureAudioClient->SetEventHandle( captureEvent );
-      if ( FAILED( hr ) ) {
-        errorText_ = "RtApiWasapi::wasapiThread: Unable to set capture event handle.";
-        goto Exit;
-      }
-
-      ( ( WasapiHandle* ) stream_.apiHandle )->captureClient = captureClient;
-      ( ( WasapiHandle* ) stream_.apiHandle )->captureEvent = captureEvent;
-    }
-
-    unsigned int inBufferSize = 0;
-    hr = captureAudioClient->GetBufferSize( &inBufferSize );
-    if ( FAILED( hr ) ) {
-      errorText_ = "RtApiWasapi::wasapiThread: Unable to get capture buffer size.";
-      goto Exit;
-    }
-
-    // scale outBufferSize according to stream->user sample rate ratio
-    unsigned int outBufferSize = ( unsigned int ) ( stream_.bufferSize * captureSrRatio ) * stream_.nDeviceChannels[INPUT];
-    inBufferSize *= stream_.nDeviceChannels[INPUT];
-
-    // set captureBuffer size
-    captureBuffer.setBufferSize( inBufferSize + outBufferSize, formatBytes( stream_.deviceFormat[INPUT] ) );
-
-    // reset the capture stream
-    hr = captureAudioClient->Reset();
-    if ( FAILED( hr ) ) {
-      errorText_ = "RtApiWasapi::wasapiThread: Unable to reset capture stream.";
-      goto Exit;
-    }
-
-    // start the capture stream
-    hr = captureAudioClient->Start();
-    if ( FAILED( hr ) ) {
-      errorText_ = "RtApiWasapi::wasapiThread: Unable to start capture stream.";
-      goto Exit;
-    }
-  }
-
-  // start render stream if applicable
-  if ( renderAudioClient ) {
-    hr = renderAudioClient->GetMixFormat( &renderFormat );
-    if ( FAILED( hr ) ) {
-      errorText_ = "RtApiWasapi::wasapiThread: Unable to retrieve device mix format.";
-      goto Exit;
-    }
-
-    renderSrRatio = ( ( float ) renderFormat->nSamplesPerSec / stream_.sampleRate );
-
-    // initialize render stream according to desire buffer size
-    float desiredBufferSize = stream_.bufferSize * renderSrRatio;
-    REFERENCE_TIME desiredBufferPeriod = ( REFERENCE_TIME ) ( ( float ) desiredBufferSize * 10000000 / renderFormat->nSamplesPerSec );
-
-    if ( !renderClient ) {
-      hr = renderAudioClient->Initialize( AUDCLNT_SHAREMODE_SHARED,
-                                          AUDCLNT_STREAMFLAGS_EVENTCALLBACK,
-                                          desiredBufferPeriod,
-                                          desiredBufferPeriod,
-                                          renderFormat,
-                                          NULL );
-      if ( FAILED( hr ) ) {
-        errorText_ = "RtApiWasapi::wasapiThread: Unable to initialize render audio client.";
-        goto Exit;
-      }
-
-      hr = renderAudioClient->GetService( __uuidof( IAudioRenderClient ),
-                                          ( void** ) &renderClient );
-      if ( FAILED( hr ) ) {
-        errorText_ = "RtApiWasapi::wasapiThread: Unable to retrieve render client handle.";
-        goto Exit;
-      }
-
-      // configure renderEvent to trigger on every available render buffer
-      renderEvent = CreateEvent( NULL, FALSE, FALSE, NULL );
-      if ( !renderEvent ) {
-        errorType = RtAudioError::SYSTEM_ERROR;
-        errorText_ = "RtApiWasapi::wasapiThread: Unable to create render event.";
-        goto Exit;
-      }
-
-      hr = renderAudioClient->SetEventHandle( renderEvent );
-      if ( FAILED( hr ) ) {
-        errorText_ = "RtApiWasapi::wasapiThread: Unable to set render event handle.";
-        goto Exit;
-      }
-
-      ( ( WasapiHandle* ) stream_.apiHandle )->renderClient = renderClient;
-      ( ( WasapiHandle* ) stream_.apiHandle )->renderEvent = renderEvent;
-    }
-
-    unsigned int outBufferSize = 0;
-    hr = renderAudioClient->GetBufferSize( &outBufferSize );
-    if ( FAILED( hr ) ) {
-      errorText_ = "RtApiWasapi::wasapiThread: Unable to get render buffer size.";
-      goto Exit;
-    }
-
-    // scale inBufferSize according to user->stream sample rate ratio
-    unsigned int inBufferSize = ( unsigned int ) ( stream_.bufferSize * renderSrRatio ) * stream_.nDeviceChannels[OUTPUT];
-    outBufferSize *= stream_.nDeviceChannels[OUTPUT];
-
-    // set renderBuffer size
-    renderBuffer.setBufferSize( inBufferSize + outBufferSize, formatBytes( stream_.deviceFormat[OUTPUT] ) );
-
-    // reset the render stream
-    hr = renderAudioClient->Reset();
-    if ( FAILED( hr ) ) {
-      errorText_ = "RtApiWasapi::wasapiThread: Unable to reset render stream.";
-      goto Exit;
-    }
-
-    // start the render stream
-    hr = renderAudioClient->Start();
-    if ( FAILED( hr ) ) {
-      errorText_ = "RtApiWasapi::wasapiThread: Unable to start render stream.";
-      goto Exit;
-    }
-  }
-
-  if ( stream_.mode == INPUT ) {
-    convBuffSize = ( size_t ) ( stream_.bufferSize * captureSrRatio ) * stream_.nDeviceChannels[INPUT] * formatBytes( stream_.deviceFormat[INPUT] );
-    deviceBuffSize = stream_.bufferSize * stream_.nDeviceChannels[INPUT] * formatBytes( stream_.deviceFormat[INPUT] );
-  }
-  else if ( stream_.mode == OUTPUT ) {
-    convBuffSize = ( size_t ) ( stream_.bufferSize * renderSrRatio ) * stream_.nDeviceChannels[OUTPUT] * formatBytes( stream_.deviceFormat[OUTPUT] );
-    deviceBuffSize = stream_.bufferSize * stream_.nDeviceChannels[OUTPUT] * formatBytes( stream_.deviceFormat[OUTPUT] );
-  }
-  else if ( stream_.mode == DUPLEX ) {
-    convBuffSize = std::max( ( size_t ) ( stream_.bufferSize * captureSrRatio ) * stream_.nDeviceChannels[INPUT] * formatBytes( stream_.deviceFormat[INPUT] ),
-                             ( size_t ) ( stream_.bufferSize * renderSrRatio ) * stream_.nDeviceChannels[OUTPUT] * formatBytes( stream_.deviceFormat[OUTPUT] ) );
-    deviceBuffSize = std::max( stream_.bufferSize * stream_.nDeviceChannels[INPUT] * formatBytes( stream_.deviceFormat[INPUT] ),
-                               stream_.bufferSize * stream_.nDeviceChannels[OUTPUT] * formatBytes( stream_.deviceFormat[OUTPUT] ) );
-  }
-
-  convBuffer = ( char* ) malloc( convBuffSize );
-  stream_.deviceBuffer = ( char* ) malloc( deviceBuffSize );
-  if ( !convBuffer || !stream_.deviceBuffer ) {
-    errorType = RtAudioError::MEMORY_ERROR;
-    errorText_ = "RtApiWasapi::wasapiThread: Error allocating device buffer memory.";
-    goto Exit;
-  }
-
-  // stream process loop
-  while ( stream_.state != STREAM_STOPPING ) {
-    if ( !callbackPulled ) {
-      // Callback Input
-      // ==============
-      // 1. Pull callback buffer from inputBuffer
-      // 2. If 1. was successful: Convert callback buffer to user sample rate and channel count
-      //                          Convert callback buffer to user format
-
-      if ( captureAudioClient ) {
-        // Pull callback buffer from inputBuffer
-        callbackPulled = captureBuffer.pullBuffer( convBuffer,
-                                                   ( unsigned int ) ( stream_.bufferSize * captureSrRatio ) * stream_.nDeviceChannels[INPUT],
-                                                   stream_.deviceFormat[INPUT] );
-
-        if ( callbackPulled ) {
-          // Convert callback buffer to user sample rate
-          convertBufferWasapi( stream_.deviceBuffer,
-                               convBuffer,
-                               stream_.nDeviceChannels[INPUT],
-                               captureFormat->nSamplesPerSec,
-                               stream_.sampleRate,
-                               ( unsigned int ) ( stream_.bufferSize * captureSrRatio ),
-                               convBufferSize,
-                               stream_.deviceFormat[INPUT] );
-
-          if ( stream_.doConvertBuffer[INPUT] ) {
-            // Convert callback buffer to user format
-            convertBuffer( stream_.userBuffer[INPUT],
-                           stream_.deviceBuffer,
-                           stream_.convertInfo[INPUT] );
-          }
-          else {
-            // no further conversion, simple copy deviceBuffer to userBuffer
-            memcpy( stream_.userBuffer[INPUT],
-                    stream_.deviceBuffer,
-                    stream_.bufferSize * stream_.nUserChannels[INPUT] * formatBytes( stream_.userFormat ) );
-          }
-        }
-      }
-      else {
-        // if there is no capture stream, set callbackPulled flag
-        callbackPulled = true;
-      }
-
-      // Execute Callback
-      // ================
-      // 1. Execute user callback method
-      // 2. Handle return value from callback
-
-      // if callback has not requested the stream to stop
-      if ( callbackPulled && !callbackStopped ) {
-        // Execute user callback method
-        callbackResult = callback( stream_.userBuffer[OUTPUT],
-                                   stream_.userBuffer[INPUT],
-                                   stream_.bufferSize,
-                                   getStreamTime(),
-                                   captureFlags & AUDCLNT_BUFFERFLAGS_DATA_DISCONTINUITY ? RTAUDIO_INPUT_OVERFLOW : 0,
-                                   stream_.callbackInfo.userData );
-
-        // Handle return value from callback
-        if ( callbackResult == 1 ) {
-          // instantiate a thread to stop this thread
-          HANDLE threadHandle = CreateThread( NULL, 0, stopWasapiThread, this, 0, NULL );
-          if ( !threadHandle ) {
-            errorType = RtAudioError::THREAD_ERROR;
-            errorText_ = "RtApiWasapi::wasapiThread: Unable to instantiate stream stop thread.";
-            goto Exit;
-          }
-          else if ( !CloseHandle( threadHandle ) ) {
-            errorType = RtAudioError::THREAD_ERROR;
-            errorText_ = "RtApiWasapi::wasapiThread: Unable to close stream stop thread handle.";
-            goto Exit;
-          }
-
-          callbackStopped = true;
-        }
-        else if ( callbackResult == 2 ) {
-          // instantiate a thread to stop this thread
-          HANDLE threadHandle = CreateThread( NULL, 0, abortWasapiThread, this, 0, NULL );
-          if ( !threadHandle ) {
-            errorType = RtAudioError::THREAD_ERROR;
-            errorText_ = "RtApiWasapi::wasapiThread: Unable to instantiate stream abort thread.";
-            goto Exit;
-          }
-          else if ( !CloseHandle( threadHandle ) ) {
-            errorType = RtAudioError::THREAD_ERROR;
-            errorText_ = "RtApiWasapi::wasapiThread: Unable to close stream abort thread handle.";
-            goto Exit;
-          }
-
-          callbackStopped = true;
-        }
-      }
-    }
-
-    // Callback Output
-    // ===============
-    // 1. Convert callback buffer to stream format
-    // 2. Convert callback buffer to stream sample rate and channel count
-    // 3. Push callback buffer into outputBuffer
-
-    if ( renderAudioClient && callbackPulled ) {
-      if ( stream_.doConvertBuffer[OUTPUT] ) {
-        // Convert callback buffer to stream format
-        convertBuffer( stream_.deviceBuffer,
-                       stream_.userBuffer[OUTPUT],
-                       stream_.convertInfo[OUTPUT] );
-
-      }
-
-      // Convert callback buffer to stream sample rate
-      convertBufferWasapi( convBuffer,
-                           stream_.deviceBuffer,
-                           stream_.nDeviceChannels[OUTPUT],
-                           stream_.sampleRate,
-                           renderFormat->nSamplesPerSec,
-                           stream_.bufferSize,
-                           convBufferSize,
-                           stream_.deviceFormat[OUTPUT] );
-
-      // Push callback buffer into outputBuffer
-      callbackPushed = renderBuffer.pushBuffer( convBuffer,
-                                                convBufferSize * stream_.nDeviceChannels[OUTPUT],
-                                                stream_.deviceFormat[OUTPUT] );
-    }
-    else {
-      // if there is no render stream, set callbackPushed flag
-      callbackPushed = true;
-    }
-
-    // Stream Capture
-    // ==============
-    // 1. Get capture buffer from stream
-    // 2. Push capture buffer into inputBuffer
-    // 3. If 2. was successful: Release capture buffer
-
-    if ( captureAudioClient ) {
-      // if the callback input buffer was not pulled from captureBuffer, wait for next capture event
-      if ( !callbackPulled ) {
-        WaitForSingleObject( captureEvent, INFINITE );
-      }
-
-      // Get capture buffer from stream
-      hr = captureClient->GetBuffer( &streamBuffer,
-                                     &bufferFrameCount,
-                                     &captureFlags, NULL, NULL );
-      if ( FAILED( hr ) ) {
-        errorText_ = "RtApiWasapi::wasapiThread: Unable to retrieve capture buffer.";
-        goto Exit;
-      }
-
-      if ( bufferFrameCount != 0 ) {
-        // Push capture buffer into inputBuffer
-        if ( captureBuffer.pushBuffer( ( char* ) streamBuffer,
-                                       bufferFrameCount * stream_.nDeviceChannels[INPUT],
-                                       stream_.deviceFormat[INPUT] ) )
-        {
-          // Release capture buffer
-          hr = captureClient->ReleaseBuffer( bufferFrameCount );
-          if ( FAILED( hr ) ) {
-            errorText_ = "RtApiWasapi::wasapiThread: Unable to release capture buffer.";
-            goto Exit;
-          }
-        }
-        else
-        {
-          // Inform WASAPI that capture was unsuccessful
-          hr = captureClient->ReleaseBuffer( 0 );
-          if ( FAILED( hr ) ) {
-            errorText_ = "RtApiWasapi::wasapiThread: Unable to release capture buffer.";
-            goto Exit;
-          }
-        }
-      }
-      else
-      {
-        // Inform WASAPI that capture was unsuccessful
-        hr = captureClient->ReleaseBuffer( 0 );
-        if ( FAILED( hr ) ) {
-          errorText_ = "RtApiWasapi::wasapiThread: Unable to release capture buffer.";
-          goto Exit;
-        }
-      }
-    }
-
-    // Stream Render
-    // =============
-    // 1. Get render buffer from stream
-    // 2. Pull next buffer from outputBuffer
-    // 3. If 2. was successful: Fill render buffer with next buffer
-    //                          Release render buffer
-
-    if ( renderAudioClient ) {
-      // if the callback output buffer was not pushed to renderBuffer, wait for next render event
-      if ( callbackPulled && !callbackPushed ) {
-        WaitForSingleObject( renderEvent, INFINITE );
-      }
-
-      // Get render buffer from stream
-      hr = renderAudioClient->GetBufferSize( &bufferFrameCount );
-      if ( FAILED( hr ) ) {
-        errorText_ = "RtApiWasapi::wasapiThread: Unable to retrieve render buffer size.";
-        goto Exit;
-      }
-
-      hr = renderAudioClient->GetCurrentPadding( &numFramesPadding );
-      if ( FAILED( hr ) ) {
-        errorText_ = "RtApiWasapi::wasapiThread: Unable to retrieve render buffer padding.";
-        goto Exit;
-      }
-
-      bufferFrameCount -= numFramesPadding;
-
-      if ( bufferFrameCount != 0 ) {
-        hr = renderClient->GetBuffer( bufferFrameCount, &streamBuffer );
-        if ( FAILED( hr ) ) {
-          errorText_ = "RtApiWasapi::wasapiThread: Unable to retrieve render buffer.";
-          goto Exit;
-        }
-
-        // Pull next buffer from outputBuffer
-        // Fill render buffer with next buffer
-        if ( renderBuffer.pullBuffer( ( char* ) streamBuffer,
-                                      bufferFrameCount * stream_.nDeviceChannels[OUTPUT],
-                                      stream_.deviceFormat[OUTPUT] ) )
-        {
-          // Release render buffer
-          hr = renderClient->ReleaseBuffer( bufferFrameCount, 0 );
-          if ( FAILED( hr ) ) {
-            errorText_ = "RtApiWasapi::wasapiThread: Unable to release render buffer.";
-            goto Exit;
-          }
-        }
-        else
-        {
-          // Inform WASAPI that render was unsuccessful
-          hr = renderClient->ReleaseBuffer( 0, 0 );
-          if ( FAILED( hr ) ) {
-            errorText_ = "RtApiWasapi::wasapiThread: Unable to release render buffer.";
-            goto Exit;
-          }
-        }
-      }
-      else
-      {
-        // Inform WASAPI that render was unsuccessful
-        hr = renderClient->ReleaseBuffer( 0, 0 );
-        if ( FAILED( hr ) ) {
-          errorText_ = "RtApiWasapi::wasapiThread: Unable to release render buffer.";
-          goto Exit;
-        }
-      }
-    }
-
-    // if the callback buffer was pushed renderBuffer reset callbackPulled flag
-    if ( callbackPushed ) {
-      callbackPulled = false;
-    }
-
-    // tick stream time
-    RtApi::tickStreamTime();
-  }
-
-Exit:
-  // clean up
-  CoTaskMemFree( captureFormat );
-  CoTaskMemFree( renderFormat );
-
-  free ( convBuffer );
-
-  CoUninitialize();
-
-  // update stream state
-  stream_.state = STREAM_STOPPED;
-
-  if ( errorText_.empty() )
-    return;
-  else
-    error( errorType );
-}
-
-//******************** End of __WINDOWS_WASAPI__ *********************//
-#endif
-
-
-#if defined(__WINDOWS_DS__) // Windows DirectSound API
-
-// Modified by Robin Davies, October 2005
-// - Improvements to DirectX pointer chasing. 
-// - Bug fix for non-power-of-two Asio granularity used by Edirol PCR-A30.
-// - Auto-call CoInitialize for DSOUND and ASIO platforms.
-// Various revisions for RtAudio 4.0 by Gary Scavone, April 2007
-// Changed device query structure for RtAudio 4.0.7, January 2010
-
-#include <dsound.h>
-#include <assert.h>
-#include <algorithm>
-
-#if defined(__MINGW32__)
-  // missing from latest mingw winapi
-#define WAVE_FORMAT_96M08 0x00010000 /* 96 kHz, Mono, 8-bit */
-#define WAVE_FORMAT_96S08 0x00020000 /* 96 kHz, Stereo, 8-bit */
-#define WAVE_FORMAT_96M16 0x00040000 /* 96 kHz, Mono, 16-bit */
-#define WAVE_FORMAT_96S16 0x00080000 /* 96 kHz, Stereo, 16-bit */
-#endif
-
-#define MINIMUM_DEVICE_BUFFER_SIZE 32768
-
-#ifdef _MSC_VER // if Microsoft Visual C++
-#pragma comment( lib, "winmm.lib" ) // then, auto-link winmm.lib. Otherwise, it has to be added manually.
-#endif
-
-static inline DWORD dsPointerBetween( DWORD pointer, DWORD laterPointer, DWORD earlierPointer, DWORD bufferSize )
-{
-  if ( pointer > bufferSize ) pointer -= bufferSize;
-  if ( laterPointer < earlierPointer ) laterPointer += bufferSize;
-  if ( pointer < earlierPointer ) pointer += bufferSize;
-  return pointer >= earlierPointer && pointer < laterPointer;
-}
-
-// A structure to hold various information related to the DirectSound
-// API implementation.
-struct DsHandle {
-  unsigned int drainCounter; // Tracks callback counts when draining
-  bool internalDrain;        // Indicates if stop is initiated from callback or not.
-  void *id[2];
-  void *buffer[2];
-  bool xrun[2];
-  UINT bufferPointer[2];  
-  DWORD dsBufferSize[2];
-  DWORD dsPointerLeadTime[2]; // the number of bytes ahead of the safe pointer to lead by.
-  HANDLE condition;
-
-  DsHandle()
-    :drainCounter(0), internalDrain(false) { id[0] = 0; id[1] = 0; buffer[0] = 0; buffer[1] = 0; xrun[0] = false; xrun[1] = false; bufferPointer[0] = 0; bufferPointer[1] = 0; }
-};
-
-// Declarations for utility functions, callbacks, and structures
-// specific to the DirectSound implementation.
-static BOOL CALLBACK deviceQueryCallback( LPGUID lpguid,
-                                          LPCTSTR description,
-                                          LPCTSTR module,
-                                          LPVOID lpContext );
-
-static const char* getErrorString( int code );
-
-static unsigned __stdcall callbackHandler( void *ptr );
-
-struct DsDevice {
-  LPGUID id[2];
-  bool validId[2];
-  bool found;
-  std::string name;
-
-  DsDevice()
-  : found(false) { validId[0] = false; validId[1] = false; }
-};
-
-struct DsProbeData {
-  bool isInput;
-  std::vector<struct DsDevice>* dsDevices;
-};
-
-RtApiDs :: RtApiDs()
-{
-  // Dsound will run both-threaded. If CoInitialize fails, then just
-  // accept whatever the mainline chose for a threading model.
-  coInitialized_ = false;
-  HRESULT hr = CoInitialize( NULL );
-  if ( !FAILED( hr ) ) coInitialized_ = true;
-}
-
-RtApiDs :: ~RtApiDs()
-{
-  if ( coInitialized_ ) CoUninitialize(); // balanced call.
-  if ( stream_.state != STREAM_CLOSED ) closeStream();
-}
-
-// The DirectSound default output is always the first device.
-unsigned int RtApiDs :: getDefaultOutputDevice( void )
-{
-  return 0;
-}
-
-// The DirectSound default input is always the first input device,
-// which is the first capture device enumerated.
-unsigned int RtApiDs :: getDefaultInputDevice( void )
-{
-  return 0;
-}
-
-unsigned int RtApiDs :: getDeviceCount( void )
-{
-  // Set query flag for previously found devices to false, so that we
-  // can check for any devices that have disappeared.
-  for ( unsigned int i=0; i<dsDevices.size(); i++ )
-    dsDevices[i].found = false;
-
-  // Query DirectSound devices.
-  struct DsProbeData probeInfo;
-  probeInfo.isInput = false;
-  probeInfo.dsDevices = &dsDevices;
-  HRESULT result = DirectSoundEnumerate( (LPDSENUMCALLBACK) deviceQueryCallback, &probeInfo );
-  if ( FAILED( result ) ) {
-    errorStream_ << "RtApiDs::getDeviceCount: error (" << getErrorString( result ) << ") enumerating output devices!";
-    errorText_ = errorStream_.str();
-    error( RtAudioError::WARNING );
-  }
-
-  // Query DirectSoundCapture devices.
-  probeInfo.isInput = true;
-  result = DirectSoundCaptureEnumerate( (LPDSENUMCALLBACK) deviceQueryCallback, &probeInfo );
-  if ( FAILED( result ) ) {
-    errorStream_ << "RtApiDs::getDeviceCount: error (" << getErrorString( result ) << ") enumerating input devices!";
-    errorText_ = errorStream_.str();
-    error( RtAudioError::WARNING );
-  }
-
-  // Clean out any devices that may have disappeared (code update submitted by Eli Zehngut).
-  for ( unsigned int i=0; i<dsDevices.size(); ) {
-    if ( dsDevices[i].found == false ) dsDevices.erase( dsDevices.begin() + i );
-    else i++;
-  }
-
-  return static_cast<unsigned int>(dsDevices.size());
-}
-
-RtAudio::DeviceInfo RtApiDs :: getDeviceInfo( unsigned int device )
-{
-  RtAudio::DeviceInfo info;
-  info.probed = false;
-
-  if ( dsDevices.size() == 0 ) {
-    // Force a query of all devices
-    getDeviceCount();
-    if ( dsDevices.size() == 0 ) {
-      errorText_ = "RtApiDs::getDeviceInfo: no devices found!";
-      error( RtAudioError::INVALID_USE );
-      return info;
-    }
-  }
-
-  if ( device >= dsDevices.size() ) {
-    errorText_ = "RtApiDs::getDeviceInfo: device ID is invalid!";
-    error( RtAudioError::INVALID_USE );
-    return info;
-  }
-
-  HRESULT result;
-  if ( dsDevices[ device ].validId[0] == false ) goto probeInput;
-
-  LPDIRECTSOUND output;
-  DSCAPS outCaps;
-  result = DirectSoundCreate( dsDevices[ device ].id[0], &output, NULL );
-  if ( FAILED( result ) ) {
-    errorStream_ << "RtApiDs::getDeviceInfo: error (" << getErrorString( result ) << ") opening output device (" << dsDevices[ device ].name << ")!";
-    errorText_ = errorStream_.str();
-    error( RtAudioError::WARNING );
-    goto probeInput;
-  }
-
-  outCaps.dwSize = sizeof( outCaps );
-  result = output->GetCaps( &outCaps );
-  if ( FAILED( result ) ) {
-    output->Release();
-    errorStream_ << "RtApiDs::getDeviceInfo: error (" << getErrorString( result ) << ") getting capabilities!";
-    errorText_ = errorStream_.str();
-    error( RtAudioError::WARNING );
-    goto probeInput;
-  }
-
-  // Get output channel information.
-  info.outputChannels = ( outCaps.dwFlags & DSCAPS_PRIMARYSTEREO ) ? 2 : 1;
-
-  // Get sample rate information.
-  info.sampleRates.clear();
-  for ( unsigned int k=0; k<MAX_SAMPLE_RATES; k++ ) {
-    if ( SAMPLE_RATES[k] >= (unsigned int) outCaps.dwMinSecondarySampleRate &&
-         SAMPLE_RATES[k] <= (unsigned int) outCaps.dwMaxSecondarySampleRate ) {
-      info.sampleRates.push_back( SAMPLE_RATES[k] );
-
-      if ( !info.preferredSampleRate || ( SAMPLE_RATES[k] <= 48000 && SAMPLE_RATES[k] > info.preferredSampleRate ) )
-        info.preferredSampleRate = SAMPLE_RATES[k];
-    }
-  }
-
-  // Get format information.
-  if ( outCaps.dwFlags & DSCAPS_PRIMARY16BIT ) info.nativeFormats |= RTAUDIO_SINT16;
-  if ( outCaps.dwFlags & DSCAPS_PRIMARY8BIT ) info.nativeFormats |= RTAUDIO_SINT8;
-
-  output->Release();
-
-  if ( getDefaultOutputDevice() == device )
-    info.isDefaultOutput = true;
-
-  if ( dsDevices[ device ].validId[1] == false ) {
-    info.name = dsDevices[ device ].name;
-    info.probed = true;
-    return info;
-  }
-
- probeInput:
-
-  LPDIRECTSOUNDCAPTURE input;
-  result = DirectSoundCaptureCreate( dsDevices[ device ].id[1], &input, NULL );
-  if ( FAILED( result ) ) {
-    errorStream_ << "RtApiDs::getDeviceInfo: error (" << getErrorString( result ) << ") opening input device (" << dsDevices[ device ].name << ")!";
-    errorText_ = errorStream_.str();
-    error( RtAudioError::WARNING );
-    return info;
-  }
-
-  DSCCAPS inCaps;
-  inCaps.dwSize = sizeof( inCaps );
-  result = input->GetCaps( &inCaps );
-  if ( FAILED( result ) ) {
-    input->Release();
-    errorStream_ << "RtApiDs::getDeviceInfo: error (" << getErrorString( result ) << ") getting object capabilities (" << dsDevices[ device ].name << ")!";
-    errorText_ = errorStream_.str();
-    error( RtAudioError::WARNING );
-    return info;
-  }
-
-  // Get input channel information.
-  info.inputChannels = inCaps.dwChannels;
-
-  // Get sample rate and format information.
-  std::vector<unsigned int> rates;
-  if ( inCaps.dwChannels >= 2 ) {
-    if ( inCaps.dwFormats & WAVE_FORMAT_1S16 ) info.nativeFormats |= RTAUDIO_SINT16;
-    if ( inCaps.dwFormats & WAVE_FORMAT_2S16 ) info.nativeFormats |= RTAUDIO_SINT16;
-    if ( inCaps.dwFormats & WAVE_FORMAT_4S16 ) info.nativeFormats |= RTAUDIO_SINT16;
-    if ( inCaps.dwFormats & WAVE_FORMAT_96S16 ) info.nativeFormats |= RTAUDIO_SINT16;
-    if ( inCaps.dwFormats & WAVE_FORMAT_1S08 ) info.nativeFormats |= RTAUDIO_SINT8;
-    if ( inCaps.dwFormats & WAVE_FORMAT_2S08 ) info.nativeFormats |= RTAUDIO_SINT8;
-    if ( inCaps.dwFormats & WAVE_FORMAT_4S08 ) info.nativeFormats |= RTAUDIO_SINT8;
-    if ( inCaps.dwFormats & WAVE_FORMAT_96S08 ) info.nativeFormats |= RTAUDIO_SINT8;
-
-    if ( info.nativeFormats & RTAUDIO_SINT16 ) {
-      if ( inCaps.dwFormats & WAVE_FORMAT_1S16 ) rates.push_back( 11025 );
-      if ( inCaps.dwFormats & WAVE_FORMAT_2S16 ) rates.push_back( 22050 );
-      if ( inCaps.dwFormats & WAVE_FORMAT_4S16 ) rates.push_back( 44100 );
-      if ( inCaps.dwFormats & WAVE_FORMAT_96S16 ) rates.push_back( 96000 );
-    }
-    else if ( info.nativeFormats & RTAUDIO_SINT8 ) {
-      if ( inCaps.dwFormats & WAVE_FORMAT_1S08 ) rates.push_back( 11025 );
-      if ( inCaps.dwFormats & WAVE_FORMAT_2S08 ) rates.push_back( 22050 );
-      if ( inCaps.dwFormats & WAVE_FORMAT_4S08 ) rates.push_back( 44100 );
-      if ( inCaps.dwFormats & WAVE_FORMAT_96S08 ) rates.push_back( 96000 );
-    }
-  }
-  else if ( inCaps.dwChannels == 1 ) {
-    if ( inCaps.dwFormats & WAVE_FORMAT_1M16 ) info.nativeFormats |= RTAUDIO_SINT16;
-    if ( inCaps.dwFormats & WAVE_FORMAT_2M16 ) info.nativeFormats |= RTAUDIO_SINT16;
-    if ( inCaps.dwFormats & WAVE_FORMAT_4M16 ) info.nativeFormats |= RTAUDIO_SINT16;
-    if ( inCaps.dwFormats & WAVE_FORMAT_96M16 ) info.nativeFormats |= RTAUDIO_SINT16;
-    if ( inCaps.dwFormats & WAVE_FORMAT_1M08 ) info.nativeFormats |= RTAUDIO_SINT8;
-    if ( inCaps.dwFormats & WAVE_FORMAT_2M08 ) info.nativeFormats |= RTAUDIO_SINT8;
-    if ( inCaps.dwFormats & WAVE_FORMAT_4M08 ) info.nativeFormats |= RTAUDIO_SINT8;
-    if ( inCaps.dwFormats & WAVE_FORMAT_96M08 ) info.nativeFormats |= RTAUDIO_SINT8;
-
-    if ( info.nativeFormats & RTAUDIO_SINT16 ) {
-      if ( inCaps.dwFormats & WAVE_FORMAT_1M16 ) rates.push_back( 11025 );
-      if ( inCaps.dwFormats & WAVE_FORMAT_2M16 ) rates.push_back( 22050 );
-      if ( inCaps.dwFormats & WAVE_FORMAT_4M16 ) rates.push_back( 44100 );
-      if ( inCaps.dwFormats & WAVE_FORMAT_96M16 ) rates.push_back( 96000 );
-    }
-    else if ( info.nativeFormats & RTAUDIO_SINT8 ) {
-      if ( inCaps.dwFormats & WAVE_FORMAT_1M08 ) rates.push_back( 11025 );
-      if ( inCaps.dwFormats & WAVE_FORMAT_2M08 ) rates.push_back( 22050 );
-      if ( inCaps.dwFormats & WAVE_FORMAT_4M08 ) rates.push_back( 44100 );
-      if ( inCaps.dwFormats & WAVE_FORMAT_96M08 ) rates.push_back( 96000 );
-    }
-  }
-  else info.inputChannels = 0; // technically, this would be an error
-
-  input->Release();
-
-  if ( info.inputChannels == 0 ) return info;
-
-  // Copy the supported rates to the info structure but avoid duplication.
-  bool found;
-  for ( unsigned int i=0; i<rates.size(); i++ ) {
-    found = false;
-    for ( unsigned int j=0; j<info.sampleRates.size(); j++ ) {
-      if ( rates[i] == info.sampleRates[j] ) {
-        found = true;
-        break;
-      }
-    }
-    if ( found == false ) info.sampleRates.push_back( rates[i] );
-  }
-  std::sort( info.sampleRates.begin(), info.sampleRates.end() );
-
-  // If device opens for both playback and capture, we determine the channels.
-  if ( info.outputChannels > 0 && info.inputChannels > 0 )
-    info.duplexChannels = (info.outputChannels > info.inputChannels) ? info.inputChannels : info.outputChannels;
-
-  if ( device == 0 ) info.isDefaultInput = true;
-
-  // Copy name and return.
-  info.name = dsDevices[ device ].name;
-  info.probed = true;
-  return info;
-}
-
-bool RtApiDs :: probeDeviceOpen( unsigned int device, StreamMode mode, unsigned int channels,
-                                 unsigned int firstChannel, unsigned int sampleRate,
-                                 RtAudioFormat format, unsigned int *bufferSize,
-                                 RtAudio::StreamOptions *options )
-{
-  if ( channels + firstChannel > 2 ) {
-    errorText_ = "RtApiDs::probeDeviceOpen: DirectSound does not support more than 2 channels per device.";
-    return FAILURE;
-  }
-
-  size_t nDevices = dsDevices.size();
-  if ( nDevices == 0 ) {
-    // This should not happen because a check is made before this function is called.
-    errorText_ = "RtApiDs::probeDeviceOpen: no devices found!";
-    return FAILURE;
-  }
-
-  if ( device >= nDevices ) {
-    // This should not happen because a check is made before this function is called.
-    errorText_ = "RtApiDs::probeDeviceOpen: device ID is invalid!";
-    return FAILURE;
-  }
-
-  if ( mode == OUTPUT ) {
-    if ( dsDevices[ device ].validId[0] == false ) {
-      errorStream_ << "RtApiDs::probeDeviceOpen: device (" << device << ") does not support output!";
-      errorText_ = errorStream_.str();
-      return FAILURE;
-    }
-  }
-  else { // mode == INPUT
-    if ( dsDevices[ device ].validId[1] == false ) {
-      errorStream_ << "RtApiDs::probeDeviceOpen: device (" << device << ") does not support input!";
-      errorText_ = errorStream_.str();
-      return FAILURE;
-    }
-  }
-
-  // According to a note in PortAudio, using GetDesktopWindow()
-  // instead of GetForegroundWindow() is supposed to avoid problems
-  // that occur when the application's window is not the foreground
-  // window.  Also, if the application window closes before the
-  // DirectSound buffer, DirectSound can crash.  In the past, I had
-  // problems when using GetDesktopWindow() but it seems fine now
-  // (January 2010).  I'll leave it commented here.
-  // HWND hWnd = GetForegroundWindow();
-  HWND hWnd = GetDesktopWindow();
-
-  // Check the numberOfBuffers parameter and limit the lowest value to
-  // two.  This is a judgement call and a value of two is probably too
-  // low for capture, but it should work for playback.
-  int nBuffers = 0;
-  if ( options ) nBuffers = options->numberOfBuffers;
-  if ( options && options->flags & RTAUDIO_MINIMIZE_LATENCY ) nBuffers = 2;
-  if ( nBuffers < 2 ) nBuffers = 3;
-
-  // Check the lower range of the user-specified buffer size and set
-  // (arbitrarily) to a lower bound of 32.
-  if ( *bufferSize < 32 ) *bufferSize = 32;
-
-  // Create the wave format structure.  The data format setting will
-  // be determined later.
-  WAVEFORMATEX waveFormat;
-  ZeroMemory( &waveFormat, sizeof(WAVEFORMATEX) );
-  waveFormat.wFormatTag = WAVE_FORMAT_PCM;
-  waveFormat.nChannels = channels + firstChannel;
-  waveFormat.nSamplesPerSec = (unsigned long) sampleRate;
-
-  // Determine the device buffer size. By default, we'll use the value
-  // defined above (32K), but we will grow it to make allowances for
-  // very large software buffer sizes.
-  DWORD dsBufferSize = MINIMUM_DEVICE_BUFFER_SIZE;
-  DWORD dsPointerLeadTime = 0;
-
-  void *ohandle = 0, *bhandle = 0;
-  HRESULT result;
-  if ( mode == OUTPUT ) {
-
-    LPDIRECTSOUND output;
-    result = DirectSoundCreate( dsDevices[ device ].id[0], &output, NULL );
-    if ( FAILED( result ) ) {
-      errorStream_ << "RtApiDs::probeDeviceOpen: error (" << getErrorString( result ) << ") opening output device (" << dsDevices[ device ].name << ")!";
-      errorText_ = errorStream_.str();
-      return FAILURE;
-    }
-
-    DSCAPS outCaps;
-    outCaps.dwSize = sizeof( outCaps );
-    result = output->GetCaps( &outCaps );
-    if ( FAILED( result ) ) {
-      output->Release();
-      errorStream_ << "RtApiDs::probeDeviceOpen: error (" << getErrorString( result ) << ") getting capabilities (" << dsDevices[ device ].name << ")!";
-      errorText_ = errorStream_.str();
-      return FAILURE;
-    }
-
-    // Check channel information.
-    if ( channels + firstChannel == 2 && !( outCaps.dwFlags & DSCAPS_PRIMARYSTEREO ) ) {
-      errorStream_ << "RtApiDs::getDeviceInfo: the output device (" << dsDevices[ device ].name << ") does not support stereo playback.";
-      errorText_ = errorStream_.str();
-      return FAILURE;
-    }
-
-    // Check format information.  Use 16-bit format unless not
-    // supported or user requests 8-bit.
-    if ( outCaps.dwFlags & DSCAPS_PRIMARY16BIT &&
-         !( format == RTAUDIO_SINT8 && outCaps.dwFlags & DSCAPS_PRIMARY8BIT ) ) {
-      waveFormat.wBitsPerSample = 16;
-      stream_.deviceFormat[mode] = RTAUDIO_SINT16;
-    }
-    else {
-      waveFormat.wBitsPerSample = 8;
-      stream_.deviceFormat[mode] = RTAUDIO_SINT8;
-    }
-    stream_.userFormat = format;
-
-    // Update wave format structure and buffer information.
-    waveFormat.nBlockAlign = waveFormat.nChannels * waveFormat.wBitsPerSample / 8;
-    waveFormat.nAvgBytesPerSec = waveFormat.nSamplesPerSec * waveFormat.nBlockAlign;
-    dsPointerLeadTime = nBuffers * (*bufferSize) * (waveFormat.wBitsPerSample / 8) * channels;
-
-    // If the user wants an even bigger buffer, increase the device buffer size accordingly.
-    while ( dsPointerLeadTime * 2U > dsBufferSize )
-      dsBufferSize *= 2;
-
-    // Set cooperative level to DSSCL_EXCLUSIVE ... sound stops when window focus changes.
-    // result = output->SetCooperativeLevel( hWnd, DSSCL_EXCLUSIVE );
-    // Set cooperative level to DSSCL_PRIORITY ... sound remains when window focus changes.
-    result = output->SetCooperativeLevel( hWnd, DSSCL_PRIORITY );
-    if ( FAILED( result ) ) {
-      output->Release();
-      errorStream_ << "RtApiDs::probeDeviceOpen: error (" << getErrorString( result ) << ") setting cooperative level (" << dsDevices[ device ].name << ")!";
-      errorText_ = errorStream_.str();
-      return FAILURE;
-    }
-
-    // Even though we will write to the secondary buffer, we need to
-    // access the primary buffer to set the correct output format
-    // (since the default is 8-bit, 22 kHz!).  Setup the DS primary
-    // buffer description.
-    DSBUFFERDESC bufferDescription;
-    ZeroMemory( &bufferDescription, sizeof( DSBUFFERDESC ) );
-    bufferDescription.dwSize = sizeof( DSBUFFERDESC );
-    bufferDescription.dwFlags = DSBCAPS_PRIMARYBUFFER;
-
-    // Obtain the primary buffer
-    LPDIRECTSOUNDBUFFER buffer;
-    result = output->CreateSoundBuffer( &bufferDescription, &buffer, NULL );
-    if ( FAILED( result ) ) {
-      output->Release();
-      errorStream_ << "RtApiDs::probeDeviceOpen: error (" << getErrorString( result ) << ") accessing primary buffer (" << dsDevices[ device ].name << ")!";
-      errorText_ = errorStream_.str();
-      return FAILURE;
-    }
-
-    // Set the primary DS buffer sound format.
-    result = buffer->SetFormat( &waveFormat );
-    if ( FAILED( result ) ) {
-      output->Release();
-      errorStream_ << "RtApiDs::probeDeviceOpen: error (" << getErrorString( result ) << ") setting primary buffer format (" << dsDevices[ device ].name << ")!";
-      errorText_ = errorStream_.str();
-      return FAILURE;
-    }
-
-    // Setup the secondary DS buffer description.
-    ZeroMemory( &bufferDescription, sizeof( DSBUFFERDESC ) );
-    bufferDescription.dwSize = sizeof( DSBUFFERDESC );
-    bufferDescription.dwFlags = ( DSBCAPS_STICKYFOCUS |
-                                  DSBCAPS_GLOBALFOCUS |
-                                  DSBCAPS_GETCURRENTPOSITION2 |
-                                  DSBCAPS_LOCHARDWARE );  // Force hardware mixing
-    bufferDescription.dwBufferBytes = dsBufferSize;
-    bufferDescription.lpwfxFormat = &waveFormat;
-
-    // Try to create the secondary DS buffer.  If that doesn't work,
-    // try to use software mixing.  Otherwise, there's a problem.
-    result = output->CreateSoundBuffer( &bufferDescription, &buffer, NULL );
-    if ( FAILED( result ) ) {
-      bufferDescription.dwFlags = ( DSBCAPS_STICKYFOCUS |
-                                    DSBCAPS_GLOBALFOCUS |
-                                    DSBCAPS_GETCURRENTPOSITION2 |
-                                    DSBCAPS_LOCSOFTWARE );  // Force software mixing
-      result = output->CreateSoundBuffer( &bufferDescription, &buffer, NULL );
-      if ( FAILED( result ) ) {
-        output->Release();
-        errorStream_ << "RtApiDs::probeDeviceOpen: error (" << getErrorString( result ) << ") creating secondary buffer (" << dsDevices[ device ].name << ")!";
-        errorText_ = errorStream_.str();
-        return FAILURE;
-      }
-    }
-
-    // Get the buffer size ... might be different from what we specified.
-    DSBCAPS dsbcaps;
-    dsbcaps.dwSize = sizeof( DSBCAPS );
-    result = buffer->GetCaps( &dsbcaps );
-    if ( FAILED( result ) ) {
-      output->Release();
-      buffer->Release();
-      errorStream_ << "RtApiDs::probeDeviceOpen: error (" << getErrorString( result ) << ") getting buffer settings (" << dsDevices[ device ].name << ")!";
-      errorText_ = errorStream_.str();
-      return FAILURE;
-    }
-
-    dsBufferSize = dsbcaps.dwBufferBytes;
-
-    // Lock the DS buffer
-    LPVOID audioPtr;
-    DWORD dataLen;
-    result = buffer->Lock( 0, dsBufferSize, &audioPtr, &dataLen, NULL, NULL, 0 );
-    if ( FAILED( result ) ) {
-      output->Release();
-      buffer->Release();
-      errorStream_ << "RtApiDs::probeDeviceOpen: error (" << getErrorString( result ) << ") locking buffer (" << dsDevices[ device ].name << ")!";
-      errorText_ = errorStream_.str();
-      return FAILURE;
-    }
-
-    // Zero the DS buffer
-    ZeroMemory( audioPtr, dataLen );
-
-    // Unlock the DS buffer
-    result = buffer->Unlock( audioPtr, dataLen, NULL, 0 );
-    if ( FAILED( result ) ) {
-      output->Release();
-      buffer->Release();
-      errorStream_ << "RtApiDs::probeDeviceOpen: error (" << getErrorString( result ) << ") unlocking buffer (" << dsDevices[ device ].name << ")!";
-      errorText_ = errorStream_.str();
-      return FAILURE;
-    }
-
-    ohandle = (void *) output;
-    bhandle = (void *) buffer;
-  }
-
-  if ( mode == INPUT ) {
-
-    LPDIRECTSOUNDCAPTURE input;
-    result = DirectSoundCaptureCreate( dsDevices[ device ].id[1], &input, NULL );
-    if ( FAILED( result ) ) {
-      errorStream_ << "RtApiDs::probeDeviceOpen: error (" << getErrorString( result ) << ") opening input device (" << dsDevices[ device ].name << ")!";
-      errorText_ = errorStream_.str();
-      return FAILURE;
-    }
-
-    DSCCAPS inCaps;
-    inCaps.dwSize = sizeof( inCaps );
-    result = input->GetCaps( &inCaps );
-    if ( FAILED( result ) ) {
-      input->Release();
-      errorStream_ << "RtApiDs::probeDeviceOpen: error (" << getErrorString( result ) << ") getting input capabilities (" << dsDevices[ device ].name << ")!";
-      errorText_ = errorStream_.str();
-      return FAILURE;
-    }
-
-    // Check channel information.
-    if ( inCaps.dwChannels < channels + firstChannel ) {
-      errorText_ = "RtApiDs::getDeviceInfo: the input device does not support requested input channels.";
-      return FAILURE;
-    }
-
-    // Check format information.  Use 16-bit format unless user
-    // requests 8-bit.
-    DWORD deviceFormats;
-    if ( channels + firstChannel == 2 ) {
-      deviceFormats = WAVE_FORMAT_1S08 | WAVE_FORMAT_2S08 | WAVE_FORMAT_4S08 | WAVE_FORMAT_96S08;
-      if ( format == RTAUDIO_SINT8 && inCaps.dwFormats & deviceFormats ) {
-        waveFormat.wBitsPerSample = 8;
-        stream_.deviceFormat[mode] = RTAUDIO_SINT8;
-      }
-      else { // assume 16-bit is supported
-        waveFormat.wBitsPerSample = 16;
-        stream_.deviceFormat[mode] = RTAUDIO_SINT16;
-      }
-    }
-    else { // channel == 1
-      deviceFormats = WAVE_FORMAT_1M08 | WAVE_FORMAT_2M08 | WAVE_FORMAT_4M08 | WAVE_FORMAT_96M08;
-      if ( format == RTAUDIO_SINT8 && inCaps.dwFormats & deviceFormats ) {
-        waveFormat.wBitsPerSample = 8;
-        stream_.deviceFormat[mode] = RTAUDIO_SINT8;
-      }
-      else { // assume 16-bit is supported
-        waveFormat.wBitsPerSample = 16;
-        stream_.deviceFormat[mode] = RTAUDIO_SINT16;
-      }
-    }
-    stream_.userFormat = format;
-
-    // Update wave format structure and buffer information.
-    waveFormat.nBlockAlign = waveFormat.nChannels * waveFormat.wBitsPerSample / 8;
-    waveFormat.nAvgBytesPerSec = waveFormat.nSamplesPerSec * waveFormat.nBlockAlign;
-    dsPointerLeadTime = nBuffers * (*bufferSize) * (waveFormat.wBitsPerSample / 8) * channels;
-
-    // If the user wants an even bigger buffer, increase the device buffer size accordingly.
-    while ( dsPointerLeadTime * 2U > dsBufferSize )
-      dsBufferSize *= 2;
-
-    // Setup the secondary DS buffer description.
-    DSCBUFFERDESC bufferDescription;
-    ZeroMemory( &bufferDescription, sizeof( DSCBUFFERDESC ) );
-    bufferDescription.dwSize = sizeof( DSCBUFFERDESC );
-    bufferDescription.dwFlags = 0;
-    bufferDescription.dwReserved = 0;
-    bufferDescription.dwBufferBytes = dsBufferSize;
-    bufferDescription.lpwfxFormat = &waveFormat;
-
-    // Create the capture buffer.
-    LPDIRECTSOUNDCAPTUREBUFFER buffer;
-    result = input->CreateCaptureBuffer( &bufferDescription, &buffer, NULL );
-    if ( FAILED( result ) ) {
-      input->Release();
-      errorStream_ << "RtApiDs::probeDeviceOpen: error (" << getErrorString( result ) << ") creating input buffer (" << dsDevices[ device ].name << ")!";
-      errorText_ = errorStream_.str();
-      return FAILURE;
-    }
-
-    // Get the buffer size ... might be different from what we specified.
-    DSCBCAPS dscbcaps;
-    dscbcaps.dwSize = sizeof( DSCBCAPS );
-    result = buffer->GetCaps( &dscbcaps );
-    if ( FAILED( result ) ) {
-      input->Release();
-      buffer->Release();
-      errorStream_ << "RtApiDs::probeDeviceOpen: error (" << getErrorString( result ) << ") getting buffer settings (" << dsDevices[ device ].name << ")!";
-      errorText_ = errorStream_.str();
-      return FAILURE;
-    }
-
-    dsBufferSize = dscbcaps.dwBufferBytes;
-
-    // NOTE: We could have a problem here if this is a duplex stream
-    // and the play and capture hardware buffer sizes are different
-    // (I'm actually not sure if that is a problem or not).
-    // Currently, we are not verifying that.
-
-    // Lock the capture buffer
-    LPVOID audioPtr;
-    DWORD dataLen;
-    result = buffer->Lock( 0, dsBufferSize, &audioPtr, &dataLen, NULL, NULL, 0 );
-    if ( FAILED( result ) ) {
-      input->Release();
-      buffer->Release();
-      errorStream_ << "RtApiDs::probeDeviceOpen: error (" << getErrorString( result ) << ") locking input buffer (" << dsDevices[ device ].name << ")!";
-      errorText_ = errorStream_.str();
-      return FAILURE;
-    }
-
-    // Zero the buffer
-    ZeroMemory( audioPtr, dataLen );
-
-    // Unlock the buffer
-    result = buffer->Unlock( audioPtr, dataLen, NULL, 0 );
-    if ( FAILED( result ) ) {
-      input->Release();
-      buffer->Release();
-      errorStream_ << "RtApiDs::probeDeviceOpen: error (" << getErrorString( result ) << ") unlocking input buffer (" << dsDevices[ device ].name << ")!";
-      errorText_ = errorStream_.str();
-      return FAILURE;
-    }
-
-    ohandle = (void *) input;
-    bhandle = (void *) buffer;
-  }
-
-  // Set various stream parameters
-  DsHandle *handle = 0;
-  stream_.nDeviceChannels[mode] = channels + firstChannel;
-  stream_.nUserChannels[mode] = channels;
-  stream_.bufferSize = *bufferSize;
-  stream_.channelOffset[mode] = firstChannel;
-  stream_.deviceInterleaved[mode] = true;
-  if ( options && options->flags & RTAUDIO_NONINTERLEAVED ) stream_.userInterleaved = false;
-  else stream_.userInterleaved = true;
-
-  // Set flag for buffer conversion
-  stream_.doConvertBuffer[mode] = false;
-  if (stream_.nUserChannels[mode] != stream_.nDeviceChannels[mode])
-    stream_.doConvertBuffer[mode] = true;
-  if (stream_.userFormat != stream_.deviceFormat[mode])
-    stream_.doConvertBuffer[mode] = true;
-  if ( stream_.userInterleaved != stream_.deviceInterleaved[mode] &&
-       stream_.nUserChannels[mode] > 1 )
-    stream_.doConvertBuffer[mode] = true;
-
-  // Allocate necessary internal buffers
-  long bufferBytes = stream_.nUserChannels[mode] * *bufferSize * formatBytes( stream_.userFormat );
-  stream_.userBuffer[mode] = (char *) calloc( bufferBytes, 1 );
-  if ( stream_.userBuffer[mode] == NULL ) {
-    errorText_ = "RtApiDs::probeDeviceOpen: error allocating user buffer memory.";
-    goto error;
-  }
-
-  if ( stream_.doConvertBuffer[mode] ) {
-
-    bool makeBuffer = true;
-    bufferBytes = stream_.nDeviceChannels[mode] * formatBytes( stream_.deviceFormat[mode] );
-    if ( mode == INPUT ) {
-      if ( stream_.mode == OUTPUT && stream_.deviceBuffer ) {
-        unsigned long bytesOut = stream_.nDeviceChannels[0] * formatBytes( stream_.deviceFormat[0] );
-        if ( bufferBytes <= (long) bytesOut ) makeBuffer = false;
-      }
-    }
-
-    if ( makeBuffer ) {
-      bufferBytes *= *bufferSize;
-      if ( stream_.deviceBuffer ) free( stream_.deviceBuffer );
-      stream_.deviceBuffer = (char *) calloc( bufferBytes, 1 );
-      if ( stream_.deviceBuffer == NULL ) {
-        errorText_ = "RtApiDs::probeDeviceOpen: error allocating device buffer memory.";
-        goto error;
-      }
-    }
-  }
-
-  // Allocate our DsHandle structures for the stream.
-  if ( stream_.apiHandle == 0 ) {
-    try {
-      handle = new DsHandle;
-    }
-    catch ( std::bad_alloc& ) {
-      errorText_ = "RtApiDs::probeDeviceOpen: error allocating AsioHandle memory.";
-      goto error;
-    }
-
-    // Create a manual-reset event.
-    handle->condition = CreateEvent( NULL,   // no security
-                                     TRUE,   // manual-reset
-                                     FALSE,  // non-signaled initially
-                                     NULL ); // unnamed
-    stream_.apiHandle = (void *) handle;
-  }
-  else
-    handle = (DsHandle *) stream_.apiHandle;
-  handle->id[mode] = ohandle;
-  handle->buffer[mode] = bhandle;
-  handle->dsBufferSize[mode] = dsBufferSize;
-  handle->dsPointerLeadTime[mode] = dsPointerLeadTime;
-
-  stream_.device[mode] = device;
-  stream_.state = STREAM_STOPPED;
-  if ( stream_.mode == OUTPUT && mode == INPUT )
-    // We had already set up an output stream.
-    stream_.mode = DUPLEX;
-  else
-    stream_.mode = mode;
-  stream_.nBuffers = nBuffers;
-  stream_.sampleRate = sampleRate;
-
-  // Setup the buffer conversion information structure.
-  if ( stream_.doConvertBuffer[mode] ) setConvertInfo( mode, firstChannel );
-
-  // Setup the callback thread.
-  if ( stream_.callbackInfo.isRunning == false ) {
-    unsigned threadId;
-    stream_.callbackInfo.isRunning = true;
-    stream_.callbackInfo.object = (void *) this;
-    stream_.callbackInfo.thread = _beginthreadex( NULL, 0, &callbackHandler,
-                                                  &stream_.callbackInfo, 0, &threadId );
-    if ( stream_.callbackInfo.thread == 0 ) {
-      errorText_ = "RtApiDs::probeDeviceOpen: error creating callback thread!";
-      goto error;
-    }
-
-    // Boost DS thread priority
-    SetThreadPriority( (HANDLE) stream_.callbackInfo.thread, THREAD_PRIORITY_HIGHEST );
-  }
-  return SUCCESS;
-
- error:
-  if ( handle ) {
-    if ( handle->buffer[0] ) { // the object pointer can be NULL and valid
-      LPDIRECTSOUND object = (LPDIRECTSOUND) handle->id[0];
-      LPDIRECTSOUNDBUFFER buffer = (LPDIRECTSOUNDBUFFER) handle->buffer[0];
-      if ( buffer ) buffer->Release();
-      object->Release();
-    }
-    if ( handle->buffer[1] ) {
-      LPDIRECTSOUNDCAPTURE object = (LPDIRECTSOUNDCAPTURE) handle->id[1];
-      LPDIRECTSOUNDCAPTUREBUFFER buffer = (LPDIRECTSOUNDCAPTUREBUFFER) handle->buffer[1];
-      if ( buffer ) buffer->Release();
-      object->Release();
-    }
-    CloseHandle( handle->condition );
-    delete handle;
-    stream_.apiHandle = 0;
-  }
-
-  for ( int i=0; i<2; i++ ) {
-    if ( stream_.userBuffer[i] ) {
-      free( stream_.userBuffer[i] );
-      stream_.userBuffer[i] = 0;
-    }
-  }
-
-  if ( stream_.deviceBuffer ) {
-    free( stream_.deviceBuffer );
-    stream_.deviceBuffer = 0;
-  }
-
-  stream_.state = STREAM_CLOSED;
-  return FAILURE;
-}
-
-void RtApiDs :: closeStream()
-{
-  if ( stream_.state == STREAM_CLOSED ) {
-    errorText_ = "RtApiDs::closeStream(): no open stream to close!";
-    error( RtAudioError::WARNING );
-    return;
-  }
-
-  // Stop the callback thread.
-  stream_.callbackInfo.isRunning = false;
-  WaitForSingleObject( (HANDLE) stream_.callbackInfo.thread, INFINITE );
-  CloseHandle( (HANDLE) stream_.callbackInfo.thread );
-
-  DsHandle *handle = (DsHandle *) stream_.apiHandle;
-  if ( handle ) {
-    if ( handle->buffer[0] ) { // the object pointer can be NULL and valid
-      LPDIRECTSOUND object = (LPDIRECTSOUND) handle->id[0];
-      LPDIRECTSOUNDBUFFER buffer = (LPDIRECTSOUNDBUFFER) handle->buffer[0];
-      if ( buffer ) {
-        buffer->Stop();
-        buffer->Release();
-      }
-      object->Release();
-    }
-    if ( handle->buffer[1] ) {
-      LPDIRECTSOUNDCAPTURE object = (LPDIRECTSOUNDCAPTURE) handle->id[1];
-      LPDIRECTSOUNDCAPTUREBUFFER buffer = (LPDIRECTSOUNDCAPTUREBUFFER) handle->buffer[1];
-      if ( buffer ) {
-        buffer->Stop();
-        buffer->Release();
-      }
-      object->Release();
-    }
-    CloseHandle( handle->condition );
-    delete handle;
-    stream_.apiHandle = 0;
-  }
-
-  for ( int i=0; i<2; i++ ) {
-    if ( stream_.userBuffer[i] ) {
-      free( stream_.userBuffer[i] );
-      stream_.userBuffer[i] = 0;
-    }
-  }
-
-  if ( stream_.deviceBuffer ) {
-    free( stream_.deviceBuffer );
-    stream_.deviceBuffer = 0;
-  }
-
-  stream_.mode = UNINITIALIZED;
-  stream_.state = STREAM_CLOSED;
-}
-
-void RtApiDs :: startStream()
-{
-  verifyStream();
-  if ( stream_.state == STREAM_RUNNING ) {
-    errorText_ = "RtApiDs::startStream(): the stream is already running!";
-    error( RtAudioError::WARNING );
-    return;
-  }
-
-  DsHandle *handle = (DsHandle *) stream_.apiHandle;
-
-  // Increase scheduler frequency on lesser windows (a side-effect of
-  // increasing timer accuracy).  On greater windows (Win2K or later),
-  // this is already in effect.
-  timeBeginPeriod( 1 ); 
-
-  buffersRolling = false;
-  duplexPrerollBytes = 0;
-
-  if ( stream_.mode == DUPLEX ) {
-    // 0.5 seconds of silence in DUPLEX mode while the devices spin up and synchronize.
-    duplexPrerollBytes = (int) ( 0.5 * stream_.sampleRate * formatBytes( stream_.deviceFormat[1] ) * stream_.nDeviceChannels[1] );
-  }
-
-  HRESULT result = 0;
-  if ( stream_.mode == OUTPUT || stream_.mode == DUPLEX ) {
-
-    LPDIRECTSOUNDBUFFER buffer = (LPDIRECTSOUNDBUFFER) handle->buffer[0];
-    result = buffer->Play( 0, 0, DSBPLAY_LOOPING );
-    if ( FAILED( result ) ) {
-      errorStream_ << "RtApiDs::startStream: error (" << getErrorString( result ) << ") starting output buffer!";
-      errorText_ = errorStream_.str();
-      goto unlock;
-    }
-  }
-
-  if ( stream_.mode == INPUT || stream_.mode == DUPLEX ) {
-
-    LPDIRECTSOUNDCAPTUREBUFFER buffer = (LPDIRECTSOUNDCAPTUREBUFFER) handle->buffer[1];
-    result = buffer->Start( DSCBSTART_LOOPING );
-    if ( FAILED( result ) ) {
-      errorStream_ << "RtApiDs::startStream: error (" << getErrorString( result ) << ") starting input buffer!";
-      errorText_ = errorStream_.str();
-      goto unlock;
-    }
-  }
-
-  handle->drainCounter = 0;
-  handle->internalDrain = false;
-  ResetEvent( handle->condition );
-  stream_.state = STREAM_RUNNING;
-
- unlock:
-  if ( FAILED( result ) ) error( RtAudioError::SYSTEM_ERROR );
-}
-
-void RtApiDs :: stopStream()
-{
-  verifyStream();
-  if ( stream_.state == STREAM_STOPPED ) {
-    errorText_ = "RtApiDs::stopStream(): the stream is already stopped!";
-    error( RtAudioError::WARNING );
-    return;
-  }
-
-  HRESULT result = 0;
-  LPVOID audioPtr;
-  DWORD dataLen;
-  DsHandle *handle = (DsHandle *) stream_.apiHandle;
-  if ( stream_.mode == OUTPUT || stream_.mode == DUPLEX ) {
-    if ( handle->drainCounter == 0 ) {
-      handle->drainCounter = 2;
-      WaitForSingleObject( handle->condition, INFINITE );  // block until signaled
-    }
-
-    stream_.state = STREAM_STOPPED;
-
-    MUTEX_LOCK( &stream_.mutex );
-
-    // Stop the buffer and clear memory
-    LPDIRECTSOUNDBUFFER buffer = (LPDIRECTSOUNDBUFFER) handle->buffer[0];
-    result = buffer->Stop();
-    if ( FAILED( result ) ) {
-      errorStream_ << "RtApiDs::stopStream: error (" << getErrorString( result ) << ") stopping output buffer!";
-      errorText_ = errorStream_.str();
-      goto unlock;
-    }
-
-    // Lock the buffer and clear it so that if we start to play again,
-    // we won't have old data playing.
-    result = buffer->Lock( 0, handle->dsBufferSize[0], &audioPtr, &dataLen, NULL, NULL, 0 );
-    if ( FAILED( result ) ) {
-      errorStream_ << "RtApiDs::stopStream: error (" << getErrorString( result ) << ") locking output buffer!";
-      errorText_ = errorStream_.str();
-      goto unlock;
-    }
-
-    // Zero the DS buffer
-    ZeroMemory( audioPtr, dataLen );
-
-    // Unlock the DS buffer
-    result = buffer->Unlock( audioPtr, dataLen, NULL, 0 );
-    if ( FAILED( result ) ) {
-      errorStream_ << "RtApiDs::stopStream: error (" << getErrorString( result ) << ") unlocking output buffer!";
-      errorText_ = errorStream_.str();
-      goto unlock;
-    }
-
-    // If we start playing again, we must begin at beginning of buffer.
-    handle->bufferPointer[0] = 0;
-  }
-
-  if ( stream_.mode == INPUT || stream_.mode == DUPLEX ) {
-    LPDIRECTSOUNDCAPTUREBUFFER buffer = (LPDIRECTSOUNDCAPTUREBUFFER) handle->buffer[1];
-    audioPtr = NULL;
-    dataLen = 0;
-
-    stream_.state = STREAM_STOPPED;
-
-    if ( stream_.mode != DUPLEX )
-      MUTEX_LOCK( &stream_.mutex );
-
-    result = buffer->Stop();
-    if ( FAILED( result ) ) {
-      errorStream_ << "RtApiDs::stopStream: error (" << getErrorString( result ) << ") stopping input buffer!";
-      errorText_ = errorStream_.str();
-      goto unlock;
-    }
-
-    // Lock the buffer and clear it so that if we start to play again,
-    // we won't have old data playing.
-    result = buffer->Lock( 0, handle->dsBufferSize[1], &audioPtr, &dataLen, NULL, NULL, 0 );
-    if ( FAILED( result ) ) {
-      errorStream_ << "RtApiDs::stopStream: error (" << getErrorString( result ) << ") locking input buffer!";
-      errorText_ = errorStream_.str();
-      goto unlock;
-    }
-
-    // Zero the DS buffer
-    ZeroMemory( audioPtr, dataLen );
-
-    // Unlock the DS buffer
-    result = buffer->Unlock( audioPtr, dataLen, NULL, 0 );
-    if ( FAILED( result ) ) {
-      errorStream_ << "RtApiDs::stopStream: error (" << getErrorString( result ) << ") unlocking input buffer!";
-      errorText_ = errorStream_.str();
-      goto unlock;
-    }
-
-    // If we start recording again, we must begin at beginning of buffer.
-    handle->bufferPointer[1] = 0;
-  }
-
- unlock:
-  timeEndPeriod( 1 ); // revert to normal scheduler frequency on lesser windows.
-  MUTEX_UNLOCK( &stream_.mutex );
-
-  if ( FAILED( result ) ) error( RtAudioError::SYSTEM_ERROR );
-}
-
-void RtApiDs :: abortStream()
-{
-  verifyStream();
-  if ( stream_.state == STREAM_STOPPED ) {
-    errorText_ = "RtApiDs::abortStream(): the stream is already stopped!";
-    error( RtAudioError::WARNING );
-    return;
-  }
-
-  DsHandle *handle = (DsHandle *) stream_.apiHandle;
-  handle->drainCounter = 2;
-
-  stopStream();
-}
-
-void RtApiDs :: callbackEvent()
-{
-  if ( stream_.state == STREAM_STOPPED || stream_.state == STREAM_STOPPING ) {
-    Sleep( 50 ); // sleep 50 milliseconds
-    return;
-  }
-
-  if ( stream_.state == STREAM_CLOSED ) {
-    errorText_ = "RtApiDs::callbackEvent(): the stream is closed ... this shouldn't happen!";
-    error( RtAudioError::WARNING );
-    return;
-  }
-
-  CallbackInfo *info = (CallbackInfo *) &stream_.callbackInfo;
-  DsHandle *handle = (DsHandle *) stream_.apiHandle;
-
-  // Check if we were draining the stream and signal is finished.
-  if ( handle->drainCounter > stream_.nBuffers + 2 ) {
-
-    stream_.state = STREAM_STOPPING;
-    if ( handle->internalDrain == false )
-      SetEvent( handle->condition );
-    else
-      stopStream();
-    return;
-  }
-
-  // Invoke user callback to get fresh output data UNLESS we are
-  // draining stream.
-  if ( handle->drainCounter == 0 ) {
-    RtAudioCallback callback = (RtAudioCallback) info->callback;
-    double streamTime = getStreamTime();
-    RtAudioStreamStatus status = 0;
-    if ( stream_.mode != INPUT && handle->xrun[0] == true ) {
-      status |= RTAUDIO_OUTPUT_UNDERFLOW;
-      handle->xrun[0] = false;
-    }
-    if ( stream_.mode != OUTPUT && handle->xrun[1] == true ) {
-      status |= RTAUDIO_INPUT_OVERFLOW;
-      handle->xrun[1] = false;
-    }
-    int cbReturnValue = callback( stream_.userBuffer[0], stream_.userBuffer[1],
-                                  stream_.bufferSize, streamTime, status, info->userData );
-    if ( cbReturnValue == 2 ) {
-      stream_.state = STREAM_STOPPING;
-      handle->drainCounter = 2;
-      abortStream();
-      return;
-    }
-    else if ( cbReturnValue == 1 ) {
-      handle->drainCounter = 1;
-      handle->internalDrain = true;
-    }
-  }
-
-  HRESULT result;
-  DWORD currentWritePointer, safeWritePointer;
-  DWORD currentReadPointer, safeReadPointer;
-  UINT nextWritePointer;
-
-  LPVOID buffer1 = NULL;
-  LPVOID buffer2 = NULL;
-  DWORD bufferSize1 = 0;
-  DWORD bufferSize2 = 0;
-
-  char *buffer;
-  long bufferBytes;
-
-  MUTEX_LOCK( &stream_.mutex );
-  if ( stream_.state == STREAM_STOPPED ) {
-    MUTEX_UNLOCK( &stream_.mutex );
-    return;
-  }
-
-  if ( buffersRolling == false ) {
-    if ( stream_.mode == DUPLEX ) {
-      //assert( handle->dsBufferSize[0] == handle->dsBufferSize[1] );
-
-      // It takes a while for the devices to get rolling. As a result,
-      // there's no guarantee that the capture and write device pointers
-      // will move in lockstep.  Wait here for both devices to start
-      // rolling, and then set our buffer pointers accordingly.
-      // e.g. Crystal Drivers: the capture buffer starts up 5700 to 9600
-      // bytes later than the write buffer.
-
-      // Stub: a serious risk of having a pre-emptive scheduling round
-      // take place between the two GetCurrentPosition calls... but I'm
-      // really not sure how to solve the problem.  Temporarily boost to
-      // Realtime priority, maybe; but I'm not sure what priority the
-      // DirectSound service threads run at. We *should* be roughly
-      // within a ms or so of correct.
-
-      LPDIRECTSOUNDBUFFER dsWriteBuffer = (LPDIRECTSOUNDBUFFER) handle->buffer[0];
-      LPDIRECTSOUNDCAPTUREBUFFER dsCaptureBuffer = (LPDIRECTSOUNDCAPTUREBUFFER) handle->buffer[1];
-
-      DWORD startSafeWritePointer, startSafeReadPointer;
-
-      result = dsWriteBuffer->GetCurrentPosition( NULL, &startSafeWritePointer );
-      if ( FAILED( result ) ) {
-        errorStream_ << "RtApiDs::callbackEvent: error (" << getErrorString( result ) << ") getting current write position!";
-        errorText_ = errorStream_.str();
-        MUTEX_UNLOCK( &stream_.mutex );
-        error( RtAudioError::SYSTEM_ERROR );
-        return;
-      }
-      result = dsCaptureBuffer->GetCurrentPosition( NULL, &startSafeReadPointer );
-      if ( FAILED( result ) ) {
-        errorStream_ << "RtApiDs::callbackEvent: error (" << getErrorString( result ) << ") getting current read position!";
-        errorText_ = errorStream_.str();
-        MUTEX_UNLOCK( &stream_.mutex );
-        error( RtAudioError::SYSTEM_ERROR );
-        return;
-      }
-      while ( true ) {
-        result = dsWriteBuffer->GetCurrentPosition( NULL, &safeWritePointer );
-        if ( FAILED( result ) ) {
-          errorStream_ << "RtApiDs::callbackEvent: error (" << getErrorString( result ) << ") getting current write position!";
-          errorText_ = errorStream_.str();
-          MUTEX_UNLOCK( &stream_.mutex );
-          error( RtAudioError::SYSTEM_ERROR );
-          return;
-        }
-        result = dsCaptureBuffer->GetCurrentPosition( NULL, &safeReadPointer );
-        if ( FAILED( result ) ) {
-          errorStream_ << "RtApiDs::callbackEvent: error (" << getErrorString( result ) << ") getting current read position!";
-          errorText_ = errorStream_.str();
-          MUTEX_UNLOCK( &stream_.mutex );
-          error( RtAudioError::SYSTEM_ERROR );
-          return;
-        }
-        if ( safeWritePointer != startSafeWritePointer && safeReadPointer != startSafeReadPointer ) break;
-        Sleep( 1 );
-      }
-
-      //assert( handle->dsBufferSize[0] == handle->dsBufferSize[1] );
-
-      handle->bufferPointer[0] = safeWritePointer + handle->dsPointerLeadTime[0];
-      if ( handle->bufferPointer[0] >= handle->dsBufferSize[0] ) handle->bufferPointer[0] -= handle->dsBufferSize[0];
-      handle->bufferPointer[1] = safeReadPointer;
-    }
-    else if ( stream_.mode == OUTPUT ) {
-
-      // Set the proper nextWritePosition after initial startup.
-      LPDIRECTSOUNDBUFFER dsWriteBuffer = (LPDIRECTSOUNDBUFFER) handle->buffer[0];
-      result = dsWriteBuffer->GetCurrentPosition( &currentWritePointer, &safeWritePointer );
-      if ( FAILED( result ) ) {
-        errorStream_ << "RtApiDs::callbackEvent: error (" << getErrorString( result ) << ") getting current write position!";
-        errorText_ = errorStream_.str();
-        MUTEX_UNLOCK( &stream_.mutex );
-        error( RtAudioError::SYSTEM_ERROR );
-        return;
-      }
-      handle->bufferPointer[0] = safeWritePointer + handle->dsPointerLeadTime[0];
-      if ( handle->bufferPointer[0] >= handle->dsBufferSize[0] ) handle->bufferPointer[0] -= handle->dsBufferSize[0];
-    }
-
-    buffersRolling = true;
-  }
-
-  if ( stream_.mode == OUTPUT || stream_.mode == DUPLEX ) {
-    
-    LPDIRECTSOUNDBUFFER dsBuffer = (LPDIRECTSOUNDBUFFER) handle->buffer[0];
-
-    if ( handle->drainCounter > 1 ) { // write zeros to the output stream
-      bufferBytes = stream_.bufferSize * stream_.nUserChannels[0];
-      bufferBytes *= formatBytes( stream_.userFormat );
-      memset( stream_.userBuffer[0], 0, bufferBytes );
-    }
-
-    // Setup parameters and do buffer conversion if necessary.
-    if ( stream_.doConvertBuffer[0] ) {
-      buffer = stream_.deviceBuffer;
-      convertBuffer( buffer, stream_.userBuffer[0], stream_.convertInfo[0] );
-      bufferBytes = stream_.bufferSize * stream_.nDeviceChannels[0];
-      bufferBytes *= formatBytes( stream_.deviceFormat[0] );
-    }
-    else {
-      buffer = stream_.userBuffer[0];
-      bufferBytes = stream_.bufferSize * stream_.nUserChannels[0];
-      bufferBytes *= formatBytes( stream_.userFormat );
-    }
-
-    // No byte swapping necessary in DirectSound implementation.
-
-    // Ahhh ... windoze.  16-bit data is signed but 8-bit data is
-    // unsigned.  So, we need to convert our signed 8-bit data here to
-    // unsigned.
-    if ( stream_.deviceFormat[0] == RTAUDIO_SINT8 )
-      for ( int i=0; i<bufferBytes; i++ ) buffer[i] = (unsigned char) ( buffer[i] + 128 );
-
-    DWORD dsBufferSize = handle->dsBufferSize[0];
-    nextWritePointer = handle->bufferPointer[0];
-
-    DWORD endWrite, leadPointer;
-    while ( true ) {
-      // Find out where the read and "safe write" pointers are.
-      result = dsBuffer->GetCurrentPosition( &currentWritePointer, &safeWritePointer );
-      if ( FAILED( result ) ) {
-        errorStream_ << "RtApiDs::callbackEvent: error (" << getErrorString( result ) << ") getting current write position!";
-        errorText_ = errorStream_.str();
-        error( RtAudioError::SYSTEM_ERROR );
-        return;
-      }
-
-      // We will copy our output buffer into the region between
-      // safeWritePointer and leadPointer.  If leadPointer is not
-      // beyond the next endWrite position, wait until it is.
-      leadPointer = safeWritePointer + handle->dsPointerLeadTime[0];
-      //std::cout << "safeWritePointer = " << safeWritePointer << ", leadPointer = " << leadPointer << ", nextWritePointer = " << nextWritePointer << std::endl;
-      if ( leadPointer > dsBufferSize ) leadPointer -= dsBufferSize;
-      if ( leadPointer < nextWritePointer ) leadPointer += dsBufferSize; // unwrap offset
-      endWrite = nextWritePointer + bufferBytes;
-
-      // Check whether the entire write region is behind the play pointer.
-      if ( leadPointer >= endWrite ) break;
-
-      // If we are here, then we must wait until the leadPointer advances
-      // beyond the end of our next write region. We use the
-      // Sleep() function to suspend operation until that happens.
-      double millis = ( endWrite - leadPointer ) * 1000.0;
-      millis /= ( formatBytes( stream_.deviceFormat[0]) * stream_.nDeviceChannels[0] * stream_.sampleRate);
-      if ( millis < 1.0 ) millis = 1.0;
-      Sleep( (DWORD) millis );
-    }
-
-    if ( dsPointerBetween( nextWritePointer, safeWritePointer, currentWritePointer, dsBufferSize )
-         || dsPointerBetween( endWrite, safeWritePointer, currentWritePointer, dsBufferSize ) ) { 
-      // We've strayed into the forbidden zone ... resync the read pointer.
-      handle->xrun[0] = true;
-      nextWritePointer = safeWritePointer + handle->dsPointerLeadTime[0] - bufferBytes;
-      if ( nextWritePointer >= dsBufferSize ) nextWritePointer -= dsBufferSize;
-      handle->bufferPointer[0] = nextWritePointer;
-      endWrite = nextWritePointer + bufferBytes;
-    }
-
-    // Lock free space in the buffer
-    result = dsBuffer->Lock( nextWritePointer, bufferBytes, &buffer1,
-                             &bufferSize1, &buffer2, &bufferSize2, 0 );
-    if ( FAILED( result ) ) {
-      errorStream_ << "RtApiDs::callbackEvent: error (" << getErrorString( result ) << ") locking buffer during playback!";
-      errorText_ = errorStream_.str();
-      MUTEX_UNLOCK( &stream_.mutex );
-      error( RtAudioError::SYSTEM_ERROR );
-      return;
-    }
-
-    // Copy our buffer into the DS buffer
-    CopyMemory( buffer1, buffer, bufferSize1 );
-    if ( buffer2 != NULL ) CopyMemory( buffer2, buffer+bufferSize1, bufferSize2 );
-
-    // Update our buffer offset and unlock sound buffer
-    dsBuffer->Unlock( buffer1, bufferSize1, buffer2, bufferSize2 );
-    if ( FAILED( result ) ) {
-      errorStream_ << "RtApiDs::callbackEvent: error (" << getErrorString( result ) << ") unlocking buffer during playback!";
-      errorText_ = errorStream_.str();
-      MUTEX_UNLOCK( &stream_.mutex );
-      error( RtAudioError::SYSTEM_ERROR );
-      return;
-    }
-    nextWritePointer = ( nextWritePointer + bufferSize1 + bufferSize2 ) % dsBufferSize;
-    handle->bufferPointer[0] = nextWritePointer;
-  }
-
-  // Don't bother draining input
-  if ( handle->drainCounter ) {
-    handle->drainCounter++;
-    goto unlock;
-  }
-
-  if ( stream_.mode == INPUT || stream_.mode == DUPLEX ) {
-
-    // Setup parameters.
-    if ( stream_.doConvertBuffer[1] ) {
-      buffer = stream_.deviceBuffer;
-      bufferBytes = stream_.bufferSize * stream_.nDeviceChannels[1];
-      bufferBytes *= formatBytes( stream_.deviceFormat[1] );
-    }
-    else {
-      buffer = stream_.userBuffer[1];
-      bufferBytes = stream_.bufferSize * stream_.nUserChannels[1];
-      bufferBytes *= formatBytes( stream_.userFormat );
-    }
-
-    LPDIRECTSOUNDCAPTUREBUFFER dsBuffer = (LPDIRECTSOUNDCAPTUREBUFFER) handle->buffer[1];
-    long nextReadPointer = handle->bufferPointer[1];
-    DWORD dsBufferSize = handle->dsBufferSize[1];
-
-    // Find out where the write and "safe read" pointers are.
-    result = dsBuffer->GetCurrentPosition( &currentReadPointer, &safeReadPointer );
-    if ( FAILED( result ) ) {
-      errorStream_ << "RtApiDs::callbackEvent: error (" << getErrorString( result ) << ") getting current read position!";
-      errorText_ = errorStream_.str();
-      MUTEX_UNLOCK( &stream_.mutex );
-      error( RtAudioError::SYSTEM_ERROR );
-      return;
-    }
-
-    if ( safeReadPointer < (DWORD)nextReadPointer ) safeReadPointer += dsBufferSize; // unwrap offset
-    DWORD endRead = nextReadPointer + bufferBytes;
-
-    // Handling depends on whether we are INPUT or DUPLEX. 
-    // If we're in INPUT mode then waiting is a good thing. If we're in DUPLEX mode,
-    // then a wait here will drag the write pointers into the forbidden zone.
-    // 
-    // In DUPLEX mode, rather than wait, we will back off the read pointer until 
-    // it's in a safe position. This causes dropouts, but it seems to be the only 
-    // practical way to sync up the read and write pointers reliably, given the 
-    // the very complex relationship between phase and increment of the read and write 
-    // pointers.
-    //
-    // In order to minimize audible dropouts in DUPLEX mode, we will
-    // provide a pre-roll period of 0.5 seconds in which we return
-    // zeros from the read buffer while the pointers sync up.
-
-    if ( stream_.mode == DUPLEX ) {
-      if ( safeReadPointer < endRead ) {
-        if ( duplexPrerollBytes <= 0 ) {
-          // Pre-roll time over. Be more agressive.
-          int adjustment = endRead-safeReadPointer;
-
-          handle->xrun[1] = true;
-          // Two cases:
-          //   - large adjustments: we've probably run out of CPU cycles, so just resync exactly,
-          //     and perform fine adjustments later.
-          //   - small adjustments: back off by twice as much.
-          if ( adjustment >= 2*bufferBytes )
-            nextReadPointer = safeReadPointer-2*bufferBytes;
-          else
-            nextReadPointer = safeReadPointer-bufferBytes-adjustment;
-
-          if ( nextReadPointer < 0 ) nextReadPointer += dsBufferSize;
-
-        }
-        else {
-          // In pre=roll time. Just do it.
-          nextReadPointer = safeReadPointer - bufferBytes;
-          while ( nextReadPointer < 0 ) nextReadPointer += dsBufferSize;
-        }
-        endRead = nextReadPointer + bufferBytes;
-      }
-    }
-    else { // mode == INPUT
-      while ( safeReadPointer < endRead && stream_.callbackInfo.isRunning ) {
-        // See comments for playback.
-        double millis = (endRead - safeReadPointer) * 1000.0;
-        millis /= ( formatBytes(stream_.deviceFormat[1]) * stream_.nDeviceChannels[1] * stream_.sampleRate);
-        if ( millis < 1.0 ) millis = 1.0;
-        Sleep( (DWORD) millis );
-
-        // Wake up and find out where we are now.
-        result = dsBuffer->GetCurrentPosition( &currentReadPointer, &safeReadPointer );
-        if ( FAILED( result ) ) {
-          errorStream_ << "RtApiDs::callbackEvent: error (" << getErrorString( result ) << ") getting current read position!";
-          errorText_ = errorStream_.str();
-          MUTEX_UNLOCK( &stream_.mutex );
-          error( RtAudioError::SYSTEM_ERROR );
-          return;
-        }
-      
-        if ( safeReadPointer < (DWORD)nextReadPointer ) safeReadPointer += dsBufferSize; // unwrap offset
-      }
-    }
-
-    // Lock free space in the buffer
-    result = dsBuffer->Lock( nextReadPointer, bufferBytes, &buffer1,
-                             &bufferSize1, &buffer2, &bufferSize2, 0 );
-    if ( FAILED( result ) ) {
-      errorStream_ << "RtApiDs::callbackEvent: error (" << getErrorString( result ) << ") locking capture buffer!";
-      errorText_ = errorStream_.str();
-      MUTEX_UNLOCK( &stream_.mutex );
-      error( RtAudioError::SYSTEM_ERROR );
-      return;
-    }
-
-    if ( duplexPrerollBytes <= 0 ) {
-      // Copy our buffer into the DS buffer
-      CopyMemory( buffer, buffer1, bufferSize1 );
-      if ( buffer2 != NULL ) CopyMemory( buffer+bufferSize1, buffer2, bufferSize2 );
-    }
-    else {
-      memset( buffer, 0, bufferSize1 );
-      if ( buffer2 != NULL ) memset( buffer + bufferSize1, 0, bufferSize2 );
-      duplexPrerollBytes -= bufferSize1 + bufferSize2;
-    }
-
-    // Update our buffer offset and unlock sound buffer
-    nextReadPointer = ( nextReadPointer + bufferSize1 + bufferSize2 ) % dsBufferSize;
-    dsBuffer->Unlock( buffer1, bufferSize1, buffer2, bufferSize2 );
-    if ( FAILED( result ) ) {
-      errorStream_ << "RtApiDs::callbackEvent: error (" << getErrorString( result ) << ") unlocking capture buffer!";
-      errorText_ = errorStream_.str();
-      MUTEX_UNLOCK( &stream_.mutex );
-      error( RtAudioError::SYSTEM_ERROR );
-      return;
-    }
-    handle->bufferPointer[1] = nextReadPointer;
-
-    // No byte swapping necessary in DirectSound implementation.
-
-    // If necessary, convert 8-bit data from unsigned to signed.
-    if ( stream_.deviceFormat[1] == RTAUDIO_SINT8 )
-      for ( int j=0; j<bufferBytes; j++ ) buffer[j] = (signed char) ( buffer[j] - 128 );
-
-    // Do buffer conversion if necessary.
-    if ( stream_.doConvertBuffer[1] )
-      convertBuffer( stream_.userBuffer[1], stream_.deviceBuffer, stream_.convertInfo[1] );
-  }
-
- unlock:
-  MUTEX_UNLOCK( &stream_.mutex );
-  RtApi::tickStreamTime();
-}
-
-// Definitions for utility functions and callbacks
-// specific to the DirectSound implementation.
-
-static unsigned __stdcall callbackHandler( void *ptr )
-{
-  CallbackInfo *info = (CallbackInfo *) ptr;
-  RtApiDs *object = (RtApiDs *) info->object;
-  bool* isRunning = &info->isRunning;
-
-  while ( *isRunning == true ) {
-    object->callbackEvent();
-  }
-
-  _endthreadex( 0 );
-  return 0;
-}
-
-static BOOL CALLBACK deviceQueryCallback( LPGUID lpguid,
-                                          LPCTSTR description,
-                                          LPCTSTR /*module*/,
-                                          LPVOID lpContext )
-{
-  struct DsProbeData& probeInfo = *(struct DsProbeData*) lpContext;
-  std::vector<struct DsDevice>& dsDevices = *probeInfo.dsDevices;
-
-  HRESULT hr;
-  bool validDevice = false;
-  if ( probeInfo.isInput == true ) {
-    DSCCAPS caps;
-    LPDIRECTSOUNDCAPTURE object;
-
-    hr = DirectSoundCaptureCreate(  lpguid, &object,   NULL );
-    if ( hr != DS_OK ) return TRUE;
-
-    caps.dwSize = sizeof(caps);
-    hr = object->GetCaps( &caps );
-    if ( hr == DS_OK ) {
-      if ( caps.dwChannels > 0 && caps.dwFormats > 0 )
-        validDevice = true;
-    }
-    object->Release();
-  }
-  else {
-    DSCAPS caps;
-    LPDIRECTSOUND object;
-    hr = DirectSoundCreate(  lpguid, &object,   NULL );
-    if ( hr != DS_OK ) return TRUE;
-
-    caps.dwSize = sizeof(caps);
-    hr = object->GetCaps( &caps );
-    if ( hr == DS_OK ) {
-      if ( caps.dwFlags & DSCAPS_PRIMARYMONO || caps.dwFlags & DSCAPS_PRIMARYSTEREO )
-        validDevice = true;
-    }
-    object->Release();
-  }
-
-  // If good device, then save its name and guid.
-  std::string name = convertCharPointerToStdString( description );
-  //if ( name == "Primary Sound Driver" || name == "Primary Sound Capture Driver" )
-  if ( lpguid == NULL )
-    name = "Default Device";
-  if ( validDevice ) {
-    for ( unsigned int i=0; i<dsDevices.size(); i++ ) {
-      if ( dsDevices[i].name == name ) {
-        dsDevices[i].found = true;
-        if ( probeInfo.isInput ) {
-          dsDevices[i].id[1] = lpguid;
-          dsDevices[i].validId[1] = true;
-        }
-        else {
-          dsDevices[i].id[0] = lpguid;
-          dsDevices[i].validId[0] = true;
-        }
-        return TRUE;
-      }
-    }
-
-    DsDevice device;
-    device.name = name;
-    device.found = true;
-    if ( probeInfo.isInput ) {
-      device.id[1] = lpguid;
-      device.validId[1] = true;
-    }
-    else {
-      device.id[0] = lpguid;
-      device.validId[0] = true;
-    }
-    dsDevices.push_back( device );
-  }
-
-  return TRUE;
-}
-
-static const char* getErrorString( int code )
-{
-  switch ( code ) {
-
-  case DSERR_ALLOCATED:
-    return "Already allocated";
-
-  case DSERR_CONTROLUNAVAIL:
-    return "Control unavailable";
-
-  case DSERR_INVALIDPARAM:
-    return "Invalid parameter";
-
-  case DSERR_INVALIDCALL:
-    return "Invalid call";
-
-  case DSERR_GENERIC:
-    return "Generic error";
-
-  case DSERR_PRIOLEVELNEEDED:
-    return "Priority level needed";
-
-  case DSERR_OUTOFMEMORY:
-    return "Out of memory";
-
-  case DSERR_BADFORMAT:
-    return "The sample rate or the channel format is not supported";
-
-  case DSERR_UNSUPPORTED:
-    return "Not supported";
-
-  case DSERR_NODRIVER:
-    return "No driver";
-
-  case DSERR_ALREADYINITIALIZED:
-    return "Already initialized";
-
-  case DSERR_NOAGGREGATION:
-    return "No aggregation";
-
-  case DSERR_BUFFERLOST:
-    return "Buffer lost";
-
-  case DSERR_OTHERAPPHASPRIO:
-    return "Another application already has priority";
-
-  case DSERR_UNINITIALIZED:
-    return "Uninitialized";
-
-  default:
-    return "DirectSound unknown error";
-  }
-}
-//******************** End of __WINDOWS_DS__ *********************//
-#endif
-
-
-#if defined(__LINUX_ALSA__)
-
-#include <alsa/asoundlib.h>
-#include <unistd.h>
-
-  // A structure to hold various information related to the ALSA API
-  // implementation.
-struct AlsaHandle {
-  snd_pcm_t *handles[2];
-  bool synchronized;
-  bool xrun[2];
-  pthread_cond_t runnable_cv;
-  bool runnable;
-
-  AlsaHandle()
-    :synchronized(false), runnable(false) { xrun[0] = false; xrun[1] = false; }
-};
-
-static void *alsaCallbackHandler( void * ptr );
-
-RtApiAlsa :: RtApiAlsa()
-{
-  // Nothing to do here.
-}
-
-RtApiAlsa :: ~RtApiAlsa()
-{
-  if ( stream_.state != STREAM_CLOSED ) closeStream();
-}
-
-unsigned int RtApiAlsa :: getDeviceCount( void )
-{
-  unsigned nDevices = 0;
-  int result, subdevice, card;
-  char name[64];
-  snd_ctl_t *handle;
-
-  // Count cards and devices
-  card = -1;
-  snd_card_next( &card );
-  while ( card >= 0 ) {
-    sprintf( name, "hw:%d", card );
-    result = snd_ctl_open( &handle, name, 0 );
-    if ( result < 0 ) {
-      errorStream_ << "RtApiAlsa::getDeviceCount: control open, card = " << card << ", " << snd_strerror( result ) << ".";
-      errorText_ = errorStream_.str();
-      error( RtAudioError::WARNING );
-      goto nextcard;
-    }
-    subdevice = -1;
-    while( 1 ) {
-      result = snd_ctl_pcm_next_device( handle, &subdevice );
-      if ( result < 0 ) {
-        errorStream_ << "RtApiAlsa::getDeviceCount: control next device, card = " << card << ", " << snd_strerror( result ) << ".";
-        errorText_ = errorStream_.str();
-        error( RtAudioError::WARNING );
-        break;
-      }
-      if ( subdevice < 0 )
-        break;
-      nDevices++;
-    }
-  nextcard:
-    snd_ctl_close( handle );
-    snd_card_next( &card );
-  }
-
-  result = snd_ctl_open( &handle, "default", 0 );
-  if (result == 0) {
-    nDevices++;
-    snd_ctl_close( handle );
-  }
-
-  return nDevices;
-}
-
-RtAudio::DeviceInfo RtApiAlsa :: getDeviceInfo( unsigned int device )
-{
-  RtAudio::DeviceInfo info;
-  info.probed = false;
-
-  unsigned nDevices = 0;
-  int result, subdevice, card;
-  char name[64];
-  snd_ctl_t *chandle;
-
-  // Count cards and devices
-  card = -1;
-  subdevice = -1;
-  snd_card_next( &card );
-  while ( card >= 0 ) {
-    sprintf( name, "hw:%d", card );
-    result = snd_ctl_open( &chandle, name, SND_CTL_NONBLOCK );
-    if ( result < 0 ) {
-      errorStream_ << "RtApiAlsa::getDeviceInfo: control open, card = " << card << ", " << snd_strerror( result ) << ".";
-      errorText_ = errorStream_.str();
-      error( RtAudioError::WARNING );
-      goto nextcard;
-    }
-    subdevice = -1;
-    while( 1 ) {
-      result = snd_ctl_pcm_next_device( chandle, &subdevice );
-      if ( result < 0 ) {
-        errorStream_ << "RtApiAlsa::getDeviceInfo: control next device, card = " << card << ", " << snd_strerror( result ) << ".";
-        errorText_ = errorStream_.str();
-        error( RtAudioError::WARNING );
-        break;
-      }
-      if ( subdevice < 0 ) break;
-      if ( nDevices == device ) {
-        sprintf( name, "hw:%d,%d", card, subdevice );
-        goto foundDevice;
-      }
-      nDevices++;
-    }
-  nextcard:
-    snd_ctl_close( chandle );
-    snd_card_next( &card );
-  }
-
-  result = snd_ctl_open( &chandle, "default", SND_CTL_NONBLOCK );
-  if ( result == 0 ) {
-    if ( nDevices == device ) {
-      strcpy( name, "default" );
-      goto foundDevice;
-    }
-    nDevices++;
-  }
-
-  if ( nDevices == 0 ) {
-    errorText_ = "RtApiAlsa::getDeviceInfo: no devices found!";
-    error( RtAudioError::INVALID_USE );
-    return info;
-  }
-
-  if ( device >= nDevices ) {
-    errorText_ = "RtApiAlsa::getDeviceInfo: device ID is invalid!";
-    error( RtAudioError::INVALID_USE );
-    return info;
-  }
-
- foundDevice:
-
-  // If a stream is already open, we cannot probe the stream devices.
-  // Thus, use the saved results.
-  if ( stream_.state != STREAM_CLOSED &&
-       ( stream_.device[0] == device || stream_.device[1] == device ) ) {
-    snd_ctl_close( chandle );
-    if ( device >= devices_.size() ) {
-      errorText_ = "RtApiAlsa::getDeviceInfo: device ID was not present before stream was opened.";
-      error( RtAudioError::WARNING );
-      return info;
-    }
-    return devices_[ device ];
-  }
-
-  int openMode = SND_PCM_ASYNC;
-  snd_pcm_stream_t stream;
-  snd_pcm_info_t *pcminfo;
-  snd_pcm_info_alloca( &pcminfo );
-  snd_pcm_t *phandle;
-  snd_pcm_hw_params_t *params;
-  snd_pcm_hw_params_alloca( &params );
-
-  // First try for playback unless default device (which has subdev -1)
-  stream = SND_PCM_STREAM_PLAYBACK;
-  snd_pcm_info_set_stream( pcminfo, stream );
-  if ( subdevice != -1 ) {
-    snd_pcm_info_set_device( pcminfo, subdevice );
-    snd_pcm_info_set_subdevice( pcminfo, 0 );
-
-    result = snd_ctl_pcm_info( chandle, pcminfo );
-    if ( result < 0 ) {
-      // Device probably doesn't support playback.
-      goto captureProbe;
-    }
-  }
-
-  result = snd_pcm_open( &phandle, name, stream, openMode | SND_PCM_NONBLOCK );
-  if ( result < 0 ) {
-    errorStream_ << "RtApiAlsa::getDeviceInfo: snd_pcm_open error for device (" << name << "), " << snd_strerror( result ) << ".";
-    errorText_ = errorStream_.str();
-    error( RtAudioError::WARNING );
-    goto captureProbe;
-  }
-
-  // The device is open ... fill the parameter structure.
-  result = snd_pcm_hw_params_any( phandle, params );
-  if ( result < 0 ) {
-    snd_pcm_close( phandle );
-    errorStream_ << "RtApiAlsa::getDeviceInfo: snd_pcm_hw_params error for device (" << name << "), " << snd_strerror( result ) << ".";
-    errorText_ = errorStream_.str();
-    error( RtAudioError::WARNING );
-    goto captureProbe;
-  }
-
-  // Get output channel information.
-  unsigned int value;
-  result = snd_pcm_hw_params_get_channels_max( params, &value );
-  if ( result < 0 ) {
-    snd_pcm_close( phandle );
-    errorStream_ << "RtApiAlsa::getDeviceInfo: error getting device (" << name << ") output channels, " << snd_strerror( result ) << ".";
-    errorText_ = errorStream_.str();
-    error( RtAudioError::WARNING );
-    goto captureProbe;
-  }
-  info.outputChannels = value;
-  snd_pcm_close( phandle );
-
- captureProbe:
-  stream = SND_PCM_STREAM_CAPTURE;
-  snd_pcm_info_set_stream( pcminfo, stream );
-
-  // Now try for capture unless default device (with subdev = -1)
-  if ( subdevice != -1 ) {
-    result = snd_ctl_pcm_info( chandle, pcminfo );
-    snd_ctl_close( chandle );
-    if ( result < 0 ) {
-      // Device probably doesn't support capture.
-      if ( info.outputChannels == 0 ) return info;
-      goto probeParameters;
-    }
-  }
-  else
-    snd_ctl_close( chandle );
-
-  result = snd_pcm_open( &phandle, name, stream, openMode | SND_PCM_NONBLOCK);
-  if ( result < 0 ) {
-    errorStream_ << "RtApiAlsa::getDeviceInfo: snd_pcm_open error for device (" << name << "), " << snd_strerror( result ) << ".";
-    errorText_ = errorStream_.str();
-    error( RtAudioError::WARNING );
-    if ( info.outputChannels == 0 ) return info;
-    goto probeParameters;
-  }
-
-  // The device is open ... fill the parameter structure.
-  result = snd_pcm_hw_params_any( phandle, params );
-  if ( result < 0 ) {
-    snd_pcm_close( phandle );
-    errorStream_ << "RtApiAlsa::getDeviceInfo: snd_pcm_hw_params error for device (" << name << "), " << snd_strerror( result ) << ".";
-    errorText_ = errorStream_.str();
-    error( RtAudioError::WARNING );
-    if ( info.outputChannels == 0 ) return info;
-    goto probeParameters;
-  }
-
-  result = snd_pcm_hw_params_get_channels_max( params, &value );
-  if ( result < 0 ) {
-    snd_pcm_close( phandle );
-    errorStream_ << "RtApiAlsa::getDeviceInfo: error getting device (" << name << ") input channels, " << snd_strerror( result ) << ".";
-    errorText_ = errorStream_.str();
-    error( RtAudioError::WARNING );
-    if ( info.outputChannels == 0 ) return info;
-    goto probeParameters;
-  }
-  info.inputChannels = value;
-  snd_pcm_close( phandle );
-
-  // If device opens for both playback and capture, we determine the channels.
-  if ( info.outputChannels > 0 && info.inputChannels > 0 )
-    info.duplexChannels = (info.outputChannels > info.inputChannels) ? info.inputChannels : info.outputChannels;
-
-  // ALSA doesn't provide default devices so we'll use the first available one.
-  if ( device == 0 && info.outputChannels > 0 )
-    info.isDefaultOutput = true;
-  if ( device == 0 && info.inputChannels > 0 )
-    info.isDefaultInput = true;
-
- probeParameters:
-  // At this point, we just need to figure out the supported data
-  // formats and sample rates.  We'll proceed by opening the device in
-  // the direction with the maximum number of channels, or playback if
-  // they are equal.  This might limit our sample rate options, but so
-  // be it.
-
-  if ( info.outputChannels >= info.inputChannels )
-    stream = SND_PCM_STREAM_PLAYBACK;
-  else
-    stream = SND_PCM_STREAM_CAPTURE;
-  snd_pcm_info_set_stream( pcminfo, stream );
-
-  result = snd_pcm_open( &phandle, name, stream, openMode | SND_PCM_NONBLOCK);
-  if ( result < 0 ) {
-    errorStream_ << "RtApiAlsa::getDeviceInfo: snd_pcm_open error for device (" << name << "), " << snd_strerror( result ) << ".";
-    errorText_ = errorStream_.str();
-    error( RtAudioError::WARNING );
-    return info;
-  }
-
-  // The device is open ... fill the parameter structure.
-  result = snd_pcm_hw_params_any( phandle, params );
-  if ( result < 0 ) {
-    snd_pcm_close( phandle );
-    errorStream_ << "RtApiAlsa::getDeviceInfo: snd_pcm_hw_params error for device (" << name << "), " << snd_strerror( result ) << ".";
-    errorText_ = errorStream_.str();
-    error( RtAudioError::WARNING );
-    return info;
-  }
-
-  // Test our discrete set of sample rate values.
-  info.sampleRates.clear();
-  for ( unsigned int i=0; i<MAX_SAMPLE_RATES; i++ ) {
-    if ( snd_pcm_hw_params_test_rate( phandle, params, SAMPLE_RATES[i], 0 ) == 0 ) {
-      info.sampleRates.push_back( SAMPLE_RATES[i] );
-
-      if ( !info.preferredSampleRate || ( SAMPLE_RATES[i] <= 48000 && SAMPLE_RATES[i] > info.preferredSampleRate ) )
-        info.preferredSampleRate = SAMPLE_RATES[i];
-    }
-  }
-  if ( info.sampleRates.size() == 0 ) {
-    snd_pcm_close( phandle );
-    errorStream_ << "RtApiAlsa::getDeviceInfo: no supported sample rates found for device (" << name << ").";
-    errorText_ = errorStream_.str();
-    error( RtAudioError::WARNING );
-    return info;
-  }
-
-  // Probe the supported data formats ... we don't care about endian-ness just yet
-  snd_pcm_format_t format;
-  info.nativeFormats = 0;
-  format = SND_PCM_FORMAT_S8;
-  if ( snd_pcm_hw_params_test_format( phandle, params, format ) == 0 )
-    info.nativeFormats |= RTAUDIO_SINT8;
-  format = SND_PCM_FORMAT_S16;
-  if ( snd_pcm_hw_params_test_format( phandle, params, format ) == 0 )
-    info.nativeFormats |= RTAUDIO_SINT16;
-  format = SND_PCM_FORMAT_S24;
-  if ( snd_pcm_hw_params_test_format( phandle, params, format ) == 0 )
-    info.nativeFormats |= RTAUDIO_SINT24;
-  format = SND_PCM_FORMAT_S32;
-  if ( snd_pcm_hw_params_test_format( phandle, params, format ) == 0 )
-    info.nativeFormats |= RTAUDIO_SINT32;
-  format = SND_PCM_FORMAT_FLOAT;
-  if ( snd_pcm_hw_params_test_format( phandle, params, format ) == 0 )
-    info.nativeFormats |= RTAUDIO_FLOAT32;
-  format = SND_PCM_FORMAT_FLOAT64;
-  if ( snd_pcm_hw_params_test_format( phandle, params, format ) == 0 )
-    info.nativeFormats |= RTAUDIO_FLOAT64;
-
-  // Check that we have at least one supported format
-  if ( info.nativeFormats == 0 ) {
-    snd_pcm_close( phandle );
-    errorStream_ << "RtApiAlsa::getDeviceInfo: pcm device (" << name << ") data format not supported by RtAudio.";
-    errorText_ = errorStream_.str();
-    error( RtAudioError::WARNING );
-    return info;
-  }
-
-  // Get the device name
-  char *cardname;
-  result = snd_card_get_name( card, &cardname );
-  if ( result >= 0 ) {
-    sprintf( name, "hw:%s,%d", cardname, subdevice );
-    free( cardname );
-  }
-  info.name = name;
-
-  // That's all ... close the device and return
-  snd_pcm_close( phandle );
-  info.probed = true;
-  return info;
-}
-
-void RtApiAlsa :: saveDeviceInfo( void )
-{
-  devices_.clear();
-
-  unsigned int nDevices = getDeviceCount();
-  devices_.resize( nDevices );
-  for ( unsigned int i=0; i<nDevices; i++ )
-    devices_[i] = getDeviceInfo( i );
-}
-
-bool RtApiAlsa :: probeDeviceOpen( unsigned int device, StreamMode mode, unsigned int channels,
-                                   unsigned int firstChannel, unsigned int sampleRate,
-                                   RtAudioFormat format, unsigned int *bufferSize,
-                                   RtAudio::StreamOptions *options )
-
-{
-#if defined(__RTAUDIO_DEBUG__)
-  snd_output_t *out;
-  snd_output_stdio_attach(&out, stderr, 0);
-#endif
-
-  // I'm not using the "plug" interface ... too much inconsistent behavior.
-
-  unsigned nDevices = 0;
-  int result, subdevice, card;
-  char name[64];
-  snd_ctl_t *chandle;
-
-  if ( options && options->flags & RTAUDIO_ALSA_USE_DEFAULT )
-    snprintf(name, sizeof(name), "%s", "default");
-  else {
-    // Count cards and devices
-    card = -1;
-    snd_card_next( &card );
-    while ( card >= 0 ) {
-      sprintf( name, "hw:%d", card );
-      result = snd_ctl_open( &chandle, name, SND_CTL_NONBLOCK );
-      if ( result < 0 ) {
-        errorStream_ << "RtApiAlsa::probeDeviceOpen: control open, card = " << card << ", " << snd_strerror( result ) << ".";
-        errorText_ = errorStream_.str();
-        return FAILURE;
-      }
-      subdevice = -1;
-      while( 1 ) {
-        result = snd_ctl_pcm_next_device( chandle, &subdevice );
-        if ( result < 0 ) break;
-        if ( subdevice < 0 ) break;
-        if ( nDevices == device ) {
-          sprintf( name, "hw:%d,%d", card, subdevice );
-          snd_ctl_close( chandle );
-          goto foundDevice;
-        }
-        nDevices++;
-      }
-      snd_ctl_close( chandle );
-      snd_card_next( &card );
-    }
-
-    result = snd_ctl_open( &chandle, "default", SND_CTL_NONBLOCK );
-    if ( result == 0 ) {
-      if ( nDevices == device ) {
-        strcpy( name, "default" );
-        goto foundDevice;
-      }
-      nDevices++;
-    }
-
-    if ( nDevices == 0 ) {
-      // This should not happen because a check is made before this function is called.
-      errorText_ = "RtApiAlsa::probeDeviceOpen: no devices found!";
-      return FAILURE;
-    }
-
-    if ( device >= nDevices ) {
-      // This should not happen because a check is made before this function is called.
-      errorText_ = "RtApiAlsa::probeDeviceOpen: device ID is invalid!";
-      return FAILURE;
-    }
-  }
-
- foundDevice:
-
-  // The getDeviceInfo() function will not work for a device that is
-  // already open.  Thus, we'll probe the system before opening a
-  // stream and save the results for use by getDeviceInfo().
-  if ( mode == OUTPUT || ( mode == INPUT && stream_.mode != OUTPUT ) ) // only do once
-    this->saveDeviceInfo();
-
-  snd_pcm_stream_t stream;
-  if ( mode == OUTPUT )
-    stream = SND_PCM_STREAM_PLAYBACK;
-  else
-    stream = SND_PCM_STREAM_CAPTURE;
-
-  snd_pcm_t *phandle;
-  int openMode = SND_PCM_ASYNC;
-  result = snd_pcm_open( &phandle, name, stream, openMode );
-  if ( result < 0 ) {
-    if ( mode == OUTPUT )
-      errorStream_ << "RtApiAlsa::probeDeviceOpen: pcm device (" << name << ") won't open for output.";
-    else
-      errorStream_ << "RtApiAlsa::probeDeviceOpen: pcm device (" << name << ") won't open for input.";
-    errorText_ = errorStream_.str();
-    return FAILURE;
-  }
-
-  // Fill the parameter structure.
-  snd_pcm_hw_params_t *hw_params;
-  snd_pcm_hw_params_alloca( &hw_params );
-  result = snd_pcm_hw_params_any( phandle, hw_params );
-  if ( result < 0 ) {
-    snd_pcm_close( phandle );
-    errorStream_ << "RtApiAlsa::probeDeviceOpen: error getting pcm device (" << name << ") parameters, " << snd_strerror( result ) << ".";
-    errorText_ = errorStream_.str();
-    return FAILURE;
-  }
-
-#if defined(__RTAUDIO_DEBUG__)
-  fprintf( stderr, "\nRtApiAlsa: dump hardware params just after device open:\n\n" );
-  snd_pcm_hw_params_dump( hw_params, out );
-#endif
-
-  // Set access ... check user preference.
-  if ( options && options->flags & RTAUDIO_NONINTERLEAVED ) {
-    stream_.userInterleaved = false;
-    result = snd_pcm_hw_params_set_access( phandle, hw_params, SND_PCM_ACCESS_RW_NONINTERLEAVED );
-    if ( result < 0 ) {
-      result = snd_pcm_hw_params_set_access( phandle, hw_params, SND_PCM_ACCESS_RW_INTERLEAVED );
-      stream_.deviceInterleaved[mode] =  true;
-    }
-    else
-      stream_.deviceInterleaved[mode] = false;
-  }
-  else {
-    stream_.userInterleaved = true;
-    result = snd_pcm_hw_params_set_access( phandle, hw_params, SND_PCM_ACCESS_RW_INTERLEAVED );
-    if ( result < 0 ) {
-      result = snd_pcm_hw_params_set_access( phandle, hw_params, SND_PCM_ACCESS_RW_NONINTERLEAVED );
-      stream_.deviceInterleaved[mode] =  false;
-    }
-    else
-      stream_.deviceInterleaved[mode] =  true;
-  }
-
-  if ( result < 0 ) {
-    snd_pcm_close( phandle );
-    errorStream_ << "RtApiAlsa::probeDeviceOpen: error setting pcm device (" << name << ") access, " << snd_strerror( result ) << ".";
-    errorText_ = errorStream_.str();
-    return FAILURE;
-  }
-
-  // Determine how to set the device format.
-  stream_.userFormat = format;
-  snd_pcm_format_t deviceFormat = SND_PCM_FORMAT_UNKNOWN;
-
-  if ( format == RTAUDIO_SINT8 )
-    deviceFormat = SND_PCM_FORMAT_S8;
-  else if ( format == RTAUDIO_SINT16 )
-    deviceFormat = SND_PCM_FORMAT_S16;
-  else if ( format == RTAUDIO_SINT24 )
-    deviceFormat = SND_PCM_FORMAT_S24;
-  else if ( format == RTAUDIO_SINT32 )
-    deviceFormat = SND_PCM_FORMAT_S32;
-  else if ( format == RTAUDIO_FLOAT32 )
-    deviceFormat = SND_PCM_FORMAT_FLOAT;
-  else if ( format == RTAUDIO_FLOAT64 )
-    deviceFormat = SND_PCM_FORMAT_FLOAT64;
-
-  if ( snd_pcm_hw_params_test_format(phandle, hw_params, deviceFormat) == 0) {
-    stream_.deviceFormat[mode] = format;
-    goto setFormat;
-  }
-
-  // The user requested format is not natively supported by the device.
-  deviceFormat = SND_PCM_FORMAT_FLOAT64;
-  if ( snd_pcm_hw_params_test_format( phandle, hw_params, deviceFormat ) == 0 ) {
-    stream_.deviceFormat[mode] = RTAUDIO_FLOAT64;
-    goto setFormat;
-  }
-
-  deviceFormat = SND_PCM_FORMAT_FLOAT;
-  if ( snd_pcm_hw_params_test_format(phandle, hw_params, deviceFormat ) == 0 ) {
-    stream_.deviceFormat[mode] = RTAUDIO_FLOAT32;
-    goto setFormat;
-  }
-
-  deviceFormat = SND_PCM_FORMAT_S32;
-  if ( snd_pcm_hw_params_test_format(phandle, hw_params, deviceFormat ) == 0 ) {
-    stream_.deviceFormat[mode] = RTAUDIO_SINT32;
-    goto setFormat;
-  }
-
-  deviceFormat = SND_PCM_FORMAT_S24;
-  if ( snd_pcm_hw_params_test_format(phandle, hw_params, deviceFormat ) == 0 ) {
-    stream_.deviceFormat[mode] = RTAUDIO_SINT24;
-    goto setFormat;
-  }
-
-  deviceFormat = SND_PCM_FORMAT_S16;
-  if ( snd_pcm_hw_params_test_format(phandle, hw_params, deviceFormat ) == 0 ) {
-    stream_.deviceFormat[mode] = RTAUDIO_SINT16;
-    goto setFormat;
-  }
-
-  deviceFormat = SND_PCM_FORMAT_S8;
-  if ( snd_pcm_hw_params_test_format(phandle, hw_params, deviceFormat ) == 0 ) {
-    stream_.deviceFormat[mode] = RTAUDIO_SINT8;
-    goto setFormat;
-  }
-
-  // If we get here, no supported format was found.
-  snd_pcm_close( phandle );
-  errorStream_ << "RtApiAlsa::probeDeviceOpen: pcm device " << device << " data format not supported by RtAudio.";
-  errorText_ = errorStream_.str();
-  return FAILURE;
-
- setFormat:
-  result = snd_pcm_hw_params_set_format( phandle, hw_params, deviceFormat );
-  if ( result < 0 ) {
-    snd_pcm_close( phandle );
-    errorStream_ << "RtApiAlsa::probeDeviceOpen: error setting pcm device (" << name << ") data format, " << snd_strerror( result ) << ".";
-    errorText_ = errorStream_.str();
-    return FAILURE;
-  }
-
-  // Determine whether byte-swaping is necessary.
-  stream_.doByteSwap[mode] = false;
-  if ( deviceFormat != SND_PCM_FORMAT_S8 ) {
-    result = snd_pcm_format_cpu_endian( deviceFormat );
-    if ( result == 0 )
-      stream_.doByteSwap[mode] = true;
-    else if (result < 0) {
-      snd_pcm_close( phandle );
-      errorStream_ << "RtApiAlsa::probeDeviceOpen: error getting pcm device (" << name << ") endian-ness, " << snd_strerror( result ) << ".";
-      errorText_ = errorStream_.str();
-      return FAILURE;
-    }
-  }
-
-  // Set the sample rate.
-  result = snd_pcm_hw_params_set_rate_near( phandle, hw_params, (unsigned int*) &sampleRate, 0 );
-  if ( result < 0 ) {
-    snd_pcm_close( phandle );
-    errorStream_ << "RtApiAlsa::probeDeviceOpen: error setting sample rate on device (" << name << "), " << snd_strerror( result ) << ".";
-    errorText_ = errorStream_.str();
-    return FAILURE;
-  }
-
-  // Determine the number of channels for this device.  We support a possible
-  // minimum device channel number > than the value requested by the user.
-  stream_.nUserChannels[mode] = channels;
-  unsigned int value;
-  result = snd_pcm_hw_params_get_channels_max( hw_params, &value );
-  unsigned int deviceChannels = value;
-  if ( result < 0 || deviceChannels < channels + firstChannel ) {
-    snd_pcm_close( phandle );
-    errorStream_ << "RtApiAlsa::probeDeviceOpen: requested channel parameters not supported by device (" << name << "), " << snd_strerror( result ) << ".";
-    errorText_ = errorStream_.str();
-    return FAILURE;
-  }
-
-  result = snd_pcm_hw_params_get_channels_min( hw_params, &value );
-  if ( result < 0 ) {
-    snd_pcm_close( phandle );
-    errorStream_ << "RtApiAlsa::probeDeviceOpen: error getting minimum channels for device (" << name << "), " << snd_strerror( result ) << ".";
-    errorText_ = errorStream_.str();
-    return FAILURE;
-  }
-  deviceChannels = value;
-  if ( deviceChannels < channels + firstChannel ) deviceChannels = channels + firstChannel;
-  stream_.nDeviceChannels[mode] = deviceChannels;
-
-  // Set the device channels.
-  result = snd_pcm_hw_params_set_channels( phandle, hw_params, deviceChannels );
-  if ( result < 0 ) {
-    snd_pcm_close( phandle );
-    errorStream_ << "RtApiAlsa::probeDeviceOpen: error setting channels for device (" << name << "), " << snd_strerror( result ) << ".";
-    errorText_ = errorStream_.str();
-    return FAILURE;
-  }
-
-  // Set the buffer (or period) size.
-  int dir = 0;
-  snd_pcm_uframes_t periodSize = *bufferSize;
-  result = snd_pcm_hw_params_set_period_size_near( phandle, hw_params, &periodSize, &dir );
-  if ( result < 0 ) {
-    snd_pcm_close( phandle );
-    errorStream_ << "RtApiAlsa::probeDeviceOpen: error setting period size for device (" << name << "), " << snd_strerror( result ) << ".";
-    errorText_ = errorStream_.str();
-    return FAILURE;
-  }
-  *bufferSize = periodSize;
-
-  // Set the buffer number, which in ALSA is referred to as the "period".
-  unsigned int periods = 0;
-  if ( options && options->flags & RTAUDIO_MINIMIZE_LATENCY ) periods = 2;
-  if ( options && options->numberOfBuffers > 0 ) periods = options->numberOfBuffers;
-  if ( periods < 2 ) periods = 4; // a fairly safe default value
-  result = snd_pcm_hw_params_set_periods_near( phandle, hw_params, &periods, &dir );
-  if ( result < 0 ) {
-    snd_pcm_close( phandle );
-    errorStream_ << "RtApiAlsa::probeDeviceOpen: error setting periods for device (" << name << "), " << snd_strerror( result ) << ".";
-    errorText_ = errorStream_.str();
-    return FAILURE;
-  }
-
-  // If attempting to setup a duplex stream, the bufferSize parameter
-  // MUST be the same in both directions!
-  if ( stream_.mode == OUTPUT && mode == INPUT && *bufferSize != stream_.bufferSize ) {
-    snd_pcm_close( phandle );
-    errorStream_ << "RtApiAlsa::probeDeviceOpen: system error setting buffer size for duplex stream on device (" << name << ").";
-    errorText_ = errorStream_.str();
-    return FAILURE;
-  }
-
-  stream_.bufferSize = *bufferSize;
-
-  // Install the hardware configuration
-  result = snd_pcm_hw_params( phandle, hw_params );
-  if ( result < 0 ) {
-    snd_pcm_close( phandle );
-    errorStream_ << "RtApiAlsa::probeDeviceOpen: error installing hardware configuration on device (" << name << "), " << snd_strerror( result ) << ".";
-    errorText_ = errorStream_.str();
-    return FAILURE;
-  }
-
-#if defined(__RTAUDIO_DEBUG__)
-  fprintf(stderr, "\nRtApiAlsa: dump hardware params after installation:\n\n");
-  snd_pcm_hw_params_dump( hw_params, out );
-#endif
-
-  // Set the software configuration to fill buffers with zeros and prevent device stopping on xruns.
-  snd_pcm_sw_params_t *sw_params = NULL;
-  snd_pcm_sw_params_alloca( &sw_params );
-  snd_pcm_sw_params_current( phandle, sw_params );
-  snd_pcm_sw_params_set_start_threshold( phandle, sw_params, *bufferSize );
-  snd_pcm_sw_params_set_stop_threshold( phandle, sw_params, ULONG_MAX );
-  snd_pcm_sw_params_set_silence_threshold( phandle, sw_params, 0 );
-
-  // The following two settings were suggested by Theo Veenker
-  //snd_pcm_sw_params_set_avail_min( phandle, sw_params, *bufferSize );
-  //snd_pcm_sw_params_set_xfer_align( phandle, sw_params, 1 );
-
-  // here are two options for a fix
-  //snd_pcm_sw_params_set_silence_size( phandle, sw_params, ULONG_MAX );
-  snd_pcm_uframes_t val;
-  snd_pcm_sw_params_get_boundary( sw_params, &val );
-  snd_pcm_sw_params_set_silence_size( phandle, sw_params, val );
-
-  result = snd_pcm_sw_params( phandle, sw_params );
-  if ( result < 0 ) {
-    snd_pcm_close( phandle );
-    errorStream_ << "RtApiAlsa::probeDeviceOpen: error installing software configuration on device (" << name << "), " << snd_strerror( result ) << ".";
-    errorText_ = errorStream_.str();
-    return FAILURE;
-  }
-
-#if defined(__RTAUDIO_DEBUG__)
-  fprintf(stderr, "\nRtApiAlsa: dump software params after installation:\n\n");
-  snd_pcm_sw_params_dump( sw_params, out );
-#endif
-
-  // Set flags for buffer conversion
-  stream_.doConvertBuffer[mode] = false;
-  if ( stream_.userFormat != stream_.deviceFormat[mode] )
-    stream_.doConvertBuffer[mode] = true;
-  if ( stream_.nUserChannels[mode] < stream_.nDeviceChannels[mode] )
-    stream_.doConvertBuffer[mode] = true;
-  if ( stream_.userInterleaved != stream_.deviceInterleaved[mode] &&
-       stream_.nUserChannels[mode] > 1 )
-    stream_.doConvertBuffer[mode] = true;
-
-  // Allocate the ApiHandle if necessary and then save.
-  AlsaHandle *apiInfo = 0;
-  if ( stream_.apiHandle == 0 ) {
-    try {
-      apiInfo = (AlsaHandle *) new AlsaHandle;
-    }
-    catch ( std::bad_alloc& ) {
-      errorText_ = "RtApiAlsa::probeDeviceOpen: error allocating AlsaHandle memory.";
-      goto error;
-    }
-
-    if ( pthread_cond_init( &apiInfo->runnable_cv, NULL ) ) {
-      errorText_ = "RtApiAlsa::probeDeviceOpen: error initializing pthread condition variable.";
-      goto error;
-    }
-
-    stream_.apiHandle = (void *) apiInfo;
-    apiInfo->handles[0] = 0;
-    apiInfo->handles[1] = 0;
-  }
-  else {
-    apiInfo = (AlsaHandle *) stream_.apiHandle;
-  }
-  apiInfo->handles[mode] = phandle;
-  phandle = 0;
-
-  // Allocate necessary internal buffers.
-  unsigned long bufferBytes;
-  bufferBytes = stream_.nUserChannels[mode] * *bufferSize * formatBytes( stream_.userFormat );
-  stream_.userBuffer[mode] = (char *) calloc( bufferBytes, 1 );
-  if ( stream_.userBuffer[mode] == NULL ) {
-    errorText_ = "RtApiAlsa::probeDeviceOpen: error allocating user buffer memory.";
-    goto error;
-  }
-
-  if ( stream_.doConvertBuffer[mode] ) {
-
-    bool makeBuffer = true;
-    bufferBytes = stream_.nDeviceChannels[mode] * formatBytes( stream_.deviceFormat[mode] );
-    if ( mode == INPUT ) {
-      if ( stream_.mode == OUTPUT && stream_.deviceBuffer ) {
-        unsigned long bytesOut = stream_.nDeviceChannels[0] * formatBytes( stream_.deviceFormat[0] );
-        if ( bufferBytes <= bytesOut ) makeBuffer = false;
-      }
-    }
-
-    if ( makeBuffer ) {
-      bufferBytes *= *bufferSize;
-      if ( stream_.deviceBuffer ) free( stream_.deviceBuffer );
-      stream_.deviceBuffer = (char *) calloc( bufferBytes, 1 );
-      if ( stream_.deviceBuffer == NULL ) {
-        errorText_ = "RtApiAlsa::probeDeviceOpen: error allocating device buffer memory.";
-        goto error;
-      }
-    }
-  }
-
-  stream_.sampleRate = sampleRate;
-  stream_.nBuffers = periods;
-  stream_.device[mode] = device;
-  stream_.state = STREAM_STOPPED;
-
-  // Setup the buffer conversion information structure.
-  if ( stream_.doConvertBuffer[mode] ) setConvertInfo( mode, firstChannel );
-
-  // Setup thread if necessary.
-  if ( stream_.mode == OUTPUT && mode == INPUT ) {
-    // We had already set up an output stream.
-    stream_.mode = DUPLEX;
-    // Link the streams if possible.
-    apiInfo->synchronized = false;
-    if ( snd_pcm_link( apiInfo->handles[0], apiInfo->handles[1] ) == 0 )
-      apiInfo->synchronized = true;
-    else {
-      errorText_ = "RtApiAlsa::probeDeviceOpen: unable to synchronize input and output devices.";
-      error( RtAudioError::WARNING );
-    }
-  }
-  else {
-    stream_.mode = mode;
-
-    // Setup callback thread.
-    stream_.callbackInfo.object = (void *) this;
-
-    // Set the thread attributes for joinable and realtime scheduling
-    // priority (optional).  The higher priority will only take affect
-    // if the program is run as root or suid. Note, under Linux
-    // processes with CAP_SYS_NICE privilege, a user can change
-    // scheduling policy and priority (thus need not be root). See
-    // POSIX "capabilities".
-    pthread_attr_t attr;
-    pthread_attr_init( &attr );
-    pthread_attr_setdetachstate( &attr, PTHREAD_CREATE_JOINABLE );
-
-#ifdef SCHED_RR // Undefined with some OSes (eg: NetBSD 1.6.x with GNU Pthread)
-    if ( options && options->flags & RTAUDIO_SCHEDULE_REALTIME ) {
-      // We previously attempted to increase the audio callback priority
-      // to SCHED_RR here via the attributes.  However, while no errors
-      // were reported in doing so, it did not work.  So, now this is
-      // done in the alsaCallbackHandler function.
-      stream_.callbackInfo.doRealtime = true;
-      int priority = options->priority;
-      int min = sched_get_priority_min( SCHED_RR );
-      int max = sched_get_priority_max( SCHED_RR );
-      if ( priority < min ) priority = min;
-      else if ( priority > max ) priority = max;
-      stream_.callbackInfo.priority = priority;
-    }
-#endif
-
-    stream_.callbackInfo.isRunning = true;
-    result = pthread_create( &stream_.callbackInfo.thread, &attr, alsaCallbackHandler, &stream_.callbackInfo );
-    pthread_attr_destroy( &attr );
-    if ( result ) {
-      stream_.callbackInfo.isRunning = false;
-      errorText_ = "RtApiAlsa::error creating callback thread!";
-      goto error;
-    }
-  }
-
-  return SUCCESS;
-
- error:
-  if ( apiInfo ) {
-    pthread_cond_destroy( &apiInfo->runnable_cv );
-    if ( apiInfo->handles[0] ) snd_pcm_close( apiInfo->handles[0] );
-    if ( apiInfo->handles[1] ) snd_pcm_close( apiInfo->handles[1] );
-    delete apiInfo;
-    stream_.apiHandle = 0;
-  }
-
-  if ( phandle) snd_pcm_close( phandle );
-
-  for ( int i=0; i<2; i++ ) {
-    if ( stream_.userBuffer[i] ) {
-      free( stream_.userBuffer[i] );
-      stream_.userBuffer[i] = 0;
-    }
-  }
-
-  if ( stream_.deviceBuffer ) {
-    free( stream_.deviceBuffer );
-    stream_.deviceBuffer = 0;
-  }
-
-  stream_.state = STREAM_CLOSED;
-  return FAILURE;
-}
-
-void RtApiAlsa :: closeStream()
-{
-  if ( stream_.state == STREAM_CLOSED ) {
-    errorText_ = "RtApiAlsa::closeStream(): no open stream to close!";
-    error( RtAudioError::WARNING );
-    return;
-  }
-
-  AlsaHandle *apiInfo = (AlsaHandle *) stream_.apiHandle;
-  stream_.callbackInfo.isRunning = false;
-  MUTEX_LOCK( &stream_.mutex );
-  if ( stream_.state == STREAM_STOPPED ) {
-    apiInfo->runnable = true;
-    pthread_cond_signal( &apiInfo->runnable_cv );
-  }
-  MUTEX_UNLOCK( &stream_.mutex );
-  pthread_join( stream_.callbackInfo.thread, NULL );
-
-  if ( stream_.state == STREAM_RUNNING ) {
-    stream_.state = STREAM_STOPPED;
-    if ( stream_.mode == OUTPUT || stream_.mode == DUPLEX )
-      snd_pcm_drop( apiInfo->handles[0] );
-    if ( stream_.mode == INPUT || stream_.mode == DUPLEX )
-      snd_pcm_drop( apiInfo->handles[1] );
-  }
-
-  if ( apiInfo ) {
-    pthread_cond_destroy( &apiInfo->runnable_cv );
-    if ( apiInfo->handles[0] ) snd_pcm_close( apiInfo->handles[0] );
-    if ( apiInfo->handles[1] ) snd_pcm_close( apiInfo->handles[1] );
-    delete apiInfo;
-    stream_.apiHandle = 0;
-  }
-
-  for ( int i=0; i<2; i++ ) {
-    if ( stream_.userBuffer[i] ) {
-      free( stream_.userBuffer[i] );
-      stream_.userBuffer[i] = 0;
-    }
-  }
-
-  if ( stream_.deviceBuffer ) {
-    free( stream_.deviceBuffer );
-    stream_.deviceBuffer = 0;
-  }
-
-  stream_.mode = UNINITIALIZED;
-  stream_.state = STREAM_CLOSED;
-}
-
-void RtApiAlsa :: startStream()
-{
-  // This method calls snd_pcm_prepare if the device isn't already in that state.
-
-  verifyStream();
-  if ( stream_.state == STREAM_RUNNING ) {
-    errorText_ = "RtApiAlsa::startStream(): the stream is already running!";
-    error( RtAudioError::WARNING );
-    return;
-  }
-
-  MUTEX_LOCK( &stream_.mutex );
-
-  int result = 0;
-  snd_pcm_state_t state;
-  AlsaHandle *apiInfo = (AlsaHandle *) stream_.apiHandle;
-  snd_pcm_t **handle = (snd_pcm_t **) apiInfo->handles;
-  if ( stream_.mode == OUTPUT || stream_.mode == DUPLEX ) {
-    state = snd_pcm_state( handle[0] );
-    if ( state != SND_PCM_STATE_PREPARED ) {
-      result = snd_pcm_prepare( handle[0] );
-      if ( result < 0 ) {
-        errorStream_ << "RtApiAlsa::startStream: error preparing output pcm device, " << snd_strerror( result ) << ".";
-        errorText_ = errorStream_.str();
-        goto unlock;
-      }
-    }
-  }
-
-  if ( ( stream_.mode == INPUT || stream_.mode == DUPLEX ) && !apiInfo->synchronized ) {
-    result = snd_pcm_drop(handle[1]); // fix to remove stale data received since device has been open
-    state = snd_pcm_state( handle[1] );
-    if ( state != SND_PCM_STATE_PREPARED ) {
-      result = snd_pcm_prepare( handle[1] );
-      if ( result < 0 ) {
-        errorStream_ << "RtApiAlsa::startStream: error preparing input pcm device, " << snd_strerror( result ) << ".";
-        errorText_ = errorStream_.str();
-        goto unlock;
-      }
-    }
-  }
-
-  stream_.state = STREAM_RUNNING;
-
- unlock:
-  apiInfo->runnable = true;
-  pthread_cond_signal( &apiInfo->runnable_cv );
-  MUTEX_UNLOCK( &stream_.mutex );
-
-  if ( result >= 0 ) return;
-  error( RtAudioError::SYSTEM_ERROR );
-}
-
-void RtApiAlsa :: stopStream()
-{
-  verifyStream();
-  if ( stream_.state == STREAM_STOPPED ) {
-    errorText_ = "RtApiAlsa::stopStream(): the stream is already stopped!";
-    error( RtAudioError::WARNING );
-    return;
-  }
-
-  stream_.state = STREAM_STOPPED;
-  MUTEX_LOCK( &stream_.mutex );
-
-  int result = 0;
-  AlsaHandle *apiInfo = (AlsaHandle *) stream_.apiHandle;
-  snd_pcm_t **handle = (snd_pcm_t **) apiInfo->handles;
-  if ( stream_.mode == OUTPUT || stream_.mode == DUPLEX ) {
-    if ( apiInfo->synchronized ) 
-      result = snd_pcm_drop( handle[0] );
-    else
-      result = snd_pcm_drain( handle[0] );
-    if ( result < 0 ) {
-      errorStream_ << "RtApiAlsa::stopStream: error draining output pcm device, " << snd_strerror( result ) << ".";
-      errorText_ = errorStream_.str();
-      goto unlock;
-    }
-  }
-
-  if ( ( stream_.mode == INPUT || stream_.mode == DUPLEX ) && !apiInfo->synchronized ) {
-    result = snd_pcm_drop( handle[1] );
-    if ( result < 0 ) {
-      errorStream_ << "RtApiAlsa::stopStream: error stopping input pcm device, " << snd_strerror( result ) << ".";
-      errorText_ = errorStream_.str();
-      goto unlock;
-    }
-  }
-
- unlock:
-  apiInfo->runnable = false; // fixes high CPU usage when stopped
-  MUTEX_UNLOCK( &stream_.mutex );
-
-  if ( result >= 0 ) return;
-  error( RtAudioError::SYSTEM_ERROR );
-}
-
-void RtApiAlsa :: abortStream()
-{
-  verifyStream();
-  if ( stream_.state == STREAM_STOPPED ) {
-    errorText_ = "RtApiAlsa::abortStream(): the stream is already stopped!";
-    error( RtAudioError::WARNING );
-    return;
-  }
-
-  stream_.state = STREAM_STOPPED;
-  MUTEX_LOCK( &stream_.mutex );
-
-  int result = 0;
-  AlsaHandle *apiInfo = (AlsaHandle *) stream_.apiHandle;
-  snd_pcm_t **handle = (snd_pcm_t **) apiInfo->handles;
-  if ( stream_.mode == OUTPUT || stream_.mode == DUPLEX ) {
-    result = snd_pcm_drop( handle[0] );
-    if ( result < 0 ) {
-      errorStream_ << "RtApiAlsa::abortStream: error aborting output pcm device, " << snd_strerror( result ) << ".";
-      errorText_ = errorStream_.str();
-      goto unlock;
-    }
-  }
-
-  if ( ( stream_.mode == INPUT || stream_.mode == DUPLEX ) && !apiInfo->synchronized ) {
-    result = snd_pcm_drop( handle[1] );
-    if ( result < 0 ) {
-      errorStream_ << "RtApiAlsa::abortStream: error aborting input pcm device, " << snd_strerror( result ) << ".";
-      errorText_ = errorStream_.str();
-      goto unlock;
-    }
-  }
-
- unlock:
-  apiInfo->runnable = false; // fixes high CPU usage when stopped
-  MUTEX_UNLOCK( &stream_.mutex );
-
-  if ( result >= 0 ) return;
-  error( RtAudioError::SYSTEM_ERROR );
-}
-
-void RtApiAlsa :: callbackEvent()
-{
-  AlsaHandle *apiInfo = (AlsaHandle *) stream_.apiHandle;
-  if ( stream_.state == STREAM_STOPPED ) {
-    MUTEX_LOCK( &stream_.mutex );
-    while ( !apiInfo->runnable )
-      pthread_cond_wait( &apiInfo->runnable_cv, &stream_.mutex );
-
-    if ( stream_.state != STREAM_RUNNING ) {
-      MUTEX_UNLOCK( &stream_.mutex );
-      return;
-    }
-    MUTEX_UNLOCK( &stream_.mutex );
-  }
-
-  if ( stream_.state == STREAM_CLOSED ) {
-    errorText_ = "RtApiAlsa::callbackEvent(): the stream is closed ... this shouldn't happen!";
-    error( RtAudioError::WARNING );
-    return;
-  }
-
-  int doStopStream = 0;
-  RtAudioCallback callback = (RtAudioCallback) stream_.callbackInfo.callback;
-  double streamTime = getStreamTime();
-  RtAudioStreamStatus status = 0;
-  if ( stream_.mode != INPUT && apiInfo->xrun[0] == true ) {
-    status |= RTAUDIO_OUTPUT_UNDERFLOW;
-    apiInfo->xrun[0] = false;
-  }
-  if ( stream_.mode != OUTPUT && apiInfo->xrun[1] == true ) {
-    status |= RTAUDIO_INPUT_OVERFLOW;
-    apiInfo->xrun[1] = false;
-  }
-  doStopStream = callback( stream_.userBuffer[0], stream_.userBuffer[1],
-                           stream_.bufferSize, streamTime, status, stream_.callbackInfo.userData );
-
-  if ( doStopStream == 2 ) {
-    abortStream();
-    return;
-  }
-
-  MUTEX_LOCK( &stream_.mutex );
-
-  // The state might change while waiting on a mutex.
-  if ( stream_.state == STREAM_STOPPED ) goto unlock;
-
-  int result;
-  char *buffer;
-  int channels;
-  snd_pcm_t **handle;
-  snd_pcm_sframes_t frames;
-  RtAudioFormat format;
-  handle = (snd_pcm_t **) apiInfo->handles;
-
-  if ( stream_.mode == INPUT || stream_.mode == DUPLEX ) {
-
-    // Setup parameters.
-    if ( stream_.doConvertBuffer[1] ) {
-      buffer = stream_.deviceBuffer;
-      channels = stream_.nDeviceChannels[1];
-      format = stream_.deviceFormat[1];
-    }
-    else {
-      buffer = stream_.userBuffer[1];
-      channels = stream_.nUserChannels[1];
-      format = stream_.userFormat;
-    }
-
-    // Read samples from device in interleaved/non-interleaved format.
-    if ( stream_.deviceInterleaved[1] )
-      result = snd_pcm_readi( handle[1], buffer, stream_.bufferSize );
-    else {
-      void *bufs[channels];
-      size_t offset = stream_.bufferSize * formatBytes( format );
-      for ( int i=0; i<channels; i++ )
-        bufs[i] = (void *) (buffer + (i * offset));
-      result = snd_pcm_readn( handle[1], bufs, stream_.bufferSize );
-    }
-
-    if ( result < (int) stream_.bufferSize ) {
-      // Either an error or overrun occured.
-      if ( result == -EPIPE ) {
-        snd_pcm_state_t state = snd_pcm_state( handle[1] );
-        if ( state == SND_PCM_STATE_XRUN ) {
-          apiInfo->xrun[1] = true;
-          result = snd_pcm_prepare( handle[1] );
-          if ( result < 0 ) {
-            errorStream_ << "RtApiAlsa::callbackEvent: error preparing device after overrun, " << snd_strerror( result ) << ".";
-            errorText_ = errorStream_.str();
-          }
-        }
-        else {
-          errorStream_ << "RtApiAlsa::callbackEvent: error, current state is " << snd_pcm_state_name( state ) << ", " << snd_strerror( result ) << ".";
-          errorText_ = errorStream_.str();
-        }
-      }
-      else {
-        errorStream_ << "RtApiAlsa::callbackEvent: audio read error, " << snd_strerror( result ) << ".";
-        errorText_ = errorStream_.str();
-      }
-      error( RtAudioError::WARNING );
-      goto tryOutput;
-    }
-
-    // Do byte swapping if necessary.
-    if ( stream_.doByteSwap[1] )
-      byteSwapBuffer( buffer, stream_.bufferSize * channels, format );
-
-    // Do buffer conversion if necessary.
-    if ( stream_.doConvertBuffer[1] )
-      convertBuffer( stream_.userBuffer[1], stream_.deviceBuffer, stream_.convertInfo[1] );
-
-    // Check stream latency
-    result = snd_pcm_delay( handle[1], &frames );
-    if ( result == 0 && frames > 0 ) stream_.latency[1] = frames;
-  }
-
- tryOutput:
-
-  if ( stream_.mode == OUTPUT || stream_.mode == DUPLEX ) {
-
-    // Setup parameters and do buffer conversion if necessary.
-    if ( stream_.doConvertBuffer[0] ) {
-      buffer = stream_.deviceBuffer;
-      convertBuffer( buffer, stream_.userBuffer[0], stream_.convertInfo[0] );
-      channels = stream_.nDeviceChannels[0];
-      format = stream_.deviceFormat[0];
-    }
-    else {
-      buffer = stream_.userBuffer[0];
-      channels = stream_.nUserChannels[0];
-      format = stream_.userFormat;
-    }
-
-    // Do byte swapping if necessary.
-    if ( stream_.doByteSwap[0] )
-      byteSwapBuffer(buffer, stream_.bufferSize * channels, format);
-
-    // Write samples to device in interleaved/non-interleaved format.
-    if ( stream_.deviceInterleaved[0] )
-      result = snd_pcm_writei( handle[0], buffer, stream_.bufferSize );
-    else {
-      void *bufs[channels];
-      size_t offset = stream_.bufferSize * formatBytes( format );
-      for ( int i=0; i<channels; i++ )
-        bufs[i] = (void *) (buffer + (i * offset));
-      result = snd_pcm_writen( handle[0], bufs, stream_.bufferSize );
-    }
-
-    if ( result < (int) stream_.bufferSize ) {
-      // Either an error or underrun occured.
-      if ( result == -EPIPE ) {
-        snd_pcm_state_t state = snd_pcm_state( handle[0] );
-        if ( state == SND_PCM_STATE_XRUN ) {
-          apiInfo->xrun[0] = true;
-          result = snd_pcm_prepare( handle[0] );
-          if ( result < 0 ) {
-            errorStream_ << "RtApiAlsa::callbackEvent: error preparing device after underrun, " << snd_strerror( result ) << ".";
-            errorText_ = errorStream_.str();
-          }
-          else
-            errorText_ =  "RtApiAlsa::callbackEvent: audio write error, underrun.";
-        }
-        else {
-          errorStream_ << "RtApiAlsa::callbackEvent: error, current state is " << snd_pcm_state_name( state ) << ", " << snd_strerror( result ) << ".";
-          errorText_ = errorStream_.str();
-        }
-      }
-      else {
-        errorStream_ << "RtApiAlsa::callbackEvent: audio write error, " << snd_strerror( result ) << ".";
-        errorText_ = errorStream_.str();
-      }
-      error( RtAudioError::WARNING );
-      goto unlock;
-    }
-
-    // Check stream latency
-    result = snd_pcm_delay( handle[0], &frames );
-    if ( result == 0 && frames > 0 ) stream_.latency[0] = frames;
-  }
-
- unlock:
-  MUTEX_UNLOCK( &stream_.mutex );
-
-  RtApi::tickStreamTime();
-  if ( doStopStream == 1 ) this->stopStream();
-}
-
-static void *alsaCallbackHandler( void *ptr )
-{
-  CallbackInfo *info = (CallbackInfo *) ptr;
-  RtApiAlsa *object = (RtApiAlsa *) info->object;
-  bool *isRunning = &info->isRunning;
-
-#ifdef SCHED_RR // Undefined with some OSes (eg: NetBSD 1.6.x with GNU Pthread)
-  if ( &info->doRealtime ) {
-    pthread_t tID = pthread_self();	 // ID of this thread
-    sched_param prio = { info->priority }; // scheduling priority of thread
-    pthread_setschedparam( tID, SCHED_RR, &prio );
-  }
-#endif
-
-  while ( *isRunning == true ) {
-    pthread_testcancel();
-    object->callbackEvent();
-  }
-
-  pthread_exit( NULL );
-}
-
-//******************** End of __LINUX_ALSA__ *********************//
-#endif
-
-#if defined(__LINUX_PULSE__)
-
-// Code written by Peter Meerwald, pmeerw@pmeerw.net
-// and Tristan Matthews.
-
-#include <pulse/error.h>
-#include <pulse/simple.h>
-#include <cstdio>
-
-static const unsigned int SUPPORTED_SAMPLERATES[] = { 8000, 16000, 22050, 32000,
-                                                      44100, 48000, 96000, 0};
-
-struct rtaudio_pa_format_mapping_t {
-  RtAudioFormat rtaudio_format;
-  pa_sample_format_t pa_format;
-};
-
-static const rtaudio_pa_format_mapping_t supported_sampleformats[] = {
-  {RTAUDIO_SINT16, PA_SAMPLE_S16LE},
-  {RTAUDIO_SINT32, PA_SAMPLE_S32LE},
-  {RTAUDIO_FLOAT32, PA_SAMPLE_FLOAT32LE},
-  {0, PA_SAMPLE_INVALID}};
-
-struct PulseAudioHandle {
-  pa_simple *s_play;
-  pa_simple *s_rec;
-  pthread_t thread;
-  pthread_cond_t runnable_cv;
-  bool runnable;
-  PulseAudioHandle() : s_play(0), s_rec(0), runnable(false) { }
-};
-
-RtApiPulse::~RtApiPulse()
-{
-  if ( stream_.state != STREAM_CLOSED )
-    closeStream();
-}
-
-unsigned int RtApiPulse::getDeviceCount( void )
-{
-  return 1;
-}
-
-RtAudio::DeviceInfo RtApiPulse::getDeviceInfo( unsigned int /*device*/ )
-{
-  RtAudio::DeviceInfo info;
-  info.probed = true;
-  info.name = "PulseAudio";
-  info.outputChannels = 2;
-  info.inputChannels = 2;
-  info.duplexChannels = 2;
-  info.isDefaultOutput = true;
-  info.isDefaultInput = true;
-
-  for ( const unsigned int *sr = SUPPORTED_SAMPLERATES; *sr; ++sr )
-    info.sampleRates.push_back( *sr );
-
-  info.preferredSampleRate = 48000;
-  info.nativeFormats = RTAUDIO_SINT16 | RTAUDIO_SINT32 | RTAUDIO_FLOAT32;
-
-  return info;
-}
-
-static void *pulseaudio_callback( void * user )
-{
-  CallbackInfo *cbi = static_cast<CallbackInfo *>( user );
-  RtApiPulse *context = static_cast<RtApiPulse *>( cbi->object );
-  volatile bool *isRunning = &cbi->isRunning;
-
-  while ( *isRunning ) {
-    pthread_testcancel();
-    context->callbackEvent();
-  }
-
-  pthread_exit( NULL );
-}
-
-void RtApiPulse::closeStream( void )
-{
-  PulseAudioHandle *pah = static_cast<PulseAudioHandle *>( stream_.apiHandle );
-
-  stream_.callbackInfo.isRunning = false;
-  if ( pah ) {
-    MUTEX_LOCK( &stream_.mutex );
-    if ( stream_.state == STREAM_STOPPED ) {
-      pah->runnable = true;
-      pthread_cond_signal( &pah->runnable_cv );
-    }
-    MUTEX_UNLOCK( &stream_.mutex );
-
-    pthread_join( pah->thread, 0 );
-    if ( pah->s_play ) {
-      pa_simple_flush( pah->s_play, NULL );
-      pa_simple_free( pah->s_play );
-    }
-    if ( pah->s_rec )
-      pa_simple_free( pah->s_rec );
-
-    pthread_cond_destroy( &pah->runnable_cv );
-    delete pah;
-    stream_.apiHandle = 0;
-  }
-
-  if ( stream_.userBuffer[0] ) {
-    free( stream_.userBuffer[0] );
-    stream_.userBuffer[0] = 0;
-  }
-  if ( stream_.userBuffer[1] ) {
-    free( stream_.userBuffer[1] );
-    stream_.userBuffer[1] = 0;
-  }
-
-  stream_.state = STREAM_CLOSED;
-  stream_.mode = UNINITIALIZED;
-}
-
-void RtApiPulse::callbackEvent( void )
-{
-  PulseAudioHandle *pah = static_cast<PulseAudioHandle *>( stream_.apiHandle );
-
-  if ( stream_.state == STREAM_STOPPED ) {
-    MUTEX_LOCK( &stream_.mutex );
-    while ( !pah->runnable )
-      pthread_cond_wait( &pah->runnable_cv, &stream_.mutex );
-
-    if ( stream_.state != STREAM_RUNNING ) {
-      MUTEX_UNLOCK( &stream_.mutex );
-      return;
-    }
-    MUTEX_UNLOCK( &stream_.mutex );
-  }
-
-  if ( stream_.state == STREAM_CLOSED ) {
-    errorText_ = "RtApiPulse::callbackEvent(): the stream is closed ... "
-      "this shouldn't happen!";
-    error( RtAudioError::WARNING );
-    return;
-  }
-
-  RtAudioCallback callback = (RtAudioCallback) stream_.callbackInfo.callback;
-  double streamTime = getStreamTime();
-  RtAudioStreamStatus status = 0;
-  int doStopStream = callback( stream_.userBuffer[OUTPUT], stream_.userBuffer[INPUT],
-                               stream_.bufferSize, streamTime, status,
-                               stream_.callbackInfo.userData );
-
-  if ( doStopStream == 2 ) {
-    abortStream();
-    return;
-  }
-
-  MUTEX_LOCK( &stream_.mutex );
-  void *pulse_in = stream_.doConvertBuffer[INPUT] ? stream_.deviceBuffer : stream_.userBuffer[INPUT];
-  void *pulse_out = stream_.doConvertBuffer[OUTPUT] ? stream_.deviceBuffer : stream_.userBuffer[OUTPUT];
-
-  if ( stream_.state != STREAM_RUNNING )
-    goto unlock;
-
-  int pa_error;
-  size_t bytes;
-  if (stream_.mode == OUTPUT || stream_.mode == DUPLEX ) {
-    if ( stream_.doConvertBuffer[OUTPUT] ) {
-        convertBuffer( stream_.deviceBuffer,
-                       stream_.userBuffer[OUTPUT],
-                       stream_.convertInfo[OUTPUT] );
-        bytes = stream_.nDeviceChannels[OUTPUT] * stream_.bufferSize *
-                formatBytes( stream_.deviceFormat[OUTPUT] );
-    } else
-        bytes = stream_.nUserChannels[OUTPUT] * stream_.bufferSize *
-                formatBytes( stream_.userFormat );
-
-    if ( pa_simple_write( pah->s_play, pulse_out, bytes, &pa_error ) < 0 ) {
-      errorStream_ << "RtApiPulse::callbackEvent: audio write error, " <<
-        pa_strerror( pa_error ) << ".";
-      errorText_ = errorStream_.str();
-      error( RtAudioError::WARNING );
-    }
-  }
-
-  if ( stream_.mode == INPUT || stream_.mode == DUPLEX) {
-    if ( stream_.doConvertBuffer[INPUT] )
-      bytes = stream_.nDeviceChannels[INPUT] * stream_.bufferSize *
-        formatBytes( stream_.deviceFormat[INPUT] );
-    else
-      bytes = stream_.nUserChannels[INPUT] * stream_.bufferSize *
-        formatBytes( stream_.userFormat );
-            
-    if ( pa_simple_read( pah->s_rec, pulse_in, bytes, &pa_error ) < 0 ) {
-      errorStream_ << "RtApiPulse::callbackEvent: audio read error, " <<
-        pa_strerror( pa_error ) << ".";
-      errorText_ = errorStream_.str();
-      error( RtAudioError::WARNING );
-    }
-    if ( stream_.doConvertBuffer[INPUT] ) {
-      convertBuffer( stream_.userBuffer[INPUT],
-                     stream_.deviceBuffer,
-                     stream_.convertInfo[INPUT] );
-    }
-  }
-
- unlock:
-  MUTEX_UNLOCK( &stream_.mutex );
-  RtApi::tickStreamTime();
-
-  if ( doStopStream == 1 )
-    stopStream();
-}
-
-void RtApiPulse::startStream( void )
-{
-  PulseAudioHandle *pah = static_cast<PulseAudioHandle *>( stream_.apiHandle );
-
-  if ( stream_.state == STREAM_CLOSED ) {
-    errorText_ = "RtApiPulse::startStream(): the stream is not open!";
-    error( RtAudioError::INVALID_USE );
-    return;
-  }
-  if ( stream_.state == STREAM_RUNNING ) {
-    errorText_ = "RtApiPulse::startStream(): the stream is already running!";
-    error( RtAudioError::WARNING );
-    return;
-  }
-
-  MUTEX_LOCK( &stream_.mutex );
-
-  stream_.state = STREAM_RUNNING;
-
-  pah->runnable = true;
-  pthread_cond_signal( &pah->runnable_cv );
-  MUTEX_UNLOCK( &stream_.mutex );
-}
-
-void RtApiPulse::stopStream( void )
-{
-  PulseAudioHandle *pah = static_cast<PulseAudioHandle *>( stream_.apiHandle );
-
-  if ( stream_.state == STREAM_CLOSED ) {
-    errorText_ = "RtApiPulse::stopStream(): the stream is not open!";
-    error( RtAudioError::INVALID_USE );
-    return;
-  }
-  if ( stream_.state == STREAM_STOPPED ) {
-    errorText_ = "RtApiPulse::stopStream(): the stream is already stopped!";
-    error( RtAudioError::WARNING );
-    return;
-  }
-
-  stream_.state = STREAM_STOPPED;
-  MUTEX_LOCK( &stream_.mutex );
-
-  if ( pah && pah->s_play ) {
-    int pa_error;
-    if ( pa_simple_drain( pah->s_play, &pa_error ) < 0 ) {
-      errorStream_ << "RtApiPulse::stopStream: error draining output device, " <<
-        pa_strerror( pa_error ) << ".";
-      errorText_ = errorStream_.str();
-      MUTEX_UNLOCK( &stream_.mutex );
-      error( RtAudioError::SYSTEM_ERROR );
-      return;
-    }
-  }
-
-  stream_.state = STREAM_STOPPED;
-  MUTEX_UNLOCK( &stream_.mutex );
-}
-
-void RtApiPulse::abortStream( void )
-{
-  PulseAudioHandle *pah = static_cast<PulseAudioHandle*>( stream_.apiHandle );
-
-  if ( stream_.state == STREAM_CLOSED ) {
-    errorText_ = "RtApiPulse::abortStream(): the stream is not open!";
-    error( RtAudioError::INVALID_USE );
-    return;
-  }
-  if ( stream_.state == STREAM_STOPPED ) {
-    errorText_ = "RtApiPulse::abortStream(): the stream is already stopped!";
-    error( RtAudioError::WARNING );
-    return;
-  }
-
-  stream_.state = STREAM_STOPPED;
-  MUTEX_LOCK( &stream_.mutex );
-
-  if ( pah && pah->s_play ) {
-    int pa_error;
-    if ( pa_simple_flush( pah->s_play, &pa_error ) < 0 ) {
-      errorStream_ << "RtApiPulse::abortStream: error flushing output device, " <<
-        pa_strerror( pa_error ) << ".";
-      errorText_ = errorStream_.str();
-      MUTEX_UNLOCK( &stream_.mutex );
-      error( RtAudioError::SYSTEM_ERROR );
-      return;
-    }
-  }
-
-  stream_.state = STREAM_STOPPED;
-  MUTEX_UNLOCK( &stream_.mutex );
-}
-
-bool RtApiPulse::probeDeviceOpen( unsigned int device, StreamMode mode,
-                                  unsigned int channels, unsigned int firstChannel,
-                                  unsigned int sampleRate, RtAudioFormat format,
-                                  unsigned int *bufferSize, RtAudio::StreamOptions *options )
-{
-  PulseAudioHandle *pah = 0;
-  unsigned long bufferBytes = 0;
-  pa_sample_spec ss;
-
-  if ( device != 0 ) return false;
-  if ( mode != INPUT && mode != OUTPUT ) return false;
-  if ( channels != 1 && channels != 2 ) {
-    errorText_ = "RtApiPulse::probeDeviceOpen: unsupported number of channels.";
-    return false;
-  }
-  ss.channels = channels;
-
-  if ( firstChannel != 0 ) return false;
-
-  bool sr_found = false;
-  for ( const unsigned int *sr = SUPPORTED_SAMPLERATES; *sr; ++sr ) {
-    if ( sampleRate == *sr ) {
-      sr_found = true;
-      stream_.sampleRate = sampleRate;
-      ss.rate = sampleRate;
-      break;
-    }
-  }
-  if ( !sr_found ) {
-    errorText_ = "RtApiPulse::probeDeviceOpen: unsupported sample rate.";
-    return false;
-  }
-
-  bool sf_found = 0;
-  for ( const rtaudio_pa_format_mapping_t *sf = supported_sampleformats;
-        sf->rtaudio_format && sf->pa_format != PA_SAMPLE_INVALID; ++sf ) {
-    if ( format == sf->rtaudio_format ) {
-      sf_found = true;
-      stream_.userFormat = sf->rtaudio_format;
-      stream_.deviceFormat[mode] = stream_.userFormat;
-      ss.format = sf->pa_format;
-      break;
-    }
-  }
-  if ( !sf_found ) { // Use internal data format conversion.
-    stream_.userFormat = format;
-    stream_.deviceFormat[mode] = RTAUDIO_FLOAT32;
-    ss.format = PA_SAMPLE_FLOAT32LE;
-  }
-
-  // Set other stream parameters.
-  if ( options && options->flags & RTAUDIO_NONINTERLEAVED ) stream_.userInterleaved = false;
-  else stream_.userInterleaved = true;
-  stream_.deviceInterleaved[mode] = true;
-  stream_.nBuffers = 1;
-  stream_.doByteSwap[mode] = false;
-  stream_.nUserChannels[mode] = channels;
-  stream_.nDeviceChannels[mode] = channels + firstChannel;
-  stream_.channelOffset[mode] = 0;
-  std::string streamName = "RtAudio";
-
-  // Set flags for buffer conversion.
-  stream_.doConvertBuffer[mode] = false;
-  if ( stream_.userFormat != stream_.deviceFormat[mode] )
-    stream_.doConvertBuffer[mode] = true;
-  if ( stream_.nUserChannels[mode] < stream_.nDeviceChannels[mode] )
-    stream_.doConvertBuffer[mode] = true;
-
-  // Allocate necessary internal buffers.
-  bufferBytes = stream_.nUserChannels[mode] * *bufferSize * formatBytes( stream_.userFormat );
-  stream_.userBuffer[mode] = (char *) calloc( bufferBytes, 1 );
-  if ( stream_.userBuffer[mode] == NULL ) {
-    errorText_ = "RtApiPulse::probeDeviceOpen: error allocating user buffer memory.";
-    goto error;
-  }
-  stream_.bufferSize = *bufferSize;
-
-  if ( stream_.doConvertBuffer[mode] ) {
-
-    bool makeBuffer = true;
-    bufferBytes = stream_.nDeviceChannels[mode] * formatBytes( stream_.deviceFormat[mode] );
-    if ( mode == INPUT ) {
-      if ( stream_.mode == OUTPUT && stream_.deviceBuffer ) {
-        unsigned long bytesOut = stream_.nDeviceChannels[0] * formatBytes( stream_.deviceFormat[0] );
-        if ( bufferBytes <= bytesOut ) makeBuffer = false;
-      }
-    }
-
-    if ( makeBuffer ) {
-      bufferBytes *= *bufferSize;
-      if ( stream_.deviceBuffer ) free( stream_.deviceBuffer );
-      stream_.deviceBuffer = (char *) calloc( bufferBytes, 1 );
-      if ( stream_.deviceBuffer == NULL ) {
-        errorText_ = "RtApiPulse::probeDeviceOpen: error allocating device buffer memory.";
-        goto error;
-      }
-    }
-  }
-
-  stream_.device[mode] = device;
-
-  // Setup the buffer conversion information structure.
-  if ( stream_.doConvertBuffer[mode] ) setConvertInfo( mode, firstChannel );
-
-  if ( !stream_.apiHandle ) {
-    PulseAudioHandle *pah = new PulseAudioHandle;
-    if ( !pah ) {
-      errorText_ = "RtApiPulse::probeDeviceOpen: error allocating memory for handle.";
-      goto error;
-    }
-
-    stream_.apiHandle = pah;
-    if ( pthread_cond_init( &pah->runnable_cv, NULL ) != 0 ) {
-      errorText_ = "RtApiPulse::probeDeviceOpen: error creating condition variable.";
-      goto error;
-    }
-  }
-  pah = static_cast<PulseAudioHandle *>( stream_.apiHandle );
-
-  int error;
-  if ( options && !options->streamName.empty() ) streamName = options->streamName;
-  switch ( mode ) {
-  case INPUT:
-    pa_buffer_attr buffer_attr;
-    buffer_attr.fragsize = bufferBytes;
-    buffer_attr.maxlength = -1;
-
-    pah->s_rec = pa_simple_new( NULL, streamName.c_str(), PA_STREAM_RECORD, NULL, "Record", &ss, NULL, &buffer_attr, &error );
-    if ( !pah->s_rec ) {
-      errorText_ = "RtApiPulse::probeDeviceOpen: error connecting input to PulseAudio server.";
-      goto error;
-    }
-    break;
-  case OUTPUT:
-    pah->s_play = pa_simple_new( NULL, "RtAudio", PA_STREAM_PLAYBACK, NULL, "Playback", &ss, NULL, NULL, &error );
-    if ( !pah->s_play ) {
-      errorText_ = "RtApiPulse::probeDeviceOpen: error connecting output to PulseAudio server.";
-      goto error;
-    }
-    break;
-  default:
-    goto error;
-  }
-
-  if ( stream_.mode == UNINITIALIZED )
-    stream_.mode = mode;
-  else if ( stream_.mode == mode )
-    goto error;
-  else
-    stream_.mode = DUPLEX;
-
-  if ( !stream_.callbackInfo.isRunning ) {
-    stream_.callbackInfo.object = this;
-    stream_.callbackInfo.isRunning = true;
-    if ( pthread_create( &pah->thread, NULL, pulseaudio_callback, (void *)&stream_.callbackInfo) != 0 ) {
-      errorText_ = "RtApiPulse::probeDeviceOpen: error creating thread.";
-      goto error;
-    }
-  }
-
-  stream_.state = STREAM_STOPPED;
-  return true;
- 
- error:
-  if ( pah && stream_.callbackInfo.isRunning ) {
-    pthread_cond_destroy( &pah->runnable_cv );
-    delete pah;
-    stream_.apiHandle = 0;
-  }
-
-  for ( int i=0; i<2; i++ ) {
-    if ( stream_.userBuffer[i] ) {
-      free( stream_.userBuffer[i] );
-      stream_.userBuffer[i] = 0;
-    }
-  }
-
-  if ( stream_.deviceBuffer ) {
-    free( stream_.deviceBuffer );
-    stream_.deviceBuffer = 0;
-  }
-
-  return FAILURE;
-}
-
-//******************** End of __LINUX_PULSE__ *********************//
-#endif
-
-#if defined(__LINUX_OSS__)
-
-#include <unistd.h>
-#include <sys/ioctl.h>
-#include <unistd.h>
-#include <fcntl.h>
-#include <sys/soundcard.h>
-#include <errno.h>
-#include <math.h>
-
-static void *ossCallbackHandler(void * ptr);
-
-// A structure to hold various information related to the OSS API
-// implementation.
-struct OssHandle {
-  int id[2];    // device ids
-  bool xrun[2];
-  bool triggered;
-  pthread_cond_t runnable;
-
-  OssHandle()
-    :triggered(false) { id[0] = 0; id[1] = 0; xrun[0] = false; xrun[1] = false; }
-};
-
-RtApiOss :: RtApiOss()
-{
-  // Nothing to do here.
-}
-
-RtApiOss :: ~RtApiOss()
-{
-  if ( stream_.state != STREAM_CLOSED ) closeStream();
-}
-
-unsigned int RtApiOss :: getDeviceCount( void )
-{
-  int mixerfd = open( "/dev/mixer", O_RDWR, 0 );
-  if ( mixerfd == -1 ) {
-    errorText_ = "RtApiOss::getDeviceCount: error opening '/dev/mixer'.";
-    error( RtAudioError::WARNING );
-    return 0;
-  }
-
-  oss_sysinfo sysinfo;
-  if ( ioctl( mixerfd, SNDCTL_SYSINFO, &sysinfo ) == -1 ) {
-    close( mixerfd );
-    errorText_ = "RtApiOss::getDeviceCount: error getting sysinfo, OSS version >= 4.0 is required.";
-    error( RtAudioError::WARNING );
-    return 0;
-  }
-
-  close( mixerfd );
-  return sysinfo.numaudios;
-}
-
-RtAudio::DeviceInfo RtApiOss :: getDeviceInfo( unsigned int device )
-{
-  RtAudio::DeviceInfo info;
-  info.probed = false;
-
-  int mixerfd = open( "/dev/mixer", O_RDWR, 0 );
-  if ( mixerfd == -1 ) {
-    errorText_ = "RtApiOss::getDeviceInfo: error opening '/dev/mixer'.";
-    error( RtAudioError::WARNING );
-    return info;
-  }
-
-  oss_sysinfo sysinfo;
-  int result = ioctl( mixerfd, SNDCTL_SYSINFO, &sysinfo );
-  if ( result == -1 ) {
-    close( mixerfd );
-    errorText_ = "RtApiOss::getDeviceInfo: error getting sysinfo, OSS version >= 4.0 is required.";
-    error( RtAudioError::WARNING );
-    return info;
-  }
-
-  unsigned nDevices = sysinfo.numaudios;
-  if ( nDevices == 0 ) {
-    close( mixerfd );
-    errorText_ = "RtApiOss::getDeviceInfo: no devices found!";
-    error( RtAudioError::INVALID_USE );
-    return info;
-  }
-
-  if ( device >= nDevices ) {
-    close( mixerfd );
-    errorText_ = "RtApiOss::getDeviceInfo: device ID is invalid!";
-    error( RtAudioError::INVALID_USE );
-    return info;
-  }
-
-  oss_audioinfo ainfo;
-  ainfo.dev = device;
-  result = ioctl( mixerfd, SNDCTL_AUDIOINFO, &ainfo );
-  close( mixerfd );
-  if ( result == -1 ) {
-    errorStream_ << "RtApiOss::getDeviceInfo: error getting device (" << ainfo.name << ") info.";
-    errorText_ = errorStream_.str();
-    error( RtAudioError::WARNING );
-    return info;
-  }
-
-  // Probe channels
-  if ( ainfo.caps & PCM_CAP_OUTPUT ) info.outputChannels = ainfo.max_channels;
-  if ( ainfo.caps & PCM_CAP_INPUT ) info.inputChannels = ainfo.max_channels;
-  if ( ainfo.caps & PCM_CAP_DUPLEX ) {
-    if ( info.outputChannels > 0 && info.inputChannels > 0 && ainfo.caps & PCM_CAP_DUPLEX )
-      info.duplexChannels = (info.outputChannels > info.inputChannels) ? info.inputChannels : info.outputChannels;
-  }
-
-  // Probe data formats ... do for input
-  unsigned long mask = ainfo.iformats;
-  if ( mask & AFMT_S16_LE || mask & AFMT_S16_BE )
-    info.nativeFormats |= RTAUDIO_SINT16;
-  if ( mask & AFMT_S8 )
-    info.nativeFormats |= RTAUDIO_SINT8;
-  if ( mask & AFMT_S32_LE || mask & AFMT_S32_BE )
-    info.nativeFormats |= RTAUDIO_SINT32;
-  if ( mask & AFMT_FLOAT )
-    info.nativeFormats |= RTAUDIO_FLOAT32;
-  if ( mask & AFMT_S24_LE || mask & AFMT_S24_BE )
-    info.nativeFormats |= RTAUDIO_SINT24;
-
-  // Check that we have at least one supported format
-  if ( info.nativeFormats == 0 ) {
-    errorStream_ << "RtApiOss::getDeviceInfo: device (" << ainfo.name << ") data format not supported by RtAudio.";
-    errorText_ = errorStream_.str();
-    error( RtAudioError::WARNING );
-    return info;
-  }
-
-  // Probe the supported sample rates.
-  info.sampleRates.clear();
-  if ( ainfo.nrates ) {
-    for ( unsigned int i=0; i<ainfo.nrates; i++ ) {
-      for ( unsigned int k=0; k<MAX_SAMPLE_RATES; k++ ) {
-        if ( ainfo.rates[i] == SAMPLE_RATES[k] ) {
-          info.sampleRates.push_back( SAMPLE_RATES[k] );
-
-          if ( !info.preferredSampleRate || ( SAMPLE_RATES[k] <= 48000 && SAMPLE_RATES[k] > info.preferredSampleRate ) )
-            info.preferredSampleRate = SAMPLE_RATES[k];
-
-          break;
-        }
-      }
-    }
-  }
-  else {
-    // Check min and max rate values;
-    for ( unsigned int k=0; k<MAX_SAMPLE_RATES; k++ ) {
-      if ( ainfo.min_rate <= (int) SAMPLE_RATES[k] && ainfo.max_rate >= (int) SAMPLE_RATES[k] ) {
-        info.sampleRates.push_back( SAMPLE_RATES[k] );
-
-        if ( !info.preferredSampleRate || ( SAMPLE_RATES[k] <= 48000 && SAMPLE_RATES[k] > info.preferredSampleRate ) )
-          info.preferredSampleRate = SAMPLE_RATES[k];
-      }
-    }
-  }
-
-  if ( info.sampleRates.size() == 0 ) {
-    errorStream_ << "RtApiOss::getDeviceInfo: no supported sample rates found for device (" << ainfo.name << ").";
-    errorText_ = errorStream_.str();
-    error( RtAudioError::WARNING );
-  }
-  else {
-    info.probed = true;
-    info.name = ainfo.name;
-  }
-
-  return info;
-}
-
-
-bool RtApiOss :: probeDeviceOpen( unsigned int device, StreamMode mode, unsigned int channels,
-                                  unsigned int firstChannel, unsigned int sampleRate,
-                                  RtAudioFormat format, unsigned int *bufferSize,
-                                  RtAudio::StreamOptions *options )
-{
-  int mixerfd = open( "/dev/mixer", O_RDWR, 0 );
-  if ( mixerfd == -1 ) {
-    errorText_ = "RtApiOss::probeDeviceOpen: error opening '/dev/mixer'.";
-    return FAILURE;
-  }
-
-  oss_sysinfo sysinfo;
-  int result = ioctl( mixerfd, SNDCTL_SYSINFO, &sysinfo );
-  if ( result == -1 ) {
-    close( mixerfd );
-    errorText_ = "RtApiOss::probeDeviceOpen: error getting sysinfo, OSS version >= 4.0 is required.";
-    return FAILURE;
-  }
-
-  unsigned nDevices = sysinfo.numaudios;
-  if ( nDevices == 0 ) {
-    // This should not happen because a check is made before this function is called.
-    close( mixerfd );
-    errorText_ = "RtApiOss::probeDeviceOpen: no devices found!";
-    return FAILURE;
-  }
-
-  if ( device >= nDevices ) {
-    // This should not happen because a check is made before this function is called.
-    close( mixerfd );
-    errorText_ = "RtApiOss::probeDeviceOpen: device ID is invalid!";
-    return FAILURE;
-  }
-
-  oss_audioinfo ainfo;
-  ainfo.dev = device;
-  result = ioctl( mixerfd, SNDCTL_AUDIOINFO, &ainfo );
-  close( mixerfd );
-  if ( result == -1 ) {
-    errorStream_ << "RtApiOss::getDeviceInfo: error getting device (" << ainfo.name << ") info.";
-    errorText_ = errorStream_.str();
-    return FAILURE;
-  }
-
-  // Check if device supports input or output
-  if ( ( mode == OUTPUT && !( ainfo.caps & PCM_CAP_OUTPUT ) ) ||
-       ( mode == INPUT && !( ainfo.caps & PCM_CAP_INPUT ) ) ) {
-    if ( mode == OUTPUT )
-      errorStream_ << "RtApiOss::probeDeviceOpen: device (" << ainfo.name << ") does not support output.";
-    else
-      errorStream_ << "RtApiOss::probeDeviceOpen: device (" << ainfo.name << ") does not support input.";
-    errorText_ = errorStream_.str();
-    return FAILURE;
-  }
-
-  int flags = 0;
-  OssHandle *handle = (OssHandle *) stream_.apiHandle;
-  if ( mode == OUTPUT )
-    flags |= O_WRONLY;
-  else { // mode == INPUT
-    if (stream_.mode == OUTPUT && stream_.device[0] == device) {
-      // We just set the same device for playback ... close and reopen for duplex (OSS only).
-      close( handle->id[0] );
-      handle->id[0] = 0;
-      if ( !( ainfo.caps & PCM_CAP_DUPLEX ) ) {
-        errorStream_ << "RtApiOss::probeDeviceOpen: device (" << ainfo.name << ") does not support duplex mode.";
-        errorText_ = errorStream_.str();
-        return FAILURE;
-      }
-      // Check that the number previously set channels is the same.
-      if ( stream_.nUserChannels[0] != channels ) {
-        errorStream_ << "RtApiOss::probeDeviceOpen: input/output channels must be equal for OSS duplex device (" << ainfo.name << ").";
-        errorText_ = errorStream_.str();
-        return FAILURE;
-      }
-      flags |= O_RDWR;
-    }
-    else
-      flags |= O_RDONLY;
-  }
-
-  // Set exclusive access if specified.
-  if ( options && options->flags & RTAUDIO_HOG_DEVICE ) flags |= O_EXCL;
-
-  // Try to open the device.
-  int fd;
-  fd = open( ainfo.devnode, flags, 0 );
-  if ( fd == -1 ) {
-    if ( errno == EBUSY )
-      errorStream_ << "RtApiOss::probeDeviceOpen: device (" << ainfo.name << ") is busy.";
-    else
-      errorStream_ << "RtApiOss::probeDeviceOpen: error opening device (" << ainfo.name << ").";
-    errorText_ = errorStream_.str();
-    return FAILURE;
-  }
-
-  // For duplex operation, specifically set this mode (this doesn't seem to work).
-  /*
-    if ( flags | O_RDWR ) {
-    result = ioctl( fd, SNDCTL_DSP_SETDUPLEX, NULL );
-    if ( result == -1) {
-    errorStream_ << "RtApiOss::probeDeviceOpen: error setting duplex mode for device (" << ainfo.name << ").";
-    errorText_ = errorStream_.str();
-    return FAILURE;
-    }
-    }
-  */
-
-  // Check the device channel support.
-  stream_.nUserChannels[mode] = channels;
-  if ( ainfo.max_channels < (int)(channels + firstChannel) ) {
-    close( fd );
-    errorStream_ << "RtApiOss::probeDeviceOpen: the device (" << ainfo.name << ") does not support requested channel parameters.";
-    errorText_ = errorStream_.str();
-    return FAILURE;
-  }
-
-  // Set the number of channels.
-  int deviceChannels = channels + firstChannel;
-  result = ioctl( fd, SNDCTL_DSP_CHANNELS, &deviceChannels );
-  if ( result == -1 || deviceChannels < (int)(channels + firstChannel) ) {
-    close( fd );
-    errorStream_ << "RtApiOss::probeDeviceOpen: error setting channel parameters on device (" << ainfo.name << ").";
-    errorText_ = errorStream_.str();
-    return FAILURE;
-  }
-  stream_.nDeviceChannels[mode] = deviceChannels;
-
-  // Get the data format mask
-  int mask;
-  result = ioctl( fd, SNDCTL_DSP_GETFMTS, &mask );
-  if ( result == -1 ) {
-    close( fd );
-    errorStream_ << "RtApiOss::probeDeviceOpen: error getting device (" << ainfo.name << ") data formats.";
-    errorText_ = errorStream_.str();
-    return FAILURE;
-  }
-
-  // Determine how to set the device format.
-  stream_.userFormat = format;
-  int deviceFormat = -1;
-  stream_.doByteSwap[mode] = false;
-  if ( format == RTAUDIO_SINT8 ) {
-    if ( mask & AFMT_S8 ) {
-      deviceFormat = AFMT_S8;
-      stream_.deviceFormat[mode] = RTAUDIO_SINT8;
-    }
-  }
-  else if ( format == RTAUDIO_SINT16 ) {
-    if ( mask & AFMT_S16_NE ) {
-      deviceFormat = AFMT_S16_NE;
-      stream_.deviceFormat[mode] = RTAUDIO_SINT16;
-    }
-    else if ( mask & AFMT_S16_OE ) {
-      deviceFormat = AFMT_S16_OE;
-      stream_.deviceFormat[mode] = RTAUDIO_SINT16;
-      stream_.doByteSwap[mode] = true;
-    }
-  }
-  else if ( format == RTAUDIO_SINT24 ) {
-    if ( mask & AFMT_S24_NE ) {
-      deviceFormat = AFMT_S24_NE;
-      stream_.deviceFormat[mode] = RTAUDIO_SINT24;
-    }
-    else if ( mask & AFMT_S24_OE ) {
-      deviceFormat = AFMT_S24_OE;
-      stream_.deviceFormat[mode] = RTAUDIO_SINT24;
-      stream_.doByteSwap[mode] = true;
-    }
-  }
-  else if ( format == RTAUDIO_SINT32 ) {
-    if ( mask & AFMT_S32_NE ) {
-      deviceFormat = AFMT_S32_NE;
-      stream_.deviceFormat[mode] = RTAUDIO_SINT32;
-    }
-    else if ( mask & AFMT_S32_OE ) {
-      deviceFormat = AFMT_S32_OE;
-      stream_.deviceFormat[mode] = RTAUDIO_SINT32;
-      stream_.doByteSwap[mode] = true;
-    }
-  }
-
-  if ( deviceFormat == -1 ) {
-    // The user requested format is not natively supported by the device.
-    if ( mask & AFMT_S16_NE ) {
-      deviceFormat = AFMT_S16_NE;
-      stream_.deviceFormat[mode] = RTAUDIO_SINT16;
-    }
-    else if ( mask & AFMT_S32_NE ) {
-      deviceFormat = AFMT_S32_NE;
-      stream_.deviceFormat[mode] = RTAUDIO_SINT32;
-    }
-    else if ( mask & AFMT_S24_NE ) {
-      deviceFormat = AFMT_S24_NE;
-      stream_.deviceFormat[mode] = RTAUDIO_SINT24;
-    }
-    else if ( mask & AFMT_S16_OE ) {
-      deviceFormat = AFMT_S16_OE;
-      stream_.deviceFormat[mode] = RTAUDIO_SINT16;
-      stream_.doByteSwap[mode] = true;
-    }
-    else if ( mask & AFMT_S32_OE ) {
-      deviceFormat = AFMT_S32_OE;
-      stream_.deviceFormat[mode] = RTAUDIO_SINT32;
-      stream_.doByteSwap[mode] = true;
-    }
-    else if ( mask & AFMT_S24_OE ) {
-      deviceFormat = AFMT_S24_OE;
-      stream_.deviceFormat[mode] = RTAUDIO_SINT24;
-      stream_.doByteSwap[mode] = true;
-    }
-    else if ( mask & AFMT_S8) {
-      deviceFormat = AFMT_S8;
-      stream_.deviceFormat[mode] = RTAUDIO_SINT8;
-    }
-  }
-
-  if ( stream_.deviceFormat[mode] == 0 ) {
-    // This really shouldn't happen ...
-    close( fd );
-    errorStream_ << "RtApiOss::probeDeviceOpen: device (" << ainfo.name << ") data format not supported by RtAudio.";
-    errorText_ = errorStream_.str();
-    return FAILURE;
-  }
-
-  // Set the data format.
-  int temp = deviceFormat;
-  result = ioctl( fd, SNDCTL_DSP_SETFMT, &deviceFormat );
-  if ( result == -1 || deviceFormat != temp ) {
-    close( fd );
-    errorStream_ << "RtApiOss::probeDeviceOpen: error setting data format on device (" << ainfo.name << ").";
-    errorText_ = errorStream_.str();
-    return FAILURE;
-  }
-
-  // Attempt to set the buffer size.  According to OSS, the minimum
-  // number of buffers is two.  The supposed minimum buffer size is 16
-  // bytes, so that will be our lower bound.  The argument to this
-  // call is in the form 0xMMMMSSSS (hex), where the buffer size (in
-  // bytes) is given as 2^SSSS and the number of buffers as 2^MMMM.
-  // We'll check the actual value used near the end of the setup
-  // procedure.
-  int ossBufferBytes = *bufferSize * formatBytes( stream_.deviceFormat[mode] ) * deviceChannels;
-  if ( ossBufferBytes < 16 ) ossBufferBytes = 16;
-  int buffers = 0;
-  if ( options ) buffers = options->numberOfBuffers;
-  if ( options && options->flags & RTAUDIO_MINIMIZE_LATENCY ) buffers = 2;
-  if ( buffers < 2 ) buffers = 3;
-  temp = ((int) buffers << 16) + (int)( log10( (double)ossBufferBytes ) / log10( 2.0 ) );
-  result = ioctl( fd, SNDCTL_DSP_SETFRAGMENT, &temp );
-  if ( result == -1 ) {
-    close( fd );
-    errorStream_ << "RtApiOss::probeDeviceOpen: error setting buffer size on device (" << ainfo.name << ").";
-    errorText_ = errorStream_.str();
-    return FAILURE;
-  }
-  stream_.nBuffers = buffers;
-
-  // Save buffer size (in sample frames).
-  *bufferSize = ossBufferBytes / ( formatBytes(stream_.deviceFormat[mode]) * deviceChannels );
-  stream_.bufferSize = *bufferSize;
-
-  // Set the sample rate.
-  int srate = sampleRate;
-  result = ioctl( fd, SNDCTL_DSP_SPEED, &srate );
-  if ( result == -1 ) {
-    close( fd );
-    errorStream_ << "RtApiOss::probeDeviceOpen: error setting sample rate (" << sampleRate << ") on device (" << ainfo.name << ").";
-    errorText_ = errorStream_.str();
-    return FAILURE;
-  }
-
-  // Verify the sample rate setup worked.
-  if ( abs( srate - sampleRate ) > 100 ) {
-    close( fd );
-    errorStream_ << "RtApiOss::probeDeviceOpen: device (" << ainfo.name << ") does not support sample rate (" << sampleRate << ").";
-    errorText_ = errorStream_.str();
-    return FAILURE;
-  }
-  stream_.sampleRate = sampleRate;
-
-  if ( mode == INPUT && stream_.mode == OUTPUT && stream_.device[0] == device) {
-    // We're doing duplex setup here.
-    stream_.deviceFormat[0] = stream_.deviceFormat[1];
-    stream_.nDeviceChannels[0] = deviceChannels;
-  }
-
-  // Set interleaving parameters.
-  stream_.userInterleaved = true;
-  stream_.deviceInterleaved[mode] =  true;
-  if ( options && options->flags & RTAUDIO_NONINTERLEAVED )
-    stream_.userInterleaved = false;
-
-  // Set flags for buffer conversion
-  stream_.doConvertBuffer[mode] = false;
-  if ( stream_.userFormat != stream_.deviceFormat[mode] )
-    stream_.doConvertBuffer[mode] = true;
-  if ( stream_.nUserChannels[mode] < stream_.nDeviceChannels[mode] )
-    stream_.doConvertBuffer[mode] = true;
-  if ( stream_.userInterleaved != stream_.deviceInterleaved[mode] &&
-       stream_.nUserChannels[mode] > 1 )
-    stream_.doConvertBuffer[mode] = true;
-
-  // Allocate the stream handles if necessary and then save.
-  if ( stream_.apiHandle == 0 ) {
-    try {
-      handle = new OssHandle;
-    }
-    catch ( std::bad_alloc& ) {
-      errorText_ = "RtApiOss::probeDeviceOpen: error allocating OssHandle memory.";
-      goto error;
-    }
-
-    if ( pthread_cond_init( &handle->runnable, NULL ) ) {
-      errorText_ = "RtApiOss::probeDeviceOpen: error initializing pthread condition variable.";
-      goto error;
-    }
-
-    stream_.apiHandle = (void *) handle;
-  }
-  else {
-    handle = (OssHandle *) stream_.apiHandle;
-  }
-  handle->id[mode] = fd;
-
-  // Allocate necessary internal buffers.
-  unsigned long bufferBytes;
-  bufferBytes = stream_.nUserChannels[mode] * *bufferSize * formatBytes( stream_.userFormat );
-  stream_.userBuffer[mode] = (char *) calloc( bufferBytes, 1 );
-  if ( stream_.userBuffer[mode] == NULL ) {
-    errorText_ = "RtApiOss::probeDeviceOpen: error allocating user buffer memory.";
-    goto error;
-  }
-
-  if ( stream_.doConvertBuffer[mode] ) {
-
-    bool makeBuffer = true;
-    bufferBytes = stream_.nDeviceChannels[mode] * formatBytes( stream_.deviceFormat[mode] );
-    if ( mode == INPUT ) {
-      if ( stream_.mode == OUTPUT && stream_.deviceBuffer ) {
-        unsigned long bytesOut = stream_.nDeviceChannels[0] * formatBytes( stream_.deviceFormat[0] );
-        if ( bufferBytes <= bytesOut ) makeBuffer = false;
-      }
-    }
-
-    if ( makeBuffer ) {
-      bufferBytes *= *bufferSize;
-      if ( stream_.deviceBuffer ) free( stream_.deviceBuffer );
-      stream_.deviceBuffer = (char *) calloc( bufferBytes, 1 );
-      if ( stream_.deviceBuffer == NULL ) {
-        errorText_ = "RtApiOss::probeDeviceOpen: error allocating device buffer memory.";
-        goto error;
-      }
-    }
-  }
-
-  stream_.device[mode] = device;
-  stream_.state = STREAM_STOPPED;
-
-  // Setup the buffer conversion information structure.
-  if ( stream_.doConvertBuffer[mode] ) setConvertInfo( mode, firstChannel );
-
-  // Setup thread if necessary.
-  if ( stream_.mode == OUTPUT && mode == INPUT ) {
-    // We had already set up an output stream.
-    stream_.mode = DUPLEX;
-    if ( stream_.device[0] == device ) handle->id[0] = fd;
-  }
-  else {
-    stream_.mode = mode;
-
-    // Setup callback thread.
-    stream_.callbackInfo.object = (void *) this;
-
-    // Set the thread attributes for joinable and realtime scheduling
-    // priority.  The higher priority will only take affect if the
-    // program is run as root or suid.
-    pthread_attr_t attr;
-    pthread_attr_init( &attr );
-    pthread_attr_setdetachstate( &attr, PTHREAD_CREATE_JOINABLE );
-#ifdef SCHED_RR // Undefined with some OSes (eg: NetBSD 1.6.x with GNU Pthread)
-    if ( options && options->flags & RTAUDIO_SCHEDULE_REALTIME ) {
-      struct sched_param param;
-      int priority = options->priority;
-      int min = sched_get_priority_min( SCHED_RR );
-      int max = sched_get_priority_max( SCHED_RR );
-      if ( priority < min ) priority = min;
-      else if ( priority > max ) priority = max;
-      param.sched_priority = priority;
-      pthread_attr_setschedparam( &attr, &param );
-      pthread_attr_setschedpolicy( &attr, SCHED_RR );
-    }
-    else
-      pthread_attr_setschedpolicy( &attr, SCHED_OTHER );
-#else
-    pthread_attr_setschedpolicy( &attr, SCHED_OTHER );
-#endif
-
-    stream_.callbackInfo.isRunning = true;
-    result = pthread_create( &stream_.callbackInfo.thread, &attr, ossCallbackHandler, &stream_.callbackInfo );
-    pthread_attr_destroy( &attr );
-    if ( result ) {
-      stream_.callbackInfo.isRunning = false;
-      errorText_ = "RtApiOss::error creating callback thread!";
-      goto error;
-    }
-  }
-
-  return SUCCESS;
-
- error:
-  if ( handle ) {
-    pthread_cond_destroy( &handle->runnable );
-    if ( handle->id[0] ) close( handle->id[0] );
-    if ( handle->id[1] ) close( handle->id[1] );
-    delete handle;
-    stream_.apiHandle = 0;
-  }
-
-  for ( int i=0; i<2; i++ ) {
-    if ( stream_.userBuffer[i] ) {
-      free( stream_.userBuffer[i] );
-      stream_.userBuffer[i] = 0;
-    }
-  }
-
-  if ( stream_.deviceBuffer ) {
-    free( stream_.deviceBuffer );
-    stream_.deviceBuffer = 0;
-  }
-
-  return FAILURE;
-}
-
-void RtApiOss :: closeStream()
-{
-  if ( stream_.state == STREAM_CLOSED ) {
-    errorText_ = "RtApiOss::closeStream(): no open stream to close!";
-    error( RtAudioError::WARNING );
-    return;
-  }
-
-  OssHandle *handle = (OssHandle *) stream_.apiHandle;
-  stream_.callbackInfo.isRunning = false;
-  MUTEX_LOCK( &stream_.mutex );
-  if ( stream_.state == STREAM_STOPPED )
-    pthread_cond_signal( &handle->runnable );
-  MUTEX_UNLOCK( &stream_.mutex );
-  pthread_join( stream_.callbackInfo.thread, NULL );
-
-  if ( stream_.state == STREAM_RUNNING ) {
-    if ( stream_.mode == OUTPUT || stream_.mode == DUPLEX )
-      ioctl( handle->id[0], SNDCTL_DSP_HALT, 0 );
-    else
-      ioctl( handle->id[1], SNDCTL_DSP_HALT, 0 );
-    stream_.state = STREAM_STOPPED;
-  }
-
-  if ( handle ) {
-    pthread_cond_destroy( &handle->runnable );
-    if ( handle->id[0] ) close( handle->id[0] );
-    if ( handle->id[1] ) close( handle->id[1] );
-    delete handle;
-    stream_.apiHandle = 0;
-  }
-
-  for ( int i=0; i<2; i++ ) {
-    if ( stream_.userBuffer[i] ) {
-      free( stream_.userBuffer[i] );
-      stream_.userBuffer[i] = 0;
-    }
-  }
-
-  if ( stream_.deviceBuffer ) {
-    free( stream_.deviceBuffer );
-    stream_.deviceBuffer = 0;
-  }
-
-  stream_.mode = UNINITIALIZED;
-  stream_.state = STREAM_CLOSED;
-}
-
-void RtApiOss :: startStream()
-{
-  verifyStream();
-  if ( stream_.state == STREAM_RUNNING ) {
-    errorText_ = "RtApiOss::startStream(): the stream is already running!";
-    error( RtAudioError::WARNING );
-    return;
-  }
-
-  MUTEX_LOCK( &stream_.mutex );
-
-  stream_.state = STREAM_RUNNING;
-
-  // No need to do anything else here ... OSS automatically starts
-  // when fed samples.
-
-  MUTEX_UNLOCK( &stream_.mutex );
-
-  OssHandle *handle = (OssHandle *) stream_.apiHandle;
-  pthread_cond_signal( &handle->runnable );
-}
-
-void RtApiOss :: stopStream()
-{
-  verifyStream();
-  if ( stream_.state == STREAM_STOPPED ) {
-    errorText_ = "RtApiOss::stopStream(): the stream is already stopped!";
-    error( RtAudioError::WARNING );
-    return;
-  }
-
-  MUTEX_LOCK( &stream_.mutex );
-
-  // The state might change while waiting on a mutex.
-  if ( stream_.state == STREAM_STOPPED ) {
-    MUTEX_UNLOCK( &stream_.mutex );
-    return;
-  }
-
-  int result = 0;
-  OssHandle *handle = (OssHandle *) stream_.apiHandle;
-  if ( stream_.mode == OUTPUT || stream_.mode == DUPLEX ) {
-
-    // Flush the output with zeros a few times.
-    char *buffer;
-    int samples;
-    RtAudioFormat format;
-
-    if ( stream_.doConvertBuffer[0] ) {
-      buffer = stream_.deviceBuffer;
-      samples = stream_.bufferSize * stream_.nDeviceChannels[0];
-      format = stream_.deviceFormat[0];
-    }
-    else {
-      buffer = stream_.userBuffer[0];
-      samples = stream_.bufferSize * stream_.nUserChannels[0];
-      format = stream_.userFormat;
-    }
-
-    memset( buffer, 0, samples * formatBytes(format) );
-    for ( unsigned int i=0; i<stream_.nBuffers+1; i++ ) {
-      result = write( handle->id[0], buffer, samples * formatBytes(format) );
-      if ( result == -1 ) {
-        errorText_ = "RtApiOss::stopStream: audio write error.";
-        error( RtAudioError::WARNING );
-      }
-    }
-
-    result = ioctl( handle->id[0], SNDCTL_DSP_HALT, 0 );
-    if ( result == -1 ) {
-      errorStream_ << "RtApiOss::stopStream: system error stopping callback procedure on device (" << stream_.device[0] << ").";
-      errorText_ = errorStream_.str();
-      goto unlock;
-    }
-    handle->triggered = false;
-  }
-
-  if ( stream_.mode == INPUT || ( stream_.mode == DUPLEX && handle->id[0] != handle->id[1] ) ) {
-    result = ioctl( handle->id[1], SNDCTL_DSP_HALT, 0 );
-    if ( result == -1 ) {
-      errorStream_ << "RtApiOss::stopStream: system error stopping input callback procedure on device (" << stream_.device[0] << ").";
-      errorText_ = errorStream_.str();
-      goto unlock;
-    }
-  }
-
- unlock:
-  stream_.state = STREAM_STOPPED;
-  MUTEX_UNLOCK( &stream_.mutex );
-
-  if ( result != -1 ) return;
-  error( RtAudioError::SYSTEM_ERROR );
-}
-
-void RtApiOss :: abortStream()
-{
-  verifyStream();
-  if ( stream_.state == STREAM_STOPPED ) {
-    errorText_ = "RtApiOss::abortStream(): the stream is already stopped!";
-    error( RtAudioError::WARNING );
-    return;
-  }
-
-  MUTEX_LOCK( &stream_.mutex );
-
-  // The state might change while waiting on a mutex.
-  if ( stream_.state == STREAM_STOPPED ) {
-    MUTEX_UNLOCK( &stream_.mutex );
-    return;
-  }
-
-  int result = 0;
-  OssHandle *handle = (OssHandle *) stream_.apiHandle;
-  if ( stream_.mode == OUTPUT || stream_.mode == DUPLEX ) {
-    result = ioctl( handle->id[0], SNDCTL_DSP_HALT, 0 );
-    if ( result == -1 ) {
-      errorStream_ << "RtApiOss::abortStream: system error stopping callback procedure on device (" << stream_.device[0] << ").";
-      errorText_ = errorStream_.str();
-      goto unlock;
-    }
-    handle->triggered = false;
-  }
-
-  if ( stream_.mode == INPUT || ( stream_.mode == DUPLEX && handle->id[0] != handle->id[1] ) ) {
-    result = ioctl( handle->id[1], SNDCTL_DSP_HALT, 0 );
-    if ( result == -1 ) {
-      errorStream_ << "RtApiOss::abortStream: system error stopping input callback procedure on device (" << stream_.device[0] << ").";
-      errorText_ = errorStream_.str();
-      goto unlock;
-    }
-  }
-
- unlock:
-  stream_.state = STREAM_STOPPED;
-  MUTEX_UNLOCK( &stream_.mutex );
-
-  if ( result != -1 ) return;
-  error( RtAudioError::SYSTEM_ERROR );
-}
-
-void RtApiOss :: callbackEvent()
-{
-  OssHandle *handle = (OssHandle *) stream_.apiHandle;
-  if ( stream_.state == STREAM_STOPPED ) {
-    MUTEX_LOCK( &stream_.mutex );
-    pthread_cond_wait( &handle->runnable, &stream_.mutex );
-    if ( stream_.state != STREAM_RUNNING ) {
-      MUTEX_UNLOCK( &stream_.mutex );
-      return;
-    }
-    MUTEX_UNLOCK( &stream_.mutex );
-  }
-
-  if ( stream_.state == STREAM_CLOSED ) {
-    errorText_ = "RtApiOss::callbackEvent(): the stream is closed ... this shouldn't happen!";
-    error( RtAudioError::WARNING );
-    return;
-  }
-
-  // Invoke user callback to get fresh output data.
-  int doStopStream = 0;
-  RtAudioCallback callback = (RtAudioCallback) stream_.callbackInfo.callback;
-  double streamTime = getStreamTime();
-  RtAudioStreamStatus status = 0;
-  if ( stream_.mode != INPUT && handle->xrun[0] == true ) {
-    status |= RTAUDIO_OUTPUT_UNDERFLOW;
-    handle->xrun[0] = false;
-  }
-  if ( stream_.mode != OUTPUT && handle->xrun[1] == true ) {
-    status |= RTAUDIO_INPUT_OVERFLOW;
-    handle->xrun[1] = false;
-  }
-  doStopStream = callback( stream_.userBuffer[0], stream_.userBuffer[1],
-                           stream_.bufferSize, streamTime, status, stream_.callbackInfo.userData );
-  if ( doStopStream == 2 ) {
-    this->abortStream();
-    return;
-  }
-
-  MUTEX_LOCK( &stream_.mutex );
-
-  // The state might change while waiting on a mutex.
-  if ( stream_.state == STREAM_STOPPED ) goto unlock;
-
-  int result;
-  char *buffer;
-  int samples;
-  RtAudioFormat format;
-
-  if ( stream_.mode == OUTPUT || stream_.mode == DUPLEX ) {
-
-    // Setup parameters and do buffer conversion if necessary.
-    if ( stream_.doConvertBuffer[0] ) {
-      buffer = stream_.deviceBuffer;
-      convertBuffer( buffer, stream_.userBuffer[0], stream_.convertInfo[0] );
-      samples = stream_.bufferSize * stream_.nDeviceChannels[0];
-      format = stream_.deviceFormat[0];
-    }
-    else {
-      buffer = stream_.userBuffer[0];
-      samples = stream_.bufferSize * stream_.nUserChannels[0];
-      format = stream_.userFormat;
-    }
-
-    // Do byte swapping if necessary.
-    if ( stream_.doByteSwap[0] )
-      byteSwapBuffer( buffer, samples, format );
-
-    if ( stream_.mode == DUPLEX && handle->triggered == false ) {
-      int trig = 0;
-      ioctl( handle->id[0], SNDCTL_DSP_SETTRIGGER, &trig );
-      result = write( handle->id[0], buffer, samples * formatBytes(format) );
-      trig = PCM_ENABLE_INPUT|PCM_ENABLE_OUTPUT;
-      ioctl( handle->id[0], SNDCTL_DSP_SETTRIGGER, &trig );
-      handle->triggered = true;
-    }
-    else
-      // Write samples to device.
-      result = write( handle->id[0], buffer, samples * formatBytes(format) );
-
-    if ( result == -1 ) {
-      // We'll assume this is an underrun, though there isn't a
-      // specific means for determining that.
-      handle->xrun[0] = true;
-      errorText_ = "RtApiOss::callbackEvent: audio write error.";
-      error( RtAudioError::WARNING );
-      // Continue on to input section.
-    }
-  }
-
-  if ( stream_.mode == INPUT || stream_.mode == DUPLEX ) {
-
-    // Setup parameters.
-    if ( stream_.doConvertBuffer[1] ) {
-      buffer = stream_.deviceBuffer;
-      samples = stream_.bufferSize * stream_.nDeviceChannels[1];
-      format = stream_.deviceFormat[1];
-    }
-    else {
-      buffer = stream_.userBuffer[1];
-      samples = stream_.bufferSize * stream_.nUserChannels[1];
-      format = stream_.userFormat;
-    }
-
-    // Read samples from device.
-    result = read( handle->id[1], buffer, samples * formatBytes(format) );
-
-    if ( result == -1 ) {
-      // We'll assume this is an overrun, though there isn't a
-      // specific means for determining that.
-      handle->xrun[1] = true;
-      errorText_ = "RtApiOss::callbackEvent: audio read error.";
-      error( RtAudioError::WARNING );
-      goto unlock;
-    }
-
-    // Do byte swapping if necessary.
-    if ( stream_.doByteSwap[1] )
-      byteSwapBuffer( buffer, samples, format );
-
-    // Do buffer conversion if necessary.
-    if ( stream_.doConvertBuffer[1] )
-      convertBuffer( stream_.userBuffer[1], stream_.deviceBuffer, stream_.convertInfo[1] );
-  }
-
- unlock:
-  MUTEX_UNLOCK( &stream_.mutex );
-
-  RtApi::tickStreamTime();
-  if ( doStopStream == 1 ) this->stopStream();
-}
-
-static void *ossCallbackHandler( void *ptr )
-{
-  CallbackInfo *info = (CallbackInfo *) ptr;
-  RtApiOss *object = (RtApiOss *) info->object;
-  bool *isRunning = &info->isRunning;
-
-  while ( *isRunning == true ) {
-    pthread_testcancel();
-    object->callbackEvent();
-  }
-
-  pthread_exit( NULL );
-}
-
-//******************** End of __LINUX_OSS__ *********************//
-#endif
-
-
-// *************************************************** //
-//
-// Protected common (OS-independent) RtAudio methods.
-//
-// *************************************************** //
-
-// This method can be modified to control the behavior of error
-// message printing.
-void RtApi :: error( RtAudioError::Type type )
-{
-  errorStream_.str(""); // clear the ostringstream
-
-  RtAudioErrorCallback errorCallback = (RtAudioErrorCallback) stream_.callbackInfo.errorCallback;
-  if ( errorCallback ) {
-    // abortStream() can generate new error messages. Ignore them. Just keep original one.
-
-    if ( firstErrorOccurred_ )
-      return;
-
-    firstErrorOccurred_ = true;
-    const std::string errorMessage = errorText_;
-
-    if ( type != RtAudioError::WARNING && stream_.state != STREAM_STOPPED) {
-      stream_.callbackInfo.isRunning = false; // exit from the thread
-      abortStream();
-    }
-
-    errorCallback( type, errorMessage );
-    firstErrorOccurred_ = false;
-    return;
-  }
-
-  if ( type == RtAudioError::WARNING && showWarnings_ == true )
-    std::cerr << '\n' << errorText_ << "\n\n";
-  else if ( type != RtAudioError::WARNING )
-    throw( RtAudioError( errorText_, type ) );
-}
-
-void RtApi :: verifyStream()
-{
-  if ( stream_.state == STREAM_CLOSED ) {
-    errorText_ = "RtApi:: a stream is not open!";
-    error( RtAudioError::INVALID_USE );
-  }
-}
-
-void RtApi :: clearStreamInfo()
-{
-  stream_.mode = UNINITIALIZED;
-  stream_.state = STREAM_CLOSED;
-  stream_.sampleRate = 0;
-  stream_.bufferSize = 0;
-  stream_.nBuffers = 0;
-  stream_.userFormat = 0;
-  stream_.userInterleaved = true;
-  stream_.streamTime = 0.0;
-  stream_.apiHandle = 0;
-  stream_.deviceBuffer = 0;
-  stream_.callbackInfo.callback = 0;
-  stream_.callbackInfo.userData = 0;
-  stream_.callbackInfo.isRunning = false;
-  stream_.callbackInfo.errorCallback = 0;
-  for ( int i=0; i<2; i++ ) {
-    stream_.device[i] = 11111;
-    stream_.doConvertBuffer[i] = false;
-    stream_.deviceInterleaved[i] = true;
-    stream_.doByteSwap[i] = false;
-    stream_.nUserChannels[i] = 0;
-    stream_.nDeviceChannels[i] = 0;
-    stream_.channelOffset[i] = 0;
-    stream_.deviceFormat[i] = 0;
-    stream_.latency[i] = 0;
-    stream_.userBuffer[i] = 0;
-    stream_.convertInfo[i].channels = 0;
-    stream_.convertInfo[i].inJump = 0;
-    stream_.convertInfo[i].outJump = 0;
-    stream_.convertInfo[i].inFormat = 0;
-    stream_.convertInfo[i].outFormat = 0;
-    stream_.convertInfo[i].inOffset.clear();
-    stream_.convertInfo[i].outOffset.clear();
-  }
-}
-
-unsigned int RtApi :: formatBytes( RtAudioFormat format )
-{
-  if ( format == RTAUDIO_SINT16 )
-    return 2;
-  else if ( format == RTAUDIO_SINT32 || format == RTAUDIO_FLOAT32 )
-    return 4;
-  else if ( format == RTAUDIO_FLOAT64 )
-    return 8;
-  else if ( format == RTAUDIO_SINT24 )
-    return 3;
-  else if ( format == RTAUDIO_SINT8 )
-    return 1;
-
-  errorText_ = "RtApi::formatBytes: undefined format.";
-  error( RtAudioError::WARNING );
-
-  return 0;
-}
-
-void RtApi :: setConvertInfo( StreamMode mode, unsigned int firstChannel )
-{
-  if ( mode == INPUT ) { // convert device to user buffer
-    stream_.convertInfo[mode].inJump = stream_.nDeviceChannels[1];
-    stream_.convertInfo[mode].outJump = stream_.nUserChannels[1];
-    stream_.convertInfo[mode].inFormat = stream_.deviceFormat[1];
-    stream_.convertInfo[mode].outFormat = stream_.userFormat;
-  }
-  else { // convert user to device buffer
-    stream_.convertInfo[mode].inJump = stream_.nUserChannels[0];
-    stream_.convertInfo[mode].outJump = stream_.nDeviceChannels[0];
-    stream_.convertInfo[mode].inFormat = stream_.userFormat;
-    stream_.convertInfo[mode].outFormat = stream_.deviceFormat[0];
-  }
-
-  if ( stream_.convertInfo[mode].inJump < stream_.convertInfo[mode].outJump )
-    stream_.convertInfo[mode].channels = stream_.convertInfo[mode].inJump;
-  else
-    stream_.convertInfo[mode].channels = stream_.convertInfo[mode].outJump;
-
-  // Set up the interleave/deinterleave offsets.
-  if ( stream_.deviceInterleaved[mode] != stream_.userInterleaved ) {
-    if ( ( mode == OUTPUT && stream_.deviceInterleaved[mode] ) ||
-         ( mode == INPUT && stream_.userInterleaved ) ) {
-      for ( int k=0; k<stream_.convertInfo[mode].channels; k++ ) {
-        stream_.convertInfo[mode].inOffset.push_back( k * stream_.bufferSize );
-        stream_.convertInfo[mode].outOffset.push_back( k );
-        stream_.convertInfo[mode].inJump = 1;
-      }
-    }
-    else {
-      for ( int k=0; k<stream_.convertInfo[mode].channels; k++ ) {
-        stream_.convertInfo[mode].inOffset.push_back( k );
-        stream_.convertInfo[mode].outOffset.push_back( k * stream_.bufferSize );
-        stream_.convertInfo[mode].outJump = 1;
-      }
-    }
-  }
-  else { // no (de)interleaving
-    if ( stream_.userInterleaved ) {
-      for ( int k=0; k<stream_.convertInfo[mode].channels; k++ ) {
-        stream_.convertInfo[mode].inOffset.push_back( k );
-        stream_.convertInfo[mode].outOffset.push_back( k );
-      }
-    }
-    else {
-      for ( int k=0; k<stream_.convertInfo[mode].channels; k++ ) {
-        stream_.convertInfo[mode].inOffset.push_back( k * stream_.bufferSize );
-        stream_.convertInfo[mode].outOffset.push_back( k * stream_.bufferSize );
-        stream_.convertInfo[mode].inJump = 1;
-        stream_.convertInfo[mode].outJump = 1;
-      }
-    }
-  }
-
-  // Add channel offset.
-  if ( firstChannel > 0 ) {
-    if ( stream_.deviceInterleaved[mode] ) {
-      if ( mode == OUTPUT ) {
-        for ( int k=0; k<stream_.convertInfo[mode].channels; k++ )
-          stream_.convertInfo[mode].outOffset[k] += firstChannel;
-      }
-      else {
-        for ( int k=0; k<stream_.convertInfo[mode].channels; k++ )
-          stream_.convertInfo[mode].inOffset[k] += firstChannel;
-      }
-    }
-    else {
-      if ( mode == OUTPUT ) {
-        for ( int k=0; k<stream_.convertInfo[mode].channels; k++ )
-          stream_.convertInfo[mode].outOffset[k] += ( firstChannel * stream_.bufferSize );
-      }
-      else {
-        for ( int k=0; k<stream_.convertInfo[mode].channels; k++ )
-          stream_.convertInfo[mode].inOffset[k] += ( firstChannel  * stream_.bufferSize );
-      }
-    }
-  }
-}
-
-void RtApi :: convertBuffer( char *outBuffer, char *inBuffer, ConvertInfo &info )
-{
-  // This function does format conversion, input/output channel compensation, and
-  // data interleaving/deinterleaving.  24-bit integers are assumed to occupy
-  // the lower three bytes of a 32-bit integer.
-
-  // Clear our device buffer when in/out duplex device channels are different
-  if ( outBuffer == stream_.deviceBuffer && stream_.mode == DUPLEX &&
-       ( stream_.nDeviceChannels[0] < stream_.nDeviceChannels[1] ) )
-    memset( outBuffer, 0, stream_.bufferSize * info.outJump * formatBytes( info.outFormat ) );
-
-  int j;
-  if (info.outFormat == RTAUDIO_FLOAT64) {
-    Float64 scale;
-    Float64 *out = (Float64 *)outBuffer;
-
-    if (info.inFormat == RTAUDIO_SINT8) {
-      signed char *in = (signed char *)inBuffer;
-      scale = 1.0 / 127.5;
-      for (unsigned int i=0; i<stream_.bufferSize; i++) {
-        for (j=0; j<info.channels; j++) {
-          out[info.outOffset[j]] = (Float64) in[info.inOffset[j]];
-          out[info.outOffset[j]] += 0.5;
-          out[info.outOffset[j]] *= scale;
-        }
-        in += info.inJump;
-        out += info.outJump;
-      }
-    }
-    else if (info.inFormat == RTAUDIO_SINT16) {
-      Int16 *in = (Int16 *)inBuffer;
-      scale = 1.0 / 32767.5;
-      for (unsigned int i=0; i<stream_.bufferSize; i++) {
-        for (j=0; j<info.channels; j++) {
-          out[info.outOffset[j]] = (Float64) in[info.inOffset[j]];
-          out[info.outOffset[j]] += 0.5;
-          out[info.outOffset[j]] *= scale;
-        }
-        in += info.inJump;
-        out += info.outJump;
-      }
-    }
-    else if (info.inFormat == RTAUDIO_SINT24) {
-      Int24 *in = (Int24 *)inBuffer;
-      scale = 1.0 / 8388607.5;
-      for (unsigned int i=0; i<stream_.bufferSize; i++) {
-        for (j=0; j<info.channels; j++) {
-          out[info.outOffset[j]] = (Float64) (in[info.inOffset[j]].asInt());
-          out[info.outOffset[j]] += 0.5;
-          out[info.outOffset[j]] *= scale;
-        }
-        in += info.inJump;
-        out += info.outJump;
-      }
-    }
-    else if (info.inFormat == RTAUDIO_SINT32) {
-      Int32 *in = (Int32 *)inBuffer;
-      scale = 1.0 / 2147483647.5;
-      for (unsigned int i=0; i<stream_.bufferSize; i++) {
-        for (j=0; j<info.channels; j++) {
-          out[info.outOffset[j]] = (Float64) in[info.inOffset[j]];
-          out[info.outOffset[j]] += 0.5;
-          out[info.outOffset[j]] *= scale;
-        }
-        in += info.inJump;
-        out += info.outJump;
-      }
-    }
-    else if (info.inFormat == RTAUDIO_FLOAT32) {
-      Float32 *in = (Float32 *)inBuffer;
-      for (unsigned int i=0; i<stream_.bufferSize; i++) {
-        for (j=0; j<info.channels; j++) {
-          out[info.outOffset[j]] = (Float64) in[info.inOffset[j]];
-        }
-        in += info.inJump;
-        out += info.outJump;
-      }
-    }
-    else if (info.inFormat == RTAUDIO_FLOAT64) {
-      // Channel compensation and/or (de)interleaving only.
-      Float64 *in = (Float64 *)inBuffer;
-      for (unsigned int i=0; i<stream_.bufferSize; i++) {
-        for (j=0; j<info.channels; j++) {
-          out[info.outOffset[j]] = in[info.inOffset[j]];
-        }
-        in += info.inJump;
-        out += info.outJump;
-      }
-    }
-  }
-  else if (info.outFormat == RTAUDIO_FLOAT32) {
-    Float32 scale;
-    Float32 *out = (Float32 *)outBuffer;
-
-    if (info.inFormat == RTAUDIO_SINT8) {
-      signed char *in = (signed char *)inBuffer;
-      scale = (Float32) ( 1.0 / 127.5 );
-      for (unsigned int i=0; i<stream_.bufferSize; i++) {
-        for (j=0; j<info.channels; j++) {
-          out[info.outOffset[j]] = (Float32) in[info.inOffset[j]];
-          out[info.outOffset[j]] += 0.5;
-          out[info.outOffset[j]] *= scale;
-        }
-        in += info.inJump;
-        out += info.outJump;
-      }
-    }
-    else if (info.inFormat == RTAUDIO_SINT16) {
-      Int16 *in = (Int16 *)inBuffer;
-      scale = (Float32) ( 1.0 / 32767.5 );
-      for (unsigned int i=0; i<stream_.bufferSize; i++) {
-        for (j=0; j<info.channels; j++) {
-          out[info.outOffset[j]] = (Float32) in[info.inOffset[j]];
-          out[info.outOffset[j]] += 0.5;
-          out[info.outOffset[j]] *= scale;
-        }
-        in += info.inJump;
-        out += info.outJump;
-      }
-    }
-    else if (info.inFormat == RTAUDIO_SINT24) {
-      Int24 *in = (Int24 *)inBuffer;
-      scale = (Float32) ( 1.0 / 8388607.5 );
-      for (unsigned int i=0; i<stream_.bufferSize; i++) {
-        for (j=0; j<info.channels; j++) {
-          out[info.outOffset[j]] = (Float32) (in[info.inOffset[j]].asInt());
-          out[info.outOffset[j]] += 0.5;
-          out[info.outOffset[j]] *= scale;
-        }
-        in += info.inJump;
-        out += info.outJump;
-      }
-    }
-    else if (info.inFormat == RTAUDIO_SINT32) {
-      Int32 *in = (Int32 *)inBuffer;
-      scale = (Float32) ( 1.0 / 2147483647.5 );
-      for (unsigned int i=0; i<stream_.bufferSize; i++) {
-        for (j=0; j<info.channels; j++) {
-          out[info.outOffset[j]] = (Float32) in[info.inOffset[j]];
-          out[info.outOffset[j]] += 0.5;
-          out[info.outOffset[j]] *= scale;
-        }
-        in += info.inJump;
-        out += info.outJump;
-      }
-    }
-    else if (info.inFormat == RTAUDIO_FLOAT32) {
-      // Channel compensation and/or (de)interleaving only.
-      Float32 *in = (Float32 *)inBuffer;
-      for (unsigned int i=0; i<stream_.bufferSize; i++) {
-        for (j=0; j<info.channels; j++) {
-          out[info.outOffset[j]] = in[info.inOffset[j]];
-        }
-        in += info.inJump;
-        out += info.outJump;
-      }
-    }
-    else if (info.inFormat == RTAUDIO_FLOAT64) {
-      Float64 *in = (Float64 *)inBuffer;
-      for (unsigned int i=0; i<stream_.bufferSize; i++) {
-        for (j=0; j<info.channels; j++) {
-          out[info.outOffset[j]] = (Float32) in[info.inOffset[j]];
-        }
-        in += info.inJump;
-        out += info.outJump;
-      }
-    }
-  }
-  else if (info.outFormat == RTAUDIO_SINT32) {
-    Int32 *out = (Int32 *)outBuffer;
-    if (info.inFormat == RTAUDIO_SINT8) {
-      signed char *in = (signed char *)inBuffer;
-      for (unsigned int i=0; i<stream_.bufferSize; i++) {
-        for (j=0; j<info.channels; j++) {
-          out[info.outOffset[j]] = (Int32) in[info.inOffset[j]];
-          out[info.outOffset[j]] <<= 24;
-        }
-        in += info.inJump;
-        out += info.outJump;
-      }
-    }
-    else if (info.inFormat == RTAUDIO_SINT16) {
-      Int16 *in = (Int16 *)inBuffer;
-      for (unsigned int i=0; i<stream_.bufferSize; i++) {
-        for (j=0; j<info.channels; j++) {
-          out[info.outOffset[j]] = (Int32) in[info.inOffset[j]];
-          out[info.outOffset[j]] <<= 16;
-        }
-        in += info.inJump;
-        out += info.outJump;
-      }
-    }
-    else if (info.inFormat == RTAUDIO_SINT24) {
-      Int24 *in = (Int24 *)inBuffer;
-      for (unsigned int i=0; i<stream_.bufferSize; i++) {
-        for (j=0; j<info.channels; j++) {
-          out[info.outOffset[j]] = (Int32) in[info.inOffset[j]].asInt();
-          out[info.outOffset[j]] <<= 8;
-        }
-        in += info.inJump;
-        out += info.outJump;
-      }
-    }
-    else if (info.inFormat == RTAUDIO_SINT32) {
-      // Channel compensation and/or (de)interleaving only.
-      Int32 *in = (Int32 *)inBuffer;
-      for (unsigned int i=0; i<stream_.bufferSize; i++) {
-        for (j=0; j<info.channels; j++) {
-          out[info.outOffset[j]] = in[info.inOffset[j]];
-        }
-        in += info.inJump;
-        out += info.outJump;
-      }
-    }
-    else if (info.inFormat == RTAUDIO_FLOAT32) {
-      Float32 *in = (Float32 *)inBuffer;
-      for (unsigned int i=0; i<stream_.bufferSize; i++) {
-        for (j=0; j<info.channels; j++) {
-          out[info.outOffset[j]] = (Int32) (in[info.inOffset[j]] * 2147483647.5 - 0.5);
-        }
-        in += info.inJump;
-        out += info.outJump;
-      }
-    }
-    else if (info.inFormat == RTAUDIO_FLOAT64) {
-      Float64 *in = (Float64 *)inBuffer;
-      for (unsigned int i=0; i<stream_.bufferSize; i++) {
-        for (j=0; j<info.channels; j++) {
-          out[info.outOffset[j]] = (Int32) (in[info.inOffset[j]] * 2147483647.5 - 0.5);
-        }
-        in += info.inJump;
-        out += info.outJump;
-      }
-    }
-  }
-  else if (info.outFormat == RTAUDIO_SINT24) {
-    Int24 *out = (Int24 *)outBuffer;
-    if (info.inFormat == RTAUDIO_SINT8) {
-      signed char *in = (signed char *)inBuffer;
-      for (unsigned int i=0; i<stream_.bufferSize; i++) {
-        for (j=0; j<info.channels; j++) {
-          out[info.outOffset[j]] = (Int32) (in[info.inOffset[j]] << 16);
-          //out[info.outOffset[j]] <<= 16;
-        }
-        in += info.inJump;
-        out += info.outJump;
-      }
-    }
-    else if (info.inFormat == RTAUDIO_SINT16) {
-      Int16 *in = (Int16 *)inBuffer;
-      for (unsigned int i=0; i<stream_.bufferSize; i++) {
-        for (j=0; j<info.channels; j++) {
-          out[info.outOffset[j]] = (Int32) (in[info.inOffset[j]] << 8);
-          //out[info.outOffset[j]] <<= 8;
-        }
-        in += info.inJump;
-        out += info.outJump;
-      }
-    }
-    else if (info.inFormat == RTAUDIO_SINT24) {
-      // Channel compensation and/or (de)interleaving only.
-      Int24 *in = (Int24 *)inBuffer;
-      for (unsigned int i=0; i<stream_.bufferSize; i++) {
-        for (j=0; j<info.channels; j++) {
-          out[info.outOffset[j]] = in[info.inOffset[j]];
-        }
-        in += info.inJump;
-        out += info.outJump;
-      }
-    }
-    else if (info.inFormat == RTAUDIO_SINT32) {
-      Int32 *in = (Int32 *)inBuffer;
-      for (unsigned int i=0; i<stream_.bufferSize; i++) {
-        for (j=0; j<info.channels; j++) {
-          out[info.outOffset[j]] = (Int32) (in[info.inOffset[j]] >> 8);
-          //out[info.outOffset[j]] >>= 8;
-        }
-        in += info.inJump;
-        out += info.outJump;
-      }
-    }
-    else if (info.inFormat == RTAUDIO_FLOAT32) {
-      Float32 *in = (Float32 *)inBuffer;
-      for (unsigned int i=0; i<stream_.bufferSize; i++) {
-        for (j=0; j<info.channels; j++) {
-          out[info.outOffset[j]] = (Int32) (in[info.inOffset[j]] * 8388607.5 - 0.5);
-        }
-        in += info.inJump;
-        out += info.outJump;
-      }
-    }
-    else if (info.inFormat == RTAUDIO_FLOAT64) {
-      Float64 *in = (Float64 *)inBuffer;
-      for (unsigned int i=0; i<stream_.bufferSize; i++) {
-        for (j=0; j<info.channels; j++) {
-          out[info.outOffset[j]] = (Int32) (in[info.inOffset[j]] * 8388607.5 - 0.5);
-        }
-        in += info.inJump;
-        out += info.outJump;
-      }
-    }
-  }
-  else if (info.outFormat == RTAUDIO_SINT16) {
-    Int16 *out = (Int16 *)outBuffer;
-    if (info.inFormat == RTAUDIO_SINT8) {
-      signed char *in = (signed char *)inBuffer;
-      for (unsigned int i=0; i<stream_.bufferSize; i++) {
-        for (j=0; j<info.channels; j++) {
-          out[info.outOffset[j]] = (Int16) in[info.inOffset[j]];
-          out[info.outOffset[j]] <<= 8;
-        }
-        in += info.inJump;
-        out += info.outJump;
-      }
-    }
-    else if (info.inFormat == RTAUDIO_SINT16) {
-      // Channel compensation and/or (de)interleaving only.
-      Int16 *in = (Int16 *)inBuffer;
-      for (unsigned int i=0; i<stream_.bufferSize; i++) {
-        for (j=0; j<info.channels; j++) {
-          out[info.outOffset[j]] = in[info.inOffset[j]];
-        }
-        in += info.inJump;
-        out += info.outJump;
-      }
-    }
-    else if (info.inFormat == RTAUDIO_SINT24) {
-      Int24 *in = (Int24 *)inBuffer;
-      for (unsigned int i=0; i<stream_.bufferSize; i++) {
-        for (j=0; j<info.channels; j++) {
-          out[info.outOffset[j]] = (Int16) (in[info.inOffset[j]].asInt() >> 8);
-        }
-        in += info.inJump;
-        out += info.outJump;
-      }
-    }
-    else if (info.inFormat == RTAUDIO_SINT32) {
-      Int32 *in = (Int32 *)inBuffer;
-      for (unsigned int i=0; i<stream_.bufferSize; i++) {
-        for (j=0; j<info.channels; j++) {
-          out[info.outOffset[j]] = (Int16) ((in[info.inOffset[j]] >> 16) & 0x0000ffff);
-        }
-        in += info.inJump;
-        out += info.outJump;
-      }
-    }
-    else if (info.inFormat == RTAUDIO_FLOAT32) {
-      Float32 *in = (Float32 *)inBuffer;
-      for (unsigned int i=0; i<stream_.bufferSize; i++) {
-        for (j=0; j<info.channels; j++) {
-          out[info.outOffset[j]] = (Int16) (in[info.inOffset[j]] * 32767.5 - 0.5);
-        }
-        in += info.inJump;
-        out += info.outJump;
-      }
-    }
-    else if (info.inFormat == RTAUDIO_FLOAT64) {
-      Float64 *in = (Float64 *)inBuffer;
-      for (unsigned int i=0; i<stream_.bufferSize; i++) {
-        for (j=0; j<info.channels; j++) {
-          out[info.outOffset[j]] = (Int16) (in[info.inOffset[j]] * 32767.5 - 0.5);
-        }
-        in += info.inJump;
-        out += info.outJump;
-      }
-    }
-  }
-  else if (info.outFormat == RTAUDIO_SINT8) {
-    signed char *out = (signed char *)outBuffer;
-    if (info.inFormat == RTAUDIO_SINT8) {
-      // Channel compensation and/or (de)interleaving only.
-      signed char *in = (signed char *)inBuffer;
-      for (unsigned int i=0; i<stream_.bufferSize; i++) {
-        for (j=0; j<info.channels; j++) {
-          out[info.outOffset[j]] = in[info.inOffset[j]];
-        }
-        in += info.inJump;
-        out += info.outJump;
-      }
-    }
-    if (info.inFormat == RTAUDIO_SINT16) {
-      Int16 *in = (Int16 *)inBuffer;
-      for (unsigned int i=0; i<stream_.bufferSize; i++) {
-        for (j=0; j<info.channels; j++) {
-          out[info.outOffset[j]] = (signed char) ((in[info.inOffset[j]] >> 8) & 0x00ff);
-        }
-        in += info.inJump;
-        out += info.outJump;
-      }
-    }
-    else if (info.inFormat == RTAUDIO_SINT24) {
-      Int24 *in = (Int24 *)inBuffer;
-      for (unsigned int i=0; i<stream_.bufferSize; i++) {
-        for (j=0; j<info.channels; j++) {
-          out[info.outOffset[j]] = (signed char) (in[info.inOffset[j]].asInt() >> 16);
-        }
-        in += info.inJump;
-        out += info.outJump;
-      }
-    }
-    else if (info.inFormat == RTAUDIO_SINT32) {
-      Int32 *in = (Int32 *)inBuffer;
-      for (unsigned int i=0; i<stream_.bufferSize; i++) {
-        for (j=0; j<info.channels; j++) {
-          out[info.outOffset[j]] = (signed char) ((in[info.inOffset[j]] >> 24) & 0x000000ff);
-        }
-        in += info.inJump;
-        out += info.outJump;
-      }
-    }
-    else if (info.inFormat == RTAUDIO_FLOAT32) {
-      Float32 *in = (Float32 *)inBuffer;
-      for (unsigned int i=0; i<stream_.bufferSize; i++) {
-        for (j=0; j<info.channels; j++) {
-          out[info.outOffset[j]] = (signed char) (in[info.inOffset[j]] * 127.5 - 0.5);
-        }
-        in += info.inJump;
-        out += info.outJump;
-      }
-    }
-    else if (info.inFormat == RTAUDIO_FLOAT64) {
-      Float64 *in = (Float64 *)inBuffer;
-      for (unsigned int i=0; i<stream_.bufferSize; i++) {
-        for (j=0; j<info.channels; j++) {
-          out[info.outOffset[j]] = (signed char) (in[info.inOffset[j]] * 127.5 - 0.5);
-        }
-        in += info.inJump;
-        out += info.outJump;
-      }
-    }
-  }
-}
-
-//static inline uint16_t bswap_16(uint16_t x) { return (x>>8) | (x<<8); }
-//static inline uint32_t bswap_32(uint32_t x) { return (bswap_16(x&0xffff)<<16) | (bswap_16(x>>16)); }
-//static inline uint64_t bswap_64(uint64_t x) { return (((unsigned long long)bswap_32(x&0xffffffffull))<<32) | (bswap_32(x>>32)); }
-
-void RtApi :: byteSwapBuffer( char *buffer, unsigned int samples, RtAudioFormat format )
-{
-  register char val;
-  register char *ptr;
-
-  ptr = buffer;
-  if ( format == RTAUDIO_SINT16 ) {
-    for ( unsigned int i=0; i<samples; i++ ) {
-      // Swap 1st and 2nd bytes.
-      val = *(ptr);
-      *(ptr) = *(ptr+1);
-      *(ptr+1) = val;
-
-      // Increment 2 bytes.
-      ptr += 2;
-    }
-  }
-  else if ( format == RTAUDIO_SINT32 ||
-            format == RTAUDIO_FLOAT32 ) {
-    for ( unsigned int i=0; i<samples; i++ ) {
-      // Swap 1st and 4th bytes.
-      val = *(ptr);
-      *(ptr) = *(ptr+3);
-      *(ptr+3) = val;
-
-      // Swap 2nd and 3rd bytes.
-      ptr += 1;
-      val = *(ptr);
-      *(ptr) = *(ptr+1);
-      *(ptr+1) = val;
-
-      // Increment 3 more bytes.
-      ptr += 3;
-    }
-  }
-  else if ( format == RTAUDIO_SINT24 ) {
-    for ( unsigned int i=0; i<samples; i++ ) {
-      // Swap 1st and 3rd bytes.
-      val = *(ptr);
-      *(ptr) = *(ptr+2);
-      *(ptr+2) = val;
-
-      // Increment 2 more bytes.
-      ptr += 2;
-    }
-  }
-  else if ( format == RTAUDIO_FLOAT64 ) {
-    for ( unsigned int i=0; i<samples; i++ ) {
-      // Swap 1st and 8th bytes
-      val = *(ptr);
-      *(ptr) = *(ptr+7);
-      *(ptr+7) = val;
-
-      // Swap 2nd and 7th bytes
-      ptr += 1;
-      val = *(ptr);
-      *(ptr) = *(ptr+5);
-      *(ptr+5) = val;
-
-      // Swap 3rd and 6th bytes
-      ptr += 1;
-      val = *(ptr);
-      *(ptr) = *(ptr+3);
-      *(ptr+3) = val;
-
-      // Swap 4th and 5th bytes
-      ptr += 1;
-      val = *(ptr);
-      *(ptr) = *(ptr+1);
-      *(ptr+1) = val;
-
-      // Increment 5 more bytes.
-      ptr += 5;
-    }
-  }
-}
-
-  // Indentation settings for Vim and Emacs
-  //
-  // Local Variables:
-  // c-basic-offset: 2
-  // indent-tabs-mode: nil
-  // End:
-  //
-  // vim: et sts=2 sw=2
-
-#endif
+#ifdef RTAUDIO_ENABLED
+/************************************************************************/
+/*! \class RtAudio
+    \brief Realtime audio i/o C++ classes.
+
+    RtAudio provides a common API (Application Programming Interface)
+    for realtime audio input/output across Linux (native ALSA, Jack,
+    and OSS), Macintosh OS X (CoreAudio and Jack), and Windows
+    (DirectSound, ASIO and WASAPI) operating systems.
+
+    RtAudio WWW site: http://www.music.mcgill.ca/~gary/rtaudio/
+
+    RtAudio: realtime audio i/o C++ classes
+    Copyright (c) 2001-2014 Gary P. Scavone
+
+    Permission is hereby granted, free of charge, to any person
+    obtaining a copy of this software and associated documentation files
+    (the "Software"), to deal in the Software without restriction,
+    including without limitation the rights to use, copy, modify, merge,
+    publish, distribute, sublicense, and/or sell copies of the Software,
+    and to permit persons to whom the Software is furnished to do so,
+    subject to the following conditions:
+
+    The above copyright notice and this permission notice shall be
+    included in all copies or substantial portions of the Software.
+
+    Any person wishing to distribute modifications to the Software is
+    asked to send the modifications to the original developer so that
+    they can be incorporated into the canonical version.  This is,
+    however, not a binding provision of this license.
+
+    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+    EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+    MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+    IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR
+    ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF
+    CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+    WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+*/
+/************************************************************************/
+
+// RtAudio: Version 4.1.1
+
+#include "RtAudio.h"
+#include <iostream>
+#include <cstdlib>
+#include <cstring>
+#include <climits>
+#include <algorithm>
+
+// Static variable definitions.
+const unsigned int RtApi::MAX_SAMPLE_RATES = 14;
+const unsigned int RtApi::SAMPLE_RATES[] = {
+  4000, 5512, 8000, 9600, 11025, 16000, 22050,
+  32000, 44100, 48000, 88200, 96000, 176400, 192000
+};
+
+#if defined(__WINDOWS_DS__) || defined(__WINDOWS_ASIO__) || defined(__WINDOWS_WASAPI__)
+#ifdef WINRT_ENABLED
+  #define MUTEX_INITIALIZE(A) InitializeCriticalSectionEx(A, 0, 0)
+#else
+  #define MUTEX_INITIALIZE(A) InitializeCriticalSection(A)
+#endif
+  #define MUTEX_DESTROY(A)    DeleteCriticalSection(A)
+  #define MUTEX_LOCK(A)       EnterCriticalSection(A)
+  #define MUTEX_UNLOCK(A)     LeaveCriticalSection(A)
+
+  #include "tchar.h"
+
+  static std::string convertCharPointerToStdString(const char *text)
+  {
+    return std::string(text);
+  }
+
+  static std::string convertCharPointerToStdString(const wchar_t *text)
+  {
+    int length = WideCharToMultiByte(CP_UTF8, 0, text, -1, NULL, 0, NULL, NULL);
+    std::string s( length-1, '\0' );
+    WideCharToMultiByte(CP_UTF8, 0, text, -1, &s[0], length, NULL, NULL);
+    return s;
+  }
+
+#elif defined(__LINUX_ALSA__) || defined(__LINUX_PULSE__) || defined(__UNIX_JACK__) || defined(__LINUX_OSS__) || defined(__MACOSX_CORE__)
+  // pthread API
+  #define MUTEX_INITIALIZE(A) pthread_mutex_init(A, NULL)
+  #define MUTEX_DESTROY(A)    pthread_mutex_destroy(A)
+  #define MUTEX_LOCK(A)       pthread_mutex_lock(A)
+  #define MUTEX_UNLOCK(A)     pthread_mutex_unlock(A)
+#else
+  #define MUTEX_INITIALIZE(A) abs(*A) // dummy definitions
+  #define MUTEX_DESTROY(A)    abs(*A) // dummy definitions
+#endif
+
+// *************************************************** //
+//
+// RtAudio definitions.
+//
+// *************************************************** //
+
+std::string RtAudio :: getVersion( void ) throw()
+{
+  return RTAUDIO_VERSION;
+}
+
+void RtAudio :: getCompiledApi( std::vector<RtAudio::Api> &apis ) throw()
+{
+  apis.clear();
+
+  // The order here will control the order of RtAudio's API search in
+  // the constructor.
+#if defined(__UNIX_JACK__)
+  apis.push_back( UNIX_JACK );
+#endif
+#if defined(__LINUX_ALSA__)
+  apis.push_back( LINUX_ALSA );
+#endif
+#if defined(__LINUX_PULSE__)
+  apis.push_back( LINUX_PULSE );
+#endif
+#if defined(__LINUX_OSS__)
+  apis.push_back( LINUX_OSS );
+#endif
+#if defined(__WINDOWS_ASIO__)
+  apis.push_back( WINDOWS_ASIO );
+#endif
+#if defined(__WINDOWS_WASAPI__)
+  apis.push_back( WINDOWS_WASAPI );
+#endif
+#if defined(__WINDOWS_DS__)
+  apis.push_back( WINDOWS_DS );
+#endif
+#if defined(__MACOSX_CORE__)
+  apis.push_back( MACOSX_CORE );
+#endif
+#if defined(__RTAUDIO_DUMMY__)
+  apis.push_back( RTAUDIO_DUMMY );
+#endif
+}
+
+void RtAudio :: openRtApi( RtAudio::Api api )
+{
+  if ( rtapi_ )
+    delete rtapi_;
+  rtapi_ = 0;
+
+#if defined(__UNIX_JACK__)
+  if ( api == UNIX_JACK )
+    rtapi_ = new RtApiJack();
+#endif
+#if defined(__LINUX_ALSA__)
+  if ( api == LINUX_ALSA )
+    rtapi_ = new RtApiAlsa();
+#endif
+#if defined(__LINUX_PULSE__)
+  if ( api == LINUX_PULSE )
+    rtapi_ = new RtApiPulse();
+#endif
+#if defined(__LINUX_OSS__)
+  if ( api == LINUX_OSS )
+    rtapi_ = new RtApiOss();
+#endif
+#if defined(__WINDOWS_ASIO__)
+  if ( api == WINDOWS_ASIO )
+    rtapi_ = new RtApiAsio();
+#endif
+#if defined(__WINDOWS_WASAPI__)
+  if ( api == WINDOWS_WASAPI )
+    rtapi_ = new RtApiWasapi();
+#endif
+#if defined(__WINDOWS_DS__)
+  if ( api == WINDOWS_DS )
+    rtapi_ = new RtApiDs();
+#endif
+#if defined(__MACOSX_CORE__)
+  if ( api == MACOSX_CORE )
+    rtapi_ = new RtApiCore();
+#endif
+#if defined(__RTAUDIO_DUMMY__)
+  if ( api == RTAUDIO_DUMMY )
+    rtapi_ = new RtApiDummy();
+#endif
+}
+
+RtAudio :: RtAudio( RtAudio::Api api )
+{
+  rtapi_ = 0;
+
+  if ( api != UNSPECIFIED ) {
+    // Attempt to open the specified API.
+    openRtApi( api );
+    if ( rtapi_ ) return;
+
+    // No compiled support for specified API value.  Issue a debug
+    // warning and continue as if no API was specified.
+    std::cerr << "\nRtAudio: no compiled support for specified API argument!\n" << std::endl;
+  }
+
+  // Iterate through the compiled APIs and return as soon as we find
+  // one with at least one device or we reach the end of the list.
+  std::vector< RtAudio::Api > apis;
+  getCompiledApi( apis );
+  for ( unsigned int i=0; i<apis.size(); i++ ) {
+    openRtApi( apis[i] );
+    if ( rtapi_ && rtapi_->getDeviceCount() ) break;
+  }
+
+  if ( rtapi_ ) return;
+
+  // It should not be possible to get here because the preprocessor
+  // definition __RTAUDIO_DUMMY__ is automatically defined if no
+  // API-specific definitions are passed to the compiler. But just in
+  // case something weird happens, we'll thow an error.
+  std::string errorText = "\nRtAudio: no compiled API support found ... critical error!!\n\n";
+  throw( RtAudioError( errorText, RtAudioError::UNSPECIFIED ) );
+}
+
+RtAudio :: ~RtAudio() throw()
+{
+  if ( rtapi_ )
+    delete rtapi_;
+}
+
+void RtAudio :: openStream( RtAudio::StreamParameters *outputParameters,
+                            RtAudio::StreamParameters *inputParameters,
+                            RtAudioFormat format, unsigned int sampleRate,
+                            unsigned int *bufferFrames,
+                            RtAudioCallback callback, void *userData,
+                            RtAudio::StreamOptions *options,
+                            RtAudioErrorCallback errorCallback )
+{
+  return rtapi_->openStream( outputParameters, inputParameters, format,
+                             sampleRate, bufferFrames, callback,
+                             userData, options, errorCallback );
+}
+
+// *************************************************** //
+//
+// Public RtApi definitions (see end of file for
+// private or protected utility functions).
+//
+// *************************************************** //
+
+RtApi :: RtApi()
+{
+  stream_.state = STREAM_CLOSED;
+  stream_.mode = UNINITIALIZED;
+  stream_.apiHandle = 0;
+  stream_.userBuffer[0] = 0;
+  stream_.userBuffer[1] = 0;
+  MUTEX_INITIALIZE( &stream_.mutex );
+  showWarnings_ = true;
+  firstErrorOccurred_ = false;
+}
+
+RtApi :: ~RtApi()
+{
+  MUTEX_DESTROY( &stream_.mutex );
+}
+
+void RtApi :: openStream( RtAudio::StreamParameters *oParams,
+                          RtAudio::StreamParameters *iParams,
+                          RtAudioFormat format, unsigned int sampleRate,
+                          unsigned int *bufferFrames,
+                          RtAudioCallback callback, void *userData,
+                          RtAudio::StreamOptions *options,
+                          RtAudioErrorCallback errorCallback )
+{
+  if ( stream_.state != STREAM_CLOSED ) {
+    errorText_ = "RtApi::openStream: a stream is already open!";
+    error( RtAudioError::INVALID_USE );
+    return;
+  }
+
+  // Clear stream information potentially left from a previously open stream.
+  clearStreamInfo();
+
+  if ( oParams && oParams->nChannels < 1 ) {
+    errorText_ = "RtApi::openStream: a non-NULL output StreamParameters structure cannot have an nChannels value less than one.";
+    error( RtAudioError::INVALID_USE );
+    return;
+  }
+
+  if ( iParams && iParams->nChannels < 1 ) {
+    errorText_ = "RtApi::openStream: a non-NULL input StreamParameters structure cannot have an nChannels value less than one.";
+    error( RtAudioError::INVALID_USE );
+    return;
+  }
+
+  if ( oParams == NULL && iParams == NULL ) {
+    errorText_ = "RtApi::openStream: input and output StreamParameters structures are both NULL!";
+    error( RtAudioError::INVALID_USE );
+    return;
+  }
+
+  if ( formatBytes(format) == 0 ) {
+    errorText_ = "RtApi::openStream: 'format' parameter value is undefined.";
+    error( RtAudioError::INVALID_USE );
+    return;
+  }
+
+  unsigned int nDevices = getDeviceCount();
+  unsigned int oChannels = 0;
+  if ( oParams ) {
+    oChannels = oParams->nChannels;
+    if ( oParams->deviceId >= nDevices ) {
+      errorText_ = "RtApi::openStream: output device parameter value is invalid.";
+      error( RtAudioError::INVALID_USE );
+      return;
+    }
+  }
+
+  unsigned int iChannels = 0;
+  if ( iParams ) {
+    iChannels = iParams->nChannels;
+    if ( iParams->deviceId >= nDevices ) {
+      errorText_ = "RtApi::openStream: input device parameter value is invalid.";
+      error( RtAudioError::INVALID_USE );
+      return;
+    }
+  }
+
+  bool result;
+
+  if ( oChannels > 0 ) {
+
+    result = probeDeviceOpen( oParams->deviceId, OUTPUT, oChannels, oParams->firstChannel,
+                              sampleRate, format, bufferFrames, options );
+    if ( result == false ) {
+      error( RtAudioError::SYSTEM_ERROR );
+      return;
+    }
+  }
+
+  if ( iChannels > 0 ) {
+
+    result = probeDeviceOpen( iParams->deviceId, INPUT, iChannels, iParams->firstChannel,
+                              sampleRate, format, bufferFrames, options );
+    if ( result == false ) {
+      if ( oChannels > 0 ) closeStream();
+      error( RtAudioError::SYSTEM_ERROR );
+      return;
+    }
+  }
+
+  stream_.callbackInfo.callback = (void *) callback;
+  stream_.callbackInfo.userData = userData;
+  stream_.callbackInfo.errorCallback = (void *) errorCallback;
+
+  if ( options ) options->numberOfBuffers = stream_.nBuffers;
+  stream_.state = STREAM_STOPPED;
+}
+
+unsigned int RtApi :: getDefaultInputDevice( void )
+{
+  // Should be implemented in subclasses if possible.
+  return 0;
+}
+
+unsigned int RtApi :: getDefaultOutputDevice( void )
+{
+  // Should be implemented in subclasses if possible.
+  return 0;
+}
+
+void RtApi :: closeStream( void )
+{
+  // MUST be implemented in subclasses!
+  return;
+}
+
+bool RtApi :: probeDeviceOpen( unsigned int /*device*/, StreamMode /*mode*/, unsigned int /*channels*/,
+                               unsigned int /*firstChannel*/, unsigned int /*sampleRate*/,
+                               RtAudioFormat /*format*/, unsigned int * /*bufferSize*/,
+                               RtAudio::StreamOptions * /*options*/ )
+{
+  // MUST be implemented in subclasses!
+  return FAILURE;
+}
+
+void RtApi :: tickStreamTime( void )
+{
+  // Subclasses that do not provide their own implementation of
+  // getStreamTime should call this function once per buffer I/O to
+  // provide basic stream time support.
+
+  stream_.streamTime += ( stream_.bufferSize * 1.0 / stream_.sampleRate );
+
+#if defined( HAVE_GETTIMEOFDAY )
+  gettimeofday( &stream_.lastTickTimestamp, NULL );
+#endif
+}
+
+long RtApi :: getStreamLatency( void )
+{
+  verifyStream();
+
+  long totalLatency = 0;
+  if ( stream_.mode == OUTPUT || stream_.mode == DUPLEX )
+    totalLatency = stream_.latency[0];
+  if ( stream_.mode == INPUT || stream_.mode == DUPLEX )
+    totalLatency += stream_.latency[1];
+
+  return totalLatency;
+}
+
+double RtApi :: getStreamTime( void )
+{
+  verifyStream();
+
+#if defined( HAVE_GETTIMEOFDAY )
+  // Return a very accurate estimate of the stream time by
+  // adding in the elapsed time since the last tick.
+  struct timeval then;
+  struct timeval now;
+
+  if ( stream_.state != STREAM_RUNNING || stream_.streamTime == 0.0 )
+    return stream_.streamTime;
+
+  gettimeofday( &now, NULL );
+  then = stream_.lastTickTimestamp;
+  return stream_.streamTime +
+    ((now.tv_sec + 0.000001 * now.tv_usec) -
+     (then.tv_sec + 0.000001 * then.tv_usec));     
+#else
+  return stream_.streamTime;
+#endif
+}
+
+void RtApi :: setStreamTime( double time )
+{
+  verifyStream();
+
+  if ( time >= 0.0 )
+    stream_.streamTime = time;
+}
+
+unsigned int RtApi :: getStreamSampleRate( void )
+{
+ verifyStream();
+
+ return stream_.sampleRate;
+}
+
+
+// *************************************************** //
+//
+// OS/API-specific methods.
+//
+// *************************************************** //
+
+#if defined(__MACOSX_CORE__)
+
+// The OS X CoreAudio API is designed to use a separate callback
+// procedure for each of its audio devices.  A single RtAudio duplex
+// stream using two different devices is supported here, though it
+// cannot be guaranteed to always behave correctly because we cannot
+// synchronize these two callbacks.
+//
+// A property listener is installed for over/underrun information.
+// However, no functionality is currently provided to allow property
+// listeners to trigger user handlers because it is unclear what could
+// be done if a critical stream parameter (buffer size, sample rate,
+// device disconnect) notification arrived.  The listeners entail
+// quite a bit of extra code and most likely, a user program wouldn't
+// be prepared for the result anyway.  However, we do provide a flag
+// to the client callback function to inform of an over/underrun.
+
+// A structure to hold various information related to the CoreAudio API
+// implementation.
+struct CoreHandle {
+  AudioDeviceID id[2];    // device ids
+#if defined( MAC_OS_X_VERSION_10_5 ) && ( MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_5 )
+  AudioDeviceIOProcID procId[2];
+#endif
+  UInt32 iStream[2];      // device stream index (or first if using multiple)
+  UInt32 nStreams[2];     // number of streams to use
+  bool xrun[2];
+  char *deviceBuffer;
+  pthread_cond_t condition;
+  int drainCounter;       // Tracks callback counts when draining
+  bool internalDrain;     // Indicates if stop is initiated from callback or not.
+
+  CoreHandle()
+    :deviceBuffer(0), drainCounter(0), internalDrain(false) { nStreams[0] = 1; nStreams[1] = 1; id[0] = 0; id[1] = 0; xrun[0] = false; xrun[1] = false; }
+};
+
+RtApiCore:: RtApiCore()
+{
+#if defined( AVAILABLE_MAC_OS_X_VERSION_10_6_AND_LATER )
+  // This is a largely undocumented but absolutely necessary
+  // requirement starting with OS-X 10.6.  If not called, queries and
+  // updates to various audio device properties are not handled
+  // correctly.
+  CFRunLoopRef theRunLoop = NULL;
+  AudioObjectPropertyAddress property = { kAudioHardwarePropertyRunLoop,
+                                          kAudioObjectPropertyScopeGlobal,
+                                          kAudioObjectPropertyElementMaster };
+  OSStatus result = AudioObjectSetPropertyData( kAudioObjectSystemObject, &property, 0, NULL, sizeof(CFRunLoopRef), &theRunLoop);
+  if ( result != noErr ) {
+    errorText_ = "RtApiCore::RtApiCore: error setting run loop property!";
+    error( RtAudioError::WARNING );
+  }
+#endif
+}
+
+RtApiCore :: ~RtApiCore()
+{
+  // The subclass destructor gets called before the base class
+  // destructor, so close an existing stream before deallocating
+  // apiDeviceId memory.
+  if ( stream_.state != STREAM_CLOSED ) closeStream();
+}
+
+unsigned int RtApiCore :: getDeviceCount( void )
+{
+  // Find out how many audio devices there are, if any.
+  UInt32 dataSize;
+  AudioObjectPropertyAddress propertyAddress = { kAudioHardwarePropertyDevices, kAudioObjectPropertyScopeGlobal, kAudioObjectPropertyElementMaster };
+  OSStatus result = AudioObjectGetPropertyDataSize( kAudioObjectSystemObject, &propertyAddress, 0, NULL, &dataSize );
+  if ( result != noErr ) {
+    errorText_ = "RtApiCore::getDeviceCount: OS-X error getting device info!";
+    error( RtAudioError::WARNING );
+    return 0;
+  }
+
+  return dataSize / sizeof( AudioDeviceID );
+}
+
+unsigned int RtApiCore :: getDefaultInputDevice( void )
+{
+  unsigned int nDevices = getDeviceCount();
+  if ( nDevices <= 1 ) return 0;
+
+  AudioDeviceID id;
+  UInt32 dataSize = sizeof( AudioDeviceID );
+  AudioObjectPropertyAddress property = { kAudioHardwarePropertyDefaultInputDevice, kAudioObjectPropertyScopeGlobal, kAudioObjectPropertyElementMaster };
+  OSStatus result = AudioObjectGetPropertyData( kAudioObjectSystemObject, &property, 0, NULL, &dataSize, &id );
+  if ( result != noErr ) {
+    errorText_ = "RtApiCore::getDefaultInputDevice: OS-X system error getting device.";
+    error( RtAudioError::WARNING );
+    return 0;
+  }
+
+  dataSize *= nDevices;
+  AudioDeviceID deviceList[ nDevices ];
+  property.mSelector = kAudioHardwarePropertyDevices;
+  result = AudioObjectGetPropertyData( kAudioObjectSystemObject, &property, 0, NULL, &dataSize, (void *) &deviceList );
+  if ( result != noErr ) {
+    errorText_ = "RtApiCore::getDefaultInputDevice: OS-X system error getting device IDs.";
+    error( RtAudioError::WARNING );
+    return 0;
+  }
+
+  for ( unsigned int i=0; i<nDevices; i++ )
+    if ( id == deviceList[i] ) return i;
+
+  errorText_ = "RtApiCore::getDefaultInputDevice: No default device found!";
+  error( RtAudioError::WARNING );
+  return 0;
+}
+
+unsigned int RtApiCore :: getDefaultOutputDevice( void )
+{
+  unsigned int nDevices = getDeviceCount();
+  if ( nDevices <= 1 ) return 0;
+
+  AudioDeviceID id;
+  UInt32 dataSize = sizeof( AudioDeviceID );
+  AudioObjectPropertyAddress property = { kAudioHardwarePropertyDefaultOutputDevice, kAudioObjectPropertyScopeGlobal, kAudioObjectPropertyElementMaster };
+  OSStatus result = AudioObjectGetPropertyData( kAudioObjectSystemObject, &property, 0, NULL, &dataSize, &id );
+  if ( result != noErr ) {
+    errorText_ = "RtApiCore::getDefaultOutputDevice: OS-X system error getting device.";
+    error( RtAudioError::WARNING );
+    return 0;
+  }
+
+  dataSize = sizeof( AudioDeviceID ) * nDevices;
+  AudioDeviceID deviceList[ nDevices ];
+  property.mSelector = kAudioHardwarePropertyDevices;
+  result = AudioObjectGetPropertyData( kAudioObjectSystemObject, &property, 0, NULL, &dataSize, (void *) &deviceList );
+  if ( result != noErr ) {
+    errorText_ = "RtApiCore::getDefaultOutputDevice: OS-X system error getting device IDs.";
+    error( RtAudioError::WARNING );
+    return 0;
+  }
+
+  for ( unsigned int i=0; i<nDevices; i++ )
+    if ( id == deviceList[i] ) return i;
+
+  errorText_ = "RtApiCore::getDefaultOutputDevice: No default device found!";
+  error( RtAudioError::WARNING );
+  return 0;
+}
+
+RtAudio::DeviceInfo RtApiCore :: getDeviceInfo( unsigned int device )
+{
+  RtAudio::DeviceInfo info;
+  info.probed = false;
+
+  // Get device ID
+  unsigned int nDevices = getDeviceCount();
+  if ( nDevices == 0 ) {
+    errorText_ = "RtApiCore::getDeviceInfo: no devices found!";
+    error( RtAudioError::INVALID_USE );
+    return info;
+  }
+
+  if ( device >= nDevices ) {
+    errorText_ = "RtApiCore::getDeviceInfo: device ID is invalid!";
+    error( RtAudioError::INVALID_USE );
+    return info;
+  }
+
+  AudioDeviceID deviceList[ nDevices ];
+  UInt32 dataSize = sizeof( AudioDeviceID ) * nDevices;
+  AudioObjectPropertyAddress property = { kAudioHardwarePropertyDevices,
+                                          kAudioObjectPropertyScopeGlobal,
+                                          kAudioObjectPropertyElementMaster };
+  OSStatus result = AudioObjectGetPropertyData( kAudioObjectSystemObject, &property,
+                                                0, NULL, &dataSize, (void *) &deviceList );
+  if ( result != noErr ) {
+    errorText_ = "RtApiCore::getDeviceInfo: OS-X system error getting device IDs.";
+    error( RtAudioError::WARNING );
+    return info;
+  }
+
+  AudioDeviceID id = deviceList[ device ];
+
+  // Get the device name.
+  info.name.erase();
+  CFStringRef cfname;
+  dataSize = sizeof( CFStringRef );
+  property.mSelector = kAudioObjectPropertyManufacturer;
+  result = AudioObjectGetPropertyData( id, &property, 0, NULL, &dataSize, &cfname );
+  if ( result != noErr ) {
+    errorStream_ << "RtApiCore::probeDeviceInfo: system error (" << getErrorCode( result ) << ") getting device manufacturer.";
+    errorText_ = errorStream_.str();
+    error( RtAudioError::WARNING );
+    return info;
+  }
+
+  //const char *mname = CFStringGetCStringPtr( cfname, CFStringGetSystemEncoding() );
+  int length = CFStringGetLength(cfname);
+  char *mname = (char *)malloc(length * 3 + 1);
+#if defined( UNICODE ) || defined( _UNICODE )
+  CFStringGetCString(cfname, mname, length * 3 + 1, kCFStringEncodingUTF8);
+#else
+  CFStringGetCString(cfname, mname, length * 3 + 1, CFStringGetSystemEncoding());
+#endif
+  info.name.append( (const char *)mname, strlen(mname) );
+  info.name.append( ": " );
+  CFRelease( cfname );
+  free(mname);
+
+  property.mSelector = kAudioObjectPropertyName;
+  result = AudioObjectGetPropertyData( id, &property, 0, NULL, &dataSize, &cfname );
+  if ( result != noErr ) {
+    errorStream_ << "RtApiCore::probeDeviceInfo: system error (" << getErrorCode( result ) << ") getting device name.";
+    errorText_ = errorStream_.str();
+    error( RtAudioError::WARNING );
+    return info;
+  }
+
+  //const char *name = CFStringGetCStringPtr( cfname, CFStringGetSystemEncoding() );
+  length = CFStringGetLength(cfname);
+  char *name = (char *)malloc(length * 3 + 1);
+#if defined( UNICODE ) || defined( _UNICODE )
+  CFStringGetCString(cfname, name, length * 3 + 1, kCFStringEncodingUTF8);
+#else
+  CFStringGetCString(cfname, name, length * 3 + 1, CFStringGetSystemEncoding());
+#endif
+  info.name.append( (const char *)name, strlen(name) );
+  CFRelease( cfname );
+  free(name);
+
+  // Get the output stream "configuration".
+  AudioBufferList	*bufferList = nil;
+  property.mSelector = kAudioDevicePropertyStreamConfiguration;
+  property.mScope = kAudioDevicePropertyScopeOutput;
+  //  property.mElement = kAudioObjectPropertyElementWildcard;
+  dataSize = 0;
+  result = AudioObjectGetPropertyDataSize( id, &property, 0, NULL, &dataSize );
+  if ( result != noErr || dataSize == 0 ) {
+    errorStream_ << "RtApiCore::getDeviceInfo: system error (" << getErrorCode( result ) << ") getting output stream configuration info for device (" << device << ").";
+    errorText_ = errorStream_.str();
+    error( RtAudioError::WARNING );
+    return info;
+  }
+
+  // Allocate the AudioBufferList.
+  bufferList = (AudioBufferList *) malloc( dataSize );
+  if ( bufferList == NULL ) {
+    errorText_ = "RtApiCore::getDeviceInfo: memory error allocating output AudioBufferList.";
+    error( RtAudioError::WARNING );
+    return info;
+  }
+
+  result = AudioObjectGetPropertyData( id, &property, 0, NULL, &dataSize, bufferList );
+  if ( result != noErr || dataSize == 0 ) {
+    free( bufferList );
+    errorStream_ << "RtApiCore::getDeviceInfo: system error (" << getErrorCode( result ) << ") getting output stream configuration for device (" << device << ").";
+    errorText_ = errorStream_.str();
+    error( RtAudioError::WARNING );
+    return info;
+  }
+
+  // Get output channel information.
+  unsigned int i, nStreams = bufferList->mNumberBuffers;
+  for ( i=0; i<nStreams; i++ )
+    info.outputChannels += bufferList->mBuffers[i].mNumberChannels;
+  free( bufferList );
+
+  // Get the input stream "configuration".
+  property.mScope = kAudioDevicePropertyScopeInput;
+  result = AudioObjectGetPropertyDataSize( id, &property, 0, NULL, &dataSize );
+  if ( result != noErr || dataSize == 0 ) {
+    errorStream_ << "RtApiCore::getDeviceInfo: system error (" << getErrorCode( result ) << ") getting input stream configuration info for device (" << device << ").";
+    errorText_ = errorStream_.str();
+    error( RtAudioError::WARNING );
+    return info;
+  }
+
+  // Allocate the AudioBufferList.
+  bufferList = (AudioBufferList *) malloc( dataSize );
+  if ( bufferList == NULL ) {
+    errorText_ = "RtApiCore::getDeviceInfo: memory error allocating input AudioBufferList.";
+    error( RtAudioError::WARNING );
+    return info;
+  }
+
+  result = AudioObjectGetPropertyData( id, &property, 0, NULL, &dataSize, bufferList );
+  if (result != noErr || dataSize == 0) {
+    free( bufferList );
+    errorStream_ << "RtApiCore::getDeviceInfo: system error (" << getErrorCode( result ) << ") getting input stream configuration for device (" << device << ").";
+    errorText_ = errorStream_.str();
+    error( RtAudioError::WARNING );
+    return info;
+  }
+
+  // Get input channel information.
+  nStreams = bufferList->mNumberBuffers;
+  for ( i=0; i<nStreams; i++ )
+    info.inputChannels += bufferList->mBuffers[i].mNumberChannels;
+  free( bufferList );
+
+  // If device opens for both playback and capture, we determine the channels.
+  if ( info.outputChannels > 0 && info.inputChannels > 0 )
+    info.duplexChannels = (info.outputChannels > info.inputChannels) ? info.inputChannels : info.outputChannels;
+
+  // Probe the device sample rates.
+  bool isInput = false;
+  if ( info.outputChannels == 0 ) isInput = true;
+
+  // Determine the supported sample rates.
+  property.mSelector = kAudioDevicePropertyAvailableNominalSampleRates;
+  if ( isInput == false ) property.mScope = kAudioDevicePropertyScopeOutput;
+  result = AudioObjectGetPropertyDataSize( id, &property, 0, NULL, &dataSize );
+  if ( result != kAudioHardwareNoError || dataSize == 0 ) {
+    errorStream_ << "RtApiCore::getDeviceInfo: system error (" << getErrorCode( result ) << ") getting sample rate info.";
+    errorText_ = errorStream_.str();
+    error( RtAudioError::WARNING );
+    return info;
+  }
+
+  UInt32 nRanges = dataSize / sizeof( AudioValueRange );
+  AudioValueRange rangeList[ nRanges ];
+  result = AudioObjectGetPropertyData( id, &property, 0, NULL, &dataSize, &rangeList );
+  if ( result != kAudioHardwareNoError ) {
+    errorStream_ << "RtApiCore::getDeviceInfo: system error (" << getErrorCode( result ) << ") getting sample rates.";
+    errorText_ = errorStream_.str();
+    error( RtAudioError::WARNING );
+    return info;
+  }
+
+  // The sample rate reporting mechanism is a bit of a mystery.  It
+  // seems that it can either return individual rates or a range of
+  // rates.  I assume that if the min / max range values are the same,
+  // then that represents a single supported rate and if the min / max
+  // range values are different, the device supports an arbitrary
+  // range of values (though there might be multiple ranges, so we'll
+  // use the most conservative range).
+  Float64 minimumRate = 1.0, maximumRate = 10000000000.0;
+  bool haveValueRange = false;
+  info.sampleRates.clear();
+  for ( UInt32 i=0; i<nRanges; i++ ) {
+    if ( rangeList[i].mMinimum == rangeList[i].mMaximum ) {
+      unsigned int tmpSr = (unsigned int) rangeList[i].mMinimum;
+      info.sampleRates.push_back( tmpSr );
+
+      if ( !info.preferredSampleRate || ( tmpSr <= 48000 && tmpSr > info.preferredSampleRate ) )
+        info.preferredSampleRate = tmpSr;
+
+    } else {
+      haveValueRange = true;
+      if ( rangeList[i].mMinimum > minimumRate ) minimumRate = rangeList[i].mMinimum;
+      if ( rangeList[i].mMaximum < maximumRate ) maximumRate = rangeList[i].mMaximum;
+    }
+  }
+
+  if ( haveValueRange ) {
+    for ( unsigned int k=0; k<MAX_SAMPLE_RATES; k++ ) {
+      if ( SAMPLE_RATES[k] >= (unsigned int) minimumRate && SAMPLE_RATES[k] <= (unsigned int) maximumRate ) {
+        info.sampleRates.push_back( SAMPLE_RATES[k] );
+
+        if ( !info.preferredSampleRate || ( SAMPLE_RATES[k] <= 48000 && SAMPLE_RATES[k] > info.preferredSampleRate ) )
+          info.preferredSampleRate = SAMPLE_RATES[k];
+      }
+    }
+  }
+
+  // Sort and remove any redundant values
+  std::sort( info.sampleRates.begin(), info.sampleRates.end() );
+  info.sampleRates.erase( unique( info.sampleRates.begin(), info.sampleRates.end() ), info.sampleRates.end() );
+
+  if ( info.sampleRates.size() == 0 ) {
+    errorStream_ << "RtApiCore::probeDeviceInfo: No supported sample rates found for device (" << device << ").";
+    errorText_ = errorStream_.str();
+    error( RtAudioError::WARNING );
+    return info;
+  }
+
+  // CoreAudio always uses 32-bit floating point data for PCM streams.
+  // Thus, any other "physical" formats supported by the device are of
+  // no interest to the client.
+  info.nativeFormats = RTAUDIO_FLOAT32;
+
+  if ( info.outputChannels > 0 )
+    if ( getDefaultOutputDevice() == device ) info.isDefaultOutput = true;
+  if ( info.inputChannels > 0 )
+    if ( getDefaultInputDevice() == device ) info.isDefaultInput = true;
+
+  info.probed = true;
+  return info;
+}
+
+static OSStatus callbackHandler( AudioDeviceID inDevice,
+                                 const AudioTimeStamp* /*inNow*/,
+                                 const AudioBufferList* inInputData,
+                                 const AudioTimeStamp* /*inInputTime*/,
+                                 AudioBufferList* outOutputData,
+                                 const AudioTimeStamp* /*inOutputTime*/,
+                                 void* infoPointer )
+{
+  CallbackInfo *info = (CallbackInfo *) infoPointer;
+
+  RtApiCore *object = (RtApiCore *) info->object;
+  if ( object->callbackEvent( inDevice, inInputData, outOutputData ) == false )
+    return kAudioHardwareUnspecifiedError;
+  else
+    return kAudioHardwareNoError;
+}
+
+static OSStatus xrunListener( AudioObjectID /*inDevice*/,
+                              UInt32 nAddresses,
+                              const AudioObjectPropertyAddress properties[],
+                              void* handlePointer )
+{
+  CoreHandle *handle = (CoreHandle *) handlePointer;
+  for ( UInt32 i=0; i<nAddresses; i++ ) {
+    if ( properties[i].mSelector == kAudioDeviceProcessorOverload ) {
+      if ( properties[i].mScope == kAudioDevicePropertyScopeInput )
+        handle->xrun[1] = true;
+      else
+        handle->xrun[0] = true;
+    }
+  }
+
+  return kAudioHardwareNoError;
+}
+
+static OSStatus rateListener( AudioObjectID inDevice,
+                              UInt32 /*nAddresses*/,
+                              const AudioObjectPropertyAddress /*properties*/[],
+                              void* ratePointer )
+{
+  Float64 *rate = (Float64 *) ratePointer;
+  UInt32 dataSize = sizeof( Float64 );
+  AudioObjectPropertyAddress property = { kAudioDevicePropertyNominalSampleRate,
+                                          kAudioObjectPropertyScopeGlobal,
+                                          kAudioObjectPropertyElementMaster };
+  AudioObjectGetPropertyData( inDevice, &property, 0, NULL, &dataSize, rate );
+  return kAudioHardwareNoError;
+}
+
+bool RtApiCore :: probeDeviceOpen( unsigned int device, StreamMode mode, unsigned int channels,
+                                   unsigned int firstChannel, unsigned int sampleRate,
+                                   RtAudioFormat format, unsigned int *bufferSize,
+                                   RtAudio::StreamOptions *options )
+{
+  // Get device ID
+  unsigned int nDevices = getDeviceCount();
+  if ( nDevices == 0 ) {
+    // This should not happen because a check is made before this function is called.
+    errorText_ = "RtApiCore::probeDeviceOpen: no devices found!";
+    return FAILURE;
+  }
+
+  if ( device >= nDevices ) {
+    // This should not happen because a check is made before this function is called.
+    errorText_ = "RtApiCore::probeDeviceOpen: device ID is invalid!";
+    return FAILURE;
+  }
+
+  AudioDeviceID deviceList[ nDevices ];
+  UInt32 dataSize = sizeof( AudioDeviceID ) * nDevices;
+  AudioObjectPropertyAddress property = { kAudioHardwarePropertyDevices,
+                                          kAudioObjectPropertyScopeGlobal,
+                                          kAudioObjectPropertyElementMaster };
+  OSStatus result = AudioObjectGetPropertyData( kAudioObjectSystemObject, &property,
+                                                0, NULL, &dataSize, (void *) &deviceList );
+  if ( result != noErr ) {
+    errorText_ = "RtApiCore::probeDeviceOpen: OS-X system error getting device IDs.";
+    return FAILURE;
+  }
+
+  AudioDeviceID id = deviceList[ device ];
+
+  // Setup for stream mode.
+  bool isInput = false;
+  if ( mode == INPUT ) {
+    isInput = true;
+    property.mScope = kAudioDevicePropertyScopeInput;
+  }
+  else
+    property.mScope = kAudioDevicePropertyScopeOutput;
+
+  // Get the stream "configuration".
+  AudioBufferList	*bufferList = nil;
+  dataSize = 0;
+  property.mSelector = kAudioDevicePropertyStreamConfiguration;
+  result = AudioObjectGetPropertyDataSize( id, &property, 0, NULL, &dataSize );
+  if ( result != noErr || dataSize == 0 ) {
+    errorStream_ << "RtApiCore::probeDeviceOpen: system error (" << getErrorCode( result ) << ") getting stream configuration info for device (" << device << ").";
+    errorText_ = errorStream_.str();
+    return FAILURE;
+  }
+
+  // Allocate the AudioBufferList.
+  bufferList = (AudioBufferList *) malloc( dataSize );
+  if ( bufferList == NULL ) {
+    errorText_ = "RtApiCore::probeDeviceOpen: memory error allocating AudioBufferList.";
+    return FAILURE;
+  }
+
+  result = AudioObjectGetPropertyData( id, &property, 0, NULL, &dataSize, bufferList );
+  if (result != noErr || dataSize == 0) {
+    free( bufferList );
+    errorStream_ << "RtApiCore::probeDeviceOpen: system error (" << getErrorCode( result ) << ") getting stream configuration for device (" << device << ").";
+    errorText_ = errorStream_.str();
+    return FAILURE;
+  }
+
+  // Search for one or more streams that contain the desired number of
+  // channels. CoreAudio devices can have an arbitrary number of
+  // streams and each stream can have an arbitrary number of channels.
+  // For each stream, a single buffer of interleaved samples is
+  // provided.  RtAudio prefers the use of one stream of interleaved
+  // data or multiple consecutive single-channel streams.  However, we
+  // now support multiple consecutive multi-channel streams of
+  // interleaved data as well.
+  UInt32 iStream, offsetCounter = firstChannel;
+  UInt32 nStreams = bufferList->mNumberBuffers;
+  bool monoMode = false;
+  bool foundStream = false;
+
+  // First check that the device supports the requested number of
+  // channels.
+  UInt32 deviceChannels = 0;
+  for ( iStream=0; iStream<nStreams; iStream++ )
+    deviceChannels += bufferList->mBuffers[iStream].mNumberChannels;
+
+  if ( deviceChannels < ( channels + firstChannel ) ) {
+    free( bufferList );
+    errorStream_ << "RtApiCore::probeDeviceOpen: the device (" << device << ") does not support the requested channel count.";
+    errorText_ = errorStream_.str();
+    return FAILURE;
+  }
+
+  // Look for a single stream meeting our needs.
+  UInt32 firstStream, streamCount = 1, streamChannels = 0, channelOffset = 0;
+  for ( iStream=0; iStream<nStreams; iStream++ ) {
+    streamChannels = bufferList->mBuffers[iStream].mNumberChannels;
+    if ( streamChannels >= channels + offsetCounter ) {
+      firstStream = iStream;
+      channelOffset = offsetCounter;
+      foundStream = true;
+      break;
+    }
+    if ( streamChannels > offsetCounter ) break;
+    offsetCounter -= streamChannels;
+  }
+
+  // If we didn't find a single stream above, then we should be able
+  // to meet the channel specification with multiple streams.
+  if ( foundStream == false ) {
+    monoMode = true;
+    offsetCounter = firstChannel;
+    for ( iStream=0; iStream<nStreams; iStream++ ) {
+      streamChannels = bufferList->mBuffers[iStream].mNumberChannels;
+      if ( streamChannels > offsetCounter ) break;
+      offsetCounter -= streamChannels;
+    }
+
+    firstStream = iStream;
+    channelOffset = offsetCounter;
+    Int32 channelCounter = channels + offsetCounter - streamChannels;
+
+    if ( streamChannels > 1 ) monoMode = false;
+    while ( channelCounter > 0 ) {
+      streamChannels = bufferList->mBuffers[++iStream].mNumberChannels;
+      if ( streamChannels > 1 ) monoMode = false;
+      channelCounter -= streamChannels;
+      streamCount++;
+    }
+  }
+
+  free( bufferList );
+
+  // Determine the buffer size.
+  AudioValueRange	bufferRange;
+  dataSize = sizeof( AudioValueRange );
+  property.mSelector = kAudioDevicePropertyBufferFrameSizeRange;
+  result = AudioObjectGetPropertyData( id, &property, 0, NULL, &dataSize, &bufferRange );
+
+  if ( result != noErr ) {
+    errorStream_ << "RtApiCore::probeDeviceOpen: system error (" << getErrorCode( result ) << ") getting buffer size range for device (" << device << ").";
+    errorText_ = errorStream_.str();
+    return FAILURE;
+  }
+
+  if ( bufferRange.mMinimum > *bufferSize ) *bufferSize = (unsigned long) bufferRange.mMinimum;
+  else if ( bufferRange.mMaximum < *bufferSize ) *bufferSize = (unsigned long) bufferRange.mMaximum;
+  if ( options && options->flags & RTAUDIO_MINIMIZE_LATENCY ) *bufferSize = (unsigned long) bufferRange.mMinimum;
+
+  // Set the buffer size.  For multiple streams, I'm assuming we only
+  // need to make this setting for the master channel.
+  UInt32 theSize = (UInt32) *bufferSize;
+  dataSize = sizeof( UInt32 );
+  property.mSelector = kAudioDevicePropertyBufferFrameSize;
+  result = AudioObjectSetPropertyData( id, &property, 0, NULL, dataSize, &theSize );
+
+  if ( result != noErr ) {
+    errorStream_ << "RtApiCore::probeDeviceOpen: system error (" << getErrorCode( result ) << ") setting the buffer size for device (" << device << ").";
+    errorText_ = errorStream_.str();
+    return FAILURE;
+  }
+
+  // If attempting to setup a duplex stream, the bufferSize parameter
+  // MUST be the same in both directions!
+  *bufferSize = theSize;
+  if ( stream_.mode == OUTPUT && mode == INPUT && *bufferSize != stream_.bufferSize ) {
+    errorStream_ << "RtApiCore::probeDeviceOpen: system error setting buffer size for duplex stream on device (" << device << ").";
+    errorText_ = errorStream_.str();
+    return FAILURE;
+  }
+
+  stream_.bufferSize = *bufferSize;
+  stream_.nBuffers = 1;
+
+  // Try to set "hog" mode ... it's not clear to me this is working.
+  if ( options && options->flags & RTAUDIO_HOG_DEVICE ) {
+    pid_t hog_pid;
+    dataSize = sizeof( hog_pid );
+    property.mSelector = kAudioDevicePropertyHogMode;
+    result = AudioObjectGetPropertyData( id, &property, 0, NULL, &dataSize, &hog_pid );
+    if ( result != noErr ) {
+      errorStream_ << "RtApiCore::probeDeviceOpen: system error (" << getErrorCode( result ) << ") getting 'hog' state!";
+      errorText_ = errorStream_.str();
+      return FAILURE;
+    }
+
+    if ( hog_pid != getpid() ) {
+      hog_pid = getpid();
+      result = AudioObjectSetPropertyData( id, &property, 0, NULL, dataSize, &hog_pid );
+      if ( result != noErr ) {
+        errorStream_ << "RtApiCore::probeDeviceOpen: system error (" << getErrorCode( result ) << ") setting 'hog' state!";
+        errorText_ = errorStream_.str();
+        return FAILURE;
+      }
+    }
+  }
+
+  // Check and if necessary, change the sample rate for the device.
+  Float64 nominalRate;
+  dataSize = sizeof( Float64 );
+  property.mSelector = kAudioDevicePropertyNominalSampleRate;
+  result = AudioObjectGetPropertyData( id, &property, 0, NULL, &dataSize, &nominalRate );
+  if ( result != noErr ) {
+    errorStream_ << "RtApiCore::probeDeviceOpen: system error (" << getErrorCode( result ) << ") getting current sample rate.";
+    errorText_ = errorStream_.str();
+    return FAILURE;
+  }
+
+  // Only change the sample rate if off by more than 1 Hz.
+  if ( fabs( nominalRate - (double)sampleRate ) > 1.0 ) {
+
+    // Set a property listener for the sample rate change
+    Float64 reportedRate = 0.0;
+    AudioObjectPropertyAddress tmp = { kAudioDevicePropertyNominalSampleRate, kAudioObjectPropertyScopeGlobal, kAudioObjectPropertyElementMaster };
+    result = AudioObjectAddPropertyListener( id, &tmp, rateListener, (void *) &reportedRate );
+    if ( result != noErr ) {
+      errorStream_ << "RtApiCore::probeDeviceOpen: system error (" << getErrorCode( result ) << ") setting sample rate property listener for device (" << device << ").";
+      errorText_ = errorStream_.str();
+      return FAILURE;
+    }
+
+    nominalRate = (Float64) sampleRate;
+    result = AudioObjectSetPropertyData( id, &property, 0, NULL, dataSize, &nominalRate );
+    if ( result != noErr ) {
+      AudioObjectRemovePropertyListener( id, &tmp, rateListener, (void *) &reportedRate );
+      errorStream_ << "RtApiCore::probeDeviceOpen: system error (" << getErrorCode( result ) << ") setting sample rate for device (" << device << ").";
+      errorText_ = errorStream_.str();
+      return FAILURE;
+    }
+
+    // Now wait until the reported nominal rate is what we just set.
+    UInt32 microCounter = 0;
+    while ( reportedRate != nominalRate ) {
+      microCounter += 5000;
+      if ( microCounter > 5000000 ) break;
+      usleep( 5000 );
+    }
+
+    // Remove the property listener.
+    AudioObjectRemovePropertyListener( id, &tmp, rateListener, (void *) &reportedRate );
+
+    if ( microCounter > 5000000 ) {
+      errorStream_ << "RtApiCore::probeDeviceOpen: timeout waiting for sample rate update for device (" << device << ").";
+      errorText_ = errorStream_.str();
+      return FAILURE;
+    }
+  }
+
+  // Now set the stream format for all streams.  Also, check the
+  // physical format of the device and change that if necessary.
+  AudioStreamBasicDescription	description;
+  dataSize = sizeof( AudioStreamBasicDescription );
+  property.mSelector = kAudioStreamPropertyVirtualFormat;
+  result = AudioObjectGetPropertyData( id, &property, 0, NULL, &dataSize, &description );
+  if ( result != noErr ) {
+    errorStream_ << "RtApiCore::probeDeviceOpen: system error (" << getErrorCode( result ) << ") getting stream format for device (" << device << ").";
+    errorText_ = errorStream_.str();
+    return FAILURE;
+  }
+
+  // Set the sample rate and data format id.  However, only make the
+  // change if the sample rate is not within 1.0 of the desired
+  // rate and the format is not linear pcm.
+  bool updateFormat = false;
+  if ( fabs( description.mSampleRate - (Float64)sampleRate ) > 1.0 ) {
+    description.mSampleRate = (Float64) sampleRate;
+    updateFormat = true;
+  }
+
+  if ( description.mFormatID != kAudioFormatLinearPCM ) {
+    description.mFormatID = kAudioFormatLinearPCM;
+    updateFormat = true;
+  }
+
+  if ( updateFormat ) {
+    result = AudioObjectSetPropertyData( id, &property, 0, NULL, dataSize, &description );
+    if ( result != noErr ) {
+      errorStream_ << "RtApiCore::probeDeviceOpen: system error (" << getErrorCode( result ) << ") setting sample rate or data format for device (" << device << ").";
+      errorText_ = errorStream_.str();
+      return FAILURE;
+    }
+  }
+
+  // Now check the physical format.
+  property.mSelector = kAudioStreamPropertyPhysicalFormat;
+  result = AudioObjectGetPropertyData( id, &property, 0, NULL,  &dataSize, &description );
+  if ( result != noErr ) {
+    errorStream_ << "RtApiCore::probeDeviceOpen: system error (" << getErrorCode( result ) << ") getting stream physical format for device (" << device << ").";
+    errorText_ = errorStream_.str();
+    return FAILURE;
+  }
+
+  //std::cout << "Current physical stream format:" << std::endl;
+  //std::cout << "   mBitsPerChan = " << description.mBitsPerChannel << std::endl;
+  //std::cout << "   aligned high = " << (description.mFormatFlags & kAudioFormatFlagIsAlignedHigh) << ", isPacked = " << (description.mFormatFlags & kAudioFormatFlagIsPacked) << std::endl;
+  //std::cout << "   bytesPerFrame = " << description.mBytesPerFrame << std::endl;
+  //std::cout << "   sample rate = " << description.mSampleRate << std::endl;
+
+  if ( description.mFormatID != kAudioFormatLinearPCM || description.mBitsPerChannel < 16 ) {
+    description.mFormatID = kAudioFormatLinearPCM;
+    //description.mSampleRate = (Float64) sampleRate;
+    AudioStreamBasicDescription	testDescription = description;
+    UInt32 formatFlags;
+
+    // We'll try higher bit rates first and then work our way down.
+    std::vector< std::pair<UInt32, UInt32>  > physicalFormats;
+    formatFlags = (description.mFormatFlags | kLinearPCMFormatFlagIsFloat) & ~kLinearPCMFormatFlagIsSignedInteger;
+    physicalFormats.push_back( std::pair<Float32, UInt32>( 32, formatFlags ) );
+    formatFlags = (description.mFormatFlags | kLinearPCMFormatFlagIsSignedInteger | kAudioFormatFlagIsPacked) & ~kLinearPCMFormatFlagIsFloat;
+    physicalFormats.push_back( std::pair<Float32, UInt32>( 32, formatFlags ) );
+    physicalFormats.push_back( std::pair<Float32, UInt32>( 24, formatFlags ) );   // 24-bit packed
+    formatFlags &= ~( kAudioFormatFlagIsPacked | kAudioFormatFlagIsAlignedHigh );
+    physicalFormats.push_back( std::pair<Float32, UInt32>( 24.2, formatFlags ) ); // 24-bit in 4 bytes, aligned low
+    formatFlags |= kAudioFormatFlagIsAlignedHigh;
+    physicalFormats.push_back( std::pair<Float32, UInt32>( 24.4, formatFlags ) ); // 24-bit in 4 bytes, aligned high
+    formatFlags = (description.mFormatFlags | kLinearPCMFormatFlagIsSignedInteger | kAudioFormatFlagIsPacked) & ~kLinearPCMFormatFlagIsFloat;
+    physicalFormats.push_back( std::pair<Float32, UInt32>( 16, formatFlags ) );
+    physicalFormats.push_back( std::pair<Float32, UInt32>( 8, formatFlags ) );
+
+    bool setPhysicalFormat = false;
+    for( unsigned int i=0; i<physicalFormats.size(); i++ ) {
+      testDescription = description;
+      testDescription.mBitsPerChannel = (UInt32) physicalFormats[i].first;
+      testDescription.mFormatFlags = physicalFormats[i].second;
+      if ( (24 == (UInt32)physicalFormats[i].first) && ~( physicalFormats[i].second & kAudioFormatFlagIsPacked ) )
+        testDescription.mBytesPerFrame =  4 * testDescription.mChannelsPerFrame;
+      else
+        testDescription.mBytesPerFrame =  testDescription.mBitsPerChannel/8 * testDescription.mChannelsPerFrame;
+      testDescription.mBytesPerPacket = testDescription.mBytesPerFrame * testDescription.mFramesPerPacket;
+      result = AudioObjectSetPropertyData( id, &property, 0, NULL, dataSize, &testDescription );
+      if ( result == noErr ) {
+        setPhysicalFormat = true;
+        //std::cout << "Updated physical stream format:" << std::endl;
+        //std::cout << "   mBitsPerChan = " << testDescription.mBitsPerChannel << std::endl;
+        //std::cout << "   aligned high = " << (testDescription.mFormatFlags & kAudioFormatFlagIsAlignedHigh) << ", isPacked = " << (testDescription.mFormatFlags & kAudioFormatFlagIsPacked) << std::endl;
+        //std::cout << "   bytesPerFrame = " << testDescription.mBytesPerFrame << std::endl;
+        //std::cout << "   sample rate = " << testDescription.mSampleRate << std::endl;
+        break;
+      }
+    }
+
+    if ( !setPhysicalFormat ) {
+      errorStream_ << "RtApiCore::probeDeviceOpen: system error (" << getErrorCode( result ) << ") setting physical data format for device (" << device << ").";
+      errorText_ = errorStream_.str();
+      return FAILURE;
+    }
+  } // done setting virtual/physical formats.
+
+  // Get the stream / device latency.
+  UInt32 latency;
+  dataSize = sizeof( UInt32 );
+  property.mSelector = kAudioDevicePropertyLatency;
+  if ( AudioObjectHasProperty( id, &property ) == true ) {
+    result = AudioObjectGetPropertyData( id, &property, 0, NULL, &dataSize, &latency );
+    if ( result == kAudioHardwareNoError ) stream_.latency[ mode ] = latency;
+    else {
+      errorStream_ << "RtApiCore::probeDeviceOpen: system error (" << getErrorCode( result ) << ") getting device latency for device (" << device << ").";
+      errorText_ = errorStream_.str();
+      error( RtAudioError::WARNING );
+    }
+  }
+
+  // Byte-swapping: According to AudioHardware.h, the stream data will
+  // always be presented in native-endian format, so we should never
+  // need to byte swap.
+  stream_.doByteSwap[mode] = false;
+
+  // From the CoreAudio documentation, PCM data must be supplied as
+  // 32-bit floats.
+  stream_.userFormat = format;
+  stream_.deviceFormat[mode] = RTAUDIO_FLOAT32;
+
+  if ( streamCount == 1 )
+    stream_.nDeviceChannels[mode] = description.mChannelsPerFrame;
+  else // multiple streams
+    stream_.nDeviceChannels[mode] = channels;
+  stream_.nUserChannels[mode] = channels;
+  stream_.channelOffset[mode] = channelOffset;  // offset within a CoreAudio stream
+  if ( options && options->flags & RTAUDIO_NONINTERLEAVED ) stream_.userInterleaved = false;
+  else stream_.userInterleaved = true;
+  stream_.deviceInterleaved[mode] = true;
+  if ( monoMode == true ) stream_.deviceInterleaved[mode] = false;
+
+  // Set flags for buffer conversion.
+  stream_.doConvertBuffer[mode] = false;
+  if ( stream_.userFormat != stream_.deviceFormat[mode] )
+    stream_.doConvertBuffer[mode] = true;
+  if ( stream_.nUserChannels[mode] < stream_.nDeviceChannels[mode] )
+    stream_.doConvertBuffer[mode] = true;
+  if ( streamCount == 1 ) {
+    if ( stream_.nUserChannels[mode] > 1 &&
+         stream_.userInterleaved != stream_.deviceInterleaved[mode] )
+      stream_.doConvertBuffer[mode] = true;
+  }
+  else if ( monoMode && stream_.userInterleaved )
+    stream_.doConvertBuffer[mode] = true;
+
+  // Allocate our CoreHandle structure for the stream.
+  CoreHandle *handle = 0;
+  if ( stream_.apiHandle == 0 ) {
+    try {
+      handle = new CoreHandle;
+    }
+    catch ( std::bad_alloc& ) {
+      errorText_ = "RtApiCore::probeDeviceOpen: error allocating CoreHandle memory.";
+      goto error;
+    }
+
+    if ( pthread_cond_init( &handle->condition, NULL ) ) {
+      errorText_ = "RtApiCore::probeDeviceOpen: error initializing pthread condition variable.";
+      goto error;
+    }
+    stream_.apiHandle = (void *) handle;
+  }
+  else
+    handle = (CoreHandle *) stream_.apiHandle;
+  handle->iStream[mode] = firstStream;
+  handle->nStreams[mode] = streamCount;
+  handle->id[mode] = id;
+
+  // Allocate necessary internal buffers.
+  unsigned long bufferBytes;
+  bufferBytes = stream_.nUserChannels[mode] * *bufferSize * formatBytes( stream_.userFormat );
+  //  stream_.userBuffer[mode] = (char *) calloc( bufferBytes, 1 );
+  stream_.userBuffer[mode] = (char *) malloc( bufferBytes * sizeof(char) );
+  memset( stream_.userBuffer[mode], 0, bufferBytes * sizeof(char) );
+  if ( stream_.userBuffer[mode] == NULL ) {
+    errorText_ = "RtApiCore::probeDeviceOpen: error allocating user buffer memory.";
+    goto error;
+  }
+
+  // If possible, we will make use of the CoreAudio stream buffers as
+  // "device buffers".  However, we can't do this if using multiple
+  // streams.
+  if ( stream_.doConvertBuffer[mode] && handle->nStreams[mode] > 1 ) {
+
+    bool makeBuffer = true;
+    bufferBytes = stream_.nDeviceChannels[mode] * formatBytes( stream_.deviceFormat[mode] );
+    if ( mode == INPUT ) {
+      if ( stream_.mode == OUTPUT && stream_.deviceBuffer ) {
+        unsigned long bytesOut = stream_.nDeviceChannels[0] * formatBytes( stream_.deviceFormat[0] );
+        if ( bufferBytes <= bytesOut ) makeBuffer = false;
+      }
+    }
+
+    if ( makeBuffer ) {
+      bufferBytes *= *bufferSize;
+      if ( stream_.deviceBuffer ) free( stream_.deviceBuffer );
+      stream_.deviceBuffer = (char *) calloc( bufferBytes, 1 );
+      if ( stream_.deviceBuffer == NULL ) {
+        errorText_ = "RtApiCore::probeDeviceOpen: error allocating device buffer memory.";
+        goto error;
+      }
+    }
+  }
+
+  stream_.sampleRate = sampleRate;
+  stream_.device[mode] = device;
+  stream_.state = STREAM_STOPPED;
+  stream_.callbackInfo.object = (void *) this;
+
+  // Setup the buffer conversion information structure.
+  if ( stream_.doConvertBuffer[mode] ) {
+    if ( streamCount > 1 ) setConvertInfo( mode, 0 );
+    else setConvertInfo( mode, channelOffset );
+  }
+
+  if ( mode == INPUT && stream_.mode == OUTPUT && stream_.device[0] == device )
+    // Only one callback procedure per device.
+    stream_.mode = DUPLEX;
+  else {
+#if defined( MAC_OS_X_VERSION_10_5 ) && ( MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_5 )
+    result = AudioDeviceCreateIOProcID( id, callbackHandler, (void *) &stream_.callbackInfo, &handle->procId[mode] );
+#else
+    // deprecated in favor of AudioDeviceCreateIOProcID()
+    result = AudioDeviceAddIOProc( id, callbackHandler, (void *) &stream_.callbackInfo );
+#endif
+    if ( result != noErr ) {
+      errorStream_ << "RtApiCore::probeDeviceOpen: system error setting callback for device (" << device << ").";
+      errorText_ = errorStream_.str();
+      goto error;
+    }
+    if ( stream_.mode == OUTPUT && mode == INPUT )
+      stream_.mode = DUPLEX;
+    else
+      stream_.mode = mode;
+  }
+
+  // Setup the device property listener for over/underload.
+  property.mSelector = kAudioDeviceProcessorOverload;
+  property.mScope = kAudioObjectPropertyScopeGlobal;
+  result = AudioObjectAddPropertyListener( id, &property, xrunListener, (void *) handle );
+
+  return SUCCESS;
+
+ error:
+  if ( handle ) {
+    pthread_cond_destroy( &handle->condition );
+    delete handle;
+    stream_.apiHandle = 0;
+  }
+
+  for ( int i=0; i<2; i++ ) {
+    if ( stream_.userBuffer[i] ) {
+      free( stream_.userBuffer[i] );
+      stream_.userBuffer[i] = 0;
+    }
+  }
+
+  if ( stream_.deviceBuffer ) {
+    free( stream_.deviceBuffer );
+    stream_.deviceBuffer = 0;
+  }
+
+  stream_.state = STREAM_CLOSED;
+  return FAILURE;
+}
+
+void RtApiCore :: closeStream( void )
+{
+  if ( stream_.state == STREAM_CLOSED ) {
+    errorText_ = "RtApiCore::closeStream(): no open stream to close!";
+    error( RtAudioError::WARNING );
+    return;
+  }
+
+  CoreHandle *handle = (CoreHandle *) stream_.apiHandle;
+  if ( stream_.mode == OUTPUT || stream_.mode == DUPLEX ) {
+    if (handle) {
+      AudioObjectPropertyAddress property = { kAudioHardwarePropertyDevices,
+        kAudioObjectPropertyScopeGlobal,
+        kAudioObjectPropertyElementMaster };
+
+      property.mSelector = kAudioDeviceProcessorOverload;
+      property.mScope = kAudioObjectPropertyScopeGlobal;
+      if (AudioObjectRemovePropertyListener( handle->id[0], &property, xrunListener, (void *) handle ) != noErr) {
+        errorText_ = "RtApiCore::closeStream(): error removing property listener!";
+        error( RtAudioError::WARNING );
+      }
+    }
+    if ( stream_.state == STREAM_RUNNING )
+      AudioDeviceStop( handle->id[0], callbackHandler );
+#if defined( MAC_OS_X_VERSION_10_5 ) && ( MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_5 )
+    AudioDeviceDestroyIOProcID( handle->id[0], handle->procId[0] );
+#else
+    // deprecated in favor of AudioDeviceDestroyIOProcID()
+    AudioDeviceRemoveIOProc( handle->id[0], callbackHandler );
+#endif
+  }
+
+  if ( stream_.mode == INPUT || ( stream_.mode == DUPLEX && stream_.device[0] != stream_.device[1] ) ) {
+    if (handle) {
+      AudioObjectPropertyAddress property = { kAudioHardwarePropertyDevices,
+        kAudioObjectPropertyScopeGlobal,
+        kAudioObjectPropertyElementMaster };
+
+      property.mSelector = kAudioDeviceProcessorOverload;
+      property.mScope = kAudioObjectPropertyScopeGlobal;
+      if (AudioObjectRemovePropertyListener( handle->id[1], &property, xrunListener, (void *) handle ) != noErr) {
+        errorText_ = "RtApiCore::closeStream(): error removing property listener!";
+        error( RtAudioError::WARNING );
+      }
+    }
+    if ( stream_.state == STREAM_RUNNING )
+      AudioDeviceStop( handle->id[1], callbackHandler );
+#if defined( MAC_OS_X_VERSION_10_5 ) && ( MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_5 )
+    AudioDeviceDestroyIOProcID( handle->id[1], handle->procId[1] );
+#else
+    // deprecated in favor of AudioDeviceDestroyIOProcID()
+    AudioDeviceRemoveIOProc( handle->id[1], callbackHandler );
+#endif
+  }
+
+  for ( int i=0; i<2; i++ ) {
+    if ( stream_.userBuffer[i] ) {
+      free( stream_.userBuffer[i] );
+      stream_.userBuffer[i] = 0;
+    }
+  }
+
+  if ( stream_.deviceBuffer ) {
+    free( stream_.deviceBuffer );
+    stream_.deviceBuffer = 0;
+  }
+
+  // Destroy pthread condition variable.
+  pthread_cond_destroy( &handle->condition );
+  delete handle;
+  stream_.apiHandle = 0;
+
+  stream_.mode = UNINITIALIZED;
+  stream_.state = STREAM_CLOSED;
+}
+
+void RtApiCore :: startStream( void )
+{
+  verifyStream();
+  if ( stream_.state == STREAM_RUNNING ) {
+    errorText_ = "RtApiCore::startStream(): the stream is already running!";
+    error( RtAudioError::WARNING );
+    return;
+  }
+
+  OSStatus result = noErr;
+  CoreHandle *handle = (CoreHandle *) stream_.apiHandle;
+  if ( stream_.mode == OUTPUT || stream_.mode == DUPLEX ) {
+
+    result = AudioDeviceStart( handle->id[0], callbackHandler );
+    if ( result != noErr ) {
+      errorStream_ << "RtApiCore::startStream: system error (" << getErrorCode( result ) << ") starting callback procedure on device (" << stream_.device[0] << ").";
+      errorText_ = errorStream_.str();
+      goto unlock;
+    }
+  }
+
+  if ( stream_.mode == INPUT ||
+       ( stream_.mode == DUPLEX && stream_.device[0] != stream_.device[1] ) ) {
+
+    result = AudioDeviceStart( handle->id[1], callbackHandler );
+    if ( result != noErr ) {
+      errorStream_ << "RtApiCore::startStream: system error starting input callback procedure on device (" << stream_.device[1] << ").";
+      errorText_ = errorStream_.str();
+      goto unlock;
+    }
+  }
+
+  handle->drainCounter = 0;
+  handle->internalDrain = false;
+  stream_.state = STREAM_RUNNING;
+
+ unlock:
+  if ( result == noErr ) return;
+  error( RtAudioError::SYSTEM_ERROR );
+}
+
+void RtApiCore :: stopStream( void )
+{
+  verifyStream();
+  if ( stream_.state == STREAM_STOPPED ) {
+    errorText_ = "RtApiCore::stopStream(): the stream is already stopped!";
+    error( RtAudioError::WARNING );
+    return;
+  }
+
+  OSStatus result = noErr;
+  CoreHandle *handle = (CoreHandle *) stream_.apiHandle;
+  if ( stream_.mode == OUTPUT || stream_.mode == DUPLEX ) {
+
+    if ( handle->drainCounter == 0 ) {
+      handle->drainCounter = 2;
+      pthread_cond_wait( &handle->condition, &stream_.mutex ); // block until signaled
+    }
+
+    result = AudioDeviceStop( handle->id[0], callbackHandler );
+    if ( result != noErr ) {
+      errorStream_ << "RtApiCore::stopStream: system error (" << getErrorCode( result ) << ") stopping callback procedure on device (" << stream_.device[0] << ").";
+      errorText_ = errorStream_.str();
+      goto unlock;
+    }
+  }
+
+  if ( stream_.mode == INPUT || ( stream_.mode == DUPLEX && stream_.device[0] != stream_.device[1] ) ) {
+
+    result = AudioDeviceStop( handle->id[1], callbackHandler );
+    if ( result != noErr ) {
+      errorStream_ << "RtApiCore::stopStream: system error (" << getErrorCode( result ) << ") stopping input callback procedure on device (" << stream_.device[1] << ").";
+      errorText_ = errorStream_.str();
+      goto unlock;
+    }
+  }
+
+  stream_.state = STREAM_STOPPED;
+
+ unlock:
+  if ( result == noErr ) return;
+  error( RtAudioError::SYSTEM_ERROR );
+}
+
+void RtApiCore :: abortStream( void )
+{
+  verifyStream();
+  if ( stream_.state == STREAM_STOPPED ) {
+    errorText_ = "RtApiCore::abortStream(): the stream is already stopped!";
+    error( RtAudioError::WARNING );
+    return;
+  }
+
+  CoreHandle *handle = (CoreHandle *) stream_.apiHandle;
+  handle->drainCounter = 2;
+
+  stopStream();
+}
+
+// This function will be called by a spawned thread when the user
+// callback function signals that the stream should be stopped or
+// aborted.  It is better to handle it this way because the
+// callbackEvent() function probably should return before the AudioDeviceStop()
+// function is called.
+static void *coreStopStream( void *ptr )
+{
+  CallbackInfo *info = (CallbackInfo *) ptr;
+  RtApiCore *object = (RtApiCore *) info->object;
+
+  object->stopStream();
+  pthread_exit( NULL );
+}
+
+bool RtApiCore :: callbackEvent( AudioDeviceID deviceId,
+                                 const AudioBufferList *inBufferList,
+                                 const AudioBufferList *outBufferList )
+{
+  if ( stream_.state == STREAM_STOPPED || stream_.state == STREAM_STOPPING ) return SUCCESS;
+  if ( stream_.state == STREAM_CLOSED ) {
+    errorText_ = "RtApiCore::callbackEvent(): the stream is closed ... this shouldn't happen!";
+    error( RtAudioError::WARNING );
+    return FAILURE;
+  }
+
+  CallbackInfo *info = (CallbackInfo *) &stream_.callbackInfo;
+  CoreHandle *handle = (CoreHandle *) stream_.apiHandle;
+
+  // Check if we were draining the stream and signal is finished.
+  if ( handle->drainCounter > 3 ) {
+    ThreadHandle threadId;
+
+    stream_.state = STREAM_STOPPING;
+    if ( handle->internalDrain == true )
+      pthread_create( &threadId, NULL, coreStopStream, info );
+    else // external call to stopStream()
+      pthread_cond_signal( &handle->condition );
+    return SUCCESS;
+  }
+
+  AudioDeviceID outputDevice = handle->id[0];
+
+  // Invoke user callback to get fresh output data UNLESS we are
+  // draining stream or duplex mode AND the input/output devices are
+  // different AND this function is called for the input device.
+  if ( handle->drainCounter == 0 && ( stream_.mode != DUPLEX || deviceId == outputDevice ) ) {
+    RtAudioCallback callback = (RtAudioCallback) info->callback;
+    double streamTime = getStreamTime();
+    RtAudioStreamStatus status = 0;
+    if ( stream_.mode != INPUT && handle->xrun[0] == true ) {
+      status |= RTAUDIO_OUTPUT_UNDERFLOW;
+      handle->xrun[0] = false;
+    }
+    if ( stream_.mode != OUTPUT && handle->xrun[1] == true ) {
+      status |= RTAUDIO_INPUT_OVERFLOW;
+      handle->xrun[1] = false;
+    }
+
+    int cbReturnValue = callback( stream_.userBuffer[0], stream_.userBuffer[1],
+                                  stream_.bufferSize, streamTime, status, info->userData );
+    if ( cbReturnValue == 2 ) {
+      stream_.state = STREAM_STOPPING;
+      handle->drainCounter = 2;
+      abortStream();
+      return SUCCESS;
+    }
+    else if ( cbReturnValue == 1 ) {
+      handle->drainCounter = 1;
+      handle->internalDrain = true;
+    }
+  }
+
+  if ( stream_.mode == OUTPUT || ( stream_.mode == DUPLEX && deviceId == outputDevice ) ) {
+
+    if ( handle->drainCounter > 1 ) { // write zeros to the output stream
+
+      if ( handle->nStreams[0] == 1 ) {
+        memset( outBufferList->mBuffers[handle->iStream[0]].mData,
+                0,
+                outBufferList->mBuffers[handle->iStream[0]].mDataByteSize );
+      }
+      else { // fill multiple streams with zeros
+        for ( unsigned int i=0; i<handle->nStreams[0]; i++ ) {
+          memset( outBufferList->mBuffers[handle->iStream[0]+i].mData,
+                  0,
+                  outBufferList->mBuffers[handle->iStream[0]+i].mDataByteSize );
+        }
+      }
+    }
+    else if ( handle->nStreams[0] == 1 ) {
+      if ( stream_.doConvertBuffer[0] ) { // convert directly to CoreAudio stream buffer
+        convertBuffer( (char *) outBufferList->mBuffers[handle->iStream[0]].mData,
+                       stream_.userBuffer[0], stream_.convertInfo[0] );
+      }
+      else { // copy from user buffer
+        memcpy( outBufferList->mBuffers[handle->iStream[0]].mData,
+                stream_.userBuffer[0],
+                outBufferList->mBuffers[handle->iStream[0]].mDataByteSize );
+      }
+    }
+    else { // fill multiple streams
+      Float32 *inBuffer = (Float32 *) stream_.userBuffer[0];
+      if ( stream_.doConvertBuffer[0] ) {
+        convertBuffer( stream_.deviceBuffer, stream_.userBuffer[0], stream_.convertInfo[0] );
+        inBuffer = (Float32 *) stream_.deviceBuffer;
+      }
+
+      if ( stream_.deviceInterleaved[0] == false ) { // mono mode
+        UInt32 bufferBytes = outBufferList->mBuffers[handle->iStream[0]].mDataByteSize;
+        for ( unsigned int i=0; i<stream_.nUserChannels[0]; i++ ) {
+          memcpy( outBufferList->mBuffers[handle->iStream[0]+i].mData,
+                  (void *)&inBuffer[i*stream_.bufferSize], bufferBytes );
+        }
+      }
+      else { // fill multiple multi-channel streams with interleaved data
+        UInt32 streamChannels, channelsLeft, inJump, outJump, inOffset;
+        Float32 *out, *in;
+
+        bool inInterleaved = ( stream_.userInterleaved ) ? true : false;
+        UInt32 inChannels = stream_.nUserChannels[0];
+        if ( stream_.doConvertBuffer[0] ) {
+          inInterleaved = true; // device buffer will always be interleaved for nStreams > 1 and not mono mode
+          inChannels = stream_.nDeviceChannels[0];
+        }
+
+        if ( inInterleaved ) inOffset = 1;
+        else inOffset = stream_.bufferSize;
+
+        channelsLeft = inChannels;
+        for ( unsigned int i=0; i<handle->nStreams[0]; i++ ) {
+          in = inBuffer;
+          out = (Float32 *) outBufferList->mBuffers[handle->iStream[0]+i].mData;
+          streamChannels = outBufferList->mBuffers[handle->iStream[0]+i].mNumberChannels;
+
+          outJump = 0;
+          // Account for possible channel offset in first stream
+          if ( i == 0 && stream_.channelOffset[0] > 0 ) {
+            streamChannels -= stream_.channelOffset[0];
+            outJump = stream_.channelOffset[0];
+            out += outJump;
+          }
+
+          // Account for possible unfilled channels at end of the last stream
+          if ( streamChannels > channelsLeft ) {
+            outJump = streamChannels - channelsLeft;
+            streamChannels = channelsLeft;
+          }
+
+          // Determine input buffer offsets and skips
+          if ( inInterleaved ) {
+            inJump = inChannels;
+            in += inChannels - channelsLeft;
+          }
+          else {
+            inJump = 1;
+            in += (inChannels - channelsLeft) * inOffset;
+          }
+
+          for ( unsigned int i=0; i<stream_.bufferSize; i++ ) {
+            for ( unsigned int j=0; j<streamChannels; j++ ) {
+              *out++ = in[j*inOffset];
+            }
+            out += outJump;
+            in += inJump;
+          }
+          channelsLeft -= streamChannels;
+        }
+      }
+    }
+  }
+
+  // Don't bother draining input
+  if ( handle->drainCounter ) {
+    handle->drainCounter++;
+    goto unlock;
+  }
+
+  AudioDeviceID inputDevice;
+  inputDevice = handle->id[1];
+  if ( stream_.mode == INPUT || ( stream_.mode == DUPLEX && deviceId == inputDevice ) ) {
+
+    if ( handle->nStreams[1] == 1 ) {
+      if ( stream_.doConvertBuffer[1] ) { // convert directly from CoreAudio stream buffer
+        convertBuffer( stream_.userBuffer[1],
+                       (char *) inBufferList->mBuffers[handle->iStream[1]].mData,
+                       stream_.convertInfo[1] );
+      }
+      else { // copy to user buffer
+        memcpy( stream_.userBuffer[1],
+                inBufferList->mBuffers[handle->iStream[1]].mData,
+                inBufferList->mBuffers[handle->iStream[1]].mDataByteSize );
+      }
+    }
+    else { // read from multiple streams
+      Float32 *outBuffer = (Float32 *) stream_.userBuffer[1];
+      if ( stream_.doConvertBuffer[1] ) outBuffer = (Float32 *) stream_.deviceBuffer;
+
+      if ( stream_.deviceInterleaved[1] == false ) { // mono mode
+        UInt32 bufferBytes = inBufferList->mBuffers[handle->iStream[1]].mDataByteSize;
+        for ( unsigned int i=0; i<stream_.nUserChannels[1]; i++ ) {
+          memcpy( (void *)&outBuffer[i*stream_.bufferSize],
+                  inBufferList->mBuffers[handle->iStream[1]+i].mData, bufferBytes );
+        }
+      }
+      else { // read from multiple multi-channel streams
+        UInt32 streamChannels, channelsLeft, inJump, outJump, outOffset;
+        Float32 *out, *in;
+
+        bool outInterleaved = ( stream_.userInterleaved ) ? true : false;
+        UInt32 outChannels = stream_.nUserChannels[1];
+        if ( stream_.doConvertBuffer[1] ) {
+          outInterleaved = true; // device buffer will always be interleaved for nStreams > 1 and not mono mode
+          outChannels = stream_.nDeviceChannels[1];
+        }
+
+        if ( outInterleaved ) outOffset = 1;
+        else outOffset = stream_.bufferSize;
+
+        channelsLeft = outChannels;
+        for ( unsigned int i=0; i<handle->nStreams[1]; i++ ) {
+          out = outBuffer;
+          in = (Float32 *) inBufferList->mBuffers[handle->iStream[1]+i].mData;
+          streamChannels = inBufferList->mBuffers[handle->iStream[1]+i].mNumberChannels;
+
+          inJump = 0;
+          // Account for possible channel offset in first stream
+          if ( i == 0 && stream_.channelOffset[1] > 0 ) {
+            streamChannels -= stream_.channelOffset[1];
+            inJump = stream_.channelOffset[1];
+            in += inJump;
+          }
+
+          // Account for possible unread channels at end of the last stream
+          if ( streamChannels > channelsLeft ) {
+            inJump = streamChannels - channelsLeft;
+            streamChannels = channelsLeft;
+          }
+
+          // Determine output buffer offsets and skips
+          if ( outInterleaved ) {
+            outJump = outChannels;
+            out += outChannels - channelsLeft;
+          }
+          else {
+            outJump = 1;
+            out += (outChannels - channelsLeft) * outOffset;
+          }
+
+          for ( unsigned int i=0; i<stream_.bufferSize; i++ ) {
+            for ( unsigned int j=0; j<streamChannels; j++ ) {
+              out[j*outOffset] = *in++;
+            }
+            out += outJump;
+            in += inJump;
+          }
+          channelsLeft -= streamChannels;
+        }
+      }
+      
+      if ( stream_.doConvertBuffer[1] ) { // convert from our internal "device" buffer
+        convertBuffer( stream_.userBuffer[1],
+                       stream_.deviceBuffer,
+                       stream_.convertInfo[1] );
+      }
+    }
+  }
+
+ unlock:
+  //MUTEX_UNLOCK( &stream_.mutex );
+
+  RtApi::tickStreamTime();
+  return SUCCESS;
+}
+
+const char* RtApiCore :: getErrorCode( OSStatus code )
+{
+  switch( code ) {
+
+  case kAudioHardwareNotRunningError:
+    return "kAudioHardwareNotRunningError";
+
+  case kAudioHardwareUnspecifiedError:
+    return "kAudioHardwareUnspecifiedError";
+
+  case kAudioHardwareUnknownPropertyError:
+    return "kAudioHardwareUnknownPropertyError";
+
+  case kAudioHardwareBadPropertySizeError:
+    return "kAudioHardwareBadPropertySizeError";
+
+  case kAudioHardwareIllegalOperationError:
+    return "kAudioHardwareIllegalOperationError";
+
+  case kAudioHardwareBadObjectError:
+    return "kAudioHardwareBadObjectError";
+
+  case kAudioHardwareBadDeviceError:
+    return "kAudioHardwareBadDeviceError";
+
+  case kAudioHardwareBadStreamError:
+    return "kAudioHardwareBadStreamError";
+
+  case kAudioHardwareUnsupportedOperationError:
+    return "kAudioHardwareUnsupportedOperationError";
+
+  case kAudioDeviceUnsupportedFormatError:
+    return "kAudioDeviceUnsupportedFormatError";
+
+  case kAudioDevicePermissionsError:
+    return "kAudioDevicePermissionsError";
+
+  default:
+    return "CoreAudio unknown error";
+  }
+}
+
+  //******************** End of __MACOSX_CORE__ *********************//
+#endif
+
+#if defined(__UNIX_JACK__)
+
+// JACK is a low-latency audio server, originally written for the
+// GNU/Linux operating system and now also ported to OS-X. It can
+// connect a number of different applications to an audio device, as
+// well as allowing them to share audio between themselves.
+//
+// When using JACK with RtAudio, "devices" refer to JACK clients that
+// have ports connected to the server.  The JACK server is typically
+// started in a terminal as follows:
+//
+// .jackd -d alsa -d hw:0
+//
+// or through an interface program such as qjackctl.  Many of the
+// parameters normally set for a stream are fixed by the JACK server
+// and can be specified when the JACK server is started.  In
+// particular,
+//
+// .jackd -d alsa -d hw:0 -r 44100 -p 512 -n 4
+//
+// specifies a sample rate of 44100 Hz, a buffer size of 512 sample
+// frames, and number of buffers = 4.  Once the server is running, it
+// is not possible to override these values.  If the values are not
+// specified in the command-line, the JACK server uses default values.
+//
+// The JACK server does not have to be running when an instance of
+// RtApiJack is created, though the function getDeviceCount() will
+// report 0 devices found until JACK has been started.  When no
+// devices are available (i.e., the JACK server is not running), a
+// stream cannot be opened.
+
+#include <jack/jack.h>
+#include <unistd.h>
+#include <cstdio>
+
+// A structure to hold various information related to the Jack API
+// implementation.
+struct JackHandle {
+  jack_client_t *client;
+  jack_port_t **ports[2];
+  std::string deviceName[2];
+  bool xrun[2];
+  pthread_cond_t condition;
+  int drainCounter;       // Tracks callback counts when draining
+  bool internalDrain;     // Indicates if stop is initiated from callback or not.
+
+  JackHandle()
+    :client(0), drainCounter(0), internalDrain(false) { ports[0] = 0; ports[1] = 0; xrun[0] = false; xrun[1] = false; }
+};
+
+static void jackSilentError( const char * ) {};
+
+RtApiJack :: RtApiJack()
+{
+  // Nothing to do here.
+#if !defined(__RTAUDIO_DEBUG__)
+  // Turn off Jack's internal error reporting.
+  jack_set_error_function( &jackSilentError );
+#endif
+}
+
+RtApiJack :: ~RtApiJack()
+{
+  if ( stream_.state != STREAM_CLOSED ) closeStream();
+}
+
+unsigned int RtApiJack :: getDeviceCount( void )
+{
+  // See if we can become a jack client.
+  jack_options_t options = (jack_options_t) ( JackNoStartServer ); //JackNullOption;
+  jack_status_t *status = NULL;
+  jack_client_t *client = jack_client_open( "RtApiJackCount", options, status );
+  if ( client == 0 ) return 0;
+
+  const char **ports;
+  std::string port, previousPort;
+  unsigned int nChannels = 0, nDevices = 0;
+  ports = jack_get_ports( client, NULL, NULL, 0 );
+  if ( ports ) {
+    // Parse the port names up to the first colon (:).
+    size_t iColon = 0;
+    do {
+      port = (char *) ports[ nChannels ];
+      iColon = port.find(":");
+      if ( iColon != std::string::npos ) {
+        port = port.substr( 0, iColon + 1 );
+        if ( port != previousPort ) {
+          nDevices++;
+          previousPort = port;
+        }
+      }
+    } while ( ports[++nChannels] );
+    free( ports );
+  }
+
+  jack_client_close( client );
+  return nDevices;
+}
+
+RtAudio::DeviceInfo RtApiJack :: getDeviceInfo( unsigned int device )
+{
+  RtAudio::DeviceInfo info;
+  info.probed = false;
+
+  jack_options_t options = (jack_options_t) ( JackNoStartServer ); //JackNullOption
+  jack_status_t *status = NULL;
+  jack_client_t *client = jack_client_open( "RtApiJackInfo", options, status );
+  if ( client == 0 ) {
+    errorText_ = "RtApiJack::getDeviceInfo: Jack server not found or connection error!";
+    error( RtAudioError::WARNING );
+    return info;
+  }
+
+  const char **ports;
+  std::string port, previousPort;
+  unsigned int nPorts = 0, nDevices = 0;
+  ports = jack_get_ports( client, NULL, NULL, 0 );
+  if ( ports ) {
+    // Parse the port names up to the first colon (:).
+    size_t iColon = 0;
+    do {
+      port = (char *) ports[ nPorts ];
+      iColon = port.find(":");
+      if ( iColon != std::string::npos ) {
+        port = port.substr( 0, iColon );
+        if ( port != previousPort ) {
+          if ( nDevices == device ) info.name = port;
+          nDevices++;
+          previousPort = port;
+        }
+      }
+    } while ( ports[++nPorts] );
+    free( ports );
+  }
+
+  if ( device >= nDevices ) {
+    jack_client_close( client );
+    errorText_ = "RtApiJack::getDeviceInfo: device ID is invalid!";
+    error( RtAudioError::INVALID_USE );
+    return info;
+  }
+
+  // Get the current jack server sample rate.
+  info.sampleRates.clear();
+
+  info.preferredSampleRate = jack_get_sample_rate( client );
+  info.sampleRates.push_back( info.preferredSampleRate );
+
+  // Count the available ports containing the client name as device
+  // channels.  Jack "input ports" equal RtAudio output channels.
+  unsigned int nChannels = 0;
+  ports = jack_get_ports( client, info.name.c_str(), NULL, JackPortIsInput );
+  if ( ports ) {
+    while ( ports[ nChannels ] ) nChannels++;
+    free( ports );
+    info.outputChannels = nChannels;
+  }
+
+  // Jack "output ports" equal RtAudio input channels.
+  nChannels = 0;
+  ports = jack_get_ports( client, info.name.c_str(), NULL, JackPortIsOutput );
+  if ( ports ) {
+    while ( ports[ nChannels ] ) nChannels++;
+    free( ports );
+    info.inputChannels = nChannels;
+  }
+
+  if ( info.outputChannels == 0 && info.inputChannels == 0 ) {
+    jack_client_close(client);
+    errorText_ = "RtApiJack::getDeviceInfo: error determining Jack input/output channels!";
+    error( RtAudioError::WARNING );
+    return info;
+  }
+
+  // If device opens for both playback and capture, we determine the channels.
+  if ( info.outputChannels > 0 && info.inputChannels > 0 )
+    info.duplexChannels = (info.outputChannels > info.inputChannels) ? info.inputChannels : info.outputChannels;
+
+  // Jack always uses 32-bit floats.
+  info.nativeFormats = RTAUDIO_FLOAT32;
+
+  // Jack doesn't provide default devices so we'll use the first available one.
+  if ( device == 0 && info.outputChannels > 0 )
+    info.isDefaultOutput = true;
+  if ( device == 0 && info.inputChannels > 0 )
+    info.isDefaultInput = true;
+
+  jack_client_close(client);
+  info.probed = true;
+  return info;
+}
+
+static int jackCallbackHandler( jack_nframes_t nframes, void *infoPointer )
+{
+  CallbackInfo *info = (CallbackInfo *) infoPointer;
+
+  RtApiJack *object = (RtApiJack *) info->object;
+  if ( object->callbackEvent( (unsigned long) nframes ) == false ) return 1;
+
+  return 0;
+}
+
+// This function will be called by a spawned thread when the Jack
+// server signals that it is shutting down.  It is necessary to handle
+// it this way because the jackShutdown() function must return before
+// the jack_deactivate() function (in closeStream()) will return.
+static void *jackCloseStream( void *ptr )
+{
+  CallbackInfo *info = (CallbackInfo *) ptr;
+  RtApiJack *object = (RtApiJack *) info->object;
+
+  object->closeStream();
+
+  pthread_exit( NULL );
+}
+static void jackShutdown( void *infoPointer )
+{
+  CallbackInfo *info = (CallbackInfo *) infoPointer;
+  RtApiJack *object = (RtApiJack *) info->object;
+
+  // Check current stream state.  If stopped, then we'll assume this
+  // was called as a result of a call to RtApiJack::stopStream (the
+  // deactivation of a client handle causes this function to be called).
+  // If not, we'll assume the Jack server is shutting down or some
+  // other problem occurred and we should close the stream.
+  if ( object->isStreamRunning() == false ) return;
+
+  ThreadHandle threadId;
+  pthread_create( &threadId, NULL, jackCloseStream, info );
+  std::cerr << "\nRtApiJack: the Jack server is shutting down this client ... stream stopped and closed!!\n" << std::endl;
+}
+
+static int jackXrun( void *infoPointer )
+{
+  JackHandle *handle = (JackHandle *) infoPointer;
+
+  if ( handle->ports[0] ) handle->xrun[0] = true;
+  if ( handle->ports[1] ) handle->xrun[1] = true;
+
+  return 0;
+}
+
+bool RtApiJack :: probeDeviceOpen( unsigned int device, StreamMode mode, unsigned int channels,
+                                   unsigned int firstChannel, unsigned int sampleRate,
+                                   RtAudioFormat format, unsigned int *bufferSize,
+                                   RtAudio::StreamOptions *options )
+{
+  JackHandle *handle = (JackHandle *) stream_.apiHandle;
+
+  // Look for jack server and try to become a client (only do once per stream).
+  jack_client_t *client = 0;
+  if ( mode == OUTPUT || ( mode == INPUT && stream_.mode != OUTPUT ) ) {
+    jack_options_t jackoptions = (jack_options_t) ( JackNoStartServer ); //JackNullOption;
+    jack_status_t *status = NULL;
+    if ( options && !options->streamName.empty() )
+      client = jack_client_open( options->streamName.c_str(), jackoptions, status );
+    else
+      client = jack_client_open( "RtApiJack", jackoptions, status );
+    if ( client == 0 ) {
+      errorText_ = "RtApiJack::probeDeviceOpen: Jack server not found or connection error!";
+      error( RtAudioError::WARNING );
+      return FAILURE;
+    }
+  }
+  else {
+    // The handle must have been created on an earlier pass.
+    client = handle->client;
+  }
+
+  const char **ports;
+  std::string port, previousPort, deviceName;
+  unsigned int nPorts = 0, nDevices = 0;
+  ports = jack_get_ports( client, NULL, NULL, 0 );
+  if ( ports ) {
+    // Parse the port names up to the first colon (:).
+    size_t iColon = 0;
+    do {
+      port = (char *) ports[ nPorts ];
+      iColon = port.find(":");
+      if ( iColon != std::string::npos ) {
+        port = port.substr( 0, iColon );
+        if ( port != previousPort ) {
+          if ( nDevices == device ) deviceName = port;
+          nDevices++;
+          previousPort = port;
+        }
+      }
+    } while ( ports[++nPorts] );
+    free( ports );
+  }
+
+  if ( device >= nDevices ) {
+    errorText_ = "RtApiJack::probeDeviceOpen: device ID is invalid!";
+    return FAILURE;
+  }
+
+  // Count the available ports containing the client name as device
+  // channels.  Jack "input ports" equal RtAudio output channels.
+  unsigned int nChannels = 0;
+  unsigned long flag = JackPortIsInput;
+  if ( mode == INPUT ) flag = JackPortIsOutput;
+  ports = jack_get_ports( client, deviceName.c_str(), NULL, flag );
+  if ( ports ) {
+    while ( ports[ nChannels ] ) nChannels++;
+    free( ports );
+  }
+
+  // Compare the jack ports for specified client to the requested number of channels.
+  if ( nChannels < (channels + firstChannel) ) {
+    errorStream_ << "RtApiJack::probeDeviceOpen: requested number of channels (" << channels << ") + offset (" << firstChannel << ") not found for specified device (" << device << ":" << deviceName << ").";
+    errorText_ = errorStream_.str();
+    return FAILURE;
+  }
+
+  // Check the jack server sample rate.
+  unsigned int jackRate = jack_get_sample_rate( client );
+  if ( sampleRate != jackRate ) {
+    jack_client_close( client );
+    errorStream_ << "RtApiJack::probeDeviceOpen: the requested sample rate (" << sampleRate << ") is different than the JACK server rate (" << jackRate << ").";
+    errorText_ = errorStream_.str();
+    return FAILURE;
+  }
+  stream_.sampleRate = jackRate;
+
+  // Get the latency of the JACK port.
+  ports = jack_get_ports( client, deviceName.c_str(), NULL, flag );
+  if ( ports[ firstChannel ] ) {
+    // Added by Ge Wang
+    jack_latency_callback_mode_t cbmode = (mode == INPUT ? JackCaptureLatency : JackPlaybackLatency);
+    // the range (usually the min and max are equal)
+    jack_latency_range_t latrange; latrange.min = latrange.max = 0;
+    // get the latency range
+    jack_port_get_latency_range( jack_port_by_name( client, ports[firstChannel] ), cbmode, &latrange );
+    // be optimistic, use the min!
+    stream_.latency[mode] = latrange.min;
+    //stream_.latency[mode] = jack_port_get_latency( jack_port_by_name( client, ports[ firstChannel ] ) );
+  }
+  free( ports );
+
+  // The jack server always uses 32-bit floating-point data.
+  stream_.deviceFormat[mode] = RTAUDIO_FLOAT32;
+  stream_.userFormat = format;
+
+  if ( options && options->flags & RTAUDIO_NONINTERLEAVED ) stream_.userInterleaved = false;
+  else stream_.userInterleaved = true;
+
+  // Jack always uses non-interleaved buffers.
+  stream_.deviceInterleaved[mode] = false;
+
+  // Jack always provides host byte-ordered data.
+  stream_.doByteSwap[mode] = false;
+
+  // Get the buffer size.  The buffer size and number of buffers
+  // (periods) is set when the jack server is started.
+  stream_.bufferSize = (int) jack_get_buffer_size( client );
+  *bufferSize = stream_.bufferSize;
+
+  stream_.nDeviceChannels[mode] = channels;
+  stream_.nUserChannels[mode] = channels;
+
+  // Set flags for buffer conversion.
+  stream_.doConvertBuffer[mode] = false;
+  if ( stream_.userFormat != stream_.deviceFormat[mode] )
+    stream_.doConvertBuffer[mode] = true;
+  if ( stream_.userInterleaved != stream_.deviceInterleaved[mode] &&
+       stream_.nUserChannels[mode] > 1 )
+    stream_.doConvertBuffer[mode] = true;
+
+  // Allocate our JackHandle structure for the stream.
+  if ( handle == 0 ) {
+    try {
+      handle = new JackHandle;
+    }
+    catch ( std::bad_alloc& ) {
+      errorText_ = "RtApiJack::probeDeviceOpen: error allocating JackHandle memory.";
+      goto error;
+    }
+
+    if ( pthread_cond_init(&handle->condition, NULL) ) {
+      errorText_ = "RtApiJack::probeDeviceOpen: error initializing pthread condition variable.";
+      goto error;
+    }
+    stream_.apiHandle = (void *) handle;
+    handle->client = client;
+  }
+  handle->deviceName[mode] = deviceName;
+
+  // Allocate necessary internal buffers.
+  unsigned long bufferBytes;
+  bufferBytes = stream_.nUserChannels[mode] * *bufferSize * formatBytes( stream_.userFormat );
+  stream_.userBuffer[mode] = (char *) calloc( bufferBytes, 1 );
+  if ( stream_.userBuffer[mode] == NULL ) {
+    errorText_ = "RtApiJack::probeDeviceOpen: error allocating user buffer memory.";
+    goto error;
+  }
+
+  if ( stream_.doConvertBuffer[mode] ) {
+
+    bool makeBuffer = true;
+    if ( mode == OUTPUT )
+      bufferBytes = stream_.nDeviceChannels[0] * formatBytes( stream_.deviceFormat[0] );
+    else { // mode == INPUT
+      bufferBytes = stream_.nDeviceChannels[1] * formatBytes( stream_.deviceFormat[1] );
+      if ( stream_.mode == OUTPUT && stream_.deviceBuffer ) {
+        unsigned long bytesOut = stream_.nDeviceChannels[0] * formatBytes(stream_.deviceFormat[0]);
+        if ( bufferBytes < bytesOut ) makeBuffer = false;
+      }
+    }
+
+    if ( makeBuffer ) {
+      bufferBytes *= *bufferSize;
+      if ( stream_.deviceBuffer ) free( stream_.deviceBuffer );
+      stream_.deviceBuffer = (char *) calloc( bufferBytes, 1 );
+      if ( stream_.deviceBuffer == NULL ) {
+        errorText_ = "RtApiJack::probeDeviceOpen: error allocating device buffer memory.";
+        goto error;
+      }
+    }
+  }
+
+  // Allocate memory for the Jack ports (channels) identifiers.
+  handle->ports[mode] = (jack_port_t **) malloc ( sizeof (jack_port_t *) * channels );
+  if ( handle->ports[mode] == NULL )  {
+    errorText_ = "RtApiJack::probeDeviceOpen: error allocating port memory.";
+    goto error;
+  }
+
+  stream_.device[mode] = device;
+  stream_.channelOffset[mode] = firstChannel;
+  stream_.state = STREAM_STOPPED;
+  stream_.callbackInfo.object = (void *) this;
+
+  if ( stream_.mode == OUTPUT && mode == INPUT )
+    // We had already set up the stream for output.
+    stream_.mode = DUPLEX;
+  else {
+    stream_.mode = mode;
+    jack_set_process_callback( handle->client, jackCallbackHandler, (void *) &stream_.callbackInfo );
+    jack_set_xrun_callback( handle->client, jackXrun, (void *) &handle );
+    jack_on_shutdown( handle->client, jackShutdown, (void *) &stream_.callbackInfo );
+  }
+
+  // Register our ports.
+  char label[64];
+  if ( mode == OUTPUT ) {
+    for ( unsigned int i=0; i<stream_.nUserChannels[0]; i++ ) {
+      snprintf( label, 64, "outport %d", i );
+      handle->ports[0][i] = jack_port_register( handle->client, (const char *)label,
+                                                JACK_DEFAULT_AUDIO_TYPE, JackPortIsOutput, 0 );
+    }
+  }
+  else {
+    for ( unsigned int i=0; i<stream_.nUserChannels[1]; i++ ) {
+      snprintf( label, 64, "inport %d", i );
+      handle->ports[1][i] = jack_port_register( handle->client, (const char *)label,
+                                                JACK_DEFAULT_AUDIO_TYPE, JackPortIsInput, 0 );
+    }
+  }
+
+  // Setup the buffer conversion information structure.  We don't use
+  // buffers to do channel offsets, so we override that parameter
+  // here.
+  if ( stream_.doConvertBuffer[mode] ) setConvertInfo( mode, 0 );
+
+  return SUCCESS;
+
+ error:
+  if ( handle ) {
+    pthread_cond_destroy( &handle->condition );
+    jack_client_close( handle->client );
+
+    if ( handle->ports[0] ) free( handle->ports[0] );
+    if ( handle->ports[1] ) free( handle->ports[1] );
+
+    delete handle;
+    stream_.apiHandle = 0;
+  }
+
+  for ( int i=0; i<2; i++ ) {
+    if ( stream_.userBuffer[i] ) {
+      free( stream_.userBuffer[i] );
+      stream_.userBuffer[i] = 0;
+    }
+  }
+
+  if ( stream_.deviceBuffer ) {
+    free( stream_.deviceBuffer );
+    stream_.deviceBuffer = 0;
+  }
+
+  return FAILURE;
+}
+
+void RtApiJack :: closeStream( void )
+{
+  if ( stream_.state == STREAM_CLOSED ) {
+    errorText_ = "RtApiJack::closeStream(): no open stream to close!";
+    error( RtAudioError::WARNING );
+    return;
+  }
+
+  JackHandle *handle = (JackHandle *) stream_.apiHandle;
+  if ( handle ) {
+
+    if ( stream_.state == STREAM_RUNNING )
+      jack_deactivate( handle->client );
+
+    jack_client_close( handle->client );
+  }
+
+  if ( handle ) {
+    if ( handle->ports[0] ) free( handle->ports[0] );
+    if ( handle->ports[1] ) free( handle->ports[1] );
+    pthread_cond_destroy( &handle->condition );
+    delete handle;
+    stream_.apiHandle = 0;
+  }
+
+  for ( int i=0; i<2; i++ ) {
+    if ( stream_.userBuffer[i] ) {
+      free( stream_.userBuffer[i] );
+      stream_.userBuffer[i] = 0;
+    }
+  }
+
+  if ( stream_.deviceBuffer ) {
+    free( stream_.deviceBuffer );
+    stream_.deviceBuffer = 0;
+  }
+
+  stream_.mode = UNINITIALIZED;
+  stream_.state = STREAM_CLOSED;
+}
+
+void RtApiJack :: startStream( void )
+{
+  verifyStream();
+  if ( stream_.state == STREAM_RUNNING ) {
+    errorText_ = "RtApiJack::startStream(): the stream is already running!";
+    error( RtAudioError::WARNING );
+    return;
+  }
+
+  JackHandle *handle = (JackHandle *) stream_.apiHandle;
+  int result = jack_activate( handle->client );
+  if ( result ) {
+    errorText_ = "RtApiJack::startStream(): unable to activate JACK client!";
+    goto unlock;
+  }
+
+  const char **ports;
+
+  // Get the list of available ports.
+  if ( stream_.mode == OUTPUT || stream_.mode == DUPLEX ) {
+    result = 1;
+    ports = jack_get_ports( handle->client, handle->deviceName[0].c_str(), NULL, JackPortIsInput);
+    if ( ports == NULL) {
+      errorText_ = "RtApiJack::startStream(): error determining available JACK input ports!";
+      goto unlock;
+    }
+
+    // Now make the port connections.  Since RtAudio wasn't designed to
+    // allow the user to select particular channels of a device, we'll
+    // just open the first "nChannels" ports with offset.
+    for ( unsigned int i=0; i<stream_.nUserChannels[0]; i++ ) {
+      result = 1;
+      if ( ports[ stream_.channelOffset[0] + i ] )
+        result = jack_connect( handle->client, jack_port_name( handle->ports[0][i] ), ports[ stream_.channelOffset[0] + i ] );
+      if ( result ) {
+        free( ports );
+        errorText_ = "RtApiJack::startStream(): error connecting output ports!";
+        goto unlock;
+      }
+    }
+    free(ports);
+  }
+
+  if ( stream_.mode == INPUT || stream_.mode == DUPLEX ) {
+    result = 1;
+    ports = jack_get_ports( handle->client, handle->deviceName[1].c_str(), NULL, JackPortIsOutput );
+    if ( ports == NULL) {
+      errorText_ = "RtApiJack::startStream(): error determining available JACK output ports!";
+      goto unlock;
+    }
+
+    // Now make the port connections.  See note above.
+    for ( unsigned int i=0; i<stream_.nUserChannels[1]; i++ ) {
+      result = 1;
+      if ( ports[ stream_.channelOffset[1] + i ] )
+        result = jack_connect( handle->client, ports[ stream_.channelOffset[1] + i ], jack_port_name( handle->ports[1][i] ) );
+      if ( result ) {
+        free( ports );
+        errorText_ = "RtApiJack::startStream(): error connecting input ports!";
+        goto unlock;
+      }
+    }
+    free(ports);
+  }
+
+  handle->drainCounter = 0;
+  handle->internalDrain = false;
+  stream_.state = STREAM_RUNNING;
+
+ unlock:
+  if ( result == 0 ) return;
+  error( RtAudioError::SYSTEM_ERROR );
+}
+
+void RtApiJack :: stopStream( void )
+{
+  verifyStream();
+  if ( stream_.state == STREAM_STOPPED ) {
+    errorText_ = "RtApiJack::stopStream(): the stream is already stopped!";
+    error( RtAudioError::WARNING );
+    return;
+  }
+
+  JackHandle *handle = (JackHandle *) stream_.apiHandle;
+  if ( stream_.mode == OUTPUT || stream_.mode == DUPLEX ) {
+
+    if ( handle->drainCounter == 0 ) {
+      handle->drainCounter = 2;
+      pthread_cond_wait( &handle->condition, &stream_.mutex ); // block until signaled
+    }
+  }
+
+  jack_deactivate( handle->client );
+  stream_.state = STREAM_STOPPED;
+}
+
+void RtApiJack :: abortStream( void )
+{
+  verifyStream();
+  if ( stream_.state == STREAM_STOPPED ) {
+    errorText_ = "RtApiJack::abortStream(): the stream is already stopped!";
+    error( RtAudioError::WARNING );
+    return;
+  }
+
+  JackHandle *handle = (JackHandle *) stream_.apiHandle;
+  handle->drainCounter = 2;
+
+  stopStream();
+}
+
+// This function will be called by a spawned thread when the user
+// callback function signals that the stream should be stopped or
+// aborted.  It is necessary to handle it this way because the
+// callbackEvent() function must return before the jack_deactivate()
+// function will return.
+static void *jackStopStream( void *ptr )
+{
+  CallbackInfo *info = (CallbackInfo *) ptr;
+  RtApiJack *object = (RtApiJack *) info->object;
+
+  object->stopStream();
+  pthread_exit( NULL );
+}
+
+bool RtApiJack :: callbackEvent( unsigned long nframes )
+{
+  if ( stream_.state == STREAM_STOPPED || stream_.state == STREAM_STOPPING ) return SUCCESS;
+  if ( stream_.state == STREAM_CLOSED ) {
+    errorText_ = "RtApiCore::callbackEvent(): the stream is closed ... this shouldn't happen!";
+    error( RtAudioError::WARNING );
+    return FAILURE;
+  }
+  if ( stream_.bufferSize != nframes ) {
+    errorText_ = "RtApiCore::callbackEvent(): the JACK buffer size has changed ... cannot process!";
+    error( RtAudioError::WARNING );
+    return FAILURE;
+  }
+
+  CallbackInfo *info = (CallbackInfo *) &stream_.callbackInfo;
+  JackHandle *handle = (JackHandle *) stream_.apiHandle;
+
+  // Check if we were draining the stream and signal is finished.
+  if ( handle->drainCounter > 3 ) {
+    ThreadHandle threadId;
+
+    stream_.state = STREAM_STOPPING;
+    if ( handle->internalDrain == true )
+      pthread_create( &threadId, NULL, jackStopStream, info );
+    else
+      pthread_cond_signal( &handle->condition );
+    return SUCCESS;
+  }
+
+  // Invoke user callback first, to get fresh output data.
+  if ( handle->drainCounter == 0 ) {
+    RtAudioCallback callback = (RtAudioCallback) info->callback;
+    double streamTime = getStreamTime();
+    RtAudioStreamStatus status = 0;
+    if ( stream_.mode != INPUT && handle->xrun[0] == true ) {
+      status |= RTAUDIO_OUTPUT_UNDERFLOW;
+      handle->xrun[0] = false;
+    }
+    if ( stream_.mode != OUTPUT && handle->xrun[1] == true ) {
+      status |= RTAUDIO_INPUT_OVERFLOW;
+      handle->xrun[1] = false;
+    }
+    int cbReturnValue = callback( stream_.userBuffer[0], stream_.userBuffer[1],
+                                  stream_.bufferSize, streamTime, status, info->userData );
+    if ( cbReturnValue == 2 ) {
+      stream_.state = STREAM_STOPPING;
+      handle->drainCounter = 2;
+      ThreadHandle id;
+      pthread_create( &id, NULL, jackStopStream, info );
+      return SUCCESS;
+    }
+    else if ( cbReturnValue == 1 ) {
+      handle->drainCounter = 1;
+      handle->internalDrain = true;
+    }
+  }
+
+  jack_default_audio_sample_t *jackbuffer;
+  unsigned long bufferBytes = nframes * sizeof( jack_default_audio_sample_t );
+  if ( stream_.mode == OUTPUT || stream_.mode == DUPLEX ) {
+
+    if ( handle->drainCounter > 1 ) { // write zeros to the output stream
+
+      for ( unsigned int i=0; i<stream_.nDeviceChannels[0]; i++ ) {
+        jackbuffer = (jack_default_audio_sample_t *) jack_port_get_buffer( handle->ports[0][i], (jack_nframes_t) nframes );
+        memset( jackbuffer, 0, bufferBytes );
+      }
+
+    }
+    else if ( stream_.doConvertBuffer[0] ) {
+
+      convertBuffer( stream_.deviceBuffer, stream_.userBuffer[0], stream_.convertInfo[0] );
+
+      for ( unsigned int i=0; i<stream_.nDeviceChannels[0]; i++ ) {
+        jackbuffer = (jack_default_audio_sample_t *) jack_port_get_buffer( handle->ports[0][i], (jack_nframes_t) nframes );
+        memcpy( jackbuffer, &stream_.deviceBuffer[i*bufferBytes], bufferBytes );
+      }
+    }
+    else { // no buffer conversion
+      for ( unsigned int i=0; i<stream_.nUserChannels[0]; i++ ) {
+        jackbuffer = (jack_default_audio_sample_t *) jack_port_get_buffer( handle->ports[0][i], (jack_nframes_t) nframes );
+        memcpy( jackbuffer, &stream_.userBuffer[0][i*bufferBytes], bufferBytes );
+      }
+    }
+  }
+
+  // Don't bother draining input
+  if ( handle->drainCounter ) {
+    handle->drainCounter++;
+    goto unlock;
+  }
+
+  if ( stream_.mode == INPUT || stream_.mode == DUPLEX ) {
+
+    if ( stream_.doConvertBuffer[1] ) {
+      for ( unsigned int i=0; i<stream_.nDeviceChannels[1]; i++ ) {
+        jackbuffer = (jack_default_audio_sample_t *) jack_port_get_buffer( handle->ports[1][i], (jack_nframes_t) nframes );
+        memcpy( &stream_.deviceBuffer[i*bufferBytes], jackbuffer, bufferBytes );
+      }
+      convertBuffer( stream_.userBuffer[1], stream_.deviceBuffer, stream_.convertInfo[1] );
+    }
+    else { // no buffer conversion
+      for ( unsigned int i=0; i<stream_.nUserChannels[1]; i++ ) {
+        jackbuffer = (jack_default_audio_sample_t *) jack_port_get_buffer( handle->ports[1][i], (jack_nframes_t) nframes );
+        memcpy( &stream_.userBuffer[1][i*bufferBytes], jackbuffer, bufferBytes );
+      }
+    }
+  }
+
+ unlock:
+  RtApi::tickStreamTime();
+  return SUCCESS;
+}
+  //******************** End of __UNIX_JACK__ *********************//
+#endif
+
+#if defined(__WINDOWS_ASIO__) // ASIO API on Windows
+
+// The ASIO API is designed around a callback scheme, so this
+// implementation is similar to that used for OS-X CoreAudio and Linux
+// Jack.  The primary constraint with ASIO is that it only allows
+// access to a single driver at a time.  Thus, it is not possible to
+// have more than one simultaneous RtAudio stream.
+//
+// This implementation also requires a number of external ASIO files
+// and a few global variables.  The ASIO callback scheme does not
+// allow for the passing of user data, so we must create a global
+// pointer to our callbackInfo structure.
+//
+// On unix systems, we make use of a pthread condition variable.
+// Since there is no equivalent in Windows, I hacked something based
+// on information found in
+// http://www.cs.wustl.edu/~schmidt/win32-cv-1.html.
+
+#include "asiosys.h"
+#include "asio.h"
+#include "iasiothiscallresolver.h"
+#include "asiodrivers.h"
+#include <cmath>
+
+static AsioDrivers drivers;
+static ASIOCallbacks asioCallbacks;
+static ASIODriverInfo driverInfo;
+static CallbackInfo *asioCallbackInfo;
+static bool asioXRun;
+
+struct AsioHandle {
+  int drainCounter;       // Tracks callback counts when draining
+  bool internalDrain;     // Indicates if stop is initiated from callback or not.
+  ASIOBufferInfo *bufferInfos;
+  HANDLE condition;
+
+  AsioHandle()
+    :drainCounter(0), internalDrain(false), bufferInfos(0) {}
+};
+
+// Function declarations (definitions at end of section)
+static const char* getAsioErrorString( ASIOError result );
+static void sampleRateChanged( ASIOSampleRate sRate );
+static long asioMessages( long selector, long value, void* message, double* opt );
+
+RtApiAsio :: RtApiAsio()
+{
+  // ASIO cannot run on a multi-threaded appartment. You can call
+  // CoInitialize beforehand, but it must be for appartment threading
+  // (in which case, CoInitilialize will return S_FALSE here).
+  coInitialized_ = false;
+  HRESULT hr = CoInitialize( NULL ); 
+  if ( FAILED(hr) ) {
+    errorText_ = "RtApiAsio::ASIO requires a single-threaded appartment. Call CoInitializeEx(0,COINIT_APARTMENTTHREADED)";
+    error( RtAudioError::WARNING );
+  }
+  coInitialized_ = true;
+
+  drivers.removeCurrentDriver();
+  driverInfo.asioVersion = 2;
+
+  // See note in DirectSound implementation about GetDesktopWindow().
+  driverInfo.sysRef = GetForegroundWindow();
+}
+
+RtApiAsio :: ~RtApiAsio()
+{
+  if ( stream_.state != STREAM_CLOSED ) closeStream();
+  if ( coInitialized_ ) CoUninitialize();
+}
+
+unsigned int RtApiAsio :: getDeviceCount( void )
+{
+  return (unsigned int) drivers.asioGetNumDev();
+}
+
+RtAudio::DeviceInfo RtApiAsio :: getDeviceInfo( unsigned int device )
+{
+  RtAudio::DeviceInfo info;
+  info.probed = false;
+
+  // Get device ID
+  unsigned int nDevices = getDeviceCount();
+  if ( nDevices == 0 ) {
+    errorText_ = "RtApiAsio::getDeviceInfo: no devices found!";
+    error( RtAudioError::INVALID_USE );
+    return info;
+  }
+
+  if ( device >= nDevices ) {
+    errorText_ = "RtApiAsio::getDeviceInfo: device ID is invalid!";
+    error( RtAudioError::INVALID_USE );
+    return info;
+  }
+
+  // If a stream is already open, we cannot probe other devices.  Thus, use the saved results.
+  if ( stream_.state != STREAM_CLOSED ) {
+    if ( device >= devices_.size() ) {
+      errorText_ = "RtApiAsio::getDeviceInfo: device ID was not present before stream was opened.";
+      error( RtAudioError::WARNING );
+      return info;
+    }
+    return devices_[ device ];
+  }
+
+  char driverName[32];
+  ASIOError result = drivers.asioGetDriverName( (int) device, driverName, 32 );
+  if ( result != ASE_OK ) {
+    errorStream_ << "RtApiAsio::getDeviceInfo: unable to get driver name (" << getAsioErrorString( result ) << ").";
+    errorText_ = errorStream_.str();
+    error( RtAudioError::WARNING );
+    return info;
+  }
+
+  info.name = driverName;
+
+  if ( !drivers.loadDriver( driverName ) ) {
+    errorStream_ << "RtApiAsio::getDeviceInfo: unable to load driver (" << driverName << ").";
+    errorText_ = errorStream_.str();
+    error( RtAudioError::WARNING );
+    return info;
+  }
+
+  result = ASIOInit( &driverInfo );
+  if ( result != ASE_OK ) {
+    errorStream_ << "RtApiAsio::getDeviceInfo: error (" << getAsioErrorString( result ) << ") initializing driver (" << driverName << ").";
+    errorText_ = errorStream_.str();
+    error( RtAudioError::WARNING );
+    return info;
+  }
+
+  // Determine the device channel information.
+  long inputChannels, outputChannels;
+  result = ASIOGetChannels( &inputChannels, &outputChannels );
+  if ( result != ASE_OK ) {
+    drivers.removeCurrentDriver();
+    errorStream_ << "RtApiAsio::getDeviceInfo: error (" << getAsioErrorString( result ) << ") getting channel count (" << driverName << ").";
+    errorText_ = errorStream_.str();
+    error( RtAudioError::WARNING );
+    return info;
+  }
+
+  info.outputChannels = outputChannels;
+  info.inputChannels = inputChannels;
+  if ( info.outputChannels > 0 && info.inputChannels > 0 )
+    info.duplexChannels = (info.outputChannels > info.inputChannels) ? info.inputChannels : info.outputChannels;
+
+  // Determine the supported sample rates.
+  info.sampleRates.clear();
+  for ( unsigned int i=0; i<MAX_SAMPLE_RATES; i++ ) {
+    result = ASIOCanSampleRate( (ASIOSampleRate) SAMPLE_RATES[i] );
+    if ( result == ASE_OK ) {
+      info.sampleRates.push_back( SAMPLE_RATES[i] );
+
+      if ( !info.preferredSampleRate || ( SAMPLE_RATES[i] <= 48000 && SAMPLE_RATES[i] > info.preferredSampleRate ) )
+        info.preferredSampleRate = SAMPLE_RATES[i];
+    }
+  }
+
+  // Determine supported data types ... just check first channel and assume rest are the same.
+  ASIOChannelInfo channelInfo;
+  channelInfo.channel = 0;
+  channelInfo.isInput = true;
+  if ( info.inputChannels <= 0 ) channelInfo.isInput = false;
+  result = ASIOGetChannelInfo( &channelInfo );
+  if ( result != ASE_OK ) {
+    drivers.removeCurrentDriver();
+    errorStream_ << "RtApiAsio::getDeviceInfo: error (" << getAsioErrorString( result ) << ") getting driver channel info (" << driverName << ").";
+    errorText_ = errorStream_.str();
+    error( RtAudioError::WARNING );
+    return info;
+  }
+
+  info.nativeFormats = 0;
+  if ( channelInfo.type == ASIOSTInt16MSB || channelInfo.type == ASIOSTInt16LSB )
+    info.nativeFormats |= RTAUDIO_SINT16;
+  else if ( channelInfo.type == ASIOSTInt32MSB || channelInfo.type == ASIOSTInt32LSB )
+    info.nativeFormats |= RTAUDIO_SINT32;
+  else if ( channelInfo.type == ASIOSTFloat32MSB || channelInfo.type == ASIOSTFloat32LSB )
+    info.nativeFormats |= RTAUDIO_FLOAT32;
+  else if ( channelInfo.type == ASIOSTFloat64MSB || channelInfo.type == ASIOSTFloat64LSB )
+    info.nativeFormats |= RTAUDIO_FLOAT64;
+  else if ( channelInfo.type == ASIOSTInt24MSB || channelInfo.type == ASIOSTInt24LSB )
+    info.nativeFormats |= RTAUDIO_SINT24;
+
+  if ( info.outputChannels > 0 )
+    if ( getDefaultOutputDevice() == device ) info.isDefaultOutput = true;
+  if ( info.inputChannels > 0 )
+    if ( getDefaultInputDevice() == device ) info.isDefaultInput = true;
+
+  info.probed = true;
+  drivers.removeCurrentDriver();
+  return info;
+}
+
+static void bufferSwitch( long index, ASIOBool /*processNow*/ )
+{
+  RtApiAsio *object = (RtApiAsio *) asioCallbackInfo->object;
+  object->callbackEvent( index );
+}
+
+void RtApiAsio :: saveDeviceInfo( void )
+{
+  devices_.clear();
+
+  unsigned int nDevices = getDeviceCount();
+  devices_.resize( nDevices );
+  for ( unsigned int i=0; i<nDevices; i++ )
+    devices_[i] = getDeviceInfo( i );
+}
+
+bool RtApiAsio :: probeDeviceOpen( unsigned int device, StreamMode mode, unsigned int channels,
+                                   unsigned int firstChannel, unsigned int sampleRate,
+                                   RtAudioFormat format, unsigned int *bufferSize,
+                                   RtAudio::StreamOptions *options )
+{////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+
+  bool isDuplexInput =  mode == INPUT && stream_.mode == OUTPUT;
+
+  // For ASIO, a duplex stream MUST use the same driver.
+  if ( isDuplexInput && stream_.device[0] != device ) {
+    errorText_ = "RtApiAsio::probeDeviceOpen: an ASIO duplex stream must use the same device for input and output!";
+    return FAILURE;
+  }
+
+  char driverName[32];
+  ASIOError result = drivers.asioGetDriverName( (int) device, driverName, 32 );
+  if ( result != ASE_OK ) {
+    errorStream_ << "RtApiAsio::probeDeviceOpen: unable to get driver name (" << getAsioErrorString( result ) << ").";
+    errorText_ = errorStream_.str();
+    return FAILURE;
+  }
+
+  // Only load the driver once for duplex stream.
+  if ( !isDuplexInput ) {
+    // The getDeviceInfo() function will not work when a stream is open
+    // because ASIO does not allow multiple devices to run at the same
+    // time.  Thus, we'll probe the system before opening a stream and
+    // save the results for use by getDeviceInfo().
+    this->saveDeviceInfo();
+
+    if ( !drivers.loadDriver( driverName ) ) {
+      errorStream_ << "RtApiAsio::probeDeviceOpen: unable to load driver (" << driverName << ").";
+      errorText_ = errorStream_.str();
+      return FAILURE;
+    }
+
+    result = ASIOInit( &driverInfo );
+    if ( result != ASE_OK ) {
+      errorStream_ << "RtApiAsio::probeDeviceOpen: error (" << getAsioErrorString( result ) << ") initializing driver (" << driverName << ").";
+      errorText_ = errorStream_.str();
+      return FAILURE;
+    }
+  }
+
+  // keep them before any "goto error", they are used for error cleanup + goto device boundary checks
+  bool buffersAllocated = false;
+  AsioHandle *handle = (AsioHandle *) stream_.apiHandle;
+  unsigned int nChannels;
+
+
+  // Check the device channel count.
+  long inputChannels, outputChannels;
+  result = ASIOGetChannels( &inputChannels, &outputChannels );
+  if ( result != ASE_OK ) {
+    errorStream_ << "RtApiAsio::probeDeviceOpen: error (" << getAsioErrorString( result ) << ") getting channel count (" << driverName << ").";
+    errorText_ = errorStream_.str();
+    goto error;
+  }
+
+  if ( ( mode == OUTPUT && (channels+firstChannel) > (unsigned int) outputChannels) ||
+       ( mode == INPUT && (channels+firstChannel) > (unsigned int) inputChannels) ) {
+    errorStream_ << "RtApiAsio::probeDeviceOpen: driver (" << driverName << ") does not support requested channel count (" << channels << ") + offset (" << firstChannel << ").";
+    errorText_ = errorStream_.str();
+    goto error;
+  }
+  stream_.nDeviceChannels[mode] = channels;
+  stream_.nUserChannels[mode] = channels;
+  stream_.channelOffset[mode] = firstChannel;
+
+  // Verify the sample rate is supported.
+  result = ASIOCanSampleRate( (ASIOSampleRate) sampleRate );
+  if ( result != ASE_OK ) {
+    errorStream_ << "RtApiAsio::probeDeviceOpen: driver (" << driverName << ") does not support requested sample rate (" << sampleRate << ").";
+    errorText_ = errorStream_.str();
+    goto error;
+  }
+
+  // Get the current sample rate
+  ASIOSampleRate currentRate;
+  result = ASIOGetSampleRate( &currentRate );
+  if ( result != ASE_OK ) {
+    errorStream_ << "RtApiAsio::probeDeviceOpen: driver (" << driverName << ") error getting sample rate.";
+    errorText_ = errorStream_.str();
+    goto error;
+  }
+
+  // Set the sample rate only if necessary
+  if ( currentRate != sampleRate ) {
+    result = ASIOSetSampleRate( (ASIOSampleRate) sampleRate );
+    if ( result != ASE_OK ) {
+      errorStream_ << "RtApiAsio::probeDeviceOpen: driver (" << driverName << ") error setting sample rate (" << sampleRate << ").";
+      errorText_ = errorStream_.str();
+      goto error;
+    }
+  }
+
+  // Determine the driver data type.
+  ASIOChannelInfo channelInfo;
+  channelInfo.channel = 0;
+  if ( mode == OUTPUT ) channelInfo.isInput = false;
+  else channelInfo.isInput = true;
+  result = ASIOGetChannelInfo( &channelInfo );
+  if ( result != ASE_OK ) {
+    errorStream_ << "RtApiAsio::probeDeviceOpen: driver (" << driverName << ") error (" << getAsioErrorString( result ) << ") getting data format.";
+    errorText_ = errorStream_.str();
+    goto error;
+  }
+
+  // Assuming WINDOWS host is always little-endian.
+  stream_.doByteSwap[mode] = false;
+  stream_.userFormat = format;
+  stream_.deviceFormat[mode] = 0;
+  if ( channelInfo.type == ASIOSTInt16MSB || channelInfo.type == ASIOSTInt16LSB ) {
+    stream_.deviceFormat[mode] = RTAUDIO_SINT16;
+    if ( channelInfo.type == ASIOSTInt16MSB ) stream_.doByteSwap[mode] = true;
+  }
+  else if ( channelInfo.type == ASIOSTInt32MSB || channelInfo.type == ASIOSTInt32LSB ) {
+    stream_.deviceFormat[mode] = RTAUDIO_SINT32;
+    if ( channelInfo.type == ASIOSTInt32MSB ) stream_.doByteSwap[mode] = true;
+  }
+  else if ( channelInfo.type == ASIOSTFloat32MSB || channelInfo.type == ASIOSTFloat32LSB ) {
+    stream_.deviceFormat[mode] = RTAUDIO_FLOAT32;
+    if ( channelInfo.type == ASIOSTFloat32MSB ) stream_.doByteSwap[mode] = true;
+  }
+  else if ( channelInfo.type == ASIOSTFloat64MSB || channelInfo.type == ASIOSTFloat64LSB ) {
+    stream_.deviceFormat[mode] = RTAUDIO_FLOAT64;
+    if ( channelInfo.type == ASIOSTFloat64MSB ) stream_.doByteSwap[mode] = true;
+  }
+  else if ( channelInfo.type == ASIOSTInt24MSB || channelInfo.type == ASIOSTInt24LSB ) {
+    stream_.deviceFormat[mode] = RTAUDIO_SINT24;
+    if ( channelInfo.type == ASIOSTInt24MSB ) stream_.doByteSwap[mode] = true;
+  }
+
+  if ( stream_.deviceFormat[mode] == 0 ) {
+    errorStream_ << "RtApiAsio::probeDeviceOpen: driver (" << driverName << ") data format not supported by RtAudio.";
+    errorText_ = errorStream_.str();
+    goto error;
+  }
+
+  // Set the buffer size.  For a duplex stream, this will end up
+  // setting the buffer size based on the input constraints, which
+  // should be ok.
+  long minSize, maxSize, preferSize, granularity;
+  result = ASIOGetBufferSize( &minSize, &maxSize, &preferSize, &granularity );
+  if ( result != ASE_OK ) {
+    errorStream_ << "RtApiAsio::probeDeviceOpen: driver (" << driverName << ") error (" << getAsioErrorString( result ) << ") getting buffer size.";
+    errorText_ = errorStream_.str();
+    goto error;
+  }
+
+  if ( isDuplexInput ) {
+    // When this is the duplex input (output was opened before), then we have to use the same
+    // buffersize as the output, because it might use the preferred buffer size, which most
+    // likely wasn't passed as input to this. The buffer sizes have to be identically anyway,
+    // So instead of throwing an error, make them equal. The caller uses the reference
+    // to the "bufferSize" param as usual to set up processing buffers.
+
+    *bufferSize = stream_.bufferSize;
+
+  } else {
+    if ( *bufferSize == 0 ) *bufferSize = preferSize;
+    else if ( *bufferSize < (unsigned int) minSize ) *bufferSize = (unsigned int) minSize;
+    else if ( *bufferSize > (unsigned int) maxSize ) *bufferSize = (unsigned int) maxSize;
+    else if ( granularity == -1 ) {
+      // Make sure bufferSize is a power of two.
+      int log2_of_min_size = 0;
+      int log2_of_max_size = 0;
+
+      for ( unsigned int i = 0; i < sizeof(long) * 8; i++ ) {
+        if ( minSize & ((long)1 << i) ) log2_of_min_size = i;
+        if ( maxSize & ((long)1 << i) ) log2_of_max_size = i;
+      }
+
+      long min_delta = std::abs( (long)*bufferSize - ((long)1 << log2_of_min_size) );
+      int min_delta_num = log2_of_min_size;
+
+      for (int i = log2_of_min_size + 1; i <= log2_of_max_size; i++) {
+        long current_delta = std::abs( (long)*bufferSize - ((long)1 << i) );
+        if (current_delta < min_delta) {
+          min_delta = current_delta;
+          min_delta_num = i;
+        }
+      }
+
+      *bufferSize = ( (unsigned int)1 << min_delta_num );
+      if ( *bufferSize < (unsigned int) minSize ) *bufferSize = (unsigned int) minSize;
+      else if ( *bufferSize > (unsigned int) maxSize ) *bufferSize = (unsigned int) maxSize;
+    }
+    else if ( granularity != 0 ) {
+      // Set to an even multiple of granularity, rounding up.
+      *bufferSize = (*bufferSize + granularity-1) / granularity * granularity;
+    }
+  }
+
+  /*
+  // we don't use it anymore, see above!
+  // Just left it here for the case...
+  if ( isDuplexInput && stream_.bufferSize != *bufferSize ) {
+    errorText_ = "RtApiAsio::probeDeviceOpen: input/output buffersize discrepancy!";
+    goto error;
+  }
+  */
+
+  stream_.bufferSize = *bufferSize;
+  stream_.nBuffers = 2;
+
+  if ( options && options->flags & RTAUDIO_NONINTERLEAVED ) stream_.userInterleaved = false;
+  else stream_.userInterleaved = true;
+
+  // ASIO always uses non-interleaved buffers.
+  stream_.deviceInterleaved[mode] = false;
+
+  // Allocate, if necessary, our AsioHandle structure for the stream.
+  if ( handle == 0 ) {
+    try {
+      handle = new AsioHandle;
+    }
+    catch ( std::bad_alloc& ) {
+      errorText_ = "RtApiAsio::probeDeviceOpen: error allocating AsioHandle memory.";
+      goto error;
+    }
+    handle->bufferInfos = 0;
+
+    // Create a manual-reset event.
+    handle->condition = CreateEvent( NULL,   // no security
+                                     TRUE,   // manual-reset
+                                     FALSE,  // non-signaled initially
+                                     NULL ); // unnamed
+    stream_.apiHandle = (void *) handle;
+  }
+
+  // Create the ASIO internal buffers.  Since RtAudio sets up input
+  // and output separately, we'll have to dispose of previously
+  // created output buffers for a duplex stream.
+  if ( mode == INPUT && stream_.mode == OUTPUT ) {
+    ASIODisposeBuffers();
+    if ( handle->bufferInfos ) free( handle->bufferInfos );
+  }
+
+  // Allocate, initialize, and save the bufferInfos in our stream callbackInfo structure.
+  unsigned int i;
+  nChannels = stream_.nDeviceChannels[0] + stream_.nDeviceChannels[1];
+  handle->bufferInfos = (ASIOBufferInfo *) malloc( nChannels * sizeof(ASIOBufferInfo) );
+  if ( handle->bufferInfos == NULL ) {
+    errorStream_ << "RtApiAsio::probeDeviceOpen: error allocating bufferInfo memory for driver (" << driverName << ").";
+    errorText_ = errorStream_.str();
+    goto error;
+  }
+
+  ASIOBufferInfo *infos;
+  infos = handle->bufferInfos;
+  for ( i=0; i<stream_.nDeviceChannels[0]; i++, infos++ ) {
+    infos->isInput = ASIOFalse;
+    infos->channelNum = i + stream_.channelOffset[0];
+    infos->buffers[0] = infos->buffers[1] = 0;
+  }
+  for ( i=0; i<stream_.nDeviceChannels[1]; i++, infos++ ) {
+    infos->isInput = ASIOTrue;
+    infos->channelNum = i + stream_.channelOffset[1];
+    infos->buffers[0] = infos->buffers[1] = 0;
+  }
+
+  // prepare for callbacks
+  stream_.sampleRate = sampleRate;
+  stream_.device[mode] = device;
+  stream_.mode = isDuplexInput ? DUPLEX : mode;
+
+  // store this class instance before registering callbacks, that are going to use it
+  asioCallbackInfo = &stream_.callbackInfo;
+  stream_.callbackInfo.object = (void *) this;
+
+  // Set up the ASIO callback structure and create the ASIO data buffers.
+  asioCallbacks.bufferSwitch = &bufferSwitch;
+  asioCallbacks.sampleRateDidChange = &sampleRateChanged;
+  asioCallbacks.asioMessage = &asioMessages;
+  asioCallbacks.bufferSwitchTimeInfo = NULL;
+  result = ASIOCreateBuffers( handle->bufferInfos, nChannels, stream_.bufferSize, &asioCallbacks );
+  if ( result != ASE_OK ) {
+    // Standard method failed. This can happen with strict/misbehaving drivers that return valid buffer size ranges
+    // but only accept the preferred buffer size as parameter for ASIOCreateBuffers. eg. Creatives ASIO driver
+    // in that case, let's be naïve and try that instead
+    *bufferSize = preferSize;
+    stream_.bufferSize = *bufferSize;
+    result = ASIOCreateBuffers( handle->bufferInfos, nChannels, stream_.bufferSize, &asioCallbacks );
+  }
+
+  if ( result != ASE_OK ) {
+    errorStream_ << "RtApiAsio::probeDeviceOpen: driver (" << driverName << ") error (" << getAsioErrorString( result ) << ") creating buffers.";
+    errorText_ = errorStream_.str();
+    goto error;
+  }
+  buffersAllocated = true;  
+  stream_.state = STREAM_STOPPED;
+
+  // Set flags for buffer conversion.
+  stream_.doConvertBuffer[mode] = false;
+  if ( stream_.userFormat != stream_.deviceFormat[mode] )
+    stream_.doConvertBuffer[mode] = true;
+  if ( stream_.userInterleaved != stream_.deviceInterleaved[mode] &&
+       stream_.nUserChannels[mode] > 1 )
+    stream_.doConvertBuffer[mode] = true;
+
+  // Allocate necessary internal buffers
+  unsigned long bufferBytes;
+  bufferBytes = stream_.nUserChannels[mode] * *bufferSize * formatBytes( stream_.userFormat );
+  stream_.userBuffer[mode] = (char *) calloc( bufferBytes, 1 );
+  if ( stream_.userBuffer[mode] == NULL ) {
+    errorText_ = "RtApiAsio::probeDeviceOpen: error allocating user buffer memory.";
+    goto error;
+  }
+
+  if ( stream_.doConvertBuffer[mode] ) {
+
+    bool makeBuffer = true;
+    bufferBytes = stream_.nDeviceChannels[mode] * formatBytes( stream_.deviceFormat[mode] );
+    if ( isDuplexInput && stream_.deviceBuffer ) {
+      unsigned long bytesOut = stream_.nDeviceChannels[0] * formatBytes( stream_.deviceFormat[0] );
+      if ( bufferBytes <= bytesOut ) makeBuffer = false;
+    }
+
+    if ( makeBuffer ) {
+      bufferBytes *= *bufferSize;
+      if ( stream_.deviceBuffer ) free( stream_.deviceBuffer );
+      stream_.deviceBuffer = (char *) calloc( bufferBytes, 1 );
+      if ( stream_.deviceBuffer == NULL ) {
+        errorText_ = "RtApiAsio::probeDeviceOpen: error allocating device buffer memory.";
+        goto error;
+      }
+    }
+  }
+
+  // Determine device latencies
+  long inputLatency, outputLatency;
+  result = ASIOGetLatencies( &inputLatency, &outputLatency );
+  if ( result != ASE_OK ) {
+    errorStream_ << "RtApiAsio::probeDeviceOpen: driver (" << driverName << ") error (" << getAsioErrorString( result ) << ") getting latency.";
+    errorText_ = errorStream_.str();
+    error( RtAudioError::WARNING); // warn but don't fail
+  }
+  else {
+    stream_.latency[0] = outputLatency;
+    stream_.latency[1] = inputLatency;
+  }
+
+  // Setup the buffer conversion information structure.  We don't use
+  // buffers to do channel offsets, so we override that parameter
+  // here.
+  if ( stream_.doConvertBuffer[mode] ) setConvertInfo( mode, 0 );
+
+  return SUCCESS;
+
+ error:
+  if ( !isDuplexInput ) {
+    // the cleanup for error in the duplex input, is done by RtApi::openStream
+    // So we clean up for single channel only
+
+    if ( buffersAllocated )
+      ASIODisposeBuffers();
+
+    drivers.removeCurrentDriver();
+
+    if ( handle ) {
+      CloseHandle( handle->condition );
+      if ( handle->bufferInfos )
+        free( handle->bufferInfos );
+
+      delete handle;
+      stream_.apiHandle = 0;
+    }
+
+
+    if ( stream_.userBuffer[mode] ) {
+      free( stream_.userBuffer[mode] );
+      stream_.userBuffer[mode] = 0;
+    }
+
+    if ( stream_.deviceBuffer ) {
+      free( stream_.deviceBuffer );
+      stream_.deviceBuffer = 0;
+    }
+  }
+
+  return FAILURE;
+}////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+
+void RtApiAsio :: closeStream()
+{
+  if ( stream_.state == STREAM_CLOSED ) {
+    errorText_ = "RtApiAsio::closeStream(): no open stream to close!";
+    error( RtAudioError::WARNING );
+    return;
+  }
+
+  if ( stream_.state == STREAM_RUNNING ) {
+    stream_.state = STREAM_STOPPED;
+    ASIOStop();
+  }
+  ASIODisposeBuffers();
+  drivers.removeCurrentDriver();
+
+  AsioHandle *handle = (AsioHandle *) stream_.apiHandle;
+  if ( handle ) {
+    CloseHandle( handle->condition );
+    if ( handle->bufferInfos )
+      free( handle->bufferInfos );
+    delete handle;
+    stream_.apiHandle = 0;
+  }
+
+  for ( int i=0; i<2; i++ ) {
+    if ( stream_.userBuffer[i] ) {
+      free( stream_.userBuffer[i] );
+      stream_.userBuffer[i] = 0;
+    }
+  }
+
+  if ( stream_.deviceBuffer ) {
+    free( stream_.deviceBuffer );
+    stream_.deviceBuffer = 0;
+  }
+
+  stream_.mode = UNINITIALIZED;
+  stream_.state = STREAM_CLOSED;
+}
+
+bool stopThreadCalled = false;
+
+void RtApiAsio :: startStream()
+{
+  verifyStream();
+  if ( stream_.state == STREAM_RUNNING ) {
+    errorText_ = "RtApiAsio::startStream(): the stream is already running!";
+    error( RtAudioError::WARNING );
+    return;
+  }
+
+  AsioHandle *handle = (AsioHandle *) stream_.apiHandle;
+  ASIOError result = ASIOStart();
+  if ( result != ASE_OK ) {
+    errorStream_ << "RtApiAsio::startStream: error (" << getAsioErrorString( result ) << ") starting device.";
+    errorText_ = errorStream_.str();
+    goto unlock;
+  }
+
+  handle->drainCounter = 0;
+  handle->internalDrain = false;
+  ResetEvent( handle->condition );
+  stream_.state = STREAM_RUNNING;
+  asioXRun = false;
+
+ unlock:
+  stopThreadCalled = false;
+
+  if ( result == ASE_OK ) return;
+  error( RtAudioError::SYSTEM_ERROR );
+}
+
+void RtApiAsio :: stopStream()
+{
+  verifyStream();
+  if ( stream_.state == STREAM_STOPPED ) {
+    errorText_ = "RtApiAsio::stopStream(): the stream is already stopped!";
+    error( RtAudioError::WARNING );
+    return;
+  }
+
+  AsioHandle *handle = (AsioHandle *) stream_.apiHandle;
+  if ( stream_.mode == OUTPUT || stream_.mode == DUPLEX ) {
+    if ( handle->drainCounter == 0 ) {
+      handle->drainCounter = 2;
+      WaitForSingleObject( handle->condition, INFINITE );  // block until signaled
+    }
+  }
+
+  stream_.state = STREAM_STOPPED;
+
+  ASIOError result = ASIOStop();
+  if ( result != ASE_OK ) {
+    errorStream_ << "RtApiAsio::stopStream: error (" << getAsioErrorString( result ) << ") stopping device.";
+    errorText_ = errorStream_.str();
+  }
+
+  if ( result == ASE_OK ) return;
+  error( RtAudioError::SYSTEM_ERROR );
+}
+
+void RtApiAsio :: abortStream()
+{
+  verifyStream();
+  if ( stream_.state == STREAM_STOPPED ) {
+    errorText_ = "RtApiAsio::abortStream(): the stream is already stopped!";
+    error( RtAudioError::WARNING );
+    return;
+  }
+
+  // The following lines were commented-out because some behavior was
+  // noted where the device buffers need to be zeroed to avoid
+  // continuing sound, even when the device buffers are completely
+  // disposed.  So now, calling abort is the same as calling stop.
+  // AsioHandle *handle = (AsioHandle *) stream_.apiHandle;
+  // handle->drainCounter = 2;
+  stopStream();
+}
+
+// This function will be called by a spawned thread when the user
+// callback function signals that the stream should be stopped or
+// aborted.  It is necessary to handle it this way because the
+// callbackEvent() function must return before the ASIOStop()
+// function will return.
+static unsigned __stdcall asioStopStream( void *ptr )
+{
+  CallbackInfo *info = (CallbackInfo *) ptr;
+  RtApiAsio *object = (RtApiAsio *) info->object;
+
+  object->stopStream();
+  _endthreadex( 0 );
+  return 0;
+}
+
+bool RtApiAsio :: callbackEvent( long bufferIndex )
+{
+  if ( stream_.state == STREAM_STOPPED || stream_.state == STREAM_STOPPING ) return SUCCESS;
+  if ( stream_.state == STREAM_CLOSED ) {
+    errorText_ = "RtApiAsio::callbackEvent(): the stream is closed ... this shouldn't happen!";
+    error( RtAudioError::WARNING );
+    return FAILURE;
+  }
+
+  CallbackInfo *info = (CallbackInfo *) &stream_.callbackInfo;
+  AsioHandle *handle = (AsioHandle *) stream_.apiHandle;
+
+  // Check if we were draining the stream and signal if finished.
+  if ( handle->drainCounter > 3 ) {
+
+    stream_.state = STREAM_STOPPING;
+    if ( handle->internalDrain == false )
+      SetEvent( handle->condition );
+    else { // spawn a thread to stop the stream
+      unsigned threadId;
+      stream_.callbackInfo.thread = _beginthreadex( NULL, 0, &asioStopStream,
+                                                    &stream_.callbackInfo, 0, &threadId );
+    }
+    return SUCCESS;
+  }
+
+  // Invoke user callback to get fresh output data UNLESS we are
+  // draining stream.
+  if ( handle->drainCounter == 0 ) {
+    RtAudioCallback callback = (RtAudioCallback) info->callback;
+    double streamTime = getStreamTime();
+    RtAudioStreamStatus status = 0;
+    if ( stream_.mode != INPUT && asioXRun == true ) {
+      status |= RTAUDIO_OUTPUT_UNDERFLOW;
+      asioXRun = false;
+    }
+    if ( stream_.mode != OUTPUT && asioXRun == true ) {
+      status |= RTAUDIO_INPUT_OVERFLOW;
+      asioXRun = false;
+    }
+    int cbReturnValue = callback( stream_.userBuffer[0], stream_.userBuffer[1],
+                                     stream_.bufferSize, streamTime, status, info->userData );
+    if ( cbReturnValue == 2 ) {
+      stream_.state = STREAM_STOPPING;
+      handle->drainCounter = 2;
+      unsigned threadId;
+      stream_.callbackInfo.thread = _beginthreadex( NULL, 0, &asioStopStream,
+                                                    &stream_.callbackInfo, 0, &threadId );
+      return SUCCESS;
+    }
+    else if ( cbReturnValue == 1 ) {
+      handle->drainCounter = 1;
+      handle->internalDrain = true;
+    }
+  }
+
+  unsigned int nChannels, bufferBytes, i, j;
+  nChannels = stream_.nDeviceChannels[0] + stream_.nDeviceChannels[1];
+  if ( stream_.mode == OUTPUT || stream_.mode == DUPLEX ) {
+
+    bufferBytes = stream_.bufferSize * formatBytes( stream_.deviceFormat[0] );
+
+    if ( handle->drainCounter > 1 ) { // write zeros to the output stream
+
+      for ( i=0, j=0; i<nChannels; i++ ) {
+        if ( handle->bufferInfos[i].isInput != ASIOTrue )
+          memset( handle->bufferInfos[i].buffers[bufferIndex], 0, bufferBytes );
+      }
+
+    }
+    else if ( stream_.doConvertBuffer[0] ) {
+
+      convertBuffer( stream_.deviceBuffer, stream_.userBuffer[0], stream_.convertInfo[0] );
+      if ( stream_.doByteSwap[0] )
+        byteSwapBuffer( stream_.deviceBuffer,
+                        stream_.bufferSize * stream_.nDeviceChannels[0],
+                        stream_.deviceFormat[0] );
+
+      for ( i=0, j=0; i<nChannels; i++ ) {
+        if ( handle->bufferInfos[i].isInput != ASIOTrue )
+          memcpy( handle->bufferInfos[i].buffers[bufferIndex],
+                  &stream_.deviceBuffer[j++*bufferBytes], bufferBytes );
+      }
+
+    }
+    else {
+
+      if ( stream_.doByteSwap[0] )
+        byteSwapBuffer( stream_.userBuffer[0],
+                        stream_.bufferSize * stream_.nUserChannels[0],
+                        stream_.userFormat );
+
+      for ( i=0, j=0; i<nChannels; i++ ) {
+        if ( handle->bufferInfos[i].isInput != ASIOTrue )
+          memcpy( handle->bufferInfos[i].buffers[bufferIndex],
+                  &stream_.userBuffer[0][bufferBytes*j++], bufferBytes );
+      }
+
+    }
+  }
+
+  // Don't bother draining input
+  if ( handle->drainCounter ) {
+    handle->drainCounter++;
+    goto unlock;
+  }
+
+  if ( stream_.mode == INPUT || stream_.mode == DUPLEX ) {
+
+    bufferBytes = stream_.bufferSize * formatBytes(stream_.deviceFormat[1]);
+
+    if (stream_.doConvertBuffer[1]) {
+
+      // Always interleave ASIO input data.
+      for ( i=0, j=0; i<nChannels; i++ ) {
+        if ( handle->bufferInfos[i].isInput == ASIOTrue )
+          memcpy( &stream_.deviceBuffer[j++*bufferBytes],
+                  handle->bufferInfos[i].buffers[bufferIndex],
+                  bufferBytes );
+      }
+
+      if ( stream_.doByteSwap[1] )
+        byteSwapBuffer( stream_.deviceBuffer,
+                        stream_.bufferSize * stream_.nDeviceChannels[1],
+                        stream_.deviceFormat[1] );
+      convertBuffer( stream_.userBuffer[1], stream_.deviceBuffer, stream_.convertInfo[1] );
+
+    }
+    else {
+      for ( i=0, j=0; i<nChannels; i++ ) {
+        if ( handle->bufferInfos[i].isInput == ASIOTrue ) {
+          memcpy( &stream_.userBuffer[1][bufferBytes*j++],
+                  handle->bufferInfos[i].buffers[bufferIndex],
+                  bufferBytes );
+        }
+      }
+
+      if ( stream_.doByteSwap[1] )
+        byteSwapBuffer( stream_.userBuffer[1],
+                        stream_.bufferSize * stream_.nUserChannels[1],
+                        stream_.userFormat );
+    }
+  }
+
+ unlock:
+  // The following call was suggested by Malte Clasen.  While the API
+  // documentation indicates it should not be required, some device
+  // drivers apparently do not function correctly without it.
+  ASIOOutputReady();
+
+  RtApi::tickStreamTime();
+  return SUCCESS;
+}
+
+static void sampleRateChanged( ASIOSampleRate sRate )
+{
+  // The ASIO documentation says that this usually only happens during
+  // external sync.  Audio processing is not stopped by the driver,
+  // actual sample rate might not have even changed, maybe only the
+  // sample rate status of an AES/EBU or S/PDIF digital input at the
+  // audio device.
+
+  RtApi *object = (RtApi *) asioCallbackInfo->object;
+  try {
+    object->stopStream();
+  }
+  catch ( RtAudioError &exception ) {
+    std::cerr << "\nRtApiAsio: sampleRateChanged() error (" << exception.getMessage() << ")!\n" << std::endl;
+    return;
+  }
+
+  std::cerr << "\nRtApiAsio: driver reports sample rate changed to " << sRate << " ... stream stopped!!!\n" << std::endl;
+}
+
+static long asioMessages( long selector, long value, void* /*message*/, double* /*opt*/ )
+{
+  long ret = 0;
+
+  switch( selector ) {
+  case kAsioSelectorSupported:
+    if ( value == kAsioResetRequest
+         || value == kAsioEngineVersion
+         || value == kAsioResyncRequest
+         || value == kAsioLatenciesChanged
+         // The following three were added for ASIO 2.0, you don't
+         // necessarily have to support them.
+         || value == kAsioSupportsTimeInfo
+         || value == kAsioSupportsTimeCode
+         || value == kAsioSupportsInputMonitor)
+      ret = 1L;
+    break;
+  case kAsioResetRequest:
+    // Defer the task and perform the reset of the driver during the
+    // next "safe" situation.  You cannot reset the driver right now,
+    // as this code is called from the driver.  Reset the driver is
+    // done by completely destruct is. I.e. ASIOStop(),
+    // ASIODisposeBuffers(), Destruction Afterwards you initialize the
+    // driver again.
+    std::cerr << "\nRtApiAsio: driver reset requested!!!" << std::endl;
+    ret = 1L;
+    break;
+  case kAsioResyncRequest:
+    // This informs the application that the driver encountered some
+    // non-fatal data loss.  It is used for synchronization purposes
+    // of different media.  Added mainly to work around the Win16Mutex
+    // problems in Windows 95/98 with the Windows Multimedia system,
+    // which could lose data because the Mutex was held too long by
+    // another thread.  However a driver can issue it in other
+    // situations, too.
+    // std::cerr << "\nRtApiAsio: driver resync requested!!!" << std::endl;
+    asioXRun = true;
+    ret = 1L;
+    break;
+  case kAsioLatenciesChanged:
+    // This will inform the host application that the drivers were
+    // latencies changed.  Beware, it this does not mean that the
+    // buffer sizes have changed!  You might need to update internal
+    // delay data.
+    std::cerr << "\nRtApiAsio: driver latency may have changed!!!" << std::endl;
+    ret = 1L;
+    break;
+  case kAsioEngineVersion:
+    // Return the supported ASIO version of the host application.  If
+    // a host application does not implement this selector, ASIO 1.0
+    // is assumed by the driver.
+    ret = 2L;
+    break;
+  case kAsioSupportsTimeInfo:
+    // Informs the driver whether the
+    // asioCallbacks.bufferSwitchTimeInfo() callback is supported.
+    // For compatibility with ASIO 1.0 drivers the host application
+    // should always support the "old" bufferSwitch method, too.
+    ret = 0;
+    break;
+  case kAsioSupportsTimeCode:
+    // Informs the driver whether application is interested in time
+    // code info.  If an application does not need to know about time
+    // code, the driver has less work to do.
+    ret = 0;
+    break;
+  }
+  return ret;
+}
+
+static const char* getAsioErrorString( ASIOError result )
+{
+  struct Messages 
+  {
+    ASIOError value;
+    const char*message;
+  };
+
+  static const Messages m[] = 
+    {
+      {   ASE_NotPresent,    "Hardware input or output is not present or available." },
+      {   ASE_HWMalfunction,  "Hardware is malfunctioning." },
+      {   ASE_InvalidParameter, "Invalid input parameter." },
+      {   ASE_InvalidMode,      "Invalid mode." },
+      {   ASE_SPNotAdvancing,     "Sample position not advancing." },
+      {   ASE_NoClock,            "Sample clock or rate cannot be determined or is not present." },
+      {   ASE_NoMemory,           "Not enough memory to complete the request." }
+    };
+
+  for ( unsigned int i = 0; i < sizeof(m)/sizeof(m[0]); ++i )
+    if ( m[i].value == result ) return m[i].message;
+
+  return "Unknown error.";
+}
+
+//******************** End of __WINDOWS_ASIO__ *********************//
+#endif
+
+
+#if defined(__WINDOWS_WASAPI__) // Windows WASAPI API
+
+// Authored by Marcus Tomlinson <themarcustomlinson@gmail.com>, April 2014
+// - Introduces support for the Windows WASAPI API
+// - Aims to deliver bit streams to and from hardware at the lowest possible latency, via the absolute minimum buffer sizes required
+// - Provides flexible stream configuration to an otherwise strict and inflexible WASAPI interface
+// - Includes automatic internal conversion of sample rate and buffer size between hardware and the user
+
+#ifndef INITGUID
+  #define INITGUID
+#endif
+#include <audioclient.h>
+#include <avrt.h>
+#include <mmdeviceapi.h>
+#include <functiondiscoverykeys_devpkey.h>
+
+//=============================================================================
+
+#define SAFE_RELEASE( objectPtr )\
+if ( objectPtr )\
+{\
+  objectPtr->Release();\
+  objectPtr = NULL;\
+}
+
+typedef HANDLE ( __stdcall *TAvSetMmThreadCharacteristicsPtr )( LPCWSTR TaskName, LPDWORD TaskIndex );
+
+//-----------------------------------------------------------------------------
+
+// WASAPI dictates stream sample rate, format, channel count, and in some cases, buffer size.
+// Therefore we must perform all necessary conversions to user buffers in order to satisfy these
+// requirements. WasapiBuffer ring buffers are used between HwIn->UserIn and UserOut->HwOut to
+// provide intermediate storage for read / write synchronization.
+class WasapiBuffer
+{
+public:
+  WasapiBuffer()
+    : buffer_( NULL ),
+      bufferSize_( 0 ),
+      inIndex_( 0 ),
+      outIndex_( 0 ) {}
+
+  ~WasapiBuffer() {
+    free( buffer_ );
+  }
+
+  // sets the length of the internal ring buffer
+  void setBufferSize( unsigned int bufferSize, unsigned int formatBytes ) {
+    free( buffer_ );
+
+    buffer_ = ( char* ) calloc( bufferSize, formatBytes );
+
+    bufferSize_ = bufferSize;
+    inIndex_ = 0;
+    outIndex_ = 0;
+  }
+
+  // attempt to push a buffer into the ring buffer at the current "in" index
+  bool pushBuffer( char* buffer, unsigned int bufferSize, RtAudioFormat format )
+  {
+    if ( !buffer ||                 // incoming buffer is NULL
+         bufferSize == 0 ||         // incoming buffer has no data
+         bufferSize > bufferSize_ ) // incoming buffer too large
+    {
+      return false;
+    }
+
+    unsigned int relOutIndex = outIndex_;
+    unsigned int inIndexEnd = inIndex_ + bufferSize;
+    if ( relOutIndex < inIndex_ && inIndexEnd >= bufferSize_ ) {
+      relOutIndex += bufferSize_;
+    }
+
+    // "in" index can end on the "out" index but cannot begin at it
+    if ( inIndex_ <= relOutIndex && inIndexEnd > relOutIndex ) {
+      return false; // not enough space between "in" index and "out" index
+    }
+
+    // copy buffer from external to internal
+    int fromZeroSize = inIndex_ + bufferSize - bufferSize_;
+    fromZeroSize = fromZeroSize < 0 ? 0 : fromZeroSize;
+    int fromInSize = bufferSize - fromZeroSize;
+
+    switch( format )
+      {
+      case RTAUDIO_SINT8:
+        memcpy( &( ( char* ) buffer_ )[inIndex_], buffer, fromInSize * sizeof( char ) );
+        memcpy( buffer_, &( ( char* ) buffer )[fromInSize], fromZeroSize * sizeof( char ) );
+        break;
+      case RTAUDIO_SINT16:
+        memcpy( &( ( short* ) buffer_ )[inIndex_], buffer, fromInSize * sizeof( short ) );
+        memcpy( buffer_, &( ( short* ) buffer )[fromInSize], fromZeroSize * sizeof( short ) );
+        break;
+      case RTAUDIO_SINT24:
+        memcpy( &( ( S24* ) buffer_ )[inIndex_], buffer, fromInSize * sizeof( S24 ) );
+        memcpy( buffer_, &( ( S24* ) buffer )[fromInSize], fromZeroSize * sizeof( S24 ) );
+        break;
+      case RTAUDIO_SINT32:
+        memcpy( &( ( int* ) buffer_ )[inIndex_], buffer, fromInSize * sizeof( int ) );
+        memcpy( buffer_, &( ( int* ) buffer )[fromInSize], fromZeroSize * sizeof( int ) );
+        break;
+      case RTAUDIO_FLOAT32:
+        memcpy( &( ( float* ) buffer_ )[inIndex_], buffer, fromInSize * sizeof( float ) );
+        memcpy( buffer_, &( ( float* ) buffer )[fromInSize], fromZeroSize * sizeof( float ) );
+        break;
+      case RTAUDIO_FLOAT64:
+        memcpy( &( ( double* ) buffer_ )[inIndex_], buffer, fromInSize * sizeof( double ) );
+        memcpy( buffer_, &( ( double* ) buffer )[fromInSize], fromZeroSize * sizeof( double ) );
+        break;
+    }
+
+    // update "in" index
+    inIndex_ += bufferSize;
+    inIndex_ %= bufferSize_;
+
+    return true;
+  }
+
+  // attempt to pull a buffer from the ring buffer from the current "out" index
+  bool pullBuffer( char* buffer, unsigned int bufferSize, RtAudioFormat format )
+  {
+    if ( !buffer ||                 // incoming buffer is NULL
+         bufferSize == 0 ||         // incoming buffer has no data
+         bufferSize > bufferSize_ ) // incoming buffer too large
+    {
+      return false;
+    }
+
+    unsigned int relInIndex = inIndex_;
+    unsigned int outIndexEnd = outIndex_ + bufferSize;
+    if ( relInIndex < outIndex_ && outIndexEnd >= bufferSize_ ) {
+      relInIndex += bufferSize_;
+    }
+
+    // "out" index can begin at and end on the "in" index
+    if ( outIndex_ < relInIndex && outIndexEnd > relInIndex ) {
+      return false; // not enough space between "out" index and "in" index
+    }
+
+    // copy buffer from internal to external
+    int fromZeroSize = outIndex_ + bufferSize - bufferSize_;
+    fromZeroSize = fromZeroSize < 0 ? 0 : fromZeroSize;
+    int fromOutSize = bufferSize - fromZeroSize;
+
+    switch( format )
+    {
+      case RTAUDIO_SINT8:
+        memcpy( buffer, &( ( char* ) buffer_ )[outIndex_], fromOutSize * sizeof( char ) );
+        memcpy( &( ( char* ) buffer )[fromOutSize], buffer_, fromZeroSize * sizeof( char ) );
+        break;
+      case RTAUDIO_SINT16:
+        memcpy( buffer, &( ( short* ) buffer_ )[outIndex_], fromOutSize * sizeof( short ) );
+        memcpy( &( ( short* ) buffer )[fromOutSize], buffer_, fromZeroSize * sizeof( short ) );
+        break;
+      case RTAUDIO_SINT24:
+        memcpy( buffer, &( ( S24* ) buffer_ )[outIndex_], fromOutSize * sizeof( S24 ) );
+        memcpy( &( ( S24* ) buffer )[fromOutSize], buffer_, fromZeroSize * sizeof( S24 ) );
+        break;
+      case RTAUDIO_SINT32:
+        memcpy( buffer, &( ( int* ) buffer_ )[outIndex_], fromOutSize * sizeof( int ) );
+        memcpy( &( ( int* ) buffer )[fromOutSize], buffer_, fromZeroSize * sizeof( int ) );
+        break;
+      case RTAUDIO_FLOAT32:
+        memcpy( buffer, &( ( float* ) buffer_ )[outIndex_], fromOutSize * sizeof( float ) );
+        memcpy( &( ( float* ) buffer )[fromOutSize], buffer_, fromZeroSize * sizeof( float ) );
+        break;
+      case RTAUDIO_FLOAT64:
+        memcpy( buffer, &( ( double* ) buffer_ )[outIndex_], fromOutSize * sizeof( double ) );
+        memcpy( &( ( double* ) buffer )[fromOutSize], buffer_, fromZeroSize * sizeof( double ) );
+        break;
+    }
+
+    // update "out" index
+    outIndex_ += bufferSize;
+    outIndex_ %= bufferSize_;
+
+    return true;
+  }
+
+private:
+  char* buffer_;
+  unsigned int bufferSize_;
+  unsigned int inIndex_;
+  unsigned int outIndex_;
+};
+
+//-----------------------------------------------------------------------------
+
+// In order to satisfy WASAPI's buffer requirements, we need a means of converting sample rate
+// between HW and the user. The convertBufferWasapi function is used to perform this conversion
+// between HwIn->UserIn and UserOut->HwOut during the stream callback loop.
+// This sample rate converter favors speed over quality, and works best with conversions between
+// one rate and its multiple.
+void convertBufferWasapi( char* outBuffer,
+                          const char* inBuffer,
+                          const unsigned int& channelCount,
+                          const unsigned int& inSampleRate,
+                          const unsigned int& outSampleRate,
+                          const unsigned int& inSampleCount,
+                          unsigned int& outSampleCount,
+                          const RtAudioFormat& format )
+{
+  // calculate the new outSampleCount and relative sampleStep
+  float sampleRatio = ( float ) outSampleRate / inSampleRate;
+  float sampleStep = 1.0f / sampleRatio;
+  float inSampleFraction = 0.0f;
+
+  outSampleCount = ( unsigned int ) roundf( inSampleCount * sampleRatio );
+
+  // frame-by-frame, copy each relative input sample into it's corresponding output sample
+  for ( unsigned int outSample = 0; outSample < outSampleCount; outSample++ )
+  {
+    unsigned int inSample = ( unsigned int ) inSampleFraction;
+
+    switch ( format )
+    {
+      case RTAUDIO_SINT8:
+        memcpy( &( ( char* ) outBuffer )[ outSample * channelCount ], &( ( char* ) inBuffer )[ inSample * channelCount ], channelCount * sizeof( char ) );
+        break;
+      case RTAUDIO_SINT16:
+        memcpy( &( ( short* ) outBuffer )[ outSample * channelCount ], &( ( short* ) inBuffer )[ inSample * channelCount ], channelCount * sizeof( short ) );
+        break;
+      case RTAUDIO_SINT24:
+        memcpy( &( ( S24* ) outBuffer )[ outSample * channelCount ], &( ( S24* ) inBuffer )[ inSample * channelCount ], channelCount * sizeof( S24 ) );
+        break;
+      case RTAUDIO_SINT32:
+        memcpy( &( ( int* ) outBuffer )[ outSample * channelCount ], &( ( int* ) inBuffer )[ inSample * channelCount ], channelCount * sizeof( int ) );
+        break;
+      case RTAUDIO_FLOAT32:
+        memcpy( &( ( float* ) outBuffer )[ outSample * channelCount ], &( ( float* ) inBuffer )[ inSample * channelCount ], channelCount * sizeof( float ) );
+        break;
+      case RTAUDIO_FLOAT64:
+        memcpy( &( ( double* ) outBuffer )[ outSample * channelCount ], &( ( double* ) inBuffer )[ inSample * channelCount ], channelCount * sizeof( double ) );
+        break;
+    }
+
+    // jump to next in sample
+    inSampleFraction += sampleStep;
+  }
+}
+
+//-----------------------------------------------------------------------------
+
+// A structure to hold various information related to the WASAPI implementation.
+struct WasapiHandle
+{
+  IAudioClient* captureAudioClient;
+  IAudioClient* renderAudioClient;
+  IAudioCaptureClient* captureClient;
+  IAudioRenderClient* renderClient;
+  HANDLE captureEvent;
+  HANDLE renderEvent;
+
+  WasapiHandle()
+  : captureAudioClient( NULL ),
+    renderAudioClient( NULL ),
+    captureClient( NULL ),
+    renderClient( NULL ),
+    captureEvent( NULL ),
+    renderEvent( NULL ) {}
+};
+
+//=============================================================================
+
+RtApiWasapi::RtApiWasapi()
+  : coInitialized_( false ), deviceEnumerator_( NULL )
+{
+  // WASAPI can run either apartment or multi-threaded
+  HRESULT hr = CoInitialize( NULL );
+  if ( !FAILED( hr ) )
+    coInitialized_ = true;
+
+  // Instantiate device enumerator
+  hr = CoCreateInstance( __uuidof( MMDeviceEnumerator ), NULL,
+                         CLSCTX_ALL, __uuidof( IMMDeviceEnumerator ),
+                         ( void** ) &deviceEnumerator_ );
+
+  if ( FAILED( hr ) ) {
+    errorText_ = "RtApiWasapi::RtApiWasapi: Unable to instantiate device enumerator";
+    error( RtAudioError::DRIVER_ERROR );
+  }
+}
+
+//-----------------------------------------------------------------------------
+
+RtApiWasapi::~RtApiWasapi()
+{
+  if ( stream_.state != STREAM_CLOSED )
+    closeStream();
+
+  SAFE_RELEASE( deviceEnumerator_ );
+
+  // If this object previously called CoInitialize()
+  if ( coInitialized_ )
+    CoUninitialize();
+}
+
+//=============================================================================
+
+unsigned int RtApiWasapi::getDeviceCount( void )
+{
+  unsigned int captureDeviceCount = 0;
+  unsigned int renderDeviceCount = 0;
+
+  IMMDeviceCollection* captureDevices = NULL;
+  IMMDeviceCollection* renderDevices = NULL;
+
+  // Count capture devices
+  errorText_.clear();
+  HRESULT hr = deviceEnumerator_->EnumAudioEndpoints( eCapture, DEVICE_STATE_ACTIVE, &captureDevices );
+  if ( FAILED( hr ) ) {
+    errorText_ = "RtApiWasapi::getDeviceCount: Unable to retrieve capture device collection.";
+    goto Exit;
+  }
+
+  hr = captureDevices->GetCount( &captureDeviceCount );
+  if ( FAILED( hr ) ) {
+    errorText_ = "RtApiWasapi::getDeviceCount: Unable to retrieve capture device count.";
+    goto Exit;
+  }
+
+  // Count render devices
+  hr = deviceEnumerator_->EnumAudioEndpoints( eRender, DEVICE_STATE_ACTIVE, &renderDevices );
+  if ( FAILED( hr ) ) {
+    errorText_ = "RtApiWasapi::getDeviceCount: Unable to retrieve render device collection.";
+    goto Exit;
+  }
+
+  hr = renderDevices->GetCount( &renderDeviceCount );
+  if ( FAILED( hr ) ) {
+    errorText_ = "RtApiWasapi::getDeviceCount: Unable to retrieve render device count.";
+    goto Exit;
+  }
+
+Exit:
+  // release all references
+  SAFE_RELEASE( captureDevices );
+  SAFE_RELEASE( renderDevices );
+
+  if ( errorText_.empty() )
+    return captureDeviceCount + renderDeviceCount;
+
+  error( RtAudioError::DRIVER_ERROR );
+  return 0;
+}
+
+//-----------------------------------------------------------------------------
+
+RtAudio::DeviceInfo RtApiWasapi::getDeviceInfo( unsigned int device )
+{
+  RtAudio::DeviceInfo info;
+  unsigned int captureDeviceCount = 0;
+  unsigned int renderDeviceCount = 0;
+  std::string defaultDeviceName;
+  bool isCaptureDevice = false;
+
+  PROPVARIANT deviceNameProp;
+  PROPVARIANT defaultDeviceNameProp;
+
+  IMMDeviceCollection* captureDevices = NULL;
+  IMMDeviceCollection* renderDevices = NULL;
+  IMMDevice* devicePtr = NULL;
+  IMMDevice* defaultDevicePtr = NULL;
+  IAudioClient* audioClient = NULL;
+  IPropertyStore* devicePropStore = NULL;
+  IPropertyStore* defaultDevicePropStore = NULL;
+
+  WAVEFORMATEX* deviceFormat = NULL;
+  WAVEFORMATEX* closestMatchFormat = NULL;
+
+  // probed
+  info.probed = false;
+
+  // Count capture devices
+  errorText_.clear();
+  RtAudioError::Type errorType = RtAudioError::DRIVER_ERROR;
+  HRESULT hr = deviceEnumerator_->EnumAudioEndpoints( eCapture, DEVICE_STATE_ACTIVE, &captureDevices );
+  if ( FAILED( hr ) ) {
+    errorText_ = "RtApiWasapi::getDeviceInfo: Unable to retrieve capture device collection.";
+    goto Exit;
+  }
+
+  hr = captureDevices->GetCount( &captureDeviceCount );
+  if ( FAILED( hr ) ) {
+    errorText_ = "RtApiWasapi::getDeviceInfo: Unable to retrieve capture device count.";
+    goto Exit;
+  }
+
+  // Count render devices
+  hr = deviceEnumerator_->EnumAudioEndpoints( eRender, DEVICE_STATE_ACTIVE, &renderDevices );
+  if ( FAILED( hr ) ) {
+    errorText_ = "RtApiWasapi::getDeviceInfo: Unable to retrieve render device collection.";
+    goto Exit;
+  }
+
+  hr = renderDevices->GetCount( &renderDeviceCount );
+  if ( FAILED( hr ) ) {
+    errorText_ = "RtApiWasapi::getDeviceInfo: Unable to retrieve render device count.";
+    goto Exit;
+  }
+
+  // validate device index
+  if ( device >= captureDeviceCount + renderDeviceCount ) {
+    errorText_ = "RtApiWasapi::getDeviceInfo: Invalid device index.";
+    errorType = RtAudioError::INVALID_USE;
+    goto Exit;
+  }
+
+  // determine whether index falls within capture or render devices
+  if ( device >= renderDeviceCount ) {
+    hr = captureDevices->Item( device - renderDeviceCount, &devicePtr );
+    if ( FAILED( hr ) ) {
+      errorText_ = "RtApiWasapi::getDeviceInfo: Unable to retrieve capture device handle.";
+      goto Exit;
+    }
+    isCaptureDevice = true;
+  }
+  else {
+    hr = renderDevices->Item( device, &devicePtr );
+    if ( FAILED( hr ) ) {
+      errorText_ = "RtApiWasapi::getDeviceInfo: Unable to retrieve render device handle.";
+      goto Exit;
+    }
+    isCaptureDevice = false;
+  }
+
+  // get default device name
+  if ( isCaptureDevice ) {
+    hr = deviceEnumerator_->GetDefaultAudioEndpoint( eCapture, eConsole, &defaultDevicePtr );
+    if ( FAILED( hr ) ) {
+      errorText_ = "RtApiWasapi::getDeviceInfo: Unable to retrieve default capture device handle.";
+      goto Exit;
+    }
+  }
+  else {
+    hr = deviceEnumerator_->GetDefaultAudioEndpoint( eRender, eConsole, &defaultDevicePtr );
+    if ( FAILED( hr ) ) {
+      errorText_ = "RtApiWasapi::getDeviceInfo: Unable to retrieve default render device handle.";
+      goto Exit;
+    }
+  }
+
+  hr = defaultDevicePtr->OpenPropertyStore( STGM_READ, &defaultDevicePropStore );
+  if ( FAILED( hr ) ) {
+    errorText_ = "RtApiWasapi::getDeviceInfo: Unable to open default device property store.";
+    goto Exit;
+  }
+  PropVariantInit( &defaultDeviceNameProp );
+
+  hr = defaultDevicePropStore->GetValue( PKEY_Device_FriendlyName, &defaultDeviceNameProp );
+  if ( FAILED( hr ) ) {
+    errorText_ = "RtApiWasapi::getDeviceInfo: Unable to retrieve default device property: PKEY_Device_FriendlyName.";
+    goto Exit;
+  }
+
+  defaultDeviceName = convertCharPointerToStdString(defaultDeviceNameProp.pwszVal);
+
+  // name
+  hr = devicePtr->OpenPropertyStore( STGM_READ, &devicePropStore );
+  if ( FAILED( hr ) ) {
+    errorText_ = "RtApiWasapi::getDeviceInfo: Unable to open device property store.";
+    goto Exit;
+  }
+
+  PropVariantInit( &deviceNameProp );
+
+  hr = devicePropStore->GetValue( PKEY_Device_FriendlyName, &deviceNameProp );
+  if ( FAILED( hr ) ) {
+    errorText_ = "RtApiWasapi::getDeviceInfo: Unable to retrieve device property: PKEY_Device_FriendlyName.";
+    goto Exit;
+  }
+
+  info.name =convertCharPointerToStdString(deviceNameProp.pwszVal);
+
+  // is default
+  if ( isCaptureDevice ) {
+    info.isDefaultInput = info.name == defaultDeviceName;
+    info.isDefaultOutput = false;
+  }
+  else {
+    info.isDefaultInput = false;
+    info.isDefaultOutput = info.name == defaultDeviceName;
+  }
+
+  // channel count
+  hr = devicePtr->Activate( __uuidof( IAudioClient ), CLSCTX_ALL, NULL, ( void** ) &audioClient );
+  if ( FAILED( hr ) ) {
+    errorText_ = "RtApiWasapi::getDeviceInfo: Unable to retrieve device audio client.";
+    goto Exit;
+  }
+
+  hr = audioClient->GetMixFormat( &deviceFormat );
+  if ( FAILED( hr ) ) {
+    errorText_ = "RtApiWasapi::getDeviceInfo: Unable to retrieve device mix format.";
+    goto Exit;
+  }
+
+  if ( isCaptureDevice ) {
+    info.inputChannels = deviceFormat->nChannels;
+    info.outputChannels = 0;
+    info.duplexChannels = 0;
+  }
+  else {
+    info.inputChannels = 0;
+    info.outputChannels = deviceFormat->nChannels;
+    info.duplexChannels = 0;
+  }
+
+  // sample rates
+  info.sampleRates.clear();
+
+  // allow support for all sample rates as we have a built-in sample rate converter
+  for ( unsigned int i = 0; i < MAX_SAMPLE_RATES; i++ ) {
+    info.sampleRates.push_back( SAMPLE_RATES[i] );
+  }
+  info.preferredSampleRate = deviceFormat->nSamplesPerSec;
+
+  // native format
+  info.nativeFormats = 0;
+
+  if ( deviceFormat->wFormatTag == WAVE_FORMAT_IEEE_FLOAT ||
+       ( deviceFormat->wFormatTag == WAVE_FORMAT_EXTENSIBLE &&
+         ( ( WAVEFORMATEXTENSIBLE* ) deviceFormat )->SubFormat == KSDATAFORMAT_SUBTYPE_IEEE_FLOAT ) )
+  {
+    if ( deviceFormat->wBitsPerSample == 32 ) {
+      info.nativeFormats |= RTAUDIO_FLOAT32;
+    }
+    else if ( deviceFormat->wBitsPerSample == 64 ) {
+      info.nativeFormats |= RTAUDIO_FLOAT64;
+    }
+  }
+  else if ( deviceFormat->wFormatTag == WAVE_FORMAT_PCM ||
+           ( deviceFormat->wFormatTag == WAVE_FORMAT_EXTENSIBLE &&
+             ( ( WAVEFORMATEXTENSIBLE* ) deviceFormat )->SubFormat == KSDATAFORMAT_SUBTYPE_PCM ) )
+  {
+    if ( deviceFormat->wBitsPerSample == 8 ) {
+      info.nativeFormats |= RTAUDIO_SINT8;
+    }
+    else if ( deviceFormat->wBitsPerSample == 16 ) {
+      info.nativeFormats |= RTAUDIO_SINT16;
+    }
+    else if ( deviceFormat->wBitsPerSample == 24 ) {
+      info.nativeFormats |= RTAUDIO_SINT24;
+    }
+    else if ( deviceFormat->wBitsPerSample == 32 ) {
+      info.nativeFormats |= RTAUDIO_SINT32;
+    }
+  }
+
+  // probed
+  info.probed = true;
+
+Exit:
+  // release all references
+  PropVariantClear( &deviceNameProp );
+  PropVariantClear( &defaultDeviceNameProp );
+
+  SAFE_RELEASE( captureDevices );
+  SAFE_RELEASE( renderDevices );
+  SAFE_RELEASE( devicePtr );
+  SAFE_RELEASE( defaultDevicePtr );
+  SAFE_RELEASE( audioClient );
+  SAFE_RELEASE( devicePropStore );
+  SAFE_RELEASE( defaultDevicePropStore );
+
+  CoTaskMemFree( deviceFormat );
+  CoTaskMemFree( closestMatchFormat );
+
+  if ( !errorText_.empty() )
+    error( errorType );
+  return info;
+}
+
+//-----------------------------------------------------------------------------
+
+unsigned int RtApiWasapi::getDefaultOutputDevice( void )
+{
+  for ( unsigned int i = 0; i < getDeviceCount(); i++ ) {
+    if ( getDeviceInfo( i ).isDefaultOutput ) {
+      return i;
+    }
+  }
+
+  return 0;
+}
+
+//-----------------------------------------------------------------------------
+
+unsigned int RtApiWasapi::getDefaultInputDevice( void )
+{
+  for ( unsigned int i = 0; i < getDeviceCount(); i++ ) {
+    if ( getDeviceInfo( i ).isDefaultInput ) {
+      return i;
+    }
+  }
+
+  return 0;
+}
+
+//-----------------------------------------------------------------------------
+
+void RtApiWasapi::closeStream( void )
+{
+  if ( stream_.state == STREAM_CLOSED ) {
+    errorText_ = "RtApiWasapi::closeStream: No open stream to close.";
+    error( RtAudioError::WARNING );
+    return;
+  }
+
+  if ( stream_.state != STREAM_STOPPED )
+    stopStream();
+
+  // clean up stream memory
+  SAFE_RELEASE( ( ( WasapiHandle* ) stream_.apiHandle )->captureAudioClient )
+  SAFE_RELEASE( ( ( WasapiHandle* ) stream_.apiHandle )->renderAudioClient )
+
+  SAFE_RELEASE( ( ( WasapiHandle* ) stream_.apiHandle )->captureClient )
+  SAFE_RELEASE( ( ( WasapiHandle* ) stream_.apiHandle )->renderClient )
+
+  if ( ( ( WasapiHandle* ) stream_.apiHandle )->captureEvent )
+    CloseHandle( ( ( WasapiHandle* ) stream_.apiHandle )->captureEvent );
+
+  if ( ( ( WasapiHandle* ) stream_.apiHandle )->renderEvent )
+    CloseHandle( ( ( WasapiHandle* ) stream_.apiHandle )->renderEvent );
+
+  delete ( WasapiHandle* ) stream_.apiHandle;
+  stream_.apiHandle = NULL;
+
+  for ( int i = 0; i < 2; i++ ) {
+    if ( stream_.userBuffer[i] ) {
+      free( stream_.userBuffer[i] );
+      stream_.userBuffer[i] = 0;
+    }
+  }
+
+  if ( stream_.deviceBuffer ) {
+    free( stream_.deviceBuffer );
+    stream_.deviceBuffer = 0;
+  }
+
+  // update stream state
+  stream_.state = STREAM_CLOSED;
+}
+
+//-----------------------------------------------------------------------------
+
+void RtApiWasapi::startStream( void )
+{
+  verifyStream();
+
+  if ( stream_.state == STREAM_RUNNING ) {
+    errorText_ = "RtApiWasapi::startStream: The stream is already running.";
+    error( RtAudioError::WARNING );
+    return;
+  }
+
+  // update stream state
+  stream_.state = STREAM_RUNNING;
+
+  // create WASAPI stream thread
+  stream_.callbackInfo.thread = ( ThreadHandle ) CreateThread( NULL, 0, runWasapiThread, this, CREATE_SUSPENDED, NULL );
+
+  if ( !stream_.callbackInfo.thread ) {
+    errorText_ = "RtApiWasapi::startStream: Unable to instantiate callback thread.";
+    error( RtAudioError::THREAD_ERROR );
+  }
+  else {
+    SetThreadPriority( ( void* ) stream_.callbackInfo.thread, stream_.callbackInfo.priority );
+    ResumeThread( ( void* ) stream_.callbackInfo.thread );
+  }
+}
+
+//-----------------------------------------------------------------------------
+
+void RtApiWasapi::stopStream( void )
+{
+  verifyStream();
+
+  if ( stream_.state == STREAM_STOPPED ) {
+    errorText_ = "RtApiWasapi::stopStream: The stream is already stopped.";
+    error( RtAudioError::WARNING );
+    return;
+  }
+
+  // inform stream thread by setting stream state to STREAM_STOPPING
+  stream_.state = STREAM_STOPPING;
+
+  // wait until stream thread is stopped
+  while( stream_.state != STREAM_STOPPED ) {
+    Sleep( 1 );
+  }
+
+  // Wait for the last buffer to play before stopping.
+  Sleep( 1000 * stream_.bufferSize / stream_.sampleRate );
+
+  // stop capture client if applicable
+  if ( ( ( WasapiHandle* ) stream_.apiHandle )->captureAudioClient ) {
+    HRESULT hr = ( ( WasapiHandle* ) stream_.apiHandle )->captureAudioClient->Stop();
+    if ( FAILED( hr ) ) {
+      errorText_ = "RtApiWasapi::stopStream: Unable to stop capture stream.";
+      error( RtAudioError::DRIVER_ERROR );
+      return;
+    }
+  }
+
+  // stop render client if applicable
+  if ( ( ( WasapiHandle* ) stream_.apiHandle )->renderAudioClient ) {
+    HRESULT hr = ( ( WasapiHandle* ) stream_.apiHandle )->renderAudioClient->Stop();
+    if ( FAILED( hr ) ) {
+      errorText_ = "RtApiWasapi::stopStream: Unable to stop render stream.";
+      error( RtAudioError::DRIVER_ERROR );
+      return;
+    }
+  }
+
+  // close thread handle
+  if ( stream_.callbackInfo.thread && !CloseHandle( ( void* ) stream_.callbackInfo.thread ) ) {
+    errorText_ = "RtApiWasapi::stopStream: Unable to close callback thread.";
+    error( RtAudioError::THREAD_ERROR );
+    return;
+  }
+
+  stream_.callbackInfo.thread = (ThreadHandle) NULL;
+}
+
+//-----------------------------------------------------------------------------
+
+void RtApiWasapi::abortStream( void )
+{
+  verifyStream();
+
+  if ( stream_.state == STREAM_STOPPED ) {
+    errorText_ = "RtApiWasapi::abortStream: The stream is already stopped.";
+    error( RtAudioError::WARNING );
+    return;
+  }
+
+  // inform stream thread by setting stream state to STREAM_STOPPING
+  stream_.state = STREAM_STOPPING;
+
+  // wait until stream thread is stopped
+  while ( stream_.state != STREAM_STOPPED ) {
+    Sleep( 1 );
+  }
+
+  // stop capture client if applicable
+  if ( ( ( WasapiHandle* ) stream_.apiHandle )->captureAudioClient ) {
+    HRESULT hr = ( ( WasapiHandle* ) stream_.apiHandle )->captureAudioClient->Stop();
+    if ( FAILED( hr ) ) {
+      errorText_ = "RtApiWasapi::abortStream: Unable to stop capture stream.";
+      error( RtAudioError::DRIVER_ERROR );
+      return;
+    }
+  }
+
+  // stop render client if applicable
+  if ( ( ( WasapiHandle* ) stream_.apiHandle )->renderAudioClient ) {
+    HRESULT hr = ( ( WasapiHandle* ) stream_.apiHandle )->renderAudioClient->Stop();
+    if ( FAILED( hr ) ) {
+      errorText_ = "RtApiWasapi::abortStream: Unable to stop render stream.";
+      error( RtAudioError::DRIVER_ERROR );
+      return;
+    }
+  }
+
+  // close thread handle
+  if ( stream_.callbackInfo.thread && !CloseHandle( ( void* ) stream_.callbackInfo.thread ) ) {
+    errorText_ = "RtApiWasapi::abortStream: Unable to close callback thread.";
+    error( RtAudioError::THREAD_ERROR );
+    return;
+  }
+
+  stream_.callbackInfo.thread = (ThreadHandle) NULL;
+}
+
+//-----------------------------------------------------------------------------
+
+bool RtApiWasapi::probeDeviceOpen( unsigned int device, StreamMode mode, unsigned int channels,
+                                   unsigned int firstChannel, unsigned int sampleRate,
+                                   RtAudioFormat format, unsigned int* bufferSize,
+                                   RtAudio::StreamOptions* options )
+{
+  bool methodResult = FAILURE;
+  unsigned int captureDeviceCount = 0;
+  unsigned int renderDeviceCount = 0;
+
+  IMMDeviceCollection* captureDevices = NULL;
+  IMMDeviceCollection* renderDevices = NULL;
+  IMMDevice* devicePtr = NULL;
+  WAVEFORMATEX* deviceFormat = NULL;
+  unsigned int bufferBytes;
+  stream_.state = STREAM_STOPPED;
+
+  // create API Handle if not already created
+  if ( !stream_.apiHandle )
+    stream_.apiHandle = ( void* ) new WasapiHandle();
+
+  // Count capture devices
+  errorText_.clear();
+  RtAudioError::Type errorType = RtAudioError::DRIVER_ERROR;
+  HRESULT hr = deviceEnumerator_->EnumAudioEndpoints( eCapture, DEVICE_STATE_ACTIVE, &captureDevices );
+  if ( FAILED( hr ) ) {
+    errorText_ = "RtApiWasapi::probeDeviceOpen: Unable to retrieve capture device collection.";
+    goto Exit;
+  }
+
+  hr = captureDevices->GetCount( &captureDeviceCount );
+  if ( FAILED( hr ) ) {
+    errorText_ = "RtApiWasapi::probeDeviceOpen: Unable to retrieve capture device count.";
+    goto Exit;
+  }
+
+  // Count render devices
+  hr = deviceEnumerator_->EnumAudioEndpoints( eRender, DEVICE_STATE_ACTIVE, &renderDevices );
+  if ( FAILED( hr ) ) {
+    errorText_ = "RtApiWasapi::probeDeviceOpen: Unable to retrieve render device collection.";
+    goto Exit;
+  }
+
+  hr = renderDevices->GetCount( &renderDeviceCount );
+  if ( FAILED( hr ) ) {
+    errorText_ = "RtApiWasapi::probeDeviceOpen: Unable to retrieve render device count.";
+    goto Exit;
+  }
+
+  // validate device index
+  if ( device >= captureDeviceCount + renderDeviceCount ) {
+    errorType = RtAudioError::INVALID_USE;
+    errorText_ = "RtApiWasapi::probeDeviceOpen: Invalid device index.";
+    goto Exit;
+  }
+
+  // determine whether index falls within capture or render devices
+  if ( device >= renderDeviceCount ) {
+    if ( mode != INPUT ) {
+      errorType = RtAudioError::INVALID_USE;
+      errorText_ = "RtApiWasapi::probeDeviceOpen: Capture device selected as output device.";
+      goto Exit;
+    }
+
+    // retrieve captureAudioClient from devicePtr
+    IAudioClient*& captureAudioClient = ( ( WasapiHandle* ) stream_.apiHandle )->captureAudioClient;
+
+    hr = captureDevices->Item( device - renderDeviceCount, &devicePtr );
+    if ( FAILED( hr ) ) {
+      errorText_ = "RtApiWasapi::probeDeviceOpen: Unable to retrieve capture device handle.";
+      goto Exit;
+    }
+
+    hr = devicePtr->Activate( __uuidof( IAudioClient ), CLSCTX_ALL,
+                              NULL, ( void** ) &captureAudioClient );
+    if ( FAILED( hr ) ) {
+      errorText_ = "RtApiWasapi::probeDeviceOpen: Unable to retrieve device audio client.";
+      goto Exit;
+    }
+
+    hr = captureAudioClient->GetMixFormat( &deviceFormat );
+    if ( FAILED( hr ) ) {
+      errorText_ = "RtApiWasapi::probeDeviceOpen: Unable to retrieve device mix format.";
+      goto Exit;
+    }
+
+    stream_.nDeviceChannels[mode] = deviceFormat->nChannels;
+    captureAudioClient->GetStreamLatency( ( long long* ) &stream_.latency[mode] );
+  }
+  else {
+    if ( mode != OUTPUT ) {
+      errorType = RtAudioError::INVALID_USE;
+      errorText_ = "RtApiWasapi::probeDeviceOpen: Render device selected as input device.";
+      goto Exit;
+    }
+
+    // retrieve renderAudioClient from devicePtr
+    IAudioClient*& renderAudioClient = ( ( WasapiHandle* ) stream_.apiHandle )->renderAudioClient;
+
+    hr = renderDevices->Item( device, &devicePtr );
+    if ( FAILED( hr ) ) {
+      errorText_ = "RtApiWasapi::probeDeviceOpen: Unable to retrieve render device handle.";
+      goto Exit;
+    }
+
+    hr = devicePtr->Activate( __uuidof( IAudioClient ), CLSCTX_ALL,
+                              NULL, ( void** ) &renderAudioClient );
+    if ( FAILED( hr ) ) {
+      errorText_ = "RtApiWasapi::probeDeviceOpen: Unable to retrieve device audio client.";
+      goto Exit;
+    }
+
+    hr = renderAudioClient->GetMixFormat( &deviceFormat );
+    if ( FAILED( hr ) ) {
+      errorText_ = "RtApiWasapi::probeDeviceOpen: Unable to retrieve device mix format.";
+      goto Exit;
+    }
+
+    stream_.nDeviceChannels[mode] = deviceFormat->nChannels;
+    renderAudioClient->GetStreamLatency( ( long long* ) &stream_.latency[mode] );
+  }
+
+  // fill stream data
+  if ( ( stream_.mode == OUTPUT && mode == INPUT ) ||
+       ( stream_.mode == INPUT && mode == OUTPUT ) ) {
+    stream_.mode = DUPLEX;
+  }
+  else {
+    stream_.mode = mode;
+  }
+
+  stream_.device[mode] = device;
+  stream_.doByteSwap[mode] = false;
+  stream_.sampleRate = sampleRate;
+  stream_.bufferSize = *bufferSize;
+  stream_.nBuffers = 1;
+  stream_.nUserChannels[mode] = channels;
+  stream_.channelOffset[mode] = firstChannel;
+  stream_.userFormat = format;
+  stream_.deviceFormat[mode] = getDeviceInfo( device ).nativeFormats;
+
+  if ( options && options->flags & RTAUDIO_NONINTERLEAVED )
+    stream_.userInterleaved = false;
+  else
+    stream_.userInterleaved = true;
+  stream_.deviceInterleaved[mode] = true;
+
+  // Set flags for buffer conversion.
+  stream_.doConvertBuffer[mode] = false;
+  if ( stream_.userFormat != stream_.deviceFormat[mode] ||
+       stream_.nUserChannels != stream_.nDeviceChannels )
+    stream_.doConvertBuffer[mode] = true;
+  else if ( stream_.userInterleaved != stream_.deviceInterleaved[mode] &&
+            stream_.nUserChannels[mode] > 1 )
+    stream_.doConvertBuffer[mode] = true;
+
+  if ( stream_.doConvertBuffer[mode] )
+    setConvertInfo( mode, 0 );
+
+  // Allocate necessary internal buffers
+  bufferBytes = stream_.nUserChannels[mode] * stream_.bufferSize * formatBytes( stream_.userFormat );
+
+  stream_.userBuffer[mode] = ( char* ) calloc( bufferBytes, 1 );
+  if ( !stream_.userBuffer[mode] ) {
+    errorType = RtAudioError::MEMORY_ERROR;
+    errorText_ = "RtApiWasapi::probeDeviceOpen: Error allocating user buffer memory.";
+    goto Exit;
+  }
+
+  if ( options && options->flags & RTAUDIO_SCHEDULE_REALTIME )
+    stream_.callbackInfo.priority = 15;
+  else
+    stream_.callbackInfo.priority = 0;
+
+  ///! TODO: RTAUDIO_MINIMIZE_LATENCY // Provide stream buffers directly to callback
+  ///! TODO: RTAUDIO_HOG_DEVICE       // Exclusive mode
+
+  methodResult = SUCCESS;
+
+Exit:
+  //clean up
+  SAFE_RELEASE( captureDevices );
+  SAFE_RELEASE( renderDevices );
+  SAFE_RELEASE( devicePtr );
+  CoTaskMemFree( deviceFormat );
+
+  // if method failed, close the stream
+  if ( methodResult == FAILURE )
+    closeStream();
+
+  if ( !errorText_.empty() )
+    error( errorType );
+  return methodResult;
+}
+
+//=============================================================================
+
+DWORD WINAPI RtApiWasapi::runWasapiThread( void* wasapiPtr )
+{
+  if ( wasapiPtr )
+    ( ( RtApiWasapi* ) wasapiPtr )->wasapiThread();
+
+  return 0;
+}
+
+DWORD WINAPI RtApiWasapi::stopWasapiThread( void* wasapiPtr )
+{
+  if ( wasapiPtr )
+    ( ( RtApiWasapi* ) wasapiPtr )->stopStream();
+
+  return 0;
+}
+
+DWORD WINAPI RtApiWasapi::abortWasapiThread( void* wasapiPtr )
+{
+  if ( wasapiPtr )
+    ( ( RtApiWasapi* ) wasapiPtr )->abortStream();
+
+  return 0;
+}
+
+//-----------------------------------------------------------------------------
+
+void RtApiWasapi::wasapiThread()
+{
+  // as this is a new thread, we must CoInitialize it
+  CoInitialize( NULL );
+
+  HRESULT hr;
+
+  IAudioClient* captureAudioClient = ( ( WasapiHandle* ) stream_.apiHandle )->captureAudioClient;
+  IAudioClient* renderAudioClient = ( ( WasapiHandle* ) stream_.apiHandle )->renderAudioClient;
+  IAudioCaptureClient* captureClient = ( ( WasapiHandle* ) stream_.apiHandle )->captureClient;
+  IAudioRenderClient* renderClient = ( ( WasapiHandle* ) stream_.apiHandle )->renderClient;
+  HANDLE captureEvent = ( ( WasapiHandle* ) stream_.apiHandle )->captureEvent;
+  HANDLE renderEvent = ( ( WasapiHandle* ) stream_.apiHandle )->renderEvent;
+
+  WAVEFORMATEX* captureFormat = NULL;
+  WAVEFORMATEX* renderFormat = NULL;
+  float captureSrRatio = 0.0f;
+  float renderSrRatio = 0.0f;
+  WasapiBuffer captureBuffer;
+  WasapiBuffer renderBuffer;
+
+  // declare local stream variables
+  RtAudioCallback callback = ( RtAudioCallback ) stream_.callbackInfo.callback;
+  BYTE* streamBuffer = NULL;
+  unsigned long captureFlags = 0;
+  unsigned int bufferFrameCount = 0;
+  unsigned int numFramesPadding = 0;
+  unsigned int convBufferSize = 0;
+  bool callbackPushed = false;
+  bool callbackPulled = false;
+  bool callbackStopped = false;
+  int callbackResult = 0;
+
+  // convBuffer is used to store converted buffers between WASAPI and the user
+  char* convBuffer = NULL;
+  unsigned int convBuffSize = 0;
+  unsigned int deviceBuffSize = 0;
+
+  errorText_.clear();
+  RtAudioError::Type errorType = RtAudioError::DRIVER_ERROR;
+
+  // Attempt to assign "Pro Audio" characteristic to thread
+  HMODULE AvrtDll = LoadLibrary( (LPCTSTR) "AVRT.dll" );
+  if ( AvrtDll ) {
+    DWORD taskIndex = 0;
+    TAvSetMmThreadCharacteristicsPtr AvSetMmThreadCharacteristicsPtr = ( TAvSetMmThreadCharacteristicsPtr ) GetProcAddress( AvrtDll, "AvSetMmThreadCharacteristicsW" );
+    AvSetMmThreadCharacteristicsPtr( L"Pro Audio", &taskIndex );
+    FreeLibrary( AvrtDll );
+  }
+
+  // start capture stream if applicable
+  if ( captureAudioClient ) {
+    hr = captureAudioClient->GetMixFormat( &captureFormat );
+    if ( FAILED( hr ) ) {
+      errorText_ = "RtApiWasapi::wasapiThread: Unable to retrieve device mix format.";
+      goto Exit;
+    }
+
+    captureSrRatio = ( ( float ) captureFormat->nSamplesPerSec / stream_.sampleRate );
+
+    // initialize capture stream according to desire buffer size
+    float desiredBufferSize = stream_.bufferSize * captureSrRatio;
+    REFERENCE_TIME desiredBufferPeriod = ( REFERENCE_TIME ) ( ( float ) desiredBufferSize * 10000000 / captureFormat->nSamplesPerSec );
+
+    if ( !captureClient ) {
+      hr = captureAudioClient->Initialize( AUDCLNT_SHAREMODE_SHARED,
+                                           AUDCLNT_STREAMFLAGS_EVENTCALLBACK,
+                                           desiredBufferPeriod,
+                                           desiredBufferPeriod,
+                                           captureFormat,
+                                           NULL );
+      if ( FAILED( hr ) ) {
+        errorText_ = "RtApiWasapi::wasapiThread: Unable to initialize capture audio client.";
+        goto Exit;
+      }
+
+      hr = captureAudioClient->GetService( __uuidof( IAudioCaptureClient ),
+                                           ( void** ) &captureClient );
+      if ( FAILED( hr ) ) {
+        errorText_ = "RtApiWasapi::wasapiThread: Unable to retrieve capture client handle.";
+        goto Exit;
+      }
+
+      // configure captureEvent to trigger on every available capture buffer
+      captureEvent = CreateEvent( NULL, FALSE, FALSE, NULL );
+      if ( !captureEvent ) {
+        errorType = RtAudioError::SYSTEM_ERROR;
+        errorText_ = "RtApiWasapi::wasapiThread: Unable to create capture event.";
+        goto Exit;
+      }
+
+      hr = captureAudioClient->SetEventHandle( captureEvent );
+      if ( FAILED( hr ) ) {
+        errorText_ = "RtApiWasapi::wasapiThread: Unable to set capture event handle.";
+        goto Exit;
+      }
+
+      ( ( WasapiHandle* ) stream_.apiHandle )->captureClient = captureClient;
+      ( ( WasapiHandle* ) stream_.apiHandle )->captureEvent = captureEvent;
+    }
+
+    unsigned int inBufferSize = 0;
+    hr = captureAudioClient->GetBufferSize( &inBufferSize );
+    if ( FAILED( hr ) ) {
+      errorText_ = "RtApiWasapi::wasapiThread: Unable to get capture buffer size.";
+      goto Exit;
+    }
+
+    // scale outBufferSize according to stream->user sample rate ratio
+    unsigned int outBufferSize = ( unsigned int ) ( stream_.bufferSize * captureSrRatio ) * stream_.nDeviceChannels[INPUT];
+    inBufferSize *= stream_.nDeviceChannels[INPUT];
+
+    // set captureBuffer size
+    captureBuffer.setBufferSize( inBufferSize + outBufferSize, formatBytes( stream_.deviceFormat[INPUT] ) );
+
+    // reset the capture stream
+    hr = captureAudioClient->Reset();
+    if ( FAILED( hr ) ) {
+      errorText_ = "RtApiWasapi::wasapiThread: Unable to reset capture stream.";
+      goto Exit;
+    }
+
+    // start the capture stream
+    hr = captureAudioClient->Start();
+    if ( FAILED( hr ) ) {
+      errorText_ = "RtApiWasapi::wasapiThread: Unable to start capture stream.";
+      goto Exit;
+    }
+  }
+
+  // start render stream if applicable
+  if ( renderAudioClient ) {
+    hr = renderAudioClient->GetMixFormat( &renderFormat );
+    if ( FAILED( hr ) ) {
+      errorText_ = "RtApiWasapi::wasapiThread: Unable to retrieve device mix format.";
+      goto Exit;
+    }
+
+    renderSrRatio = ( ( float ) renderFormat->nSamplesPerSec / stream_.sampleRate );
+
+    // initialize render stream according to desire buffer size
+    float desiredBufferSize = stream_.bufferSize * renderSrRatio;
+    REFERENCE_TIME desiredBufferPeriod = ( REFERENCE_TIME ) ( ( float ) desiredBufferSize * 10000000 / renderFormat->nSamplesPerSec );
+
+    if ( !renderClient ) {
+      hr = renderAudioClient->Initialize( AUDCLNT_SHAREMODE_SHARED,
+                                          AUDCLNT_STREAMFLAGS_EVENTCALLBACK,
+                                          desiredBufferPeriod,
+                                          desiredBufferPeriod,
+                                          renderFormat,
+                                          NULL );
+      if ( FAILED( hr ) ) {
+        errorText_ = "RtApiWasapi::wasapiThread: Unable to initialize render audio client.";
+        goto Exit;
+      }
+
+      hr = renderAudioClient->GetService( __uuidof( IAudioRenderClient ),
+                                          ( void** ) &renderClient );
+      if ( FAILED( hr ) ) {
+        errorText_ = "RtApiWasapi::wasapiThread: Unable to retrieve render client handle.";
+        goto Exit;
+      }
+
+      // configure renderEvent to trigger on every available render buffer
+      renderEvent = CreateEvent( NULL, FALSE, FALSE, NULL );
+      if ( !renderEvent ) {
+        errorType = RtAudioError::SYSTEM_ERROR;
+        errorText_ = "RtApiWasapi::wasapiThread: Unable to create render event.";
+        goto Exit;
+      }
+
+      hr = renderAudioClient->SetEventHandle( renderEvent );
+      if ( FAILED( hr ) ) {
+        errorText_ = "RtApiWasapi::wasapiThread: Unable to set render event handle.";
+        goto Exit;
+      }
+
+      ( ( WasapiHandle* ) stream_.apiHandle )->renderClient = renderClient;
+      ( ( WasapiHandle* ) stream_.apiHandle )->renderEvent = renderEvent;
+    }
+
+    unsigned int outBufferSize = 0;
+    hr = renderAudioClient->GetBufferSize( &outBufferSize );
+    if ( FAILED( hr ) ) {
+      errorText_ = "RtApiWasapi::wasapiThread: Unable to get render buffer size.";
+      goto Exit;
+    }
+
+    // scale inBufferSize according to user->stream sample rate ratio
+    unsigned int inBufferSize = ( unsigned int ) ( stream_.bufferSize * renderSrRatio ) * stream_.nDeviceChannels[OUTPUT];
+    outBufferSize *= stream_.nDeviceChannels[OUTPUT];
+
+    // set renderBuffer size
+    renderBuffer.setBufferSize( inBufferSize + outBufferSize, formatBytes( stream_.deviceFormat[OUTPUT] ) );
+
+    // reset the render stream
+    hr = renderAudioClient->Reset();
+    if ( FAILED( hr ) ) {
+      errorText_ = "RtApiWasapi::wasapiThread: Unable to reset render stream.";
+      goto Exit;
+    }
+
+    // start the render stream
+    hr = renderAudioClient->Start();
+    if ( FAILED( hr ) ) {
+      errorText_ = "RtApiWasapi::wasapiThread: Unable to start render stream.";
+      goto Exit;
+    }
+  }
+
+  if ( stream_.mode == INPUT ) {
+    convBuffSize = ( size_t ) ( stream_.bufferSize * captureSrRatio ) * stream_.nDeviceChannels[INPUT] * formatBytes( stream_.deviceFormat[INPUT] );
+    deviceBuffSize = stream_.bufferSize * stream_.nDeviceChannels[INPUT] * formatBytes( stream_.deviceFormat[INPUT] );
+  }
+  else if ( stream_.mode == OUTPUT ) {
+    convBuffSize = ( size_t ) ( stream_.bufferSize * renderSrRatio ) * stream_.nDeviceChannels[OUTPUT] * formatBytes( stream_.deviceFormat[OUTPUT] );
+    deviceBuffSize = stream_.bufferSize * stream_.nDeviceChannels[OUTPUT] * formatBytes( stream_.deviceFormat[OUTPUT] );
+  }
+  else if ( stream_.mode == DUPLEX ) {
+    convBuffSize = std::max( ( size_t ) ( stream_.bufferSize * captureSrRatio ) * stream_.nDeviceChannels[INPUT] * formatBytes( stream_.deviceFormat[INPUT] ),
+                             ( size_t ) ( stream_.bufferSize * renderSrRatio ) * stream_.nDeviceChannels[OUTPUT] * formatBytes( stream_.deviceFormat[OUTPUT] ) );
+    deviceBuffSize = std::max( stream_.bufferSize * stream_.nDeviceChannels[INPUT] * formatBytes( stream_.deviceFormat[INPUT] ),
+                               stream_.bufferSize * stream_.nDeviceChannels[OUTPUT] * formatBytes( stream_.deviceFormat[OUTPUT] ) );
+  }
+
+  convBuffer = ( char* ) malloc( convBuffSize );
+  stream_.deviceBuffer = ( char* ) malloc( deviceBuffSize );
+  if ( !convBuffer || !stream_.deviceBuffer ) {
+    errorType = RtAudioError::MEMORY_ERROR;
+    errorText_ = "RtApiWasapi::wasapiThread: Error allocating device buffer memory.";
+    goto Exit;
+  }
+
+  // stream process loop
+  while ( stream_.state != STREAM_STOPPING ) {
+    if ( !callbackPulled ) {
+      // Callback Input
+      // ==============
+      // 1. Pull callback buffer from inputBuffer
+      // 2. If 1. was successful: Convert callback buffer to user sample rate and channel count
+      //                          Convert callback buffer to user format
+
+      if ( captureAudioClient ) {
+        // Pull callback buffer from inputBuffer
+        callbackPulled = captureBuffer.pullBuffer( convBuffer,
+                                                   ( unsigned int ) ( stream_.bufferSize * captureSrRatio ) * stream_.nDeviceChannels[INPUT],
+                                                   stream_.deviceFormat[INPUT] );
+
+        if ( callbackPulled ) {
+          // Convert callback buffer to user sample rate
+          convertBufferWasapi( stream_.deviceBuffer,
+                               convBuffer,
+                               stream_.nDeviceChannels[INPUT],
+                               captureFormat->nSamplesPerSec,
+                               stream_.sampleRate,
+                               ( unsigned int ) ( stream_.bufferSize * captureSrRatio ),
+                               convBufferSize,
+                               stream_.deviceFormat[INPUT] );
+
+          if ( stream_.doConvertBuffer[INPUT] ) {
+            // Convert callback buffer to user format
+            convertBuffer( stream_.userBuffer[INPUT],
+                           stream_.deviceBuffer,
+                           stream_.convertInfo[INPUT] );
+          }
+          else {
+            // no further conversion, simple copy deviceBuffer to userBuffer
+            memcpy( stream_.userBuffer[INPUT],
+                    stream_.deviceBuffer,
+                    stream_.bufferSize * stream_.nUserChannels[INPUT] * formatBytes( stream_.userFormat ) );
+          }
+        }
+      }
+      else {
+        // if there is no capture stream, set callbackPulled flag
+        callbackPulled = true;
+      }
+
+      // Execute Callback
+      // ================
+      // 1. Execute user callback method
+      // 2. Handle return value from callback
+
+      // if callback has not requested the stream to stop
+      if ( callbackPulled && !callbackStopped ) {
+        // Execute user callback method
+        callbackResult = callback( stream_.userBuffer[OUTPUT],
+                                   stream_.userBuffer[INPUT],
+                                   stream_.bufferSize,
+                                   getStreamTime(),
+                                   captureFlags & AUDCLNT_BUFFERFLAGS_DATA_DISCONTINUITY ? RTAUDIO_INPUT_OVERFLOW : 0,
+                                   stream_.callbackInfo.userData );
+
+        // Handle return value from callback
+        if ( callbackResult == 1 ) {
+          // instantiate a thread to stop this thread
+          HANDLE threadHandle = CreateThread( NULL, 0, stopWasapiThread, this, 0, NULL );
+          if ( !threadHandle ) {
+            errorType = RtAudioError::THREAD_ERROR;
+            errorText_ = "RtApiWasapi::wasapiThread: Unable to instantiate stream stop thread.";
+            goto Exit;
+          }
+          else if ( !CloseHandle( threadHandle ) ) {
+            errorType = RtAudioError::THREAD_ERROR;
+            errorText_ = "RtApiWasapi::wasapiThread: Unable to close stream stop thread handle.";
+            goto Exit;
+          }
+
+          callbackStopped = true;
+        }
+        else if ( callbackResult == 2 ) {
+          // instantiate a thread to stop this thread
+          HANDLE threadHandle = CreateThread( NULL, 0, abortWasapiThread, this, 0, NULL );
+          if ( !threadHandle ) {
+            errorType = RtAudioError::THREAD_ERROR;
+            errorText_ = "RtApiWasapi::wasapiThread: Unable to instantiate stream abort thread.";
+            goto Exit;
+          }
+          else if ( !CloseHandle( threadHandle ) ) {
+            errorType = RtAudioError::THREAD_ERROR;
+            errorText_ = "RtApiWasapi::wasapiThread: Unable to close stream abort thread handle.";
+            goto Exit;
+          }
+
+          callbackStopped = true;
+        }
+      }
+    }
+
+    // Callback Output
+    // ===============
+    // 1. Convert callback buffer to stream format
+    // 2. Convert callback buffer to stream sample rate and channel count
+    // 3. Push callback buffer into outputBuffer
+
+    if ( renderAudioClient && callbackPulled ) {
+      if ( stream_.doConvertBuffer[OUTPUT] ) {
+        // Convert callback buffer to stream format
+        convertBuffer( stream_.deviceBuffer,
+                       stream_.userBuffer[OUTPUT],
+                       stream_.convertInfo[OUTPUT] );
+
+      }
+
+      // Convert callback buffer to stream sample rate
+      convertBufferWasapi( convBuffer,
+                           stream_.deviceBuffer,
+                           stream_.nDeviceChannels[OUTPUT],
+                           stream_.sampleRate,
+                           renderFormat->nSamplesPerSec,
+                           stream_.bufferSize,
+                           convBufferSize,
+                           stream_.deviceFormat[OUTPUT] );
+
+      // Push callback buffer into outputBuffer
+      callbackPushed = renderBuffer.pushBuffer( convBuffer,
+                                                convBufferSize * stream_.nDeviceChannels[OUTPUT],
+                                                stream_.deviceFormat[OUTPUT] );
+    }
+    else {
+      // if there is no render stream, set callbackPushed flag
+      callbackPushed = true;
+    }
+
+    // Stream Capture
+    // ==============
+    // 1. Get capture buffer from stream
+    // 2. Push capture buffer into inputBuffer
+    // 3. If 2. was successful: Release capture buffer
+
+    if ( captureAudioClient ) {
+      // if the callback input buffer was not pulled from captureBuffer, wait for next capture event
+      if ( !callbackPulled ) {
+        WaitForSingleObject( captureEvent, INFINITE );
+      }
+
+      // Get capture buffer from stream
+      hr = captureClient->GetBuffer( &streamBuffer,
+                                     &bufferFrameCount,
+                                     &captureFlags, NULL, NULL );
+      if ( FAILED( hr ) ) {
+        errorText_ = "RtApiWasapi::wasapiThread: Unable to retrieve capture buffer.";
+        goto Exit;
+      }
+
+      if ( bufferFrameCount != 0 ) {
+        // Push capture buffer into inputBuffer
+        if ( captureBuffer.pushBuffer( ( char* ) streamBuffer,
+                                       bufferFrameCount * stream_.nDeviceChannels[INPUT],
+                                       stream_.deviceFormat[INPUT] ) )
+        {
+          // Release capture buffer
+          hr = captureClient->ReleaseBuffer( bufferFrameCount );
+          if ( FAILED( hr ) ) {
+            errorText_ = "RtApiWasapi::wasapiThread: Unable to release capture buffer.";
+            goto Exit;
+          }
+        }
+        else
+        {
+          // Inform WASAPI that capture was unsuccessful
+          hr = captureClient->ReleaseBuffer( 0 );
+          if ( FAILED( hr ) ) {
+            errorText_ = "RtApiWasapi::wasapiThread: Unable to release capture buffer.";
+            goto Exit;
+          }
+        }
+      }
+      else
+      {
+        // Inform WASAPI that capture was unsuccessful
+        hr = captureClient->ReleaseBuffer( 0 );
+        if ( FAILED( hr ) ) {
+          errorText_ = "RtApiWasapi::wasapiThread: Unable to release capture buffer.";
+          goto Exit;
+        }
+      }
+    }
+
+    // Stream Render
+    // =============
+    // 1. Get render buffer from stream
+    // 2. Pull next buffer from outputBuffer
+    // 3. If 2. was successful: Fill render buffer with next buffer
+    //                          Release render buffer
+
+    if ( renderAudioClient ) {
+      // if the callback output buffer was not pushed to renderBuffer, wait for next render event
+      if ( callbackPulled && !callbackPushed ) {
+        WaitForSingleObject( renderEvent, INFINITE );
+      }
+
+      // Get render buffer from stream
+      hr = renderAudioClient->GetBufferSize( &bufferFrameCount );
+      if ( FAILED( hr ) ) {
+        errorText_ = "RtApiWasapi::wasapiThread: Unable to retrieve render buffer size.";
+        goto Exit;
+      }
+
+      hr = renderAudioClient->GetCurrentPadding( &numFramesPadding );
+      if ( FAILED( hr ) ) {
+        errorText_ = "RtApiWasapi::wasapiThread: Unable to retrieve render buffer padding.";
+        goto Exit;
+      }
+
+      bufferFrameCount -= numFramesPadding;
+
+      if ( bufferFrameCount != 0 ) {
+        hr = renderClient->GetBuffer( bufferFrameCount, &streamBuffer );
+        if ( FAILED( hr ) ) {
+          errorText_ = "RtApiWasapi::wasapiThread: Unable to retrieve render buffer.";
+          goto Exit;
+        }
+
+        // Pull next buffer from outputBuffer
+        // Fill render buffer with next buffer
+        if ( renderBuffer.pullBuffer( ( char* ) streamBuffer,
+                                      bufferFrameCount * stream_.nDeviceChannels[OUTPUT],
+                                      stream_.deviceFormat[OUTPUT] ) )
+        {
+          // Release render buffer
+          hr = renderClient->ReleaseBuffer( bufferFrameCount, 0 );
+          if ( FAILED( hr ) ) {
+            errorText_ = "RtApiWasapi::wasapiThread: Unable to release render buffer.";
+            goto Exit;
+          }
+        }
+        else
+        {
+          // Inform WASAPI that render was unsuccessful
+          hr = renderClient->ReleaseBuffer( 0, 0 );
+          if ( FAILED( hr ) ) {
+            errorText_ = "RtApiWasapi::wasapiThread: Unable to release render buffer.";
+            goto Exit;
+          }
+        }
+      }
+      else
+      {
+        // Inform WASAPI that render was unsuccessful
+        hr = renderClient->ReleaseBuffer( 0, 0 );
+        if ( FAILED( hr ) ) {
+          errorText_ = "RtApiWasapi::wasapiThread: Unable to release render buffer.";
+          goto Exit;
+        }
+      }
+    }
+
+    // if the callback buffer was pushed renderBuffer reset callbackPulled flag
+    if ( callbackPushed ) {
+      callbackPulled = false;
+    }
+
+    // tick stream time
+    RtApi::tickStreamTime();
+  }
+
+Exit:
+  // clean up
+  CoTaskMemFree( captureFormat );
+  CoTaskMemFree( renderFormat );
+
+  free ( convBuffer );
+
+  CoUninitialize();
+
+  // update stream state
+  stream_.state = STREAM_STOPPED;
+
+  if ( errorText_.empty() )
+    return;
+  else
+    error( errorType );
+}
+
+//******************** End of __WINDOWS_WASAPI__ *********************//
+#endif
+
+
+#if defined(__WINDOWS_DS__) // Windows DirectSound API
+
+// Modified by Robin Davies, October 2005
+// - Improvements to DirectX pointer chasing. 
+// - Bug fix for non-power-of-two Asio granularity used by Edirol PCR-A30.
+// - Auto-call CoInitialize for DSOUND and ASIO platforms.
+// Various revisions for RtAudio 4.0 by Gary Scavone, April 2007
+// Changed device query structure for RtAudio 4.0.7, January 2010
+
+#include <dsound.h>
+#include <assert.h>
+#include <algorithm>
+
+#if defined(__MINGW32__)
+  // missing from latest mingw winapi
+#define WAVE_FORMAT_96M08 0x00010000 /* 96 kHz, Mono, 8-bit */
+#define WAVE_FORMAT_96S08 0x00020000 /* 96 kHz, Stereo, 8-bit */
+#define WAVE_FORMAT_96M16 0x00040000 /* 96 kHz, Mono, 16-bit */
+#define WAVE_FORMAT_96S16 0x00080000 /* 96 kHz, Stereo, 16-bit */
+#endif
+
+#define MINIMUM_DEVICE_BUFFER_SIZE 32768
+
+#ifdef _MSC_VER // if Microsoft Visual C++
+#pragma comment( lib, "winmm.lib" ) // then, auto-link winmm.lib. Otherwise, it has to be added manually.
+#endif
+
+static inline DWORD dsPointerBetween( DWORD pointer, DWORD laterPointer, DWORD earlierPointer, DWORD bufferSize )
+{
+  if ( pointer > bufferSize ) pointer -= bufferSize;
+  if ( laterPointer < earlierPointer ) laterPointer += bufferSize;
+  if ( pointer < earlierPointer ) pointer += bufferSize;
+  return pointer >= earlierPointer && pointer < laterPointer;
+}
+
+// A structure to hold various information related to the DirectSound
+// API implementation.
+struct DsHandle {
+  unsigned int drainCounter; // Tracks callback counts when draining
+  bool internalDrain;        // Indicates if stop is initiated from callback or not.
+  void *id[2];
+  void *buffer[2];
+  bool xrun[2];
+  UINT bufferPointer[2];  
+  DWORD dsBufferSize[2];
+  DWORD dsPointerLeadTime[2]; // the number of bytes ahead of the safe pointer to lead by.
+  HANDLE condition;
+
+  DsHandle()
+    :drainCounter(0), internalDrain(false) { id[0] = 0; id[1] = 0; buffer[0] = 0; buffer[1] = 0; xrun[0] = false; xrun[1] = false; bufferPointer[0] = 0; bufferPointer[1] = 0; }
+};
+
+// Declarations for utility functions, callbacks, and structures
+// specific to the DirectSound implementation.
+static BOOL CALLBACK deviceQueryCallback( LPGUID lpguid,
+                                          LPCTSTR description,
+                                          LPCTSTR module,
+                                          LPVOID lpContext );
+
+static const char* getErrorString( int code );
+
+static unsigned __stdcall callbackHandler( void *ptr );
+
+struct DsDevice {
+  LPGUID id[2];
+  bool validId[2];
+  bool found;
+  std::string name;
+
+  DsDevice()
+  : found(false) { validId[0] = false; validId[1] = false; }
+};
+
+struct DsProbeData {
+  bool isInput;
+  std::vector<struct DsDevice>* dsDevices;
+};
+
+RtApiDs :: RtApiDs()
+{
+  // Dsound will run both-threaded. If CoInitialize fails, then just
+  // accept whatever the mainline chose for a threading model.
+  coInitialized_ = false;
+  HRESULT hr = CoInitialize( NULL );
+  if ( !FAILED( hr ) ) coInitialized_ = true;
+}
+
+RtApiDs :: ~RtApiDs()
+{
+  if ( coInitialized_ ) CoUninitialize(); // balanced call.
+  if ( stream_.state != STREAM_CLOSED ) closeStream();
+}
+
+// The DirectSound default output is always the first device.
+unsigned int RtApiDs :: getDefaultOutputDevice( void )
+{
+  return 0;
+}
+
+// The DirectSound default input is always the first input device,
+// which is the first capture device enumerated.
+unsigned int RtApiDs :: getDefaultInputDevice( void )
+{
+  return 0;
+}
+
+unsigned int RtApiDs :: getDeviceCount( void )
+{
+  // Set query flag for previously found devices to false, so that we
+  // can check for any devices that have disappeared.
+  for ( unsigned int i=0; i<dsDevices.size(); i++ )
+    dsDevices[i].found = false;
+
+  // Query DirectSound devices.
+  struct DsProbeData probeInfo;
+  probeInfo.isInput = false;
+  probeInfo.dsDevices = &dsDevices;
+  HRESULT result = DirectSoundEnumerate( (LPDSENUMCALLBACK) deviceQueryCallback, &probeInfo );
+  if ( FAILED( result ) ) {
+    errorStream_ << "RtApiDs::getDeviceCount: error (" << getErrorString( result ) << ") enumerating output devices!";
+    errorText_ = errorStream_.str();
+    error( RtAudioError::WARNING );
+  }
+
+  // Query DirectSoundCapture devices.
+  probeInfo.isInput = true;
+  result = DirectSoundCaptureEnumerate( (LPDSENUMCALLBACK) deviceQueryCallback, &probeInfo );
+  if ( FAILED( result ) ) {
+    errorStream_ << "RtApiDs::getDeviceCount: error (" << getErrorString( result ) << ") enumerating input devices!";
+    errorText_ = errorStream_.str();
+    error( RtAudioError::WARNING );
+  }
+
+  // Clean out any devices that may have disappeared (code update submitted by Eli Zehngut).
+  for ( unsigned int i=0; i<dsDevices.size(); ) {
+    if ( dsDevices[i].found == false ) dsDevices.erase( dsDevices.begin() + i );
+    else i++;
+  }
+
+  return static_cast<unsigned int>(dsDevices.size());
+}
+
+RtAudio::DeviceInfo RtApiDs :: getDeviceInfo( unsigned int device )
+{
+  RtAudio::DeviceInfo info;
+  info.probed = false;
+
+  if ( dsDevices.size() == 0 ) {
+    // Force a query of all devices
+    getDeviceCount();
+    if ( dsDevices.size() == 0 ) {
+      errorText_ = "RtApiDs::getDeviceInfo: no devices found!";
+      error( RtAudioError::INVALID_USE );
+      return info;
+    }
+  }
+
+  if ( device >= dsDevices.size() ) {
+    errorText_ = "RtApiDs::getDeviceInfo: device ID is invalid!";
+    error( RtAudioError::INVALID_USE );
+    return info;
+  }
+
+  HRESULT result;
+  if ( dsDevices[ device ].validId[0] == false ) goto probeInput;
+
+  LPDIRECTSOUND output;
+  DSCAPS outCaps;
+  result = DirectSoundCreate( dsDevices[ device ].id[0], &output, NULL );
+  if ( FAILED( result ) ) {
+    errorStream_ << "RtApiDs::getDeviceInfo: error (" << getErrorString( result ) << ") opening output device (" << dsDevices[ device ].name << ")!";
+    errorText_ = errorStream_.str();
+    error( RtAudioError::WARNING );
+    goto probeInput;
+  }
+
+  outCaps.dwSize = sizeof( outCaps );
+  result = output->GetCaps( &outCaps );
+  if ( FAILED( result ) ) {
+    output->Release();
+    errorStream_ << "RtApiDs::getDeviceInfo: error (" << getErrorString( result ) << ") getting capabilities!";
+    errorText_ = errorStream_.str();
+    error( RtAudioError::WARNING );
+    goto probeInput;
+  }
+
+  // Get output channel information.
+  info.outputChannels = ( outCaps.dwFlags & DSCAPS_PRIMARYSTEREO ) ? 2 : 1;
+
+  // Get sample rate information.
+  info.sampleRates.clear();
+  for ( unsigned int k=0; k<MAX_SAMPLE_RATES; k++ ) {
+    if ( SAMPLE_RATES[k] >= (unsigned int) outCaps.dwMinSecondarySampleRate &&
+         SAMPLE_RATES[k] <= (unsigned int) outCaps.dwMaxSecondarySampleRate ) {
+      info.sampleRates.push_back( SAMPLE_RATES[k] );
+
+      if ( !info.preferredSampleRate || ( SAMPLE_RATES[k] <= 48000 && SAMPLE_RATES[k] > info.preferredSampleRate ) )
+        info.preferredSampleRate = SAMPLE_RATES[k];
+    }
+  }
+
+  // Get format information.
+  if ( outCaps.dwFlags & DSCAPS_PRIMARY16BIT ) info.nativeFormats |= RTAUDIO_SINT16;
+  if ( outCaps.dwFlags & DSCAPS_PRIMARY8BIT ) info.nativeFormats |= RTAUDIO_SINT8;
+
+  output->Release();
+
+  if ( getDefaultOutputDevice() == device )
+    info.isDefaultOutput = true;
+
+  if ( dsDevices[ device ].validId[1] == false ) {
+    info.name = dsDevices[ device ].name;
+    info.probed = true;
+    return info;
+  }
+
+ probeInput:
+
+  LPDIRECTSOUNDCAPTURE input;
+  result = DirectSoundCaptureCreate( dsDevices[ device ].id[1], &input, NULL );
+  if ( FAILED( result ) ) {
+    errorStream_ << "RtApiDs::getDeviceInfo: error (" << getErrorString( result ) << ") opening input device (" << dsDevices[ device ].name << ")!";
+    errorText_ = errorStream_.str();
+    error( RtAudioError::WARNING );
+    return info;
+  }
+
+  DSCCAPS inCaps;
+  inCaps.dwSize = sizeof( inCaps );
+  result = input->GetCaps( &inCaps );
+  if ( FAILED( result ) ) {
+    input->Release();
+    errorStream_ << "RtApiDs::getDeviceInfo: error (" << getErrorString( result ) << ") getting object capabilities (" << dsDevices[ device ].name << ")!";
+    errorText_ = errorStream_.str();
+    error( RtAudioError::WARNING );
+    return info;
+  }
+
+  // Get input channel information.
+  info.inputChannels = inCaps.dwChannels;
+
+  // Get sample rate and format information.
+  std::vector<unsigned int> rates;
+  if ( inCaps.dwChannels >= 2 ) {
+    if ( inCaps.dwFormats & WAVE_FORMAT_1S16 ) info.nativeFormats |= RTAUDIO_SINT16;
+    if ( inCaps.dwFormats & WAVE_FORMAT_2S16 ) info.nativeFormats |= RTAUDIO_SINT16;
+    if ( inCaps.dwFormats & WAVE_FORMAT_4S16 ) info.nativeFormats |= RTAUDIO_SINT16;
+    if ( inCaps.dwFormats & WAVE_FORMAT_96S16 ) info.nativeFormats |= RTAUDIO_SINT16;
+    if ( inCaps.dwFormats & WAVE_FORMAT_1S08 ) info.nativeFormats |= RTAUDIO_SINT8;
+    if ( inCaps.dwFormats & WAVE_FORMAT_2S08 ) info.nativeFormats |= RTAUDIO_SINT8;
+    if ( inCaps.dwFormats & WAVE_FORMAT_4S08 ) info.nativeFormats |= RTAUDIO_SINT8;
+    if ( inCaps.dwFormats & WAVE_FORMAT_96S08 ) info.nativeFormats |= RTAUDIO_SINT8;
+
+    if ( info.nativeFormats & RTAUDIO_SINT16 ) {
+      if ( inCaps.dwFormats & WAVE_FORMAT_1S16 ) rates.push_back( 11025 );
+      if ( inCaps.dwFormats & WAVE_FORMAT_2S16 ) rates.push_back( 22050 );
+      if ( inCaps.dwFormats & WAVE_FORMAT_4S16 ) rates.push_back( 44100 );
+      if ( inCaps.dwFormats & WAVE_FORMAT_96S16 ) rates.push_back( 96000 );
+    }
+    else if ( info.nativeFormats & RTAUDIO_SINT8 ) {
+      if ( inCaps.dwFormats & WAVE_FORMAT_1S08 ) rates.push_back( 11025 );
+      if ( inCaps.dwFormats & WAVE_FORMAT_2S08 ) rates.push_back( 22050 );
+      if ( inCaps.dwFormats & WAVE_FORMAT_4S08 ) rates.push_back( 44100 );
+      if ( inCaps.dwFormats & WAVE_FORMAT_96S08 ) rates.push_back( 96000 );
+    }
+  }
+  else if ( inCaps.dwChannels == 1 ) {
+    if ( inCaps.dwFormats & WAVE_FORMAT_1M16 ) info.nativeFormats |= RTAUDIO_SINT16;
+    if ( inCaps.dwFormats & WAVE_FORMAT_2M16 ) info.nativeFormats |= RTAUDIO_SINT16;
+    if ( inCaps.dwFormats & WAVE_FORMAT_4M16 ) info.nativeFormats |= RTAUDIO_SINT16;
+    if ( inCaps.dwFormats & WAVE_FORMAT_96M16 ) info.nativeFormats |= RTAUDIO_SINT16;
+    if ( inCaps.dwFormats & WAVE_FORMAT_1M08 ) info.nativeFormats |= RTAUDIO_SINT8;
+    if ( inCaps.dwFormats & WAVE_FORMAT_2M08 ) info.nativeFormats |= RTAUDIO_SINT8;
+    if ( inCaps.dwFormats & WAVE_FORMAT_4M08 ) info.nativeFormats |= RTAUDIO_SINT8;
+    if ( inCaps.dwFormats & WAVE_FORMAT_96M08 ) info.nativeFormats |= RTAUDIO_SINT8;
+
+    if ( info.nativeFormats & RTAUDIO_SINT16 ) {
+      if ( inCaps.dwFormats & WAVE_FORMAT_1M16 ) rates.push_back( 11025 );
+      if ( inCaps.dwFormats & WAVE_FORMAT_2M16 ) rates.push_back( 22050 );
+      if ( inCaps.dwFormats & WAVE_FORMAT_4M16 ) rates.push_back( 44100 );
+      if ( inCaps.dwFormats & WAVE_FORMAT_96M16 ) rates.push_back( 96000 );
+    }
+    else if ( info.nativeFormats & RTAUDIO_SINT8 ) {
+      if ( inCaps.dwFormats & WAVE_FORMAT_1M08 ) rates.push_back( 11025 );
+      if ( inCaps.dwFormats & WAVE_FORMAT_2M08 ) rates.push_back( 22050 );
+      if ( inCaps.dwFormats & WAVE_FORMAT_4M08 ) rates.push_back( 44100 );
+      if ( inCaps.dwFormats & WAVE_FORMAT_96M08 ) rates.push_back( 96000 );
+    }
+  }
+  else info.inputChannels = 0; // technically, this would be an error
+
+  input->Release();
+
+  if ( info.inputChannels == 0 ) return info;
+
+  // Copy the supported rates to the info structure but avoid duplication.
+  bool found;
+  for ( unsigned int i=0; i<rates.size(); i++ ) {
+    found = false;
+    for ( unsigned int j=0; j<info.sampleRates.size(); j++ ) {
+      if ( rates[i] == info.sampleRates[j] ) {
+        found = true;
+        break;
+      }
+    }
+    if ( found == false ) info.sampleRates.push_back( rates[i] );
+  }
+  std::sort( info.sampleRates.begin(), info.sampleRates.end() );
+
+  // If device opens for both playback and capture, we determine the channels.
+  if ( info.outputChannels > 0 && info.inputChannels > 0 )
+    info.duplexChannels = (info.outputChannels > info.inputChannels) ? info.inputChannels : info.outputChannels;
+
+  if ( device == 0 ) info.isDefaultInput = true;
+
+  // Copy name and return.
+  info.name = dsDevices[ device ].name;
+  info.probed = true;
+  return info;
+}
+
+bool RtApiDs :: probeDeviceOpen( unsigned int device, StreamMode mode, unsigned int channels,
+                                 unsigned int firstChannel, unsigned int sampleRate,
+                                 RtAudioFormat format, unsigned int *bufferSize,
+                                 RtAudio::StreamOptions *options )
+{
+  if ( channels + firstChannel > 2 ) {
+    errorText_ = "RtApiDs::probeDeviceOpen: DirectSound does not support more than 2 channels per device.";
+    return FAILURE;
+  }
+
+  size_t nDevices = dsDevices.size();
+  if ( nDevices == 0 ) {
+    // This should not happen because a check is made before this function is called.
+    errorText_ = "RtApiDs::probeDeviceOpen: no devices found!";
+    return FAILURE;
+  }
+
+  if ( device >= nDevices ) {
+    // This should not happen because a check is made before this function is called.
+    errorText_ = "RtApiDs::probeDeviceOpen: device ID is invalid!";
+    return FAILURE;
+  }
+
+  if ( mode == OUTPUT ) {
+    if ( dsDevices[ device ].validId[0] == false ) {
+      errorStream_ << "RtApiDs::probeDeviceOpen: device (" << device << ") does not support output!";
+      errorText_ = errorStream_.str();
+      return FAILURE;
+    }
+  }
+  else { // mode == INPUT
+    if ( dsDevices[ device ].validId[1] == false ) {
+      errorStream_ << "RtApiDs::probeDeviceOpen: device (" << device << ") does not support input!";
+      errorText_ = errorStream_.str();
+      return FAILURE;
+    }
+  }
+
+  // According to a note in PortAudio, using GetDesktopWindow()
+  // instead of GetForegroundWindow() is supposed to avoid problems
+  // that occur when the application's window is not the foreground
+  // window.  Also, if the application window closes before the
+  // DirectSound buffer, DirectSound can crash.  In the past, I had
+  // problems when using GetDesktopWindow() but it seems fine now
+  // (January 2010).  I'll leave it commented here.
+  // HWND hWnd = GetForegroundWindow();
+  HWND hWnd = GetDesktopWindow();
+
+  // Check the numberOfBuffers parameter and limit the lowest value to
+  // two.  This is a judgement call and a value of two is probably too
+  // low for capture, but it should work for playback.
+  int nBuffers = 0;
+  if ( options ) nBuffers = options->numberOfBuffers;
+  if ( options && options->flags & RTAUDIO_MINIMIZE_LATENCY ) nBuffers = 2;
+  if ( nBuffers < 2 ) nBuffers = 3;
+
+  // Check the lower range of the user-specified buffer size and set
+  // (arbitrarily) to a lower bound of 32.
+  if ( *bufferSize < 32 ) *bufferSize = 32;
+
+  // Create the wave format structure.  The data format setting will
+  // be determined later.
+  WAVEFORMATEX waveFormat;
+  ZeroMemory( &waveFormat, sizeof(WAVEFORMATEX) );
+  waveFormat.wFormatTag = WAVE_FORMAT_PCM;
+  waveFormat.nChannels = channels + firstChannel;
+  waveFormat.nSamplesPerSec = (unsigned long) sampleRate;
+
+  // Determine the device buffer size. By default, we'll use the value
+  // defined above (32K), but we will grow it to make allowances for
+  // very large software buffer sizes.
+  DWORD dsBufferSize = MINIMUM_DEVICE_BUFFER_SIZE;
+  DWORD dsPointerLeadTime = 0;
+
+  void *ohandle = 0, *bhandle = 0;
+  HRESULT result;
+  if ( mode == OUTPUT ) {
+
+    LPDIRECTSOUND output;
+    result = DirectSoundCreate( dsDevices[ device ].id[0], &output, NULL );
+    if ( FAILED( result ) ) {
+      errorStream_ << "RtApiDs::probeDeviceOpen: error (" << getErrorString( result ) << ") opening output device (" << dsDevices[ device ].name << ")!";
+      errorText_ = errorStream_.str();
+      return FAILURE;
+    }
+
+    DSCAPS outCaps;
+    outCaps.dwSize = sizeof( outCaps );
+    result = output->GetCaps( &outCaps );
+    if ( FAILED( result ) ) {
+      output->Release();
+      errorStream_ << "RtApiDs::probeDeviceOpen: error (" << getErrorString( result ) << ") getting capabilities (" << dsDevices[ device ].name << ")!";
+      errorText_ = errorStream_.str();
+      return FAILURE;
+    }
+
+    // Check channel information.
+    if ( channels + firstChannel == 2 && !( outCaps.dwFlags & DSCAPS_PRIMARYSTEREO ) ) {
+      errorStream_ << "RtApiDs::getDeviceInfo: the output device (" << dsDevices[ device ].name << ") does not support stereo playback.";
+      errorText_ = errorStream_.str();
+      return FAILURE;
+    }
+
+    // Check format information.  Use 16-bit format unless not
+    // supported or user requests 8-bit.
+    if ( outCaps.dwFlags & DSCAPS_PRIMARY16BIT &&
+         !( format == RTAUDIO_SINT8 && outCaps.dwFlags & DSCAPS_PRIMARY8BIT ) ) {
+      waveFormat.wBitsPerSample = 16;
+      stream_.deviceFormat[mode] = RTAUDIO_SINT16;
+    }
+    else {
+      waveFormat.wBitsPerSample = 8;
+      stream_.deviceFormat[mode] = RTAUDIO_SINT8;
+    }
+    stream_.userFormat = format;
+
+    // Update wave format structure and buffer information.
+    waveFormat.nBlockAlign = waveFormat.nChannels * waveFormat.wBitsPerSample / 8;
+    waveFormat.nAvgBytesPerSec = waveFormat.nSamplesPerSec * waveFormat.nBlockAlign;
+    dsPointerLeadTime = nBuffers * (*bufferSize) * (waveFormat.wBitsPerSample / 8) * channels;
+
+    // If the user wants an even bigger buffer, increase the device buffer size accordingly.
+    while ( dsPointerLeadTime * 2U > dsBufferSize )
+      dsBufferSize *= 2;
+
+    // Set cooperative level to DSSCL_EXCLUSIVE ... sound stops when window focus changes.
+    // result = output->SetCooperativeLevel( hWnd, DSSCL_EXCLUSIVE );
+    // Set cooperative level to DSSCL_PRIORITY ... sound remains when window focus changes.
+    result = output->SetCooperativeLevel( hWnd, DSSCL_PRIORITY );
+    if ( FAILED( result ) ) {
+      output->Release();
+      errorStream_ << "RtApiDs::probeDeviceOpen: error (" << getErrorString( result ) << ") setting cooperative level (" << dsDevices[ device ].name << ")!";
+      errorText_ = errorStream_.str();
+      return FAILURE;
+    }
+
+    // Even though we will write to the secondary buffer, we need to
+    // access the primary buffer to set the correct output format
+    // (since the default is 8-bit, 22 kHz!).  Setup the DS primary
+    // buffer description.
+    DSBUFFERDESC bufferDescription;
+    ZeroMemory( &bufferDescription, sizeof( DSBUFFERDESC ) );
+    bufferDescription.dwSize = sizeof( DSBUFFERDESC );
+    bufferDescription.dwFlags = DSBCAPS_PRIMARYBUFFER;
+
+    // Obtain the primary buffer
+    LPDIRECTSOUNDBUFFER buffer;
+    result = output->CreateSoundBuffer( &bufferDescription, &buffer, NULL );
+    if ( FAILED( result ) ) {
+      output->Release();
+      errorStream_ << "RtApiDs::probeDeviceOpen: error (" << getErrorString( result ) << ") accessing primary buffer (" << dsDevices[ device ].name << ")!";
+      errorText_ = errorStream_.str();
+      return FAILURE;
+    }
+
+    // Set the primary DS buffer sound format.
+    result = buffer->SetFormat( &waveFormat );
+    if ( FAILED( result ) ) {
+      output->Release();
+      errorStream_ << "RtApiDs::probeDeviceOpen: error (" << getErrorString( result ) << ") setting primary buffer format (" << dsDevices[ device ].name << ")!";
+      errorText_ = errorStream_.str();
+      return FAILURE;
+    }
+
+    // Setup the secondary DS buffer description.
+    ZeroMemory( &bufferDescription, sizeof( DSBUFFERDESC ) );
+    bufferDescription.dwSize = sizeof( DSBUFFERDESC );
+    bufferDescription.dwFlags = ( DSBCAPS_STICKYFOCUS |
+                                  DSBCAPS_GLOBALFOCUS |
+                                  DSBCAPS_GETCURRENTPOSITION2 |
+                                  DSBCAPS_LOCHARDWARE );  // Force hardware mixing
+    bufferDescription.dwBufferBytes = dsBufferSize;
+    bufferDescription.lpwfxFormat = &waveFormat;
+
+    // Try to create the secondary DS buffer.  If that doesn't work,
+    // try to use software mixing.  Otherwise, there's a problem.
+    result = output->CreateSoundBuffer( &bufferDescription, &buffer, NULL );
+    if ( FAILED( result ) ) {
+      bufferDescription.dwFlags = ( DSBCAPS_STICKYFOCUS |
+                                    DSBCAPS_GLOBALFOCUS |
+                                    DSBCAPS_GETCURRENTPOSITION2 |
+                                    DSBCAPS_LOCSOFTWARE );  // Force software mixing
+      result = output->CreateSoundBuffer( &bufferDescription, &buffer, NULL );
+      if ( FAILED( result ) ) {
+        output->Release();
+        errorStream_ << "RtApiDs::probeDeviceOpen: error (" << getErrorString( result ) << ") creating secondary buffer (" << dsDevices[ device ].name << ")!";
+        errorText_ = errorStream_.str();
+        return FAILURE;
+      }
+    }
+
+    // Get the buffer size ... might be different from what we specified.
+    DSBCAPS dsbcaps;
+    dsbcaps.dwSize = sizeof( DSBCAPS );
+    result = buffer->GetCaps( &dsbcaps );
+    if ( FAILED( result ) ) {
+      output->Release();
+      buffer->Release();
+      errorStream_ << "RtApiDs::probeDeviceOpen: error (" << getErrorString( result ) << ") getting buffer settings (" << dsDevices[ device ].name << ")!";
+      errorText_ = errorStream_.str();
+      return FAILURE;
+    }
+
+    dsBufferSize = dsbcaps.dwBufferBytes;
+
+    // Lock the DS buffer
+    LPVOID audioPtr;
+    DWORD dataLen;
+    result = buffer->Lock( 0, dsBufferSize, &audioPtr, &dataLen, NULL, NULL, 0 );
+    if ( FAILED( result ) ) {
+      output->Release();
+      buffer->Release();
+      errorStream_ << "RtApiDs::probeDeviceOpen: error (" << getErrorString( result ) << ") locking buffer (" << dsDevices[ device ].name << ")!";
+      errorText_ = errorStream_.str();
+      return FAILURE;
+    }
+
+    // Zero the DS buffer
+    ZeroMemory( audioPtr, dataLen );
+
+    // Unlock the DS buffer
+    result = buffer->Unlock( audioPtr, dataLen, NULL, 0 );
+    if ( FAILED( result ) ) {
+      output->Release();
+      buffer->Release();
+      errorStream_ << "RtApiDs::probeDeviceOpen: error (" << getErrorString( result ) << ") unlocking buffer (" << dsDevices[ device ].name << ")!";
+      errorText_ = errorStream_.str();
+      return FAILURE;
+    }
+
+    ohandle = (void *) output;
+    bhandle = (void *) buffer;
+  }
+
+  if ( mode == INPUT ) {
+
+    LPDIRECTSOUNDCAPTURE input;
+    result = DirectSoundCaptureCreate( dsDevices[ device ].id[1], &input, NULL );
+    if ( FAILED( result ) ) {
+      errorStream_ << "RtApiDs::probeDeviceOpen: error (" << getErrorString( result ) << ") opening input device (" << dsDevices[ device ].name << ")!";
+      errorText_ = errorStream_.str();
+      return FAILURE;
+    }
+
+    DSCCAPS inCaps;
+    inCaps.dwSize = sizeof( inCaps );
+    result = input->GetCaps( &inCaps );
+    if ( FAILED( result ) ) {
+      input->Release();
+      errorStream_ << "RtApiDs::probeDeviceOpen: error (" << getErrorString( result ) << ") getting input capabilities (" << dsDevices[ device ].name << ")!";
+      errorText_ = errorStream_.str();
+      return FAILURE;
+    }
+
+    // Check channel information.
+    if ( inCaps.dwChannels < channels + firstChannel ) {
+      errorText_ = "RtApiDs::getDeviceInfo: the input device does not support requested input channels.";
+      return FAILURE;
+    }
+
+    // Check format information.  Use 16-bit format unless user
+    // requests 8-bit.
+    DWORD deviceFormats;
+    if ( channels + firstChannel == 2 ) {
+      deviceFormats = WAVE_FORMAT_1S08 | WAVE_FORMAT_2S08 | WAVE_FORMAT_4S08 | WAVE_FORMAT_96S08;
+      if ( format == RTAUDIO_SINT8 && inCaps.dwFormats & deviceFormats ) {
+        waveFormat.wBitsPerSample = 8;
+        stream_.deviceFormat[mode] = RTAUDIO_SINT8;
+      }
+      else { // assume 16-bit is supported
+        waveFormat.wBitsPerSample = 16;
+        stream_.deviceFormat[mode] = RTAUDIO_SINT16;
+      }
+    }
+    else { // channel == 1
+      deviceFormats = WAVE_FORMAT_1M08 | WAVE_FORMAT_2M08 | WAVE_FORMAT_4M08 | WAVE_FORMAT_96M08;
+      if ( format == RTAUDIO_SINT8 && inCaps.dwFormats & deviceFormats ) {
+        waveFormat.wBitsPerSample = 8;
+        stream_.deviceFormat[mode] = RTAUDIO_SINT8;
+      }
+      else { // assume 16-bit is supported
+        waveFormat.wBitsPerSample = 16;
+        stream_.deviceFormat[mode] = RTAUDIO_SINT16;
+      }
+    }
+    stream_.userFormat = format;
+
+    // Update wave format structure and buffer information.
+    waveFormat.nBlockAlign = waveFormat.nChannels * waveFormat.wBitsPerSample / 8;
+    waveFormat.nAvgBytesPerSec = waveFormat.nSamplesPerSec * waveFormat.nBlockAlign;
+    dsPointerLeadTime = nBuffers * (*bufferSize) * (waveFormat.wBitsPerSample / 8) * channels;
+
+    // If the user wants an even bigger buffer, increase the device buffer size accordingly.
+    while ( dsPointerLeadTime * 2U > dsBufferSize )
+      dsBufferSize *= 2;
+
+    // Setup the secondary DS buffer description.
+    DSCBUFFERDESC bufferDescription;
+    ZeroMemory( &bufferDescription, sizeof( DSCBUFFERDESC ) );
+    bufferDescription.dwSize = sizeof( DSCBUFFERDESC );
+    bufferDescription.dwFlags = 0;
+    bufferDescription.dwReserved = 0;
+    bufferDescription.dwBufferBytes = dsBufferSize;
+    bufferDescription.lpwfxFormat = &waveFormat;
+
+    // Create the capture buffer.
+    LPDIRECTSOUNDCAPTUREBUFFER buffer;
+    result = input->CreateCaptureBuffer( &bufferDescription, &buffer, NULL );
+    if ( FAILED( result ) ) {
+      input->Release();
+      errorStream_ << "RtApiDs::probeDeviceOpen: error (" << getErrorString( result ) << ") creating input buffer (" << dsDevices[ device ].name << ")!";
+      errorText_ = errorStream_.str();
+      return FAILURE;
+    }
+
+    // Get the buffer size ... might be different from what we specified.
+    DSCBCAPS dscbcaps;
+    dscbcaps.dwSize = sizeof( DSCBCAPS );
+    result = buffer->GetCaps( &dscbcaps );
+    if ( FAILED( result ) ) {
+      input->Release();
+      buffer->Release();
+      errorStream_ << "RtApiDs::probeDeviceOpen: error (" << getErrorString( result ) << ") getting buffer settings (" << dsDevices[ device ].name << ")!";
+      errorText_ = errorStream_.str();
+      return FAILURE;
+    }
+
+    dsBufferSize = dscbcaps.dwBufferBytes;
+
+    // NOTE: We could have a problem here if this is a duplex stream
+    // and the play and capture hardware buffer sizes are different
+    // (I'm actually not sure if that is a problem or not).
+    // Currently, we are not verifying that.
+
+    // Lock the capture buffer
+    LPVOID audioPtr;
+    DWORD dataLen;
+    result = buffer->Lock( 0, dsBufferSize, &audioPtr, &dataLen, NULL, NULL, 0 );
+    if ( FAILED( result ) ) {
+      input->Release();
+      buffer->Release();
+      errorStream_ << "RtApiDs::probeDeviceOpen: error (" << getErrorString( result ) << ") locking input buffer (" << dsDevices[ device ].name << ")!";
+      errorText_ = errorStream_.str();
+      return FAILURE;
+    }
+
+    // Zero the buffer
+    ZeroMemory( audioPtr, dataLen );
+
+    // Unlock the buffer
+    result = buffer->Unlock( audioPtr, dataLen, NULL, 0 );
+    if ( FAILED( result ) ) {
+      input->Release();
+      buffer->Release();
+      errorStream_ << "RtApiDs::probeDeviceOpen: error (" << getErrorString( result ) << ") unlocking input buffer (" << dsDevices[ device ].name << ")!";
+      errorText_ = errorStream_.str();
+      return FAILURE;
+    }
+
+    ohandle = (void *) input;
+    bhandle = (void *) buffer;
+  }
+
+  // Set various stream parameters
+  DsHandle *handle = 0;
+  stream_.nDeviceChannels[mode] = channels + firstChannel;
+  stream_.nUserChannels[mode] = channels;
+  stream_.bufferSize = *bufferSize;
+  stream_.channelOffset[mode] = firstChannel;
+  stream_.deviceInterleaved[mode] = true;
+  if ( options && options->flags & RTAUDIO_NONINTERLEAVED ) stream_.userInterleaved = false;
+  else stream_.userInterleaved = true;
+
+  // Set flag for buffer conversion
+  stream_.doConvertBuffer[mode] = false;
+  if (stream_.nUserChannels[mode] != stream_.nDeviceChannels[mode])
+    stream_.doConvertBuffer[mode] = true;
+  if (stream_.userFormat != stream_.deviceFormat[mode])
+    stream_.doConvertBuffer[mode] = true;
+  if ( stream_.userInterleaved != stream_.deviceInterleaved[mode] &&
+       stream_.nUserChannels[mode] > 1 )
+    stream_.doConvertBuffer[mode] = true;
+
+  // Allocate necessary internal buffers
+  long bufferBytes = stream_.nUserChannels[mode] * *bufferSize * formatBytes( stream_.userFormat );
+  stream_.userBuffer[mode] = (char *) calloc( bufferBytes, 1 );
+  if ( stream_.userBuffer[mode] == NULL ) {
+    errorText_ = "RtApiDs::probeDeviceOpen: error allocating user buffer memory.";
+    goto error;
+  }
+
+  if ( stream_.doConvertBuffer[mode] ) {
+
+    bool makeBuffer = true;
+    bufferBytes = stream_.nDeviceChannels[mode] * formatBytes( stream_.deviceFormat[mode] );
+    if ( mode == INPUT ) {
+      if ( stream_.mode == OUTPUT && stream_.deviceBuffer ) {
+        unsigned long bytesOut = stream_.nDeviceChannels[0] * formatBytes( stream_.deviceFormat[0] );
+        if ( bufferBytes <= (long) bytesOut ) makeBuffer = false;
+      }
+    }
+
+    if ( makeBuffer ) {
+      bufferBytes *= *bufferSize;
+      if ( stream_.deviceBuffer ) free( stream_.deviceBuffer );
+      stream_.deviceBuffer = (char *) calloc( bufferBytes, 1 );
+      if ( stream_.deviceBuffer == NULL ) {
+        errorText_ = "RtApiDs::probeDeviceOpen: error allocating device buffer memory.";
+        goto error;
+      }
+    }
+  }
+
+  // Allocate our DsHandle structures for the stream.
+  if ( stream_.apiHandle == 0 ) {
+    try {
+      handle = new DsHandle;
+    }
+    catch ( std::bad_alloc& ) {
+      errorText_ = "RtApiDs::probeDeviceOpen: error allocating AsioHandle memory.";
+      goto error;
+    }
+
+    // Create a manual-reset event.
+    handle->condition = CreateEvent( NULL,   // no security
+                                     TRUE,   // manual-reset
+                                     FALSE,  // non-signaled initially
+                                     NULL ); // unnamed
+    stream_.apiHandle = (void *) handle;
+  }
+  else
+    handle = (DsHandle *) stream_.apiHandle;
+  handle->id[mode] = ohandle;
+  handle->buffer[mode] = bhandle;
+  handle->dsBufferSize[mode] = dsBufferSize;
+  handle->dsPointerLeadTime[mode] = dsPointerLeadTime;
+
+  stream_.device[mode] = device;
+  stream_.state = STREAM_STOPPED;
+  if ( stream_.mode == OUTPUT && mode == INPUT )
+    // We had already set up an output stream.
+    stream_.mode = DUPLEX;
+  else
+    stream_.mode = mode;
+  stream_.nBuffers = nBuffers;
+  stream_.sampleRate = sampleRate;
+
+  // Setup the buffer conversion information structure.
+  if ( stream_.doConvertBuffer[mode] ) setConvertInfo( mode, firstChannel );
+
+  // Setup the callback thread.
+  if ( stream_.callbackInfo.isRunning == false ) {
+    unsigned threadId;
+    stream_.callbackInfo.isRunning = true;
+    stream_.callbackInfo.object = (void *) this;
+    stream_.callbackInfo.thread = _beginthreadex( NULL, 0, &callbackHandler,
+                                                  &stream_.callbackInfo, 0, &threadId );
+    if ( stream_.callbackInfo.thread == 0 ) {
+      errorText_ = "RtApiDs::probeDeviceOpen: error creating callback thread!";
+      goto error;
+    }
+
+    // Boost DS thread priority
+    SetThreadPriority( (HANDLE) stream_.callbackInfo.thread, THREAD_PRIORITY_HIGHEST );
+  }
+  return SUCCESS;
+
+ error:
+  if ( handle ) {
+    if ( handle->buffer[0] ) { // the object pointer can be NULL and valid
+      LPDIRECTSOUND object = (LPDIRECTSOUND) handle->id[0];
+      LPDIRECTSOUNDBUFFER buffer = (LPDIRECTSOUNDBUFFER) handle->buffer[0];
+      if ( buffer ) buffer->Release();
+      object->Release();
+    }
+    if ( handle->buffer[1] ) {
+      LPDIRECTSOUNDCAPTURE object = (LPDIRECTSOUNDCAPTURE) handle->id[1];
+      LPDIRECTSOUNDCAPTUREBUFFER buffer = (LPDIRECTSOUNDCAPTUREBUFFER) handle->buffer[1];
+      if ( buffer ) buffer->Release();
+      object->Release();
+    }
+    CloseHandle( handle->condition );
+    delete handle;
+    stream_.apiHandle = 0;
+  }
+
+  for ( int i=0; i<2; i++ ) {
+    if ( stream_.userBuffer[i] ) {
+      free( stream_.userBuffer[i] );
+      stream_.userBuffer[i] = 0;
+    }
+  }
+
+  if ( stream_.deviceBuffer ) {
+    free( stream_.deviceBuffer );
+    stream_.deviceBuffer = 0;
+  }
+
+  stream_.state = STREAM_CLOSED;
+  return FAILURE;
+}
+
+void RtApiDs :: closeStream()
+{
+  if ( stream_.state == STREAM_CLOSED ) {
+    errorText_ = "RtApiDs::closeStream(): no open stream to close!";
+    error( RtAudioError::WARNING );
+    return;
+  }
+
+  // Stop the callback thread.
+  stream_.callbackInfo.isRunning = false;
+  WaitForSingleObject( (HANDLE) stream_.callbackInfo.thread, INFINITE );
+  CloseHandle( (HANDLE) stream_.callbackInfo.thread );
+
+  DsHandle *handle = (DsHandle *) stream_.apiHandle;
+  if ( handle ) {
+    if ( handle->buffer[0] ) { // the object pointer can be NULL and valid
+      LPDIRECTSOUND object = (LPDIRECTSOUND) handle->id[0];
+      LPDIRECTSOUNDBUFFER buffer = (LPDIRECTSOUNDBUFFER) handle->buffer[0];
+      if ( buffer ) {
+        buffer->Stop();
+        buffer->Release();
+      }
+      object->Release();
+    }
+    if ( handle->buffer[1] ) {
+      LPDIRECTSOUNDCAPTURE object = (LPDIRECTSOUNDCAPTURE) handle->id[1];
+      LPDIRECTSOUNDCAPTUREBUFFER buffer = (LPDIRECTSOUNDCAPTUREBUFFER) handle->buffer[1];
+      if ( buffer ) {
+        buffer->Stop();
+        buffer->Release();
+      }
+      object->Release();
+    }
+    CloseHandle( handle->condition );
+    delete handle;
+    stream_.apiHandle = 0;
+  }
+
+  for ( int i=0; i<2; i++ ) {
+    if ( stream_.userBuffer[i] ) {
+      free( stream_.userBuffer[i] );
+      stream_.userBuffer[i] = 0;
+    }
+  }
+
+  if ( stream_.deviceBuffer ) {
+    free( stream_.deviceBuffer );
+    stream_.deviceBuffer = 0;
+  }
+
+  stream_.mode = UNINITIALIZED;
+  stream_.state = STREAM_CLOSED;
+}
+
+void RtApiDs :: startStream()
+{
+  verifyStream();
+  if ( stream_.state == STREAM_RUNNING ) {
+    errorText_ = "RtApiDs::startStream(): the stream is already running!";
+    error( RtAudioError::WARNING );
+    return;
+  }
+
+  DsHandle *handle = (DsHandle *) stream_.apiHandle;
+
+  // Increase scheduler frequency on lesser windows (a side-effect of
+  // increasing timer accuracy).  On greater windows (Win2K or later),
+  // this is already in effect.
+  timeBeginPeriod( 1 ); 
+
+  buffersRolling = false;
+  duplexPrerollBytes = 0;
+
+  if ( stream_.mode == DUPLEX ) {
+    // 0.5 seconds of silence in DUPLEX mode while the devices spin up and synchronize.
+    duplexPrerollBytes = (int) ( 0.5 * stream_.sampleRate * formatBytes( stream_.deviceFormat[1] ) * stream_.nDeviceChannels[1] );
+  }
+
+  HRESULT result = 0;
+  if ( stream_.mode == OUTPUT || stream_.mode == DUPLEX ) {
+
+    LPDIRECTSOUNDBUFFER buffer = (LPDIRECTSOUNDBUFFER) handle->buffer[0];
+    result = buffer->Play( 0, 0, DSBPLAY_LOOPING );
+    if ( FAILED( result ) ) {
+      errorStream_ << "RtApiDs::startStream: error (" << getErrorString( result ) << ") starting output buffer!";
+      errorText_ = errorStream_.str();
+      goto unlock;
+    }
+  }
+
+  if ( stream_.mode == INPUT || stream_.mode == DUPLEX ) {
+
+    LPDIRECTSOUNDCAPTUREBUFFER buffer = (LPDIRECTSOUNDCAPTUREBUFFER) handle->buffer[1];
+    result = buffer->Start( DSCBSTART_LOOPING );
+    if ( FAILED( result ) ) {
+      errorStream_ << "RtApiDs::startStream: error (" << getErrorString( result ) << ") starting input buffer!";
+      errorText_ = errorStream_.str();
+      goto unlock;
+    }
+  }
+
+  handle->drainCounter = 0;
+  handle->internalDrain = false;
+  ResetEvent( handle->condition );
+  stream_.state = STREAM_RUNNING;
+
+ unlock:
+  if ( FAILED( result ) ) error( RtAudioError::SYSTEM_ERROR );
+}
+
+void RtApiDs :: stopStream()
+{
+  verifyStream();
+  if ( stream_.state == STREAM_STOPPED ) {
+    errorText_ = "RtApiDs::stopStream(): the stream is already stopped!";
+    error( RtAudioError::WARNING );
+    return;
+  }
+
+  HRESULT result = 0;
+  LPVOID audioPtr;
+  DWORD dataLen;
+  DsHandle *handle = (DsHandle *) stream_.apiHandle;
+  if ( stream_.mode == OUTPUT || stream_.mode == DUPLEX ) {
+    if ( handle->drainCounter == 0 ) {
+      handle->drainCounter = 2;
+      WaitForSingleObject( handle->condition, INFINITE );  // block until signaled
+    }
+
+    stream_.state = STREAM_STOPPED;
+
+    MUTEX_LOCK( &stream_.mutex );
+
+    // Stop the buffer and clear memory
+    LPDIRECTSOUNDBUFFER buffer = (LPDIRECTSOUNDBUFFER) handle->buffer[0];
+    result = buffer->Stop();
+    if ( FAILED( result ) ) {
+      errorStream_ << "RtApiDs::stopStream: error (" << getErrorString( result ) << ") stopping output buffer!";
+      errorText_ = errorStream_.str();
+      goto unlock;
+    }
+
+    // Lock the buffer and clear it so that if we start to play again,
+    // we won't have old data playing.
+    result = buffer->Lock( 0, handle->dsBufferSize[0], &audioPtr, &dataLen, NULL, NULL, 0 );
+    if ( FAILED( result ) ) {
+      errorStream_ << "RtApiDs::stopStream: error (" << getErrorString( result ) << ") locking output buffer!";
+      errorText_ = errorStream_.str();
+      goto unlock;
+    }
+
+    // Zero the DS buffer
+    ZeroMemory( audioPtr, dataLen );
+
+    // Unlock the DS buffer
+    result = buffer->Unlock( audioPtr, dataLen, NULL, 0 );
+    if ( FAILED( result ) ) {
+      errorStream_ << "RtApiDs::stopStream: error (" << getErrorString( result ) << ") unlocking output buffer!";
+      errorText_ = errorStream_.str();
+      goto unlock;
+    }
+
+    // If we start playing again, we must begin at beginning of buffer.
+    handle->bufferPointer[0] = 0;
+  }
+
+  if ( stream_.mode == INPUT || stream_.mode == DUPLEX ) {
+    LPDIRECTSOUNDCAPTUREBUFFER buffer = (LPDIRECTSOUNDCAPTUREBUFFER) handle->buffer[1];
+    audioPtr = NULL;
+    dataLen = 0;
+
+    stream_.state = STREAM_STOPPED;
+
+    if ( stream_.mode != DUPLEX )
+      MUTEX_LOCK( &stream_.mutex );
+
+    result = buffer->Stop();
+    if ( FAILED( result ) ) {
+      errorStream_ << "RtApiDs::stopStream: error (" << getErrorString( result ) << ") stopping input buffer!";
+      errorText_ = errorStream_.str();
+      goto unlock;
+    }
+
+    // Lock the buffer and clear it so that if we start to play again,
+    // we won't have old data playing.
+    result = buffer->Lock( 0, handle->dsBufferSize[1], &audioPtr, &dataLen, NULL, NULL, 0 );
+    if ( FAILED( result ) ) {
+      errorStream_ << "RtApiDs::stopStream: error (" << getErrorString( result ) << ") locking input buffer!";
+      errorText_ = errorStream_.str();
+      goto unlock;
+    }
+
+    // Zero the DS buffer
+    ZeroMemory( audioPtr, dataLen );
+
+    // Unlock the DS buffer
+    result = buffer->Unlock( audioPtr, dataLen, NULL, 0 );
+    if ( FAILED( result ) ) {
+      errorStream_ << "RtApiDs::stopStream: error (" << getErrorString( result ) << ") unlocking input buffer!";
+      errorText_ = errorStream_.str();
+      goto unlock;
+    }
+
+    // If we start recording again, we must begin at beginning of buffer.
+    handle->bufferPointer[1] = 0;
+  }
+
+ unlock:
+  timeEndPeriod( 1 ); // revert to normal scheduler frequency on lesser windows.
+  MUTEX_UNLOCK( &stream_.mutex );
+
+  if ( FAILED( result ) ) error( RtAudioError::SYSTEM_ERROR );
+}
+
+void RtApiDs :: abortStream()
+{
+  verifyStream();
+  if ( stream_.state == STREAM_STOPPED ) {
+    errorText_ = "RtApiDs::abortStream(): the stream is already stopped!";
+    error( RtAudioError::WARNING );
+    return;
+  }
+
+  DsHandle *handle = (DsHandle *) stream_.apiHandle;
+  handle->drainCounter = 2;
+
+  stopStream();
+}
+
+void RtApiDs :: callbackEvent()
+{
+  if ( stream_.state == STREAM_STOPPED || stream_.state == STREAM_STOPPING ) {
+    Sleep( 50 ); // sleep 50 milliseconds
+    return;
+  }
+
+  if ( stream_.state == STREAM_CLOSED ) {
+    errorText_ = "RtApiDs::callbackEvent(): the stream is closed ... this shouldn't happen!";
+    error( RtAudioError::WARNING );
+    return;
+  }
+
+  CallbackInfo *info = (CallbackInfo *) &stream_.callbackInfo;
+  DsHandle *handle = (DsHandle *) stream_.apiHandle;
+
+  // Check if we were draining the stream and signal is finished.
+  if ( handle->drainCounter > stream_.nBuffers + 2 ) {
+
+    stream_.state = STREAM_STOPPING;
+    if ( handle->internalDrain == false )
+      SetEvent( handle->condition );
+    else
+      stopStream();
+    return;
+  }
+
+  // Invoke user callback to get fresh output data UNLESS we are
+  // draining stream.
+  if ( handle->drainCounter == 0 ) {
+    RtAudioCallback callback = (RtAudioCallback) info->callback;
+    double streamTime = getStreamTime();
+    RtAudioStreamStatus status = 0;
+    if ( stream_.mode != INPUT && handle->xrun[0] == true ) {
+      status |= RTAUDIO_OUTPUT_UNDERFLOW;
+      handle->xrun[0] = false;
+    }
+    if ( stream_.mode != OUTPUT && handle->xrun[1] == true ) {
+      status |= RTAUDIO_INPUT_OVERFLOW;
+      handle->xrun[1] = false;
+    }
+    int cbReturnValue = callback( stream_.userBuffer[0], stream_.userBuffer[1],
+                                  stream_.bufferSize, streamTime, status, info->userData );
+    if ( cbReturnValue == 2 ) {
+      stream_.state = STREAM_STOPPING;
+      handle->drainCounter = 2;
+      abortStream();
+      return;
+    }
+    else if ( cbReturnValue == 1 ) {
+      handle->drainCounter = 1;
+      handle->internalDrain = true;
+    }
+  }
+
+  HRESULT result;
+  DWORD currentWritePointer, safeWritePointer;
+  DWORD currentReadPointer, safeReadPointer;
+  UINT nextWritePointer;
+
+  LPVOID buffer1 = NULL;
+  LPVOID buffer2 = NULL;
+  DWORD bufferSize1 = 0;
+  DWORD bufferSize2 = 0;
+
+  char *buffer;
+  long bufferBytes;
+
+  MUTEX_LOCK( &stream_.mutex );
+  if ( stream_.state == STREAM_STOPPED ) {
+    MUTEX_UNLOCK( &stream_.mutex );
+    return;
+  }
+
+  if ( buffersRolling == false ) {
+    if ( stream_.mode == DUPLEX ) {
+      //assert( handle->dsBufferSize[0] == handle->dsBufferSize[1] );
+
+      // It takes a while for the devices to get rolling. As a result,
+      // there's no guarantee that the capture and write device pointers
+      // will move in lockstep.  Wait here for both devices to start
+      // rolling, and then set our buffer pointers accordingly.
+      // e.g. Crystal Drivers: the capture buffer starts up 5700 to 9600
+      // bytes later than the write buffer.
+
+      // Stub: a serious risk of having a pre-emptive scheduling round
+      // take place between the two GetCurrentPosition calls... but I'm
+      // really not sure how to solve the problem.  Temporarily boost to
+      // Realtime priority, maybe; but I'm not sure what priority the
+      // DirectSound service threads run at. We *should* be roughly
+      // within a ms or so of correct.
+
+      LPDIRECTSOUNDBUFFER dsWriteBuffer = (LPDIRECTSOUNDBUFFER) handle->buffer[0];
+      LPDIRECTSOUNDCAPTUREBUFFER dsCaptureBuffer = (LPDIRECTSOUNDCAPTUREBUFFER) handle->buffer[1];
+
+      DWORD startSafeWritePointer, startSafeReadPointer;
+
+      result = dsWriteBuffer->GetCurrentPosition( NULL, &startSafeWritePointer );
+      if ( FAILED( result ) ) {
+        errorStream_ << "RtApiDs::callbackEvent: error (" << getErrorString( result ) << ") getting current write position!";
+        errorText_ = errorStream_.str();
+        MUTEX_UNLOCK( &stream_.mutex );
+        error( RtAudioError::SYSTEM_ERROR );
+        return;
+      }
+      result = dsCaptureBuffer->GetCurrentPosition( NULL, &startSafeReadPointer );
+      if ( FAILED( result ) ) {
+        errorStream_ << "RtApiDs::callbackEvent: error (" << getErrorString( result ) << ") getting current read position!";
+        errorText_ = errorStream_.str();
+        MUTEX_UNLOCK( &stream_.mutex );
+        error( RtAudioError::SYSTEM_ERROR );
+        return;
+      }
+      while ( true ) {
+        result = dsWriteBuffer->GetCurrentPosition( NULL, &safeWritePointer );
+        if ( FAILED( result ) ) {
+          errorStream_ << "RtApiDs::callbackEvent: error (" << getErrorString( result ) << ") getting current write position!";
+          errorText_ = errorStream_.str();
+          MUTEX_UNLOCK( &stream_.mutex );
+          error( RtAudioError::SYSTEM_ERROR );
+          return;
+        }
+        result = dsCaptureBuffer->GetCurrentPosition( NULL, &safeReadPointer );
+        if ( FAILED( result ) ) {
+          errorStream_ << "RtApiDs::callbackEvent: error (" << getErrorString( result ) << ") getting current read position!";
+          errorText_ = errorStream_.str();
+          MUTEX_UNLOCK( &stream_.mutex );
+          error( RtAudioError::SYSTEM_ERROR );
+          return;
+        }
+        if ( safeWritePointer != startSafeWritePointer && safeReadPointer != startSafeReadPointer ) break;
+        Sleep( 1 );
+      }
+
+      //assert( handle->dsBufferSize[0] == handle->dsBufferSize[1] );
+
+      handle->bufferPointer[0] = safeWritePointer + handle->dsPointerLeadTime[0];
+      if ( handle->bufferPointer[0] >= handle->dsBufferSize[0] ) handle->bufferPointer[0] -= handle->dsBufferSize[0];
+      handle->bufferPointer[1] = safeReadPointer;
+    }
+    else if ( stream_.mode == OUTPUT ) {
+
+      // Set the proper nextWritePosition after initial startup.
+      LPDIRECTSOUNDBUFFER dsWriteBuffer = (LPDIRECTSOUNDBUFFER) handle->buffer[0];
+      result = dsWriteBuffer->GetCurrentPosition( &currentWritePointer, &safeWritePointer );
+      if ( FAILED( result ) ) {
+        errorStream_ << "RtApiDs::callbackEvent: error (" << getErrorString( result ) << ") getting current write position!";
+        errorText_ = errorStream_.str();
+        MUTEX_UNLOCK( &stream_.mutex );
+        error( RtAudioError::SYSTEM_ERROR );
+        return;
+      }
+      handle->bufferPointer[0] = safeWritePointer + handle->dsPointerLeadTime[0];
+      if ( handle->bufferPointer[0] >= handle->dsBufferSize[0] ) handle->bufferPointer[0] -= handle->dsBufferSize[0];
+    }
+
+    buffersRolling = true;
+  }
+
+  if ( stream_.mode == OUTPUT || stream_.mode == DUPLEX ) {
+    
+    LPDIRECTSOUNDBUFFER dsBuffer = (LPDIRECTSOUNDBUFFER) handle->buffer[0];
+
+    if ( handle->drainCounter > 1 ) { // write zeros to the output stream
+      bufferBytes = stream_.bufferSize * stream_.nUserChannels[0];
+      bufferBytes *= formatBytes( stream_.userFormat );
+      memset( stream_.userBuffer[0], 0, bufferBytes );
+    }
+
+    // Setup parameters and do buffer conversion if necessary.
+    if ( stream_.doConvertBuffer[0] ) {
+      buffer = stream_.deviceBuffer;
+      convertBuffer( buffer, stream_.userBuffer[0], stream_.convertInfo[0] );
+      bufferBytes = stream_.bufferSize * stream_.nDeviceChannels[0];
+      bufferBytes *= formatBytes( stream_.deviceFormat[0] );
+    }
+    else {
+      buffer = stream_.userBuffer[0];
+      bufferBytes = stream_.bufferSize * stream_.nUserChannels[0];
+      bufferBytes *= formatBytes( stream_.userFormat );
+    }
+
+    // No byte swapping necessary in DirectSound implementation.
+
+    // Ahhh ... windoze.  16-bit data is signed but 8-bit data is
+    // unsigned.  So, we need to convert our signed 8-bit data here to
+    // unsigned.
+    if ( stream_.deviceFormat[0] == RTAUDIO_SINT8 )
+      for ( int i=0; i<bufferBytes; i++ ) buffer[i] = (unsigned char) ( buffer[i] + 128 );
+
+    DWORD dsBufferSize = handle->dsBufferSize[0];
+    nextWritePointer = handle->bufferPointer[0];
+
+    DWORD endWrite, leadPointer;
+    while ( true ) {
+      // Find out where the read and "safe write" pointers are.
+      result = dsBuffer->GetCurrentPosition( &currentWritePointer, &safeWritePointer );
+      if ( FAILED( result ) ) {
+        errorStream_ << "RtApiDs::callbackEvent: error (" << getErrorString( result ) << ") getting current write position!";
+        errorText_ = errorStream_.str();
+        error( RtAudioError::SYSTEM_ERROR );
+        return;
+      }
+
+      // We will copy our output buffer into the region between
+      // safeWritePointer and leadPointer.  If leadPointer is not
+      // beyond the next endWrite position, wait until it is.
+      leadPointer = safeWritePointer + handle->dsPointerLeadTime[0];
+      //std::cout << "safeWritePointer = " << safeWritePointer << ", leadPointer = " << leadPointer << ", nextWritePointer = " << nextWritePointer << std::endl;
+      if ( leadPointer > dsBufferSize ) leadPointer -= dsBufferSize;
+      if ( leadPointer < nextWritePointer ) leadPointer += dsBufferSize; // unwrap offset
+      endWrite = nextWritePointer + bufferBytes;
+
+      // Check whether the entire write region is behind the play pointer.
+      if ( leadPointer >= endWrite ) break;
+
+      // If we are here, then we must wait until the leadPointer advances
+      // beyond the end of our next write region. We use the
+      // Sleep() function to suspend operation until that happens.
+      double millis = ( endWrite - leadPointer ) * 1000.0;
+      millis /= ( formatBytes( stream_.deviceFormat[0]) * stream_.nDeviceChannels[0] * stream_.sampleRate);
+      if ( millis < 1.0 ) millis = 1.0;
+      Sleep( (DWORD) millis );
+    }
+
+    if ( dsPointerBetween( nextWritePointer, safeWritePointer, currentWritePointer, dsBufferSize )
+         || dsPointerBetween( endWrite, safeWritePointer, currentWritePointer, dsBufferSize ) ) { 
+      // We've strayed into the forbidden zone ... resync the read pointer.
+      handle->xrun[0] = true;
+      nextWritePointer = safeWritePointer + handle->dsPointerLeadTime[0] - bufferBytes;
+      if ( nextWritePointer >= dsBufferSize ) nextWritePointer -= dsBufferSize;
+      handle->bufferPointer[0] = nextWritePointer;
+      endWrite = nextWritePointer + bufferBytes;
+    }
+
+    // Lock free space in the buffer
+    result = dsBuffer->Lock( nextWritePointer, bufferBytes, &buffer1,
+                             &bufferSize1, &buffer2, &bufferSize2, 0 );
+    if ( FAILED( result ) ) {
+      errorStream_ << "RtApiDs::callbackEvent: error (" << getErrorString( result ) << ") locking buffer during playback!";
+      errorText_ = errorStream_.str();
+      MUTEX_UNLOCK( &stream_.mutex );
+      error( RtAudioError::SYSTEM_ERROR );
+      return;
+    }
+
+    // Copy our buffer into the DS buffer
+    CopyMemory( buffer1, buffer, bufferSize1 );
+    if ( buffer2 != NULL ) CopyMemory( buffer2, buffer+bufferSize1, bufferSize2 );
+
+    // Update our buffer offset and unlock sound buffer
+    dsBuffer->Unlock( buffer1, bufferSize1, buffer2, bufferSize2 );
+    if ( FAILED( result ) ) {
+      errorStream_ << "RtApiDs::callbackEvent: error (" << getErrorString( result ) << ") unlocking buffer during playback!";
+      errorText_ = errorStream_.str();
+      MUTEX_UNLOCK( &stream_.mutex );
+      error( RtAudioError::SYSTEM_ERROR );
+      return;
+    }
+    nextWritePointer = ( nextWritePointer + bufferSize1 + bufferSize2 ) % dsBufferSize;
+    handle->bufferPointer[0] = nextWritePointer;
+  }
+
+  // Don't bother draining input
+  if ( handle->drainCounter ) {
+    handle->drainCounter++;
+    goto unlock;
+  }
+
+  if ( stream_.mode == INPUT || stream_.mode == DUPLEX ) {
+
+    // Setup parameters.
+    if ( stream_.doConvertBuffer[1] ) {
+      buffer = stream_.deviceBuffer;
+      bufferBytes = stream_.bufferSize * stream_.nDeviceChannels[1];
+      bufferBytes *= formatBytes( stream_.deviceFormat[1] );
+    }
+    else {
+      buffer = stream_.userBuffer[1];
+      bufferBytes = stream_.bufferSize * stream_.nUserChannels[1];
+      bufferBytes *= formatBytes( stream_.userFormat );
+    }
+
+    LPDIRECTSOUNDCAPTUREBUFFER dsBuffer = (LPDIRECTSOUNDCAPTUREBUFFER) handle->buffer[1];
+    long nextReadPointer = handle->bufferPointer[1];
+    DWORD dsBufferSize = handle->dsBufferSize[1];
+
+    // Find out where the write and "safe read" pointers are.
+    result = dsBuffer->GetCurrentPosition( &currentReadPointer, &safeReadPointer );
+    if ( FAILED( result ) ) {
+      errorStream_ << "RtApiDs::callbackEvent: error (" << getErrorString( result ) << ") getting current read position!";
+      errorText_ = errorStream_.str();
+      MUTEX_UNLOCK( &stream_.mutex );
+      error( RtAudioError::SYSTEM_ERROR );
+      return;
+    }
+
+    if ( safeReadPointer < (DWORD)nextReadPointer ) safeReadPointer += dsBufferSize; // unwrap offset
+    DWORD endRead = nextReadPointer + bufferBytes;
+
+    // Handling depends on whether we are INPUT or DUPLEX. 
+    // If we're in INPUT mode then waiting is a good thing. If we're in DUPLEX mode,
+    // then a wait here will drag the write pointers into the forbidden zone.
+    // 
+    // In DUPLEX mode, rather than wait, we will back off the read pointer until 
+    // it's in a safe position. This causes dropouts, but it seems to be the only 
+    // practical way to sync up the read and write pointers reliably, given the 
+    // the very complex relationship between phase and increment of the read and write 
+    // pointers.
+    //
+    // In order to minimize audible dropouts in DUPLEX mode, we will
+    // provide a pre-roll period of 0.5 seconds in which we return
+    // zeros from the read buffer while the pointers sync up.
+
+    if ( stream_.mode == DUPLEX ) {
+      if ( safeReadPointer < endRead ) {
+        if ( duplexPrerollBytes <= 0 ) {
+          // Pre-roll time over. Be more agressive.
+          int adjustment = endRead-safeReadPointer;
+
+          handle->xrun[1] = true;
+          // Two cases:
+          //   - large adjustments: we've probably run out of CPU cycles, so just resync exactly,
+          //     and perform fine adjustments later.
+          //   - small adjustments: back off by twice as much.
+          if ( adjustment >= 2*bufferBytes )
+            nextReadPointer = safeReadPointer-2*bufferBytes;
+          else
+            nextReadPointer = safeReadPointer-bufferBytes-adjustment;
+
+          if ( nextReadPointer < 0 ) nextReadPointer += dsBufferSize;
+
+        }
+        else {
+          // In pre=roll time. Just do it.
+          nextReadPointer = safeReadPointer - bufferBytes;
+          while ( nextReadPointer < 0 ) nextReadPointer += dsBufferSize;
+        }
+        endRead = nextReadPointer + bufferBytes;
+      }
+    }
+    else { // mode == INPUT
+      while ( safeReadPointer < endRead && stream_.callbackInfo.isRunning ) {
+        // See comments for playback.
+        double millis = (endRead - safeReadPointer) * 1000.0;
+        millis /= ( formatBytes(stream_.deviceFormat[1]) * stream_.nDeviceChannels[1] * stream_.sampleRate);
+        if ( millis < 1.0 ) millis = 1.0;
+        Sleep( (DWORD) millis );
+
+        // Wake up and find out where we are now.
+        result = dsBuffer->GetCurrentPosition( &currentReadPointer, &safeReadPointer );
+        if ( FAILED( result ) ) {
+          errorStream_ << "RtApiDs::callbackEvent: error (" << getErrorString( result ) << ") getting current read position!";
+          errorText_ = errorStream_.str();
+          MUTEX_UNLOCK( &stream_.mutex );
+          error( RtAudioError::SYSTEM_ERROR );
+          return;
+        }
+      
+        if ( safeReadPointer < (DWORD)nextReadPointer ) safeReadPointer += dsBufferSize; // unwrap offset
+      }
+    }
+
+    // Lock free space in the buffer
+    result = dsBuffer->Lock( nextReadPointer, bufferBytes, &buffer1,
+                             &bufferSize1, &buffer2, &bufferSize2, 0 );
+    if ( FAILED( result ) ) {
+      errorStream_ << "RtApiDs::callbackEvent: error (" << getErrorString( result ) << ") locking capture buffer!";
+      errorText_ = errorStream_.str();
+      MUTEX_UNLOCK( &stream_.mutex );
+      error( RtAudioError::SYSTEM_ERROR );
+      return;
+    }
+
+    if ( duplexPrerollBytes <= 0 ) {
+      // Copy our buffer into the DS buffer
+      CopyMemory( buffer, buffer1, bufferSize1 );
+      if ( buffer2 != NULL ) CopyMemory( buffer+bufferSize1, buffer2, bufferSize2 );
+    }
+    else {
+      memset( buffer, 0, bufferSize1 );
+      if ( buffer2 != NULL ) memset( buffer + bufferSize1, 0, bufferSize2 );
+      duplexPrerollBytes -= bufferSize1 + bufferSize2;
+    }
+
+    // Update our buffer offset and unlock sound buffer
+    nextReadPointer = ( nextReadPointer + bufferSize1 + bufferSize2 ) % dsBufferSize;
+    dsBuffer->Unlock( buffer1, bufferSize1, buffer2, bufferSize2 );
+    if ( FAILED( result ) ) {
+      errorStream_ << "RtApiDs::callbackEvent: error (" << getErrorString( result ) << ") unlocking capture buffer!";
+      errorText_ = errorStream_.str();
+      MUTEX_UNLOCK( &stream_.mutex );
+      error( RtAudioError::SYSTEM_ERROR );
+      return;
+    }
+    handle->bufferPointer[1] = nextReadPointer;
+
+    // No byte swapping necessary in DirectSound implementation.
+
+    // If necessary, convert 8-bit data from unsigned to signed.
+    if ( stream_.deviceFormat[1] == RTAUDIO_SINT8 )
+      for ( int j=0; j<bufferBytes; j++ ) buffer[j] = (signed char) ( buffer[j] - 128 );
+
+    // Do buffer conversion if necessary.
+    if ( stream_.doConvertBuffer[1] )
+      convertBuffer( stream_.userBuffer[1], stream_.deviceBuffer, stream_.convertInfo[1] );
+  }
+
+ unlock:
+  MUTEX_UNLOCK( &stream_.mutex );
+  RtApi::tickStreamTime();
+}
+
+// Definitions for utility functions and callbacks
+// specific to the DirectSound implementation.
+
+static unsigned __stdcall callbackHandler( void *ptr )
+{
+  CallbackInfo *info = (CallbackInfo *) ptr;
+  RtApiDs *object = (RtApiDs *) info->object;
+  bool* isRunning = &info->isRunning;
+
+  while ( *isRunning == true ) {
+    object->callbackEvent();
+  }
+
+  _endthreadex( 0 );
+  return 0;
+}
+
+static BOOL CALLBACK deviceQueryCallback( LPGUID lpguid,
+                                          LPCTSTR description,
+                                          LPCTSTR /*module*/,
+                                          LPVOID lpContext )
+{
+  struct DsProbeData& probeInfo = *(struct DsProbeData*) lpContext;
+  std::vector<struct DsDevice>& dsDevices = *probeInfo.dsDevices;
+
+  HRESULT hr;
+  bool validDevice = false;
+  if ( probeInfo.isInput == true ) {
+    DSCCAPS caps;
+    LPDIRECTSOUNDCAPTURE object;
+
+    hr = DirectSoundCaptureCreate(  lpguid, &object,   NULL );
+    if ( hr != DS_OK ) return TRUE;
+
+    caps.dwSize = sizeof(caps);
+    hr = object->GetCaps( &caps );
+    if ( hr == DS_OK ) {
+      if ( caps.dwChannels > 0 && caps.dwFormats > 0 )
+        validDevice = true;
+    }
+    object->Release();
+  }
+  else {
+    DSCAPS caps;
+    LPDIRECTSOUND object;
+    hr = DirectSoundCreate(  lpguid, &object,   NULL );
+    if ( hr != DS_OK ) return TRUE;
+
+    caps.dwSize = sizeof(caps);
+    hr = object->GetCaps( &caps );
+    if ( hr == DS_OK ) {
+      if ( caps.dwFlags & DSCAPS_PRIMARYMONO || caps.dwFlags & DSCAPS_PRIMARYSTEREO )
+        validDevice = true;
+    }
+    object->Release();
+  }
+
+  // If good device, then save its name and guid.
+  std::string name = convertCharPointerToStdString( description );
+  //if ( name == "Primary Sound Driver" || name == "Primary Sound Capture Driver" )
+  if ( lpguid == NULL )
+    name = "Default Device";
+  if ( validDevice ) {
+    for ( unsigned int i=0; i<dsDevices.size(); i++ ) {
+      if ( dsDevices[i].name == name ) {
+        dsDevices[i].found = true;
+        if ( probeInfo.isInput ) {
+          dsDevices[i].id[1] = lpguid;
+          dsDevices[i].validId[1] = true;
+        }
+        else {
+          dsDevices[i].id[0] = lpguid;
+          dsDevices[i].validId[0] = true;
+        }
+        return TRUE;
+      }
+    }
+
+    DsDevice device;
+    device.name = name;
+    device.found = true;
+    if ( probeInfo.isInput ) {
+      device.id[1] = lpguid;
+      device.validId[1] = true;
+    }
+    else {
+      device.id[0] = lpguid;
+      device.validId[0] = true;
+    }
+    dsDevices.push_back( device );
+  }
+
+  return TRUE;
+}
+
+static const char* getErrorString( int code )
+{
+  switch ( code ) {
+
+  case DSERR_ALLOCATED:
+    return "Already allocated";
+
+  case DSERR_CONTROLUNAVAIL:
+    return "Control unavailable";
+
+  case DSERR_INVALIDPARAM:
+    return "Invalid parameter";
+
+  case DSERR_INVALIDCALL:
+    return "Invalid call";
+
+  case DSERR_GENERIC:
+    return "Generic error";
+
+  case DSERR_PRIOLEVELNEEDED:
+    return "Priority level needed";
+
+  case DSERR_OUTOFMEMORY:
+    return "Out of memory";
+
+  case DSERR_BADFORMAT:
+    return "The sample rate or the channel format is not supported";
+
+  case DSERR_UNSUPPORTED:
+    return "Not supported";
+
+  case DSERR_NODRIVER:
+    return "No driver";
+
+  case DSERR_ALREADYINITIALIZED:
+    return "Already initialized";
+
+  case DSERR_NOAGGREGATION:
+    return "No aggregation";
+
+  case DSERR_BUFFERLOST:
+    return "Buffer lost";
+
+  case DSERR_OTHERAPPHASPRIO:
+    return "Another application already has priority";
+
+  case DSERR_UNINITIALIZED:
+    return "Uninitialized";
+
+  default:
+    return "DirectSound unknown error";
+  }
+}
+//******************** End of __WINDOWS_DS__ *********************//
+#endif
+
+
+#if defined(__LINUX_ALSA__)
+
+#include <alsa/asoundlib.h>
+#include <unistd.h>
+
+  // A structure to hold various information related to the ALSA API
+  // implementation.
+struct AlsaHandle {
+  snd_pcm_t *handles[2];
+  bool synchronized;
+  bool xrun[2];
+  pthread_cond_t runnable_cv;
+  bool runnable;
+
+  AlsaHandle()
+    :synchronized(false), runnable(false) { xrun[0] = false; xrun[1] = false; }
+};
+
+static void *alsaCallbackHandler( void * ptr );
+
+RtApiAlsa :: RtApiAlsa()
+{
+  // Nothing to do here.
+}
+
+RtApiAlsa :: ~RtApiAlsa()
+{
+  if ( stream_.state != STREAM_CLOSED ) closeStream();
+}
+
+unsigned int RtApiAlsa :: getDeviceCount( void )
+{
+  unsigned nDevices = 0;
+  int result, subdevice, card;
+  char name[64];
+  snd_ctl_t *handle;
+
+  // Count cards and devices
+  card = -1;
+  snd_card_next( &card );
+  while ( card >= 0 ) {
+    sprintf( name, "hw:%d", card );
+    result = snd_ctl_open( &handle, name, 0 );
+    if ( result < 0 ) {
+      errorStream_ << "RtApiAlsa::getDeviceCount: control open, card = " << card << ", " << snd_strerror( result ) << ".";
+      errorText_ = errorStream_.str();
+      error( RtAudioError::WARNING );
+      goto nextcard;
+    }
+    subdevice = -1;
+    while( 1 ) {
+      result = snd_ctl_pcm_next_device( handle, &subdevice );
+      if ( result < 0 ) {
+        errorStream_ << "RtApiAlsa::getDeviceCount: control next device, card = " << card << ", " << snd_strerror( result ) << ".";
+        errorText_ = errorStream_.str();
+        error( RtAudioError::WARNING );
+        break;
+      }
+      if ( subdevice < 0 )
+        break;
+      nDevices++;
+    }
+  nextcard:
+    snd_ctl_close( handle );
+    snd_card_next( &card );
+  }
+
+  result = snd_ctl_open( &handle, "default", 0 );
+  if (result == 0) {
+    nDevices++;
+    snd_ctl_close( handle );
+  }
+
+  return nDevices;
+}
+
+RtAudio::DeviceInfo RtApiAlsa :: getDeviceInfo( unsigned int device )
+{
+  RtAudio::DeviceInfo info;
+  info.probed = false;
+
+  unsigned nDevices = 0;
+  int result, subdevice, card;
+  char name[64];
+  snd_ctl_t *chandle;
+
+  // Count cards and devices
+  card = -1;
+  subdevice = -1;
+  snd_card_next( &card );
+  while ( card >= 0 ) {
+    sprintf( name, "hw:%d", card );
+    result = snd_ctl_open( &chandle, name, SND_CTL_NONBLOCK );
+    if ( result < 0 ) {
+      errorStream_ << "RtApiAlsa::getDeviceInfo: control open, card = " << card << ", " << snd_strerror( result ) << ".";
+      errorText_ = errorStream_.str();
+      error( RtAudioError::WARNING );
+      goto nextcard;
+    }
+    subdevice = -1;
+    while( 1 ) {
+      result = snd_ctl_pcm_next_device( chandle, &subdevice );
+      if ( result < 0 ) {
+        errorStream_ << "RtApiAlsa::getDeviceInfo: control next device, card = " << card << ", " << snd_strerror( result ) << ".";
+        errorText_ = errorStream_.str();
+        error( RtAudioError::WARNING );
+        break;
+      }
+      if ( subdevice < 0 ) break;
+      if ( nDevices == device ) {
+        sprintf( name, "hw:%d,%d", card, subdevice );
+        goto foundDevice;
+      }
+      nDevices++;
+    }
+  nextcard:
+    snd_ctl_close( chandle );
+    snd_card_next( &card );
+  }
+
+  result = snd_ctl_open( &chandle, "default", SND_CTL_NONBLOCK );
+  if ( result == 0 ) {
+    if ( nDevices == device ) {
+      strcpy( name, "default" );
+      goto foundDevice;
+    }
+    nDevices++;
+  }
+
+  if ( nDevices == 0 ) {
+    errorText_ = "RtApiAlsa::getDeviceInfo: no devices found!";
+    error( RtAudioError::INVALID_USE );
+    return info;
+  }
+
+  if ( device >= nDevices ) {
+    errorText_ = "RtApiAlsa::getDeviceInfo: device ID is invalid!";
+    error( RtAudioError::INVALID_USE );
+    return info;
+  }
+
+ foundDevice:
+
+  // If a stream is already open, we cannot probe the stream devices.
+  // Thus, use the saved results.
+  if ( stream_.state != STREAM_CLOSED &&
+       ( stream_.device[0] == device || stream_.device[1] == device ) ) {
+    snd_ctl_close( chandle );
+    if ( device >= devices_.size() ) {
+      errorText_ = "RtApiAlsa::getDeviceInfo: device ID was not present before stream was opened.";
+      error( RtAudioError::WARNING );
+      return info;
+    }
+    return devices_[ device ];
+  }
+
+  int openMode = SND_PCM_ASYNC;
+  snd_pcm_stream_t stream;
+  snd_pcm_info_t *pcminfo;
+  snd_pcm_info_alloca( &pcminfo );
+  snd_pcm_t *phandle;
+  snd_pcm_hw_params_t *params;
+  snd_pcm_hw_params_alloca( &params );
+
+  // First try for playback unless default device (which has subdev -1)
+  stream = SND_PCM_STREAM_PLAYBACK;
+  snd_pcm_info_set_stream( pcminfo, stream );
+  if ( subdevice != -1 ) {
+    snd_pcm_info_set_device( pcminfo, subdevice );
+    snd_pcm_info_set_subdevice( pcminfo, 0 );
+
+    result = snd_ctl_pcm_info( chandle, pcminfo );
+    if ( result < 0 ) {
+      // Device probably doesn't support playback.
+      goto captureProbe;
+    }
+  }
+
+  result = snd_pcm_open( &phandle, name, stream, openMode | SND_PCM_NONBLOCK );
+  if ( result < 0 ) {
+    errorStream_ << "RtApiAlsa::getDeviceInfo: snd_pcm_open error for device (" << name << "), " << snd_strerror( result ) << ".";
+    errorText_ = errorStream_.str();
+    error( RtAudioError::WARNING );
+    goto captureProbe;
+  }
+
+  // The device is open ... fill the parameter structure.
+  result = snd_pcm_hw_params_any( phandle, params );
+  if ( result < 0 ) {
+    snd_pcm_close( phandle );
+    errorStream_ << "RtApiAlsa::getDeviceInfo: snd_pcm_hw_params error for device (" << name << "), " << snd_strerror( result ) << ".";
+    errorText_ = errorStream_.str();
+    error( RtAudioError::WARNING );
+    goto captureProbe;
+  }
+
+  // Get output channel information.
+  unsigned int value;
+  result = snd_pcm_hw_params_get_channels_max( params, &value );
+  if ( result < 0 ) {
+    snd_pcm_close( phandle );
+    errorStream_ << "RtApiAlsa::getDeviceInfo: error getting device (" << name << ") output channels, " << snd_strerror( result ) << ".";
+    errorText_ = errorStream_.str();
+    error( RtAudioError::WARNING );
+    goto captureProbe;
+  }
+  info.outputChannels = value;
+  snd_pcm_close( phandle );
+
+ captureProbe:
+  stream = SND_PCM_STREAM_CAPTURE;
+  snd_pcm_info_set_stream( pcminfo, stream );
+
+  // Now try for capture unless default device (with subdev = -1)
+  if ( subdevice != -1 ) {
+    result = snd_ctl_pcm_info( chandle, pcminfo );
+    snd_ctl_close( chandle );
+    if ( result < 0 ) {
+      // Device probably doesn't support capture.
+      if ( info.outputChannels == 0 ) return info;
+      goto probeParameters;
+    }
+  }
+  else
+    snd_ctl_close( chandle );
+
+  result = snd_pcm_open( &phandle, name, stream, openMode | SND_PCM_NONBLOCK);
+  if ( result < 0 ) {
+    errorStream_ << "RtApiAlsa::getDeviceInfo: snd_pcm_open error for device (" << name << "), " << snd_strerror( result ) << ".";
+    errorText_ = errorStream_.str();
+    error( RtAudioError::WARNING );
+    if ( info.outputChannels == 0 ) return info;
+    goto probeParameters;
+  }
+
+  // The device is open ... fill the parameter structure.
+  result = snd_pcm_hw_params_any( phandle, params );
+  if ( result < 0 ) {
+    snd_pcm_close( phandle );
+    errorStream_ << "RtApiAlsa::getDeviceInfo: snd_pcm_hw_params error for device (" << name << "), " << snd_strerror( result ) << ".";
+    errorText_ = errorStream_.str();
+    error( RtAudioError::WARNING );
+    if ( info.outputChannels == 0 ) return info;
+    goto probeParameters;
+  }
+
+  result = snd_pcm_hw_params_get_channels_max( params, &value );
+  if ( result < 0 ) {
+    snd_pcm_close( phandle );
+    errorStream_ << "RtApiAlsa::getDeviceInfo: error getting device (" << name << ") input channels, " << snd_strerror( result ) << ".";
+    errorText_ = errorStream_.str();
+    error( RtAudioError::WARNING );
+    if ( info.outputChannels == 0 ) return info;
+    goto probeParameters;
+  }
+  info.inputChannels = value;
+  snd_pcm_close( phandle );
+
+  // If device opens for both playback and capture, we determine the channels.
+  if ( info.outputChannels > 0 && info.inputChannels > 0 )
+    info.duplexChannels = (info.outputChannels > info.inputChannels) ? info.inputChannels : info.outputChannels;
+
+  // ALSA doesn't provide default devices so we'll use the first available one.
+  if ( device == 0 && info.outputChannels > 0 )
+    info.isDefaultOutput = true;
+  if ( device == 0 && info.inputChannels > 0 )
+    info.isDefaultInput = true;
+
+ probeParameters:
+  // At this point, we just need to figure out the supported data
+  // formats and sample rates.  We'll proceed by opening the device in
+  // the direction with the maximum number of channels, or playback if
+  // they are equal.  This might limit our sample rate options, but so
+  // be it.
+
+  if ( info.outputChannels >= info.inputChannels )
+    stream = SND_PCM_STREAM_PLAYBACK;
+  else
+    stream = SND_PCM_STREAM_CAPTURE;
+  snd_pcm_info_set_stream( pcminfo, stream );
+
+  result = snd_pcm_open( &phandle, name, stream, openMode | SND_PCM_NONBLOCK);
+  if ( result < 0 ) {
+    errorStream_ << "RtApiAlsa::getDeviceInfo: snd_pcm_open error for device (" << name << "), " << snd_strerror( result ) << ".";
+    errorText_ = errorStream_.str();
+    error( RtAudioError::WARNING );
+    return info;
+  }
+
+  // The device is open ... fill the parameter structure.
+  result = snd_pcm_hw_params_any( phandle, params );
+  if ( result < 0 ) {
+    snd_pcm_close( phandle );
+    errorStream_ << "RtApiAlsa::getDeviceInfo: snd_pcm_hw_params error for device (" << name << "), " << snd_strerror( result ) << ".";
+    errorText_ = errorStream_.str();
+    error( RtAudioError::WARNING );
+    return info;
+  }
+
+  // Test our discrete set of sample rate values.
+  info.sampleRates.clear();
+  for ( unsigned int i=0; i<MAX_SAMPLE_RATES; i++ ) {
+    if ( snd_pcm_hw_params_test_rate( phandle, params, SAMPLE_RATES[i], 0 ) == 0 ) {
+      info.sampleRates.push_back( SAMPLE_RATES[i] );
+
+      if ( !info.preferredSampleRate || ( SAMPLE_RATES[i] <= 48000 && SAMPLE_RATES[i] > info.preferredSampleRate ) )
+        info.preferredSampleRate = SAMPLE_RATES[i];
+    }
+  }
+  if ( info.sampleRates.size() == 0 ) {
+    snd_pcm_close( phandle );
+    errorStream_ << "RtApiAlsa::getDeviceInfo: no supported sample rates found for device (" << name << ").";
+    errorText_ = errorStream_.str();
+    error( RtAudioError::WARNING );
+    return info;
+  }
+
+  // Probe the supported data formats ... we don't care about endian-ness just yet
+  snd_pcm_format_t format;
+  info.nativeFormats = 0;
+  format = SND_PCM_FORMAT_S8;
+  if ( snd_pcm_hw_params_test_format( phandle, params, format ) == 0 )
+    info.nativeFormats |= RTAUDIO_SINT8;
+  format = SND_PCM_FORMAT_S16;
+  if ( snd_pcm_hw_params_test_format( phandle, params, format ) == 0 )
+    info.nativeFormats |= RTAUDIO_SINT16;
+  format = SND_PCM_FORMAT_S24;
+  if ( snd_pcm_hw_params_test_format( phandle, params, format ) == 0 )
+    info.nativeFormats |= RTAUDIO_SINT24;
+  format = SND_PCM_FORMAT_S32;
+  if ( snd_pcm_hw_params_test_format( phandle, params, format ) == 0 )
+    info.nativeFormats |= RTAUDIO_SINT32;
+  format = SND_PCM_FORMAT_FLOAT;
+  if ( snd_pcm_hw_params_test_format( phandle, params, format ) == 0 )
+    info.nativeFormats |= RTAUDIO_FLOAT32;
+  format = SND_PCM_FORMAT_FLOAT64;
+  if ( snd_pcm_hw_params_test_format( phandle, params, format ) == 0 )
+    info.nativeFormats |= RTAUDIO_FLOAT64;
+
+  // Check that we have at least one supported format
+  if ( info.nativeFormats == 0 ) {
+    snd_pcm_close( phandle );
+    errorStream_ << "RtApiAlsa::getDeviceInfo: pcm device (" << name << ") data format not supported by RtAudio.";
+    errorText_ = errorStream_.str();
+    error( RtAudioError::WARNING );
+    return info;
+  }
+
+  // Get the device name
+  char *cardname;
+  result = snd_card_get_name( card, &cardname );
+  if ( result >= 0 ) {
+    sprintf( name, "hw:%s,%d", cardname, subdevice );
+    free( cardname );
+  }
+  info.name = name;
+
+  // That's all ... close the device and return
+  snd_pcm_close( phandle );
+  info.probed = true;
+  return info;
+}
+
+void RtApiAlsa :: saveDeviceInfo( void )
+{
+  devices_.clear();
+
+  unsigned int nDevices = getDeviceCount();
+  devices_.resize( nDevices );
+  for ( unsigned int i=0; i<nDevices; i++ )
+    devices_[i] = getDeviceInfo( i );
+}
+
+bool RtApiAlsa :: probeDeviceOpen( unsigned int device, StreamMode mode, unsigned int channels,
+                                   unsigned int firstChannel, unsigned int sampleRate,
+                                   RtAudioFormat format, unsigned int *bufferSize,
+                                   RtAudio::StreamOptions *options )
+
+{
+#if defined(__RTAUDIO_DEBUG__)
+  snd_output_t *out;
+  snd_output_stdio_attach(&out, stderr, 0);
+#endif
+
+  // I'm not using the "plug" interface ... too much inconsistent behavior.
+
+  unsigned nDevices = 0;
+  int result, subdevice, card;
+  char name[64];
+  snd_ctl_t *chandle;
+
+  if ( options && options->flags & RTAUDIO_ALSA_USE_DEFAULT )
+    snprintf(name, sizeof(name), "%s", "default");
+  else {
+    // Count cards and devices
+    card = -1;
+    snd_card_next( &card );
+    while ( card >= 0 ) {
+      sprintf( name, "hw:%d", card );
+      result = snd_ctl_open( &chandle, name, SND_CTL_NONBLOCK );
+      if ( result < 0 ) {
+        errorStream_ << "RtApiAlsa::probeDeviceOpen: control open, card = " << card << ", " << snd_strerror( result ) << ".";
+        errorText_ = errorStream_.str();
+        return FAILURE;
+      }
+      subdevice = -1;
+      while( 1 ) {
+        result = snd_ctl_pcm_next_device( chandle, &subdevice );
+        if ( result < 0 ) break;
+        if ( subdevice < 0 ) break;
+        if ( nDevices == device ) {
+          sprintf( name, "hw:%d,%d", card, subdevice );
+          snd_ctl_close( chandle );
+          goto foundDevice;
+        }
+        nDevices++;
+      }
+      snd_ctl_close( chandle );
+      snd_card_next( &card );
+    }
+
+    result = snd_ctl_open( &chandle, "default", SND_CTL_NONBLOCK );
+    if ( result == 0 ) {
+      if ( nDevices == device ) {
+        strcpy( name, "default" );
+        goto foundDevice;
+      }
+      nDevices++;
+    }
+
+    if ( nDevices == 0 ) {
+      // This should not happen because a check is made before this function is called.
+      errorText_ = "RtApiAlsa::probeDeviceOpen: no devices found!";
+      return FAILURE;
+    }
+
+    if ( device >= nDevices ) {
+      // This should not happen because a check is made before this function is called.
+      errorText_ = "RtApiAlsa::probeDeviceOpen: device ID is invalid!";
+      return FAILURE;
+    }
+  }
+
+ foundDevice:
+
+  // The getDeviceInfo() function will not work for a device that is
+  // already open.  Thus, we'll probe the system before opening a
+  // stream and save the results for use by getDeviceInfo().
+  if ( mode == OUTPUT || ( mode == INPUT && stream_.mode != OUTPUT ) ) // only do once
+    this->saveDeviceInfo();
+
+  snd_pcm_stream_t stream;
+  if ( mode == OUTPUT )
+    stream = SND_PCM_STREAM_PLAYBACK;
+  else
+    stream = SND_PCM_STREAM_CAPTURE;
+
+  snd_pcm_t *phandle;
+  int openMode = SND_PCM_ASYNC;
+  result = snd_pcm_open( &phandle, name, stream, openMode );
+  if ( result < 0 ) {
+    if ( mode == OUTPUT )
+      errorStream_ << "RtApiAlsa::probeDeviceOpen: pcm device (" << name << ") won't open for output.";
+    else
+      errorStream_ << "RtApiAlsa::probeDeviceOpen: pcm device (" << name << ") won't open for input.";
+    errorText_ = errorStream_.str();
+    return FAILURE;
+  }
+
+  // Fill the parameter structure.
+  snd_pcm_hw_params_t *hw_params;
+  snd_pcm_hw_params_alloca( &hw_params );
+  result = snd_pcm_hw_params_any( phandle, hw_params );
+  if ( result < 0 ) {
+    snd_pcm_close( phandle );
+    errorStream_ << "RtApiAlsa::probeDeviceOpen: error getting pcm device (" << name << ") parameters, " << snd_strerror( result ) << ".";
+    errorText_ = errorStream_.str();
+    return FAILURE;
+  }
+
+#if defined(__RTAUDIO_DEBUG__)
+  fprintf( stderr, "\nRtApiAlsa: dump hardware params just after device open:\n\n" );
+  snd_pcm_hw_params_dump( hw_params, out );
+#endif
+
+  // Set access ... check user preference.
+  if ( options && options->flags & RTAUDIO_NONINTERLEAVED ) {
+    stream_.userInterleaved = false;
+    result = snd_pcm_hw_params_set_access( phandle, hw_params, SND_PCM_ACCESS_RW_NONINTERLEAVED );
+    if ( result < 0 ) {
+      result = snd_pcm_hw_params_set_access( phandle, hw_params, SND_PCM_ACCESS_RW_INTERLEAVED );
+      stream_.deviceInterleaved[mode] =  true;
+    }
+    else
+      stream_.deviceInterleaved[mode] = false;
+  }
+  else {
+    stream_.userInterleaved = true;
+    result = snd_pcm_hw_params_set_access( phandle, hw_params, SND_PCM_ACCESS_RW_INTERLEAVED );
+    if ( result < 0 ) {
+      result = snd_pcm_hw_params_set_access( phandle, hw_params, SND_PCM_ACCESS_RW_NONINTERLEAVED );
+      stream_.deviceInterleaved[mode] =  false;
+    }
+    else
+      stream_.deviceInterleaved[mode] =  true;
+  }
+
+  if ( result < 0 ) {
+    snd_pcm_close( phandle );
+    errorStream_ << "RtApiAlsa::probeDeviceOpen: error setting pcm device (" << name << ") access, " << snd_strerror( result ) << ".";
+    errorText_ = errorStream_.str();
+    return FAILURE;
+  }
+
+  // Determine how to set the device format.
+  stream_.userFormat = format;
+  snd_pcm_format_t deviceFormat = SND_PCM_FORMAT_UNKNOWN;
+
+  if ( format == RTAUDIO_SINT8 )
+    deviceFormat = SND_PCM_FORMAT_S8;
+  else if ( format == RTAUDIO_SINT16 )
+    deviceFormat = SND_PCM_FORMAT_S16;
+  else if ( format == RTAUDIO_SINT24 )
+    deviceFormat = SND_PCM_FORMAT_S24;
+  else if ( format == RTAUDIO_SINT32 )
+    deviceFormat = SND_PCM_FORMAT_S32;
+  else if ( format == RTAUDIO_FLOAT32 )
+    deviceFormat = SND_PCM_FORMAT_FLOAT;
+  else if ( format == RTAUDIO_FLOAT64 )
+    deviceFormat = SND_PCM_FORMAT_FLOAT64;
+
+  if ( snd_pcm_hw_params_test_format(phandle, hw_params, deviceFormat) == 0) {
+    stream_.deviceFormat[mode] = format;
+    goto setFormat;
+  }
+
+  // The user requested format is not natively supported by the device.
+  deviceFormat = SND_PCM_FORMAT_FLOAT64;
+  if ( snd_pcm_hw_params_test_format( phandle, hw_params, deviceFormat ) == 0 ) {
+    stream_.deviceFormat[mode] = RTAUDIO_FLOAT64;
+    goto setFormat;
+  }
+
+  deviceFormat = SND_PCM_FORMAT_FLOAT;
+  if ( snd_pcm_hw_params_test_format(phandle, hw_params, deviceFormat ) == 0 ) {
+    stream_.deviceFormat[mode] = RTAUDIO_FLOAT32;
+    goto setFormat;
+  }
+
+  deviceFormat = SND_PCM_FORMAT_S32;
+  if ( snd_pcm_hw_params_test_format(phandle, hw_params, deviceFormat ) == 0 ) {
+    stream_.deviceFormat[mode] = RTAUDIO_SINT32;
+    goto setFormat;
+  }
+
+  deviceFormat = SND_PCM_FORMAT_S24;
+  if ( snd_pcm_hw_params_test_format(phandle, hw_params, deviceFormat ) == 0 ) {
+    stream_.deviceFormat[mode] = RTAUDIO_SINT24;
+    goto setFormat;
+  }
+
+  deviceFormat = SND_PCM_FORMAT_S16;
+  if ( snd_pcm_hw_params_test_format(phandle, hw_params, deviceFormat ) == 0 ) {
+    stream_.deviceFormat[mode] = RTAUDIO_SINT16;
+    goto setFormat;
+  }
+
+  deviceFormat = SND_PCM_FORMAT_S8;
+  if ( snd_pcm_hw_params_test_format(phandle, hw_params, deviceFormat ) == 0 ) {
+    stream_.deviceFormat[mode] = RTAUDIO_SINT8;
+    goto setFormat;
+  }
+
+  // If we get here, no supported format was found.
+  snd_pcm_close( phandle );
+  errorStream_ << "RtApiAlsa::probeDeviceOpen: pcm device " << device << " data format not supported by RtAudio.";
+  errorText_ = errorStream_.str();
+  return FAILURE;
+
+ setFormat:
+  result = snd_pcm_hw_params_set_format( phandle, hw_params, deviceFormat );
+  if ( result < 0 ) {
+    snd_pcm_close( phandle );
+    errorStream_ << "RtApiAlsa::probeDeviceOpen: error setting pcm device (" << name << ") data format, " << snd_strerror( result ) << ".";
+    errorText_ = errorStream_.str();
+    return FAILURE;
+  }
+
+  // Determine whether byte-swaping is necessary.
+  stream_.doByteSwap[mode] = false;
+  if ( deviceFormat != SND_PCM_FORMAT_S8 ) {
+    result = snd_pcm_format_cpu_endian( deviceFormat );
+    if ( result == 0 )
+      stream_.doByteSwap[mode] = true;
+    else if (result < 0) {
+      snd_pcm_close( phandle );
+      errorStream_ << "RtApiAlsa::probeDeviceOpen: error getting pcm device (" << name << ") endian-ness, " << snd_strerror( result ) << ".";
+      errorText_ = errorStream_.str();
+      return FAILURE;
+    }
+  }
+
+  // Set the sample rate.
+  result = snd_pcm_hw_params_set_rate_near( phandle, hw_params, (unsigned int*) &sampleRate, 0 );
+  if ( result < 0 ) {
+    snd_pcm_close( phandle );
+    errorStream_ << "RtApiAlsa::probeDeviceOpen: error setting sample rate on device (" << name << "), " << snd_strerror( result ) << ".";
+    errorText_ = errorStream_.str();
+    return FAILURE;
+  }
+
+  // Determine the number of channels for this device.  We support a possible
+  // minimum device channel number > than the value requested by the user.
+  stream_.nUserChannels[mode] = channels;
+  unsigned int value;
+  result = snd_pcm_hw_params_get_channels_max( hw_params, &value );
+  unsigned int deviceChannels = value;
+  if ( result < 0 || deviceChannels < channels + firstChannel ) {
+    snd_pcm_close( phandle );
+    errorStream_ << "RtApiAlsa::probeDeviceOpen: requested channel parameters not supported by device (" << name << "), " << snd_strerror( result ) << ".";
+    errorText_ = errorStream_.str();
+    return FAILURE;
+  }
+
+  result = snd_pcm_hw_params_get_channels_min( hw_params, &value );
+  if ( result < 0 ) {
+    snd_pcm_close( phandle );
+    errorStream_ << "RtApiAlsa::probeDeviceOpen: error getting minimum channels for device (" << name << "), " << snd_strerror( result ) << ".";
+    errorText_ = errorStream_.str();
+    return FAILURE;
+  }
+  deviceChannels = value;
+  if ( deviceChannels < channels + firstChannel ) deviceChannels = channels + firstChannel;
+  stream_.nDeviceChannels[mode] = deviceChannels;
+
+  // Set the device channels.
+  result = snd_pcm_hw_params_set_channels( phandle, hw_params, deviceChannels );
+  if ( result < 0 ) {
+    snd_pcm_close( phandle );
+    errorStream_ << "RtApiAlsa::probeDeviceOpen: error setting channels for device (" << name << "), " << snd_strerror( result ) << ".";
+    errorText_ = errorStream_.str();
+    return FAILURE;
+  }
+
+  // Set the buffer (or period) size.
+  int dir = 0;
+  snd_pcm_uframes_t periodSize = *bufferSize;
+  result = snd_pcm_hw_params_set_period_size_near( phandle, hw_params, &periodSize, &dir );
+  if ( result < 0 ) {
+    snd_pcm_close( phandle );
+    errorStream_ << "RtApiAlsa::probeDeviceOpen: error setting period size for device (" << name << "), " << snd_strerror( result ) << ".";
+    errorText_ = errorStream_.str();
+    return FAILURE;
+  }
+  *bufferSize = periodSize;
+
+  // Set the buffer number, which in ALSA is referred to as the "period".
+  unsigned int periods = 0;
+  if ( options && options->flags & RTAUDIO_MINIMIZE_LATENCY ) periods = 2;
+  if ( options && options->numberOfBuffers > 0 ) periods = options->numberOfBuffers;
+  if ( periods < 2 ) periods = 4; // a fairly safe default value
+  result = snd_pcm_hw_params_set_periods_near( phandle, hw_params, &periods, &dir );
+  if ( result < 0 ) {
+    snd_pcm_close( phandle );
+    errorStream_ << "RtApiAlsa::probeDeviceOpen: error setting periods for device (" << name << "), " << snd_strerror( result ) << ".";
+    errorText_ = errorStream_.str();
+    return FAILURE;
+  }
+
+  // If attempting to setup a duplex stream, the bufferSize parameter
+  // MUST be the same in both directions!
+  if ( stream_.mode == OUTPUT && mode == INPUT && *bufferSize != stream_.bufferSize ) {
+    snd_pcm_close( phandle );
+    errorStream_ << "RtApiAlsa::probeDeviceOpen: system error setting buffer size for duplex stream on device (" << name << ").";
+    errorText_ = errorStream_.str();
+    return FAILURE;
+  }
+
+  stream_.bufferSize = *bufferSize;
+
+  // Install the hardware configuration
+  result = snd_pcm_hw_params( phandle, hw_params );
+  if ( result < 0 ) {
+    snd_pcm_close( phandle );
+    errorStream_ << "RtApiAlsa::probeDeviceOpen: error installing hardware configuration on device (" << name << "), " << snd_strerror( result ) << ".";
+    errorText_ = errorStream_.str();
+    return FAILURE;
+  }
+
+#if defined(__RTAUDIO_DEBUG__)
+  fprintf(stderr, "\nRtApiAlsa: dump hardware params after installation:\n\n");
+  snd_pcm_hw_params_dump( hw_params, out );
+#endif
+
+  // Set the software configuration to fill buffers with zeros and prevent device stopping on xruns.
+  snd_pcm_sw_params_t *sw_params = NULL;
+  snd_pcm_sw_params_alloca( &sw_params );
+  snd_pcm_sw_params_current( phandle, sw_params );
+  snd_pcm_sw_params_set_start_threshold( phandle, sw_params, *bufferSize );
+  snd_pcm_sw_params_set_stop_threshold( phandle, sw_params, ULONG_MAX );
+  snd_pcm_sw_params_set_silence_threshold( phandle, sw_params, 0 );
+
+  // The following two settings were suggested by Theo Veenker
+  //snd_pcm_sw_params_set_avail_min( phandle, sw_params, *bufferSize );
+  //snd_pcm_sw_params_set_xfer_align( phandle, sw_params, 1 );
+
+  // here are two options for a fix
+  //snd_pcm_sw_params_set_silence_size( phandle, sw_params, ULONG_MAX );
+  snd_pcm_uframes_t val;
+  snd_pcm_sw_params_get_boundary( sw_params, &val );
+  snd_pcm_sw_params_set_silence_size( phandle, sw_params, val );
+
+  result = snd_pcm_sw_params( phandle, sw_params );
+  if ( result < 0 ) {
+    snd_pcm_close( phandle );
+    errorStream_ << "RtApiAlsa::probeDeviceOpen: error installing software configuration on device (" << name << "), " << snd_strerror( result ) << ".";
+    errorText_ = errorStream_.str();
+    return FAILURE;
+  }
+
+#if defined(__RTAUDIO_DEBUG__)
+  fprintf(stderr, "\nRtApiAlsa: dump software params after installation:\n\n");
+  snd_pcm_sw_params_dump( sw_params, out );
+#endif
+
+  // Set flags for buffer conversion
+  stream_.doConvertBuffer[mode] = false;
+  if ( stream_.userFormat != stream_.deviceFormat[mode] )
+    stream_.doConvertBuffer[mode] = true;
+  if ( stream_.nUserChannels[mode] < stream_.nDeviceChannels[mode] )
+    stream_.doConvertBuffer[mode] = true;
+  if ( stream_.userInterleaved != stream_.deviceInterleaved[mode] &&
+       stream_.nUserChannels[mode] > 1 )
+    stream_.doConvertBuffer[mode] = true;
+
+  // Allocate the ApiHandle if necessary and then save.
+  AlsaHandle *apiInfo = 0;
+  if ( stream_.apiHandle == 0 ) {
+    try {
+      apiInfo = (AlsaHandle *) new AlsaHandle;
+    }
+    catch ( std::bad_alloc& ) {
+      errorText_ = "RtApiAlsa::probeDeviceOpen: error allocating AlsaHandle memory.";
+      goto error;
+    }
+
+    if ( pthread_cond_init( &apiInfo->runnable_cv, NULL ) ) {
+      errorText_ = "RtApiAlsa::probeDeviceOpen: error initializing pthread condition variable.";
+      goto error;
+    }
+
+    stream_.apiHandle = (void *) apiInfo;
+    apiInfo->handles[0] = 0;
+    apiInfo->handles[1] = 0;
+  }
+  else {
+    apiInfo = (AlsaHandle *) stream_.apiHandle;
+  }
+  apiInfo->handles[mode] = phandle;
+  phandle = 0;
+
+  // Allocate necessary internal buffers.
+  unsigned long bufferBytes;
+  bufferBytes = stream_.nUserChannels[mode] * *bufferSize * formatBytes( stream_.userFormat );
+  stream_.userBuffer[mode] = (char *) calloc( bufferBytes, 1 );
+  if ( stream_.userBuffer[mode] == NULL ) {
+    errorText_ = "RtApiAlsa::probeDeviceOpen: error allocating user buffer memory.";
+    goto error;
+  }
+
+  if ( stream_.doConvertBuffer[mode] ) {
+
+    bool makeBuffer = true;
+    bufferBytes = stream_.nDeviceChannels[mode] * formatBytes( stream_.deviceFormat[mode] );
+    if ( mode == INPUT ) {
+      if ( stream_.mode == OUTPUT && stream_.deviceBuffer ) {
+        unsigned long bytesOut = stream_.nDeviceChannels[0] * formatBytes( stream_.deviceFormat[0] );
+        if ( bufferBytes <= bytesOut ) makeBuffer = false;
+      }
+    }
+
+    if ( makeBuffer ) {
+      bufferBytes *= *bufferSize;
+      if ( stream_.deviceBuffer ) free( stream_.deviceBuffer );
+      stream_.deviceBuffer = (char *) calloc( bufferBytes, 1 );
+      if ( stream_.deviceBuffer == NULL ) {
+        errorText_ = "RtApiAlsa::probeDeviceOpen: error allocating device buffer memory.";
+        goto error;
+      }
+    }
+  }
+
+  stream_.sampleRate = sampleRate;
+  stream_.nBuffers = periods;
+  stream_.device[mode] = device;
+  stream_.state = STREAM_STOPPED;
+
+  // Setup the buffer conversion information structure.
+  if ( stream_.doConvertBuffer[mode] ) setConvertInfo( mode, firstChannel );
+
+  // Setup thread if necessary.
+  if ( stream_.mode == OUTPUT && mode == INPUT ) {
+    // We had already set up an output stream.
+    stream_.mode = DUPLEX;
+    // Link the streams if possible.
+    apiInfo->synchronized = false;
+    if ( snd_pcm_link( apiInfo->handles[0], apiInfo->handles[1] ) == 0 )
+      apiInfo->synchronized = true;
+    else {
+      errorText_ = "RtApiAlsa::probeDeviceOpen: unable to synchronize input and output devices.";
+      error( RtAudioError::WARNING );
+    }
+  }
+  else {
+    stream_.mode = mode;
+
+    // Setup callback thread.
+    stream_.callbackInfo.object = (void *) this;
+
+    // Set the thread attributes for joinable and realtime scheduling
+    // priority (optional).  The higher priority will only take affect
+    // if the program is run as root or suid. Note, under Linux
+    // processes with CAP_SYS_NICE privilege, a user can change
+    // scheduling policy and priority (thus need not be root). See
+    // POSIX "capabilities".
+    pthread_attr_t attr;
+    pthread_attr_init( &attr );
+    pthread_attr_setdetachstate( &attr, PTHREAD_CREATE_JOINABLE );
+
+#ifdef SCHED_RR // Undefined with some OSes (eg: NetBSD 1.6.x with GNU Pthread)
+    if ( options && options->flags & RTAUDIO_SCHEDULE_REALTIME ) {
+      // We previously attempted to increase the audio callback priority
+      // to SCHED_RR here via the attributes.  However, while no errors
+      // were reported in doing so, it did not work.  So, now this is
+      // done in the alsaCallbackHandler function.
+      stream_.callbackInfo.doRealtime = true;
+      int priority = options->priority;
+      int min = sched_get_priority_min( SCHED_RR );
+      int max = sched_get_priority_max( SCHED_RR );
+      if ( priority < min ) priority = min;
+      else if ( priority > max ) priority = max;
+      stream_.callbackInfo.priority = priority;
+    }
+#endif
+
+    stream_.callbackInfo.isRunning = true;
+    result = pthread_create( &stream_.callbackInfo.thread, &attr, alsaCallbackHandler, &stream_.callbackInfo );
+    pthread_attr_destroy( &attr );
+    if ( result ) {
+      stream_.callbackInfo.isRunning = false;
+      errorText_ = "RtApiAlsa::error creating callback thread!";
+      goto error;
+    }
+  }
+
+  return SUCCESS;
+
+ error:
+  if ( apiInfo ) {
+    pthread_cond_destroy( &apiInfo->runnable_cv );
+    if ( apiInfo->handles[0] ) snd_pcm_close( apiInfo->handles[0] );
+    if ( apiInfo->handles[1] ) snd_pcm_close( apiInfo->handles[1] );
+    delete apiInfo;
+    stream_.apiHandle = 0;
+  }
+
+  if ( phandle) snd_pcm_close( phandle );
+
+  for ( int i=0; i<2; i++ ) {
+    if ( stream_.userBuffer[i] ) {
+      free( stream_.userBuffer[i] );
+      stream_.userBuffer[i] = 0;
+    }
+  }
+
+  if ( stream_.deviceBuffer ) {
+    free( stream_.deviceBuffer );
+    stream_.deviceBuffer = 0;
+  }
+
+  stream_.state = STREAM_CLOSED;
+  return FAILURE;
+}
+
+void RtApiAlsa :: closeStream()
+{
+  if ( stream_.state == STREAM_CLOSED ) {
+    errorText_ = "RtApiAlsa::closeStream(): no open stream to close!";
+    error( RtAudioError::WARNING );
+    return;
+  }
+
+  AlsaHandle *apiInfo = (AlsaHandle *) stream_.apiHandle;
+  stream_.callbackInfo.isRunning = false;
+  MUTEX_LOCK( &stream_.mutex );
+  if ( stream_.state == STREAM_STOPPED ) {
+    apiInfo->runnable = true;
+    pthread_cond_signal( &apiInfo->runnable_cv );
+  }
+  MUTEX_UNLOCK( &stream_.mutex );
+  pthread_join( stream_.callbackInfo.thread, NULL );
+
+  if ( stream_.state == STREAM_RUNNING ) {
+    stream_.state = STREAM_STOPPED;
+    if ( stream_.mode == OUTPUT || stream_.mode == DUPLEX )
+      snd_pcm_drop( apiInfo->handles[0] );
+    if ( stream_.mode == INPUT || stream_.mode == DUPLEX )
+      snd_pcm_drop( apiInfo->handles[1] );
+  }
+
+  if ( apiInfo ) {
+    pthread_cond_destroy( &apiInfo->runnable_cv );
+    if ( apiInfo->handles[0] ) snd_pcm_close( apiInfo->handles[0] );
+    if ( apiInfo->handles[1] ) snd_pcm_close( apiInfo->handles[1] );
+    delete apiInfo;
+    stream_.apiHandle = 0;
+  }
+
+  for ( int i=0; i<2; i++ ) {
+    if ( stream_.userBuffer[i] ) {
+      free( stream_.userBuffer[i] );
+      stream_.userBuffer[i] = 0;
+    }
+  }
+
+  if ( stream_.deviceBuffer ) {
+    free( stream_.deviceBuffer );
+    stream_.deviceBuffer = 0;
+  }
+
+  stream_.mode = UNINITIALIZED;
+  stream_.state = STREAM_CLOSED;
+}
+
+void RtApiAlsa :: startStream()
+{
+  // This method calls snd_pcm_prepare if the device isn't already in that state.
+
+  verifyStream();
+  if ( stream_.state == STREAM_RUNNING ) {
+    errorText_ = "RtApiAlsa::startStream(): the stream is already running!";
+    error( RtAudioError::WARNING );
+    return;
+  }
+
+  MUTEX_LOCK( &stream_.mutex );
+
+  int result = 0;
+  snd_pcm_state_t state;
+  AlsaHandle *apiInfo = (AlsaHandle *) stream_.apiHandle;
+  snd_pcm_t **handle = (snd_pcm_t **) apiInfo->handles;
+  if ( stream_.mode == OUTPUT || stream_.mode == DUPLEX ) {
+    state = snd_pcm_state( handle[0] );
+    if ( state != SND_PCM_STATE_PREPARED ) {
+      result = snd_pcm_prepare( handle[0] );
+      if ( result < 0 ) {
+        errorStream_ << "RtApiAlsa::startStream: error preparing output pcm device, " << snd_strerror( result ) << ".";
+        errorText_ = errorStream_.str();
+        goto unlock;
+      }
+    }
+  }
+
+  if ( ( stream_.mode == INPUT || stream_.mode == DUPLEX ) && !apiInfo->synchronized ) {
+    result = snd_pcm_drop(handle[1]); // fix to remove stale data received since device has been open
+    state = snd_pcm_state( handle[1] );
+    if ( state != SND_PCM_STATE_PREPARED ) {
+      result = snd_pcm_prepare( handle[1] );
+      if ( result < 0 ) {
+        errorStream_ << "RtApiAlsa::startStream: error preparing input pcm device, " << snd_strerror( result ) << ".";
+        errorText_ = errorStream_.str();
+        goto unlock;
+      }
+    }
+  }
+
+  stream_.state = STREAM_RUNNING;
+
+ unlock:
+  apiInfo->runnable = true;
+  pthread_cond_signal( &apiInfo->runnable_cv );
+  MUTEX_UNLOCK( &stream_.mutex );
+
+  if ( result >= 0 ) return;
+  error( RtAudioError::SYSTEM_ERROR );
+}
+
+void RtApiAlsa :: stopStream()
+{
+  verifyStream();
+  if ( stream_.state == STREAM_STOPPED ) {
+    errorText_ = "RtApiAlsa::stopStream(): the stream is already stopped!";
+    error( RtAudioError::WARNING );
+    return;
+  }
+
+  stream_.state = STREAM_STOPPED;
+  MUTEX_LOCK( &stream_.mutex );
+
+  int result = 0;
+  AlsaHandle *apiInfo = (AlsaHandle *) stream_.apiHandle;
+  snd_pcm_t **handle = (snd_pcm_t **) apiInfo->handles;
+  if ( stream_.mode == OUTPUT || stream_.mode == DUPLEX ) {
+    if ( apiInfo->synchronized ) 
+      result = snd_pcm_drop( handle[0] );
+    else
+      result = snd_pcm_drain( handle[0] );
+    if ( result < 0 ) {
+      errorStream_ << "RtApiAlsa::stopStream: error draining output pcm device, " << snd_strerror( result ) << ".";
+      errorText_ = errorStream_.str();
+      goto unlock;
+    }
+  }
+
+  if ( ( stream_.mode == INPUT || stream_.mode == DUPLEX ) && !apiInfo->synchronized ) {
+    result = snd_pcm_drop( handle[1] );
+    if ( result < 0 ) {
+      errorStream_ << "RtApiAlsa::stopStream: error stopping input pcm device, " << snd_strerror( result ) << ".";
+      errorText_ = errorStream_.str();
+      goto unlock;
+    }
+  }
+
+ unlock:
+  apiInfo->runnable = false; // fixes high CPU usage when stopped
+  MUTEX_UNLOCK( &stream_.mutex );
+
+  if ( result >= 0 ) return;
+  error( RtAudioError::SYSTEM_ERROR );
+}
+
+void RtApiAlsa :: abortStream()
+{
+  verifyStream();
+  if ( stream_.state == STREAM_STOPPED ) {
+    errorText_ = "RtApiAlsa::abortStream(): the stream is already stopped!";
+    error( RtAudioError::WARNING );
+    return;
+  }
+
+  stream_.state = STREAM_STOPPED;
+  MUTEX_LOCK( &stream_.mutex );
+
+  int result = 0;
+  AlsaHandle *apiInfo = (AlsaHandle *) stream_.apiHandle;
+  snd_pcm_t **handle = (snd_pcm_t **) apiInfo->handles;
+  if ( stream_.mode == OUTPUT || stream_.mode == DUPLEX ) {
+    result = snd_pcm_drop( handle[0] );
+    if ( result < 0 ) {
+      errorStream_ << "RtApiAlsa::abortStream: error aborting output pcm device, " << snd_strerror( result ) << ".";
+      errorText_ = errorStream_.str();
+      goto unlock;
+    }
+  }
+
+  if ( ( stream_.mode == INPUT || stream_.mode == DUPLEX ) && !apiInfo->synchronized ) {
+    result = snd_pcm_drop( handle[1] );
+    if ( result < 0 ) {
+      errorStream_ << "RtApiAlsa::abortStream: error aborting input pcm device, " << snd_strerror( result ) << ".";
+      errorText_ = errorStream_.str();
+      goto unlock;
+    }
+  }
+
+ unlock:
+  apiInfo->runnable = false; // fixes high CPU usage when stopped
+  MUTEX_UNLOCK( &stream_.mutex );
+
+  if ( result >= 0 ) return;
+  error( RtAudioError::SYSTEM_ERROR );
+}
+
+void RtApiAlsa :: callbackEvent()
+{
+  AlsaHandle *apiInfo = (AlsaHandle *) stream_.apiHandle;
+  if ( stream_.state == STREAM_STOPPED ) {
+    MUTEX_LOCK( &stream_.mutex );
+    while ( !apiInfo->runnable )
+      pthread_cond_wait( &apiInfo->runnable_cv, &stream_.mutex );
+
+    if ( stream_.state != STREAM_RUNNING ) {
+      MUTEX_UNLOCK( &stream_.mutex );
+      return;
+    }
+    MUTEX_UNLOCK( &stream_.mutex );
+  }
+
+  if ( stream_.state == STREAM_CLOSED ) {
+    errorText_ = "RtApiAlsa::callbackEvent(): the stream is closed ... this shouldn't happen!";
+    error( RtAudioError::WARNING );
+    return;
+  }
+
+  int doStopStream = 0;
+  RtAudioCallback callback = (RtAudioCallback) stream_.callbackInfo.callback;
+  double streamTime = getStreamTime();
+  RtAudioStreamStatus status = 0;
+  if ( stream_.mode != INPUT && apiInfo->xrun[0] == true ) {
+    status |= RTAUDIO_OUTPUT_UNDERFLOW;
+    apiInfo->xrun[0] = false;
+  }
+  if ( stream_.mode != OUTPUT && apiInfo->xrun[1] == true ) {
+    status |= RTAUDIO_INPUT_OVERFLOW;
+    apiInfo->xrun[1] = false;
+  }
+  doStopStream = callback( stream_.userBuffer[0], stream_.userBuffer[1],
+                           stream_.bufferSize, streamTime, status, stream_.callbackInfo.userData );
+
+  if ( doStopStream == 2 ) {
+    abortStream();
+    return;
+  }
+
+  MUTEX_LOCK( &stream_.mutex );
+
+  // The state might change while waiting on a mutex.
+  if ( stream_.state == STREAM_STOPPED ) goto unlock;
+
+  int result;
+  char *buffer;
+  int channels;
+  snd_pcm_t **handle;
+  snd_pcm_sframes_t frames;
+  RtAudioFormat format;
+  handle = (snd_pcm_t **) apiInfo->handles;
+
+  if ( stream_.mode == INPUT || stream_.mode == DUPLEX ) {
+
+    // Setup parameters.
+    if ( stream_.doConvertBuffer[1] ) {
+      buffer = stream_.deviceBuffer;
+      channels = stream_.nDeviceChannels[1];
+      format = stream_.deviceFormat[1];
+    }
+    else {
+      buffer = stream_.userBuffer[1];
+      channels = stream_.nUserChannels[1];
+      format = stream_.userFormat;
+    }
+
+    // Read samples from device in interleaved/non-interleaved format.
+    if ( stream_.deviceInterleaved[1] )
+      result = snd_pcm_readi( handle[1], buffer, stream_.bufferSize );
+    else {
+      void *bufs[channels];
+      size_t offset = stream_.bufferSize * formatBytes( format );
+      for ( int i=0; i<channels; i++ )
+        bufs[i] = (void *) (buffer + (i * offset));
+      result = snd_pcm_readn( handle[1], bufs, stream_.bufferSize );
+    }
+
+    if ( result < (int) stream_.bufferSize ) {
+      // Either an error or overrun occured.
+      if ( result == -EPIPE ) {
+        snd_pcm_state_t state = snd_pcm_state( handle[1] );
+        if ( state == SND_PCM_STATE_XRUN ) {
+          apiInfo->xrun[1] = true;
+          result = snd_pcm_prepare( handle[1] );
+          if ( result < 0 ) {
+            errorStream_ << "RtApiAlsa::callbackEvent: error preparing device after overrun, " << snd_strerror( result ) << ".";
+            errorText_ = errorStream_.str();
+          }
+        }
+        else {
+          errorStream_ << "RtApiAlsa::callbackEvent: error, current state is " << snd_pcm_state_name( state ) << ", " << snd_strerror( result ) << ".";
+          errorText_ = errorStream_.str();
+        }
+      }
+      else {
+        errorStream_ << "RtApiAlsa::callbackEvent: audio read error, " << snd_strerror( result ) << ".";
+        errorText_ = errorStream_.str();
+      }
+      error( RtAudioError::WARNING );
+      goto tryOutput;
+    }
+
+    // Do byte swapping if necessary.
+    if ( stream_.doByteSwap[1] )
+      byteSwapBuffer( buffer, stream_.bufferSize * channels, format );
+
+    // Do buffer conversion if necessary.
+    if ( stream_.doConvertBuffer[1] )
+      convertBuffer( stream_.userBuffer[1], stream_.deviceBuffer, stream_.convertInfo[1] );
+
+    // Check stream latency
+    result = snd_pcm_delay( handle[1], &frames );
+    if ( result == 0 && frames > 0 ) stream_.latency[1] = frames;
+  }
+
+ tryOutput:
+
+  if ( stream_.mode == OUTPUT || stream_.mode == DUPLEX ) {
+
+    // Setup parameters and do buffer conversion if necessary.
+    if ( stream_.doConvertBuffer[0] ) {
+      buffer = stream_.deviceBuffer;
+      convertBuffer( buffer, stream_.userBuffer[0], stream_.convertInfo[0] );
+      channels = stream_.nDeviceChannels[0];
+      format = stream_.deviceFormat[0];
+    }
+    else {
+      buffer = stream_.userBuffer[0];
+      channels = stream_.nUserChannels[0];
+      format = stream_.userFormat;
+    }
+
+    // Do byte swapping if necessary.
+    if ( stream_.doByteSwap[0] )
+      byteSwapBuffer(buffer, stream_.bufferSize * channels, format);
+
+    // Write samples to device in interleaved/non-interleaved format.
+    if ( stream_.deviceInterleaved[0] )
+      result = snd_pcm_writei( handle[0], buffer, stream_.bufferSize );
+    else {
+      void *bufs[channels];
+      size_t offset = stream_.bufferSize * formatBytes( format );
+      for ( int i=0; i<channels; i++ )
+        bufs[i] = (void *) (buffer + (i * offset));
+      result = snd_pcm_writen( handle[0], bufs, stream_.bufferSize );
+    }
+
+    if ( result < (int) stream_.bufferSize ) {
+      // Either an error or underrun occured.
+      if ( result == -EPIPE ) {
+        snd_pcm_state_t state = snd_pcm_state( handle[0] );
+        if ( state == SND_PCM_STATE_XRUN ) {
+          apiInfo->xrun[0] = true;
+          result = snd_pcm_prepare( handle[0] );
+          if ( result < 0 ) {
+            errorStream_ << "RtApiAlsa::callbackEvent: error preparing device after underrun, " << snd_strerror( result ) << ".";
+            errorText_ = errorStream_.str();
+          }
+          else
+            errorText_ =  "RtApiAlsa::callbackEvent: audio write error, underrun.";
+        }
+        else {
+          errorStream_ << "RtApiAlsa::callbackEvent: error, current state is " << snd_pcm_state_name( state ) << ", " << snd_strerror( result ) << ".";
+          errorText_ = errorStream_.str();
+        }
+      }
+      else {
+        errorStream_ << "RtApiAlsa::callbackEvent: audio write error, " << snd_strerror( result ) << ".";
+        errorText_ = errorStream_.str();
+      }
+      error( RtAudioError::WARNING );
+      goto unlock;
+    }
+
+    // Check stream latency
+    result = snd_pcm_delay( handle[0], &frames );
+    if ( result == 0 && frames > 0 ) stream_.latency[0] = frames;
+  }
+
+ unlock:
+  MUTEX_UNLOCK( &stream_.mutex );
+
+  RtApi::tickStreamTime();
+  if ( doStopStream == 1 ) this->stopStream();
+}
+
+static void *alsaCallbackHandler( void *ptr )
+{
+  CallbackInfo *info = (CallbackInfo *) ptr;
+  RtApiAlsa *object = (RtApiAlsa *) info->object;
+  bool *isRunning = &info->isRunning;
+
+#ifdef SCHED_RR // Undefined with some OSes (eg: NetBSD 1.6.x with GNU Pthread)
+  if ( &info->doRealtime ) {
+    pthread_t tID = pthread_self();	 // ID of this thread
+    sched_param prio = { info->priority }; // scheduling priority of thread
+    pthread_setschedparam( tID, SCHED_RR, &prio );
+  }
+#endif
+
+  while ( *isRunning == true ) {
+    pthread_testcancel();
+    object->callbackEvent();
+  }
+
+  pthread_exit( NULL );
+}
+
+//******************** End of __LINUX_ALSA__ *********************//
+#endif
+
+#if defined(__LINUX_PULSE__)
+
+// Code written by Peter Meerwald, pmeerw@pmeerw.net
+// and Tristan Matthews.
+
+#include <pulse/error.h>
+#include <pulse/simple.h>
+#include <cstdio>
+
+static const unsigned int SUPPORTED_SAMPLERATES[] = { 8000, 16000, 22050, 32000,
+                                                      44100, 48000, 96000, 0};
+
+struct rtaudio_pa_format_mapping_t {
+  RtAudioFormat rtaudio_format;
+  pa_sample_format_t pa_format;
+};
+
+static const rtaudio_pa_format_mapping_t supported_sampleformats[] = {
+  {RTAUDIO_SINT16, PA_SAMPLE_S16LE},
+  {RTAUDIO_SINT32, PA_SAMPLE_S32LE},
+  {RTAUDIO_FLOAT32, PA_SAMPLE_FLOAT32LE},
+  {0, PA_SAMPLE_INVALID}};
+
+struct PulseAudioHandle {
+  pa_simple *s_play;
+  pa_simple *s_rec;
+  pthread_t thread;
+  pthread_cond_t runnable_cv;
+  bool runnable;
+  PulseAudioHandle() : s_play(0), s_rec(0), runnable(false) { }
+};
+
+RtApiPulse::~RtApiPulse()
+{
+  if ( stream_.state != STREAM_CLOSED )
+    closeStream();
+}
+
+unsigned int RtApiPulse::getDeviceCount( void )
+{
+  return 1;
+}
+
+RtAudio::DeviceInfo RtApiPulse::getDeviceInfo( unsigned int /*device*/ )
+{
+  RtAudio::DeviceInfo info;
+  info.probed = true;
+  info.name = "PulseAudio";
+  info.outputChannels = 2;
+  info.inputChannels = 2;
+  info.duplexChannels = 2;
+  info.isDefaultOutput = true;
+  info.isDefaultInput = true;
+
+  for ( const unsigned int *sr = SUPPORTED_SAMPLERATES; *sr; ++sr )
+    info.sampleRates.push_back( *sr );
+
+  info.preferredSampleRate = 48000;
+  info.nativeFormats = RTAUDIO_SINT16 | RTAUDIO_SINT32 | RTAUDIO_FLOAT32;
+
+  return info;
+}
+
+static void *pulseaudio_callback( void * user )
+{
+  CallbackInfo *cbi = static_cast<CallbackInfo *>( user );
+  RtApiPulse *context = static_cast<RtApiPulse *>( cbi->object );
+  volatile bool *isRunning = &cbi->isRunning;
+
+  while ( *isRunning ) {
+    pthread_testcancel();
+    context->callbackEvent();
+  }
+
+  pthread_exit( NULL );
+}
+
+void RtApiPulse::closeStream( void )
+{
+  PulseAudioHandle *pah = static_cast<PulseAudioHandle *>( stream_.apiHandle );
+
+  stream_.callbackInfo.isRunning = false;
+  if ( pah ) {
+    MUTEX_LOCK( &stream_.mutex );
+    if ( stream_.state == STREAM_STOPPED ) {
+      pah->runnable = true;
+      pthread_cond_signal( &pah->runnable_cv );
+    }
+    MUTEX_UNLOCK( &stream_.mutex );
+
+    pthread_join( pah->thread, 0 );
+    if ( pah->s_play ) {
+      pa_simple_flush( pah->s_play, NULL );
+      pa_simple_free( pah->s_play );
+    }
+    if ( pah->s_rec )
+      pa_simple_free( pah->s_rec );
+
+    pthread_cond_destroy( &pah->runnable_cv );
+    delete pah;
+    stream_.apiHandle = 0;
+  }
+
+  if ( stream_.userBuffer[0] ) {
+    free( stream_.userBuffer[0] );
+    stream_.userBuffer[0] = 0;
+  }
+  if ( stream_.userBuffer[1] ) {
+    free( stream_.userBuffer[1] );
+    stream_.userBuffer[1] = 0;
+  }
+
+  stream_.state = STREAM_CLOSED;
+  stream_.mode = UNINITIALIZED;
+}
+
+void RtApiPulse::callbackEvent( void )
+{
+  PulseAudioHandle *pah = static_cast<PulseAudioHandle *>( stream_.apiHandle );
+
+  if ( stream_.state == STREAM_STOPPED ) {
+    MUTEX_LOCK( &stream_.mutex );
+    while ( !pah->runnable )
+      pthread_cond_wait( &pah->runnable_cv, &stream_.mutex );
+
+    if ( stream_.state != STREAM_RUNNING ) {
+      MUTEX_UNLOCK( &stream_.mutex );
+      return;
+    }
+    MUTEX_UNLOCK( &stream_.mutex );
+  }
+
+  if ( stream_.state == STREAM_CLOSED ) {
+    errorText_ = "RtApiPulse::callbackEvent(): the stream is closed ... "
+      "this shouldn't happen!";
+    error( RtAudioError::WARNING );
+    return;
+  }
+
+  RtAudioCallback callback = (RtAudioCallback) stream_.callbackInfo.callback;
+  double streamTime = getStreamTime();
+  RtAudioStreamStatus status = 0;
+  int doStopStream = callback( stream_.userBuffer[OUTPUT], stream_.userBuffer[INPUT],
+                               stream_.bufferSize, streamTime, status,
+                               stream_.callbackInfo.userData );
+
+  if ( doStopStream == 2 ) {
+    abortStream();
+    return;
+  }
+
+  MUTEX_LOCK( &stream_.mutex );
+  void *pulse_in = stream_.doConvertBuffer[INPUT] ? stream_.deviceBuffer : stream_.userBuffer[INPUT];
+  void *pulse_out = stream_.doConvertBuffer[OUTPUT] ? stream_.deviceBuffer : stream_.userBuffer[OUTPUT];
+
+  if ( stream_.state != STREAM_RUNNING )
+    goto unlock;
+
+  int pa_error;
+  size_t bytes;
+  if (stream_.mode == OUTPUT || stream_.mode == DUPLEX ) {
+    if ( stream_.doConvertBuffer[OUTPUT] ) {
+        convertBuffer( stream_.deviceBuffer,
+                       stream_.userBuffer[OUTPUT],
+                       stream_.convertInfo[OUTPUT] );
+        bytes = stream_.nDeviceChannels[OUTPUT] * stream_.bufferSize *
+                formatBytes( stream_.deviceFormat[OUTPUT] );
+    } else
+        bytes = stream_.nUserChannels[OUTPUT] * stream_.bufferSize *
+                formatBytes( stream_.userFormat );
+
+    if ( pa_simple_write( pah->s_play, pulse_out, bytes, &pa_error ) < 0 ) {
+      errorStream_ << "RtApiPulse::callbackEvent: audio write error, " <<
+        pa_strerror( pa_error ) << ".";
+      errorText_ = errorStream_.str();
+      error( RtAudioError::WARNING );
+    }
+  }
+
+  if ( stream_.mode == INPUT || stream_.mode == DUPLEX) {
+    if ( stream_.doConvertBuffer[INPUT] )
+      bytes = stream_.nDeviceChannels[INPUT] * stream_.bufferSize *
+        formatBytes( stream_.deviceFormat[INPUT] );
+    else
+      bytes = stream_.nUserChannels[INPUT] * stream_.bufferSize *
+        formatBytes( stream_.userFormat );
+            
+    if ( pa_simple_read( pah->s_rec, pulse_in, bytes, &pa_error ) < 0 ) {
+      errorStream_ << "RtApiPulse::callbackEvent: audio read error, " <<
+        pa_strerror( pa_error ) << ".";
+      errorText_ = errorStream_.str();
+      error( RtAudioError::WARNING );
+    }
+    if ( stream_.doConvertBuffer[INPUT] ) {
+      convertBuffer( stream_.userBuffer[INPUT],
+                     stream_.deviceBuffer,
+                     stream_.convertInfo[INPUT] );
+    }
+  }
+
+ unlock:
+  MUTEX_UNLOCK( &stream_.mutex );
+  RtApi::tickStreamTime();
+
+  if ( doStopStream == 1 )
+    stopStream();
+}
+
+void RtApiPulse::startStream( void )
+{
+  PulseAudioHandle *pah = static_cast<PulseAudioHandle *>( stream_.apiHandle );
+
+  if ( stream_.state == STREAM_CLOSED ) {
+    errorText_ = "RtApiPulse::startStream(): the stream is not open!";
+    error( RtAudioError::INVALID_USE );
+    return;
+  }
+  if ( stream_.state == STREAM_RUNNING ) {
+    errorText_ = "RtApiPulse::startStream(): the stream is already running!";
+    error( RtAudioError::WARNING );
+    return;
+  }
+
+  MUTEX_LOCK( &stream_.mutex );
+
+  stream_.state = STREAM_RUNNING;
+
+  pah->runnable = true;
+  pthread_cond_signal( &pah->runnable_cv );
+  MUTEX_UNLOCK( &stream_.mutex );
+}
+
+void RtApiPulse::stopStream( void )
+{
+  PulseAudioHandle *pah = static_cast<PulseAudioHandle *>( stream_.apiHandle );
+
+  if ( stream_.state == STREAM_CLOSED ) {
+    errorText_ = "RtApiPulse::stopStream(): the stream is not open!";
+    error( RtAudioError::INVALID_USE );
+    return;
+  }
+  if ( stream_.state == STREAM_STOPPED ) {
+    errorText_ = "RtApiPulse::stopStream(): the stream is already stopped!";
+    error( RtAudioError::WARNING );
+    return;
+  }
+
+  stream_.state = STREAM_STOPPED;
+  MUTEX_LOCK( &stream_.mutex );
+
+  if ( pah && pah->s_play ) {
+    int pa_error;
+    if ( pa_simple_drain( pah->s_play, &pa_error ) < 0 ) {
+      errorStream_ << "RtApiPulse::stopStream: error draining output device, " <<
+        pa_strerror( pa_error ) << ".";
+      errorText_ = errorStream_.str();
+      MUTEX_UNLOCK( &stream_.mutex );
+      error( RtAudioError::SYSTEM_ERROR );
+      return;
+    }
+  }
+
+  stream_.state = STREAM_STOPPED;
+  MUTEX_UNLOCK( &stream_.mutex );
+}
+
+void RtApiPulse::abortStream( void )
+{
+  PulseAudioHandle *pah = static_cast<PulseAudioHandle*>( stream_.apiHandle );
+
+  if ( stream_.state == STREAM_CLOSED ) {
+    errorText_ = "RtApiPulse::abortStream(): the stream is not open!";
+    error( RtAudioError::INVALID_USE );
+    return;
+  }
+  if ( stream_.state == STREAM_STOPPED ) {
+    errorText_ = "RtApiPulse::abortStream(): the stream is already stopped!";
+    error( RtAudioError::WARNING );
+    return;
+  }
+
+  stream_.state = STREAM_STOPPED;
+  MUTEX_LOCK( &stream_.mutex );
+
+  if ( pah && pah->s_play ) {
+    int pa_error;
+    if ( pa_simple_flush( pah->s_play, &pa_error ) < 0 ) {
+      errorStream_ << "RtApiPulse::abortStream: error flushing output device, " <<
+        pa_strerror( pa_error ) << ".";
+      errorText_ = errorStream_.str();
+      MUTEX_UNLOCK( &stream_.mutex );
+      error( RtAudioError::SYSTEM_ERROR );
+      return;
+    }
+  }
+
+  stream_.state = STREAM_STOPPED;
+  MUTEX_UNLOCK( &stream_.mutex );
+}
+
+bool RtApiPulse::probeDeviceOpen( unsigned int device, StreamMode mode,
+                                  unsigned int channels, unsigned int firstChannel,
+                                  unsigned int sampleRate, RtAudioFormat format,
+                                  unsigned int *bufferSize, RtAudio::StreamOptions *options )
+{
+  PulseAudioHandle *pah = 0;
+  unsigned long bufferBytes = 0;
+  pa_sample_spec ss;
+
+  if ( device != 0 ) return false;
+  if ( mode != INPUT && mode != OUTPUT ) return false;
+  if ( channels != 1 && channels != 2 ) {
+    errorText_ = "RtApiPulse::probeDeviceOpen: unsupported number of channels.";
+    return false;
+  }
+  ss.channels = channels;
+
+  if ( firstChannel != 0 ) return false;
+
+  bool sr_found = false;
+  for ( const unsigned int *sr = SUPPORTED_SAMPLERATES; *sr; ++sr ) {
+    if ( sampleRate == *sr ) {
+      sr_found = true;
+      stream_.sampleRate = sampleRate;
+      ss.rate = sampleRate;
+      break;
+    }
+  }
+  if ( !sr_found ) {
+    errorText_ = "RtApiPulse::probeDeviceOpen: unsupported sample rate.";
+    return false;
+  }
+
+  bool sf_found = 0;
+  for ( const rtaudio_pa_format_mapping_t *sf = supported_sampleformats;
+        sf->rtaudio_format && sf->pa_format != PA_SAMPLE_INVALID; ++sf ) {
+    if ( format == sf->rtaudio_format ) {
+      sf_found = true;
+      stream_.userFormat = sf->rtaudio_format;
+      stream_.deviceFormat[mode] = stream_.userFormat;
+      ss.format = sf->pa_format;
+      break;
+    }
+  }
+  if ( !sf_found ) { // Use internal data format conversion.
+    stream_.userFormat = format;
+    stream_.deviceFormat[mode] = RTAUDIO_FLOAT32;
+    ss.format = PA_SAMPLE_FLOAT32LE;
+  }
+
+  // Set other stream parameters.
+  if ( options && options->flags & RTAUDIO_NONINTERLEAVED ) stream_.userInterleaved = false;
+  else stream_.userInterleaved = true;
+  stream_.deviceInterleaved[mode] = true;
+  stream_.nBuffers = 1;
+  stream_.doByteSwap[mode] = false;
+  stream_.nUserChannels[mode] = channels;
+  stream_.nDeviceChannels[mode] = channels + firstChannel;
+  stream_.channelOffset[mode] = 0;
+  std::string streamName = "RtAudio";
+
+  // Set flags for buffer conversion.
+  stream_.doConvertBuffer[mode] = false;
+  if ( stream_.userFormat != stream_.deviceFormat[mode] )
+    stream_.doConvertBuffer[mode] = true;
+  if ( stream_.nUserChannels[mode] < stream_.nDeviceChannels[mode] )
+    stream_.doConvertBuffer[mode] = true;
+
+  // Allocate necessary internal buffers.
+  bufferBytes = stream_.nUserChannels[mode] * *bufferSize * formatBytes( stream_.userFormat );
+  stream_.userBuffer[mode] = (char *) calloc( bufferBytes, 1 );
+  if ( stream_.userBuffer[mode] == NULL ) {
+    errorText_ = "RtApiPulse::probeDeviceOpen: error allocating user buffer memory.";
+    goto error;
+  }
+  stream_.bufferSize = *bufferSize;
+
+  if ( stream_.doConvertBuffer[mode] ) {
+
+    bool makeBuffer = true;
+    bufferBytes = stream_.nDeviceChannels[mode] * formatBytes( stream_.deviceFormat[mode] );
+    if ( mode == INPUT ) {
+      if ( stream_.mode == OUTPUT && stream_.deviceBuffer ) {
+        unsigned long bytesOut = stream_.nDeviceChannels[0] * formatBytes( stream_.deviceFormat[0] );
+        if ( bufferBytes <= bytesOut ) makeBuffer = false;
+      }
+    }
+
+    if ( makeBuffer ) {
+      bufferBytes *= *bufferSize;
+      if ( stream_.deviceBuffer ) free( stream_.deviceBuffer );
+      stream_.deviceBuffer = (char *) calloc( bufferBytes, 1 );
+      if ( stream_.deviceBuffer == NULL ) {
+        errorText_ = "RtApiPulse::probeDeviceOpen: error allocating device buffer memory.";
+        goto error;
+      }
+    }
+  }
+
+  stream_.device[mode] = device;
+
+  // Setup the buffer conversion information structure.
+  if ( stream_.doConvertBuffer[mode] ) setConvertInfo( mode, firstChannel );
+
+  if ( !stream_.apiHandle ) {
+    PulseAudioHandle *pah = new PulseAudioHandle;
+    if ( !pah ) {
+      errorText_ = "RtApiPulse::probeDeviceOpen: error allocating memory for handle.";
+      goto error;
+    }
+
+    stream_.apiHandle = pah;
+    if ( pthread_cond_init( &pah->runnable_cv, NULL ) != 0 ) {
+      errorText_ = "RtApiPulse::probeDeviceOpen: error creating condition variable.";
+      goto error;
+    }
+  }
+  pah = static_cast<PulseAudioHandle *>( stream_.apiHandle );
+
+  int error;
+  if ( options && !options->streamName.empty() ) streamName = options->streamName;
+  switch ( mode ) {
+  case INPUT:
+    pa_buffer_attr buffer_attr;
+    buffer_attr.fragsize = bufferBytes;
+    buffer_attr.maxlength = -1;
+
+    pah->s_rec = pa_simple_new( NULL, streamName.c_str(), PA_STREAM_RECORD, NULL, "Record", &ss, NULL, &buffer_attr, &error );
+    if ( !pah->s_rec ) {
+      errorText_ = "RtApiPulse::probeDeviceOpen: error connecting input to PulseAudio server.";
+      goto error;
+    }
+    break;
+  case OUTPUT:
+    pah->s_play = pa_simple_new( NULL, "RtAudio", PA_STREAM_PLAYBACK, NULL, "Playback", &ss, NULL, NULL, &error );
+    if ( !pah->s_play ) {
+      errorText_ = "RtApiPulse::probeDeviceOpen: error connecting output to PulseAudio server.";
+      goto error;
+    }
+    break;
+  default:
+    goto error;
+  }
+
+  if ( stream_.mode == UNINITIALIZED )
+    stream_.mode = mode;
+  else if ( stream_.mode == mode )
+    goto error;
+  else
+    stream_.mode = DUPLEX;
+
+  if ( !stream_.callbackInfo.isRunning ) {
+    stream_.callbackInfo.object = this;
+    stream_.callbackInfo.isRunning = true;
+    if ( pthread_create( &pah->thread, NULL, pulseaudio_callback, (void *)&stream_.callbackInfo) != 0 ) {
+      errorText_ = "RtApiPulse::probeDeviceOpen: error creating thread.";
+      goto error;
+    }
+  }
+
+  stream_.state = STREAM_STOPPED;
+  return true;
+ 
+ error:
+  if ( pah && stream_.callbackInfo.isRunning ) {
+    pthread_cond_destroy( &pah->runnable_cv );
+    delete pah;
+    stream_.apiHandle = 0;
+  }
+
+  for ( int i=0; i<2; i++ ) {
+    if ( stream_.userBuffer[i] ) {
+      free( stream_.userBuffer[i] );
+      stream_.userBuffer[i] = 0;
+    }
+  }
+
+  if ( stream_.deviceBuffer ) {
+    free( stream_.deviceBuffer );
+    stream_.deviceBuffer = 0;
+  }
+
+  return FAILURE;
+}
+
+//******************** End of __LINUX_PULSE__ *********************//
+#endif
+
+#if defined(__LINUX_OSS__)
+
+#include <unistd.h>
+#include <sys/ioctl.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <sys/soundcard.h>
+#include <errno.h>
+#include <math.h>
+
+static void *ossCallbackHandler(void * ptr);
+
+// A structure to hold various information related to the OSS API
+// implementation.
+struct OssHandle {
+  int id[2];    // device ids
+  bool xrun[2];
+  bool triggered;
+  pthread_cond_t runnable;
+
+  OssHandle()
+    :triggered(false) { id[0] = 0; id[1] = 0; xrun[0] = false; xrun[1] = false; }
+};
+
+RtApiOss :: RtApiOss()
+{
+  // Nothing to do here.
+}
+
+RtApiOss :: ~RtApiOss()
+{
+  if ( stream_.state != STREAM_CLOSED ) closeStream();
+}
+
+unsigned int RtApiOss :: getDeviceCount( void )
+{
+  int mixerfd = open( "/dev/mixer", O_RDWR, 0 );
+  if ( mixerfd == -1 ) {
+    errorText_ = "RtApiOss::getDeviceCount: error opening '/dev/mixer'.";
+    error( RtAudioError::WARNING );
+    return 0;
+  }
+
+  oss_sysinfo sysinfo;
+  if ( ioctl( mixerfd, SNDCTL_SYSINFO, &sysinfo ) == -1 ) {
+    close( mixerfd );
+    errorText_ = "RtApiOss::getDeviceCount: error getting sysinfo, OSS version >= 4.0 is required.";
+    error( RtAudioError::WARNING );
+    return 0;
+  }
+
+  close( mixerfd );
+  return sysinfo.numaudios;
+}
+
+RtAudio::DeviceInfo RtApiOss :: getDeviceInfo( unsigned int device )
+{
+  RtAudio::DeviceInfo info;
+  info.probed = false;
+
+  int mixerfd = open( "/dev/mixer", O_RDWR, 0 );
+  if ( mixerfd == -1 ) {
+    errorText_ = "RtApiOss::getDeviceInfo: error opening '/dev/mixer'.";
+    error( RtAudioError::WARNING );
+    return info;
+  }
+
+  oss_sysinfo sysinfo;
+  int result = ioctl( mixerfd, SNDCTL_SYSINFO, &sysinfo );
+  if ( result == -1 ) {
+    close( mixerfd );
+    errorText_ = "RtApiOss::getDeviceInfo: error getting sysinfo, OSS version >= 4.0 is required.";
+    error( RtAudioError::WARNING );
+    return info;
+  }
+
+  unsigned nDevices = sysinfo.numaudios;
+  if ( nDevices == 0 ) {
+    close( mixerfd );
+    errorText_ = "RtApiOss::getDeviceInfo: no devices found!";
+    error( RtAudioError::INVALID_USE );
+    return info;
+  }
+
+  if ( device >= nDevices ) {
+    close( mixerfd );
+    errorText_ = "RtApiOss::getDeviceInfo: device ID is invalid!";
+    error( RtAudioError::INVALID_USE );
+    return info;
+  }
+
+  oss_audioinfo ainfo;
+  ainfo.dev = device;
+  result = ioctl( mixerfd, SNDCTL_AUDIOINFO, &ainfo );
+  close( mixerfd );
+  if ( result == -1 ) {
+    errorStream_ << "RtApiOss::getDeviceInfo: error getting device (" << ainfo.name << ") info.";
+    errorText_ = errorStream_.str();
+    error( RtAudioError::WARNING );
+    return info;
+  }
+
+  // Probe channels
+  if ( ainfo.caps & PCM_CAP_OUTPUT ) info.outputChannels = ainfo.max_channels;
+  if ( ainfo.caps & PCM_CAP_INPUT ) info.inputChannels = ainfo.max_channels;
+  if ( ainfo.caps & PCM_CAP_DUPLEX ) {
+    if ( info.outputChannels > 0 && info.inputChannels > 0 && ainfo.caps & PCM_CAP_DUPLEX )
+      info.duplexChannels = (info.outputChannels > info.inputChannels) ? info.inputChannels : info.outputChannels;
+  }
+
+  // Probe data formats ... do for input
+  unsigned long mask = ainfo.iformats;
+  if ( mask & AFMT_S16_LE || mask & AFMT_S16_BE )
+    info.nativeFormats |= RTAUDIO_SINT16;
+  if ( mask & AFMT_S8 )
+    info.nativeFormats |= RTAUDIO_SINT8;
+  if ( mask & AFMT_S32_LE || mask & AFMT_S32_BE )
+    info.nativeFormats |= RTAUDIO_SINT32;
+  if ( mask & AFMT_FLOAT )
+    info.nativeFormats |= RTAUDIO_FLOAT32;
+  if ( mask & AFMT_S24_LE || mask & AFMT_S24_BE )
+    info.nativeFormats |= RTAUDIO_SINT24;
+
+  // Check that we have at least one supported format
+  if ( info.nativeFormats == 0 ) {
+    errorStream_ << "RtApiOss::getDeviceInfo: device (" << ainfo.name << ") data format not supported by RtAudio.";
+    errorText_ = errorStream_.str();
+    error( RtAudioError::WARNING );
+    return info;
+  }
+
+  // Probe the supported sample rates.
+  info.sampleRates.clear();
+  if ( ainfo.nrates ) {
+    for ( unsigned int i=0; i<ainfo.nrates; i++ ) {
+      for ( unsigned int k=0; k<MAX_SAMPLE_RATES; k++ ) {
+        if ( ainfo.rates[i] == SAMPLE_RATES[k] ) {
+          info.sampleRates.push_back( SAMPLE_RATES[k] );
+
+          if ( !info.preferredSampleRate || ( SAMPLE_RATES[k] <= 48000 && SAMPLE_RATES[k] > info.preferredSampleRate ) )
+            info.preferredSampleRate = SAMPLE_RATES[k];
+
+          break;
+        }
+      }
+    }
+  }
+  else {
+    // Check min and max rate values;
+    for ( unsigned int k=0; k<MAX_SAMPLE_RATES; k++ ) {
+      if ( ainfo.min_rate <= (int) SAMPLE_RATES[k] && ainfo.max_rate >= (int) SAMPLE_RATES[k] ) {
+        info.sampleRates.push_back( SAMPLE_RATES[k] );
+
+        if ( !info.preferredSampleRate || ( SAMPLE_RATES[k] <= 48000 && SAMPLE_RATES[k] > info.preferredSampleRate ) )
+          info.preferredSampleRate = SAMPLE_RATES[k];
+      }
+    }
+  }
+
+  if ( info.sampleRates.size() == 0 ) {
+    errorStream_ << "RtApiOss::getDeviceInfo: no supported sample rates found for device (" << ainfo.name << ").";
+    errorText_ = errorStream_.str();
+    error( RtAudioError::WARNING );
+  }
+  else {
+    info.probed = true;
+    info.name = ainfo.name;
+  }
+
+  return info;
+}
+
+
+bool RtApiOss :: probeDeviceOpen( unsigned int device, StreamMode mode, unsigned int channels,
+                                  unsigned int firstChannel, unsigned int sampleRate,
+                                  RtAudioFormat format, unsigned int *bufferSize,
+                                  RtAudio::StreamOptions *options )
+{
+  int mixerfd = open( "/dev/mixer", O_RDWR, 0 );
+  if ( mixerfd == -1 ) {
+    errorText_ = "RtApiOss::probeDeviceOpen: error opening '/dev/mixer'.";
+    return FAILURE;
+  }
+
+  oss_sysinfo sysinfo;
+  int result = ioctl( mixerfd, SNDCTL_SYSINFO, &sysinfo );
+  if ( result == -1 ) {
+    close( mixerfd );
+    errorText_ = "RtApiOss::probeDeviceOpen: error getting sysinfo, OSS version >= 4.0 is required.";
+    return FAILURE;
+  }
+
+  unsigned nDevices = sysinfo.numaudios;
+  if ( nDevices == 0 ) {
+    // This should not happen because a check is made before this function is called.
+    close( mixerfd );
+    errorText_ = "RtApiOss::probeDeviceOpen: no devices found!";
+    return FAILURE;
+  }
+
+  if ( device >= nDevices ) {
+    // This should not happen because a check is made before this function is called.
+    close( mixerfd );
+    errorText_ = "RtApiOss::probeDeviceOpen: device ID is invalid!";
+    return FAILURE;
+  }
+
+  oss_audioinfo ainfo;
+  ainfo.dev = device;
+  result = ioctl( mixerfd, SNDCTL_AUDIOINFO, &ainfo );
+  close( mixerfd );
+  if ( result == -1 ) {
+    errorStream_ << "RtApiOss::getDeviceInfo: error getting device (" << ainfo.name << ") info.";
+    errorText_ = errorStream_.str();
+    return FAILURE;
+  }
+
+  // Check if device supports input or output
+  if ( ( mode == OUTPUT && !( ainfo.caps & PCM_CAP_OUTPUT ) ) ||
+       ( mode == INPUT && !( ainfo.caps & PCM_CAP_INPUT ) ) ) {
+    if ( mode == OUTPUT )
+      errorStream_ << "RtApiOss::probeDeviceOpen: device (" << ainfo.name << ") does not support output.";
+    else
+      errorStream_ << "RtApiOss::probeDeviceOpen: device (" << ainfo.name << ") does not support input.";
+    errorText_ = errorStream_.str();
+    return FAILURE;
+  }
+
+  int flags = 0;
+  OssHandle *handle = (OssHandle *) stream_.apiHandle;
+  if ( mode == OUTPUT )
+    flags |= O_WRONLY;
+  else { // mode == INPUT
+    if (stream_.mode == OUTPUT && stream_.device[0] == device) {
+      // We just set the same device for playback ... close and reopen for duplex (OSS only).
+      close( handle->id[0] );
+      handle->id[0] = 0;
+      if ( !( ainfo.caps & PCM_CAP_DUPLEX ) ) {
+        errorStream_ << "RtApiOss::probeDeviceOpen: device (" << ainfo.name << ") does not support duplex mode.";
+        errorText_ = errorStream_.str();
+        return FAILURE;
+      }
+      // Check that the number previously set channels is the same.
+      if ( stream_.nUserChannels[0] != channels ) {
+        errorStream_ << "RtApiOss::probeDeviceOpen: input/output channels must be equal for OSS duplex device (" << ainfo.name << ").";
+        errorText_ = errorStream_.str();
+        return FAILURE;
+      }
+      flags |= O_RDWR;
+    }
+    else
+      flags |= O_RDONLY;
+  }
+
+  // Set exclusive access if specified.
+  if ( options && options->flags & RTAUDIO_HOG_DEVICE ) flags |= O_EXCL;
+
+  // Try to open the device.
+  int fd;
+  fd = open( ainfo.devnode, flags, 0 );
+  if ( fd == -1 ) {
+    if ( errno == EBUSY )
+      errorStream_ << "RtApiOss::probeDeviceOpen: device (" << ainfo.name << ") is busy.";
+    else
+      errorStream_ << "RtApiOss::probeDeviceOpen: error opening device (" << ainfo.name << ").";
+    errorText_ = errorStream_.str();
+    return FAILURE;
+  }
+
+  // For duplex operation, specifically set this mode (this doesn't seem to work).
+  /*
+    if ( flags | O_RDWR ) {
+    result = ioctl( fd, SNDCTL_DSP_SETDUPLEX, NULL );
+    if ( result == -1) {
+    errorStream_ << "RtApiOss::probeDeviceOpen: error setting duplex mode for device (" << ainfo.name << ").";
+    errorText_ = errorStream_.str();
+    return FAILURE;
+    }
+    }
+  */
+
+  // Check the device channel support.
+  stream_.nUserChannels[mode] = channels;
+  if ( ainfo.max_channels < (int)(channels + firstChannel) ) {
+    close( fd );
+    errorStream_ << "RtApiOss::probeDeviceOpen: the device (" << ainfo.name << ") does not support requested channel parameters.";
+    errorText_ = errorStream_.str();
+    return FAILURE;
+  }
+
+  // Set the number of channels.
+  int deviceChannels = channels + firstChannel;
+  result = ioctl( fd, SNDCTL_DSP_CHANNELS, &deviceChannels );
+  if ( result == -1 || deviceChannels < (int)(channels + firstChannel) ) {
+    close( fd );
+    errorStream_ << "RtApiOss::probeDeviceOpen: error setting channel parameters on device (" << ainfo.name << ").";
+    errorText_ = errorStream_.str();
+    return FAILURE;
+  }
+  stream_.nDeviceChannels[mode] = deviceChannels;
+
+  // Get the data format mask
+  int mask;
+  result = ioctl( fd, SNDCTL_DSP_GETFMTS, &mask );
+  if ( result == -1 ) {
+    close( fd );
+    errorStream_ << "RtApiOss::probeDeviceOpen: error getting device (" << ainfo.name << ") data formats.";
+    errorText_ = errorStream_.str();
+    return FAILURE;
+  }
+
+  // Determine how to set the device format.
+  stream_.userFormat = format;
+  int deviceFormat = -1;
+  stream_.doByteSwap[mode] = false;
+  if ( format == RTAUDIO_SINT8 ) {
+    if ( mask & AFMT_S8 ) {
+      deviceFormat = AFMT_S8;
+      stream_.deviceFormat[mode] = RTAUDIO_SINT8;
+    }
+  }
+  else if ( format == RTAUDIO_SINT16 ) {
+    if ( mask & AFMT_S16_NE ) {
+      deviceFormat = AFMT_S16_NE;
+      stream_.deviceFormat[mode] = RTAUDIO_SINT16;
+    }
+    else if ( mask & AFMT_S16_OE ) {
+      deviceFormat = AFMT_S16_OE;
+      stream_.deviceFormat[mode] = RTAUDIO_SINT16;
+      stream_.doByteSwap[mode] = true;
+    }
+  }
+  else if ( format == RTAUDIO_SINT24 ) {
+    if ( mask & AFMT_S24_NE ) {
+      deviceFormat = AFMT_S24_NE;
+      stream_.deviceFormat[mode] = RTAUDIO_SINT24;
+    }
+    else if ( mask & AFMT_S24_OE ) {
+      deviceFormat = AFMT_S24_OE;
+      stream_.deviceFormat[mode] = RTAUDIO_SINT24;
+      stream_.doByteSwap[mode] = true;
+    }
+  }
+  else if ( format == RTAUDIO_SINT32 ) {
+    if ( mask & AFMT_S32_NE ) {
+      deviceFormat = AFMT_S32_NE;
+      stream_.deviceFormat[mode] = RTAUDIO_SINT32;
+    }
+    else if ( mask & AFMT_S32_OE ) {
+      deviceFormat = AFMT_S32_OE;
+      stream_.deviceFormat[mode] = RTAUDIO_SINT32;
+      stream_.doByteSwap[mode] = true;
+    }
+  }
+
+  if ( deviceFormat == -1 ) {
+    // The user requested format is not natively supported by the device.
+    if ( mask & AFMT_S16_NE ) {
+      deviceFormat = AFMT_S16_NE;
+      stream_.deviceFormat[mode] = RTAUDIO_SINT16;
+    }
+    else if ( mask & AFMT_S32_NE ) {
+      deviceFormat = AFMT_S32_NE;
+      stream_.deviceFormat[mode] = RTAUDIO_SINT32;
+    }
+    else if ( mask & AFMT_S24_NE ) {
+      deviceFormat = AFMT_S24_NE;
+      stream_.deviceFormat[mode] = RTAUDIO_SINT24;
+    }
+    else if ( mask & AFMT_S16_OE ) {
+      deviceFormat = AFMT_S16_OE;
+      stream_.deviceFormat[mode] = RTAUDIO_SINT16;
+      stream_.doByteSwap[mode] = true;
+    }
+    else if ( mask & AFMT_S32_OE ) {
+      deviceFormat = AFMT_S32_OE;
+      stream_.deviceFormat[mode] = RTAUDIO_SINT32;
+      stream_.doByteSwap[mode] = true;
+    }
+    else if ( mask & AFMT_S24_OE ) {
+      deviceFormat = AFMT_S24_OE;
+      stream_.deviceFormat[mode] = RTAUDIO_SINT24;
+      stream_.doByteSwap[mode] = true;
+    }
+    else if ( mask & AFMT_S8) {
+      deviceFormat = AFMT_S8;
+      stream_.deviceFormat[mode] = RTAUDIO_SINT8;
+    }
+  }
+
+  if ( stream_.deviceFormat[mode] == 0 ) {
+    // This really shouldn't happen ...
+    close( fd );
+    errorStream_ << "RtApiOss::probeDeviceOpen: device (" << ainfo.name << ") data format not supported by RtAudio.";
+    errorText_ = errorStream_.str();
+    return FAILURE;
+  }
+
+  // Set the data format.
+  int temp = deviceFormat;
+  result = ioctl( fd, SNDCTL_DSP_SETFMT, &deviceFormat );
+  if ( result == -1 || deviceFormat != temp ) {
+    close( fd );
+    errorStream_ << "RtApiOss::probeDeviceOpen: error setting data format on device (" << ainfo.name << ").";
+    errorText_ = errorStream_.str();
+    return FAILURE;
+  }
+
+  // Attempt to set the buffer size.  According to OSS, the minimum
+  // number of buffers is two.  The supposed minimum buffer size is 16
+  // bytes, so that will be our lower bound.  The argument to this
+  // call is in the form 0xMMMMSSSS (hex), where the buffer size (in
+  // bytes) is given as 2^SSSS and the number of buffers as 2^MMMM.
+  // We'll check the actual value used near the end of the setup
+  // procedure.
+  int ossBufferBytes = *bufferSize * formatBytes( stream_.deviceFormat[mode] ) * deviceChannels;
+  if ( ossBufferBytes < 16 ) ossBufferBytes = 16;
+  int buffers = 0;
+  if ( options ) buffers = options->numberOfBuffers;
+  if ( options && options->flags & RTAUDIO_MINIMIZE_LATENCY ) buffers = 2;
+  if ( buffers < 2 ) buffers = 3;
+  temp = ((int) buffers << 16) + (int)( log10( (double)ossBufferBytes ) / log10( 2.0 ) );
+  result = ioctl( fd, SNDCTL_DSP_SETFRAGMENT, &temp );
+  if ( result == -1 ) {
+    close( fd );
+    errorStream_ << "RtApiOss::probeDeviceOpen: error setting buffer size on device (" << ainfo.name << ").";
+    errorText_ = errorStream_.str();
+    return FAILURE;
+  }
+  stream_.nBuffers = buffers;
+
+  // Save buffer size (in sample frames).
+  *bufferSize = ossBufferBytes / ( formatBytes(stream_.deviceFormat[mode]) * deviceChannels );
+  stream_.bufferSize = *bufferSize;
+
+  // Set the sample rate.
+  int srate = sampleRate;
+  result = ioctl( fd, SNDCTL_DSP_SPEED, &srate );
+  if ( result == -1 ) {
+    close( fd );
+    errorStream_ << "RtApiOss::probeDeviceOpen: error setting sample rate (" << sampleRate << ") on device (" << ainfo.name << ").";
+    errorText_ = errorStream_.str();
+    return FAILURE;
+  }
+
+  // Verify the sample rate setup worked.
+  if ( abs( srate - sampleRate ) > 100 ) {
+    close( fd );
+    errorStream_ << "RtApiOss::probeDeviceOpen: device (" << ainfo.name << ") does not support sample rate (" << sampleRate << ").";
+    errorText_ = errorStream_.str();
+    return FAILURE;
+  }
+  stream_.sampleRate = sampleRate;
+
+  if ( mode == INPUT && stream_.mode == OUTPUT && stream_.device[0] == device) {
+    // We're doing duplex setup here.
+    stream_.deviceFormat[0] = stream_.deviceFormat[1];
+    stream_.nDeviceChannels[0] = deviceChannels;
+  }
+
+  // Set interleaving parameters.
+  stream_.userInterleaved = true;
+  stream_.deviceInterleaved[mode] =  true;
+  if ( options && options->flags & RTAUDIO_NONINTERLEAVED )
+    stream_.userInterleaved = false;
+
+  // Set flags for buffer conversion
+  stream_.doConvertBuffer[mode] = false;
+  if ( stream_.userFormat != stream_.deviceFormat[mode] )
+    stream_.doConvertBuffer[mode] = true;
+  if ( stream_.nUserChannels[mode] < stream_.nDeviceChannels[mode] )
+    stream_.doConvertBuffer[mode] = true;
+  if ( stream_.userInterleaved != stream_.deviceInterleaved[mode] &&
+       stream_.nUserChannels[mode] > 1 )
+    stream_.doConvertBuffer[mode] = true;
+
+  // Allocate the stream handles if necessary and then save.
+  if ( stream_.apiHandle == 0 ) {
+    try {
+      handle = new OssHandle;
+    }
+    catch ( std::bad_alloc& ) {
+      errorText_ = "RtApiOss::probeDeviceOpen: error allocating OssHandle memory.";
+      goto error;
+    }
+
+    if ( pthread_cond_init( &handle->runnable, NULL ) ) {
+      errorText_ = "RtApiOss::probeDeviceOpen: error initializing pthread condition variable.";
+      goto error;
+    }
+
+    stream_.apiHandle = (void *) handle;
+  }
+  else {
+    handle = (OssHandle *) stream_.apiHandle;
+  }
+  handle->id[mode] = fd;
+
+  // Allocate necessary internal buffers.
+  unsigned long bufferBytes;
+  bufferBytes = stream_.nUserChannels[mode] * *bufferSize * formatBytes( stream_.userFormat );
+  stream_.userBuffer[mode] = (char *) calloc( bufferBytes, 1 );
+  if ( stream_.userBuffer[mode] == NULL ) {
+    errorText_ = "RtApiOss::probeDeviceOpen: error allocating user buffer memory.";
+    goto error;
+  }
+
+  if ( stream_.doConvertBuffer[mode] ) {
+
+    bool makeBuffer = true;
+    bufferBytes = stream_.nDeviceChannels[mode] * formatBytes( stream_.deviceFormat[mode] );
+    if ( mode == INPUT ) {
+      if ( stream_.mode == OUTPUT && stream_.deviceBuffer ) {
+        unsigned long bytesOut = stream_.nDeviceChannels[0] * formatBytes( stream_.deviceFormat[0] );
+        if ( bufferBytes <= bytesOut ) makeBuffer = false;
+      }
+    }
+
+    if ( makeBuffer ) {
+      bufferBytes *= *bufferSize;
+      if ( stream_.deviceBuffer ) free( stream_.deviceBuffer );
+      stream_.deviceBuffer = (char *) calloc( bufferBytes, 1 );
+      if ( stream_.deviceBuffer == NULL ) {
+        errorText_ = "RtApiOss::probeDeviceOpen: error allocating device buffer memory.";
+        goto error;
+      }
+    }
+  }
+
+  stream_.device[mode] = device;
+  stream_.state = STREAM_STOPPED;
+
+  // Setup the buffer conversion information structure.
+  if ( stream_.doConvertBuffer[mode] ) setConvertInfo( mode, firstChannel );
+
+  // Setup thread if necessary.
+  if ( stream_.mode == OUTPUT && mode == INPUT ) {
+    // We had already set up an output stream.
+    stream_.mode = DUPLEX;
+    if ( stream_.device[0] == device ) handle->id[0] = fd;
+  }
+  else {
+    stream_.mode = mode;
+
+    // Setup callback thread.
+    stream_.callbackInfo.object = (void *) this;
+
+    // Set the thread attributes for joinable and realtime scheduling
+    // priority.  The higher priority will only take affect if the
+    // program is run as root or suid.
+    pthread_attr_t attr;
+    pthread_attr_init( &attr );
+    pthread_attr_setdetachstate( &attr, PTHREAD_CREATE_JOINABLE );
+#ifdef SCHED_RR // Undefined with some OSes (eg: NetBSD 1.6.x with GNU Pthread)
+    if ( options && options->flags & RTAUDIO_SCHEDULE_REALTIME ) {
+      struct sched_param param;
+      int priority = options->priority;
+      int min = sched_get_priority_min( SCHED_RR );
+      int max = sched_get_priority_max( SCHED_RR );
+      if ( priority < min ) priority = min;
+      else if ( priority > max ) priority = max;
+      param.sched_priority = priority;
+      pthread_attr_setschedparam( &attr, &param );
+      pthread_attr_setschedpolicy( &attr, SCHED_RR );
+    }
+    else
+      pthread_attr_setschedpolicy( &attr, SCHED_OTHER );
+#else
+    pthread_attr_setschedpolicy( &attr, SCHED_OTHER );
+#endif
+
+    stream_.callbackInfo.isRunning = true;
+    result = pthread_create( &stream_.callbackInfo.thread, &attr, ossCallbackHandler, &stream_.callbackInfo );
+    pthread_attr_destroy( &attr );
+    if ( result ) {
+      stream_.callbackInfo.isRunning = false;
+      errorText_ = "RtApiOss::error creating callback thread!";
+      goto error;
+    }
+  }
+
+  return SUCCESS;
+
+ error:
+  if ( handle ) {
+    pthread_cond_destroy( &handle->runnable );
+    if ( handle->id[0] ) close( handle->id[0] );
+    if ( handle->id[1] ) close( handle->id[1] );
+    delete handle;
+    stream_.apiHandle = 0;
+  }
+
+  for ( int i=0; i<2; i++ ) {
+    if ( stream_.userBuffer[i] ) {
+      free( stream_.userBuffer[i] );
+      stream_.userBuffer[i] = 0;
+    }
+  }
+
+  if ( stream_.deviceBuffer ) {
+    free( stream_.deviceBuffer );
+    stream_.deviceBuffer = 0;
+  }
+
+  return FAILURE;
+}
+
+void RtApiOss :: closeStream()
+{
+  if ( stream_.state == STREAM_CLOSED ) {
+    errorText_ = "RtApiOss::closeStream(): no open stream to close!";
+    error( RtAudioError::WARNING );
+    return;
+  }
+
+  OssHandle *handle = (OssHandle *) stream_.apiHandle;
+  stream_.callbackInfo.isRunning = false;
+  MUTEX_LOCK( &stream_.mutex );
+  if ( stream_.state == STREAM_STOPPED )
+    pthread_cond_signal( &handle->runnable );
+  MUTEX_UNLOCK( &stream_.mutex );
+  pthread_join( stream_.callbackInfo.thread, NULL );
+
+  if ( stream_.state == STREAM_RUNNING ) {
+    if ( stream_.mode == OUTPUT || stream_.mode == DUPLEX )
+      ioctl( handle->id[0], SNDCTL_DSP_HALT, 0 );
+    else
+      ioctl( handle->id[1], SNDCTL_DSP_HALT, 0 );
+    stream_.state = STREAM_STOPPED;
+  }
+
+  if ( handle ) {
+    pthread_cond_destroy( &handle->runnable );
+    if ( handle->id[0] ) close( handle->id[0] );
+    if ( handle->id[1] ) close( handle->id[1] );
+    delete handle;
+    stream_.apiHandle = 0;
+  }
+
+  for ( int i=0; i<2; i++ ) {
+    if ( stream_.userBuffer[i] ) {
+      free( stream_.userBuffer[i] );
+      stream_.userBuffer[i] = 0;
+    }
+  }
+
+  if ( stream_.deviceBuffer ) {
+    free( stream_.deviceBuffer );
+    stream_.deviceBuffer = 0;
+  }
+
+  stream_.mode = UNINITIALIZED;
+  stream_.state = STREAM_CLOSED;
+}
+
+void RtApiOss :: startStream()
+{
+  verifyStream();
+  if ( stream_.state == STREAM_RUNNING ) {
+    errorText_ = "RtApiOss::startStream(): the stream is already running!";
+    error( RtAudioError::WARNING );
+    return;
+  }
+
+  MUTEX_LOCK( &stream_.mutex );
+
+  stream_.state = STREAM_RUNNING;
+
+  // No need to do anything else here ... OSS automatically starts
+  // when fed samples.
+
+  MUTEX_UNLOCK( &stream_.mutex );
+
+  OssHandle *handle = (OssHandle *) stream_.apiHandle;
+  pthread_cond_signal( &handle->runnable );
+}
+
+void RtApiOss :: stopStream()
+{
+  verifyStream();
+  if ( stream_.state == STREAM_STOPPED ) {
+    errorText_ = "RtApiOss::stopStream(): the stream is already stopped!";
+    error( RtAudioError::WARNING );
+    return;
+  }
+
+  MUTEX_LOCK( &stream_.mutex );
+
+  // The state might change while waiting on a mutex.
+  if ( stream_.state == STREAM_STOPPED ) {
+    MUTEX_UNLOCK( &stream_.mutex );
+    return;
+  }
+
+  int result = 0;
+  OssHandle *handle = (OssHandle *) stream_.apiHandle;
+  if ( stream_.mode == OUTPUT || stream_.mode == DUPLEX ) {
+
+    // Flush the output with zeros a few times.
+    char *buffer;
+    int samples;
+    RtAudioFormat format;
+
+    if ( stream_.doConvertBuffer[0] ) {
+      buffer = stream_.deviceBuffer;
+      samples = stream_.bufferSize * stream_.nDeviceChannels[0];
+      format = stream_.deviceFormat[0];
+    }
+    else {
+      buffer = stream_.userBuffer[0];
+      samples = stream_.bufferSize * stream_.nUserChannels[0];
+      format = stream_.userFormat;
+    }
+
+    memset( buffer, 0, samples * formatBytes(format) );
+    for ( unsigned int i=0; i<stream_.nBuffers+1; i++ ) {
+      result = write( handle->id[0], buffer, samples * formatBytes(format) );
+      if ( result == -1 ) {
+        errorText_ = "RtApiOss::stopStream: audio write error.";
+        error( RtAudioError::WARNING );
+      }
+    }
+
+    result = ioctl( handle->id[0], SNDCTL_DSP_HALT, 0 );
+    if ( result == -1 ) {
+      errorStream_ << "RtApiOss::stopStream: system error stopping callback procedure on device (" << stream_.device[0] << ").";
+      errorText_ = errorStream_.str();
+      goto unlock;
+    }
+    handle->triggered = false;
+  }
+
+  if ( stream_.mode == INPUT || ( stream_.mode == DUPLEX && handle->id[0] != handle->id[1] ) ) {
+    result = ioctl( handle->id[1], SNDCTL_DSP_HALT, 0 );
+    if ( result == -1 ) {
+      errorStream_ << "RtApiOss::stopStream: system error stopping input callback procedure on device (" << stream_.device[0] << ").";
+      errorText_ = errorStream_.str();
+      goto unlock;
+    }
+  }
+
+ unlock:
+  stream_.state = STREAM_STOPPED;
+  MUTEX_UNLOCK( &stream_.mutex );
+
+  if ( result != -1 ) return;
+  error( RtAudioError::SYSTEM_ERROR );
+}
+
+void RtApiOss :: abortStream()
+{
+  verifyStream();
+  if ( stream_.state == STREAM_STOPPED ) {
+    errorText_ = "RtApiOss::abortStream(): the stream is already stopped!";
+    error( RtAudioError::WARNING );
+    return;
+  }
+
+  MUTEX_LOCK( &stream_.mutex );
+
+  // The state might change while waiting on a mutex.
+  if ( stream_.state == STREAM_STOPPED ) {
+    MUTEX_UNLOCK( &stream_.mutex );
+    return;
+  }
+
+  int result = 0;
+  OssHandle *handle = (OssHandle *) stream_.apiHandle;
+  if ( stream_.mode == OUTPUT || stream_.mode == DUPLEX ) {
+    result = ioctl( handle->id[0], SNDCTL_DSP_HALT, 0 );
+    if ( result == -1 ) {
+      errorStream_ << "RtApiOss::abortStream: system error stopping callback procedure on device (" << stream_.device[0] << ").";
+      errorText_ = errorStream_.str();
+      goto unlock;
+    }
+    handle->triggered = false;
+  }
+
+  if ( stream_.mode == INPUT || ( stream_.mode == DUPLEX && handle->id[0] != handle->id[1] ) ) {
+    result = ioctl( handle->id[1], SNDCTL_DSP_HALT, 0 );
+    if ( result == -1 ) {
+      errorStream_ << "RtApiOss::abortStream: system error stopping input callback procedure on device (" << stream_.device[0] << ").";
+      errorText_ = errorStream_.str();
+      goto unlock;
+    }
+  }
+
+ unlock:
+  stream_.state = STREAM_STOPPED;
+  MUTEX_UNLOCK( &stream_.mutex );
+
+  if ( result != -1 ) return;
+  error( RtAudioError::SYSTEM_ERROR );
+}
+
+void RtApiOss :: callbackEvent()
+{
+  OssHandle *handle = (OssHandle *) stream_.apiHandle;
+  if ( stream_.state == STREAM_STOPPED ) {
+    MUTEX_LOCK( &stream_.mutex );
+    pthread_cond_wait( &handle->runnable, &stream_.mutex );
+    if ( stream_.state != STREAM_RUNNING ) {
+      MUTEX_UNLOCK( &stream_.mutex );
+      return;
+    }
+    MUTEX_UNLOCK( &stream_.mutex );
+  }
+
+  if ( stream_.state == STREAM_CLOSED ) {
+    errorText_ = "RtApiOss::callbackEvent(): the stream is closed ... this shouldn't happen!";
+    error( RtAudioError::WARNING );
+    return;
+  }
+
+  // Invoke user callback to get fresh output data.
+  int doStopStream = 0;
+  RtAudioCallback callback = (RtAudioCallback) stream_.callbackInfo.callback;
+  double streamTime = getStreamTime();
+  RtAudioStreamStatus status = 0;
+  if ( stream_.mode != INPUT && handle->xrun[0] == true ) {
+    status |= RTAUDIO_OUTPUT_UNDERFLOW;
+    handle->xrun[0] = false;
+  }
+  if ( stream_.mode != OUTPUT && handle->xrun[1] == true ) {
+    status |= RTAUDIO_INPUT_OVERFLOW;
+    handle->xrun[1] = false;
+  }
+  doStopStream = callback( stream_.userBuffer[0], stream_.userBuffer[1],
+                           stream_.bufferSize, streamTime, status, stream_.callbackInfo.userData );
+  if ( doStopStream == 2 ) {
+    this->abortStream();
+    return;
+  }
+
+  MUTEX_LOCK( &stream_.mutex );
+
+  // The state might change while waiting on a mutex.
+  if ( stream_.state == STREAM_STOPPED ) goto unlock;
+
+  int result;
+  char *buffer;
+  int samples;
+  RtAudioFormat format;
+
+  if ( stream_.mode == OUTPUT || stream_.mode == DUPLEX ) {
+
+    // Setup parameters and do buffer conversion if necessary.
+    if ( stream_.doConvertBuffer[0] ) {
+      buffer = stream_.deviceBuffer;
+      convertBuffer( buffer, stream_.userBuffer[0], stream_.convertInfo[0] );
+      samples = stream_.bufferSize * stream_.nDeviceChannels[0];
+      format = stream_.deviceFormat[0];
+    }
+    else {
+      buffer = stream_.userBuffer[0];
+      samples = stream_.bufferSize * stream_.nUserChannels[0];
+      format = stream_.userFormat;
+    }
+
+    // Do byte swapping if necessary.
+    if ( stream_.doByteSwap[0] )
+      byteSwapBuffer( buffer, samples, format );
+
+    if ( stream_.mode == DUPLEX && handle->triggered == false ) {
+      int trig = 0;
+      ioctl( handle->id[0], SNDCTL_DSP_SETTRIGGER, &trig );
+      result = write( handle->id[0], buffer, samples * formatBytes(format) );
+      trig = PCM_ENABLE_INPUT|PCM_ENABLE_OUTPUT;
+      ioctl( handle->id[0], SNDCTL_DSP_SETTRIGGER, &trig );
+      handle->triggered = true;
+    }
+    else
+      // Write samples to device.
+      result = write( handle->id[0], buffer, samples * formatBytes(format) );
+
+    if ( result == -1 ) {
+      // We'll assume this is an underrun, though there isn't a
+      // specific means for determining that.
+      handle->xrun[0] = true;
+      errorText_ = "RtApiOss::callbackEvent: audio write error.";
+      error( RtAudioError::WARNING );
+      // Continue on to input section.
+    }
+  }
+
+  if ( stream_.mode == INPUT || stream_.mode == DUPLEX ) {
+
+    // Setup parameters.
+    if ( stream_.doConvertBuffer[1] ) {
+      buffer = stream_.deviceBuffer;
+      samples = stream_.bufferSize * stream_.nDeviceChannels[1];
+      format = stream_.deviceFormat[1];
+    }
+    else {
+      buffer = stream_.userBuffer[1];
+      samples = stream_.bufferSize * stream_.nUserChannels[1];
+      format = stream_.userFormat;
+    }
+
+    // Read samples from device.
+    result = read( handle->id[1], buffer, samples * formatBytes(format) );
+
+    if ( result == -1 ) {
+      // We'll assume this is an overrun, though there isn't a
+      // specific means for determining that.
+      handle->xrun[1] = true;
+      errorText_ = "RtApiOss::callbackEvent: audio read error.";
+      error( RtAudioError::WARNING );
+      goto unlock;
+    }
+
+    // Do byte swapping if necessary.
+    if ( stream_.doByteSwap[1] )
+      byteSwapBuffer( buffer, samples, format );
+
+    // Do buffer conversion if necessary.
+    if ( stream_.doConvertBuffer[1] )
+      convertBuffer( stream_.userBuffer[1], stream_.deviceBuffer, stream_.convertInfo[1] );
+  }
+
+ unlock:
+  MUTEX_UNLOCK( &stream_.mutex );
+
+  RtApi::tickStreamTime();
+  if ( doStopStream == 1 ) this->stopStream();
+}
+
+static void *ossCallbackHandler( void *ptr )
+{
+  CallbackInfo *info = (CallbackInfo *) ptr;
+  RtApiOss *object = (RtApiOss *) info->object;
+  bool *isRunning = &info->isRunning;
+
+  while ( *isRunning == true ) {
+    pthread_testcancel();
+    object->callbackEvent();
+  }
+
+  pthread_exit( NULL );
+}
+
+//******************** End of __LINUX_OSS__ *********************//
+#endif
+
+
+// *************************************************** //
+//
+// Protected common (OS-independent) RtAudio methods.
+//
+// *************************************************** //
+
+// This method can be modified to control the behavior of error
+// message printing.
+void RtApi :: error( RtAudioError::Type type )
+{
+  errorStream_.str(""); // clear the ostringstream
+
+  RtAudioErrorCallback errorCallback = (RtAudioErrorCallback) stream_.callbackInfo.errorCallback;
+  if ( errorCallback ) {
+    // abortStream() can generate new error messages. Ignore them. Just keep original one.
+
+    if ( firstErrorOccurred_ )
+      return;
+
+    firstErrorOccurred_ = true;
+    const std::string errorMessage = errorText_;
+
+    if ( type != RtAudioError::WARNING && stream_.state != STREAM_STOPPED) {
+      stream_.callbackInfo.isRunning = false; // exit from the thread
+      abortStream();
+    }
+
+    errorCallback( type, errorMessage );
+    firstErrorOccurred_ = false;
+    return;
+  }
+
+  if ( type == RtAudioError::WARNING && showWarnings_ == true )
+    std::cerr << '\n' << errorText_ << "\n\n";
+  else if ( type != RtAudioError::WARNING )
+    throw( RtAudioError( errorText_, type ) );
+}
+
+void RtApi :: verifyStream()
+{
+  if ( stream_.state == STREAM_CLOSED ) {
+    errorText_ = "RtApi:: a stream is not open!";
+    error( RtAudioError::INVALID_USE );
+  }
+}
+
+void RtApi :: clearStreamInfo()
+{
+  stream_.mode = UNINITIALIZED;
+  stream_.state = STREAM_CLOSED;
+  stream_.sampleRate = 0;
+  stream_.bufferSize = 0;
+  stream_.nBuffers = 0;
+  stream_.userFormat = 0;
+  stream_.userInterleaved = true;
+  stream_.streamTime = 0.0;
+  stream_.apiHandle = 0;
+  stream_.deviceBuffer = 0;
+  stream_.callbackInfo.callback = 0;
+  stream_.callbackInfo.userData = 0;
+  stream_.callbackInfo.isRunning = false;
+  stream_.callbackInfo.errorCallback = 0;
+  for ( int i=0; i<2; i++ ) {
+    stream_.device[i] = 11111;
+    stream_.doConvertBuffer[i] = false;
+    stream_.deviceInterleaved[i] = true;
+    stream_.doByteSwap[i] = false;
+    stream_.nUserChannels[i] = 0;
+    stream_.nDeviceChannels[i] = 0;
+    stream_.channelOffset[i] = 0;
+    stream_.deviceFormat[i] = 0;
+    stream_.latency[i] = 0;
+    stream_.userBuffer[i] = 0;
+    stream_.convertInfo[i].channels = 0;
+    stream_.convertInfo[i].inJump = 0;
+    stream_.convertInfo[i].outJump = 0;
+    stream_.convertInfo[i].inFormat = 0;
+    stream_.convertInfo[i].outFormat = 0;
+    stream_.convertInfo[i].inOffset.clear();
+    stream_.convertInfo[i].outOffset.clear();
+  }
+}
+
+unsigned int RtApi :: formatBytes( RtAudioFormat format )
+{
+  if ( format == RTAUDIO_SINT16 )
+    return 2;
+  else if ( format == RTAUDIO_SINT32 || format == RTAUDIO_FLOAT32 )
+    return 4;
+  else if ( format == RTAUDIO_FLOAT64 )
+    return 8;
+  else if ( format == RTAUDIO_SINT24 )
+    return 3;
+  else if ( format == RTAUDIO_SINT8 )
+    return 1;
+
+  errorText_ = "RtApi::formatBytes: undefined format.";
+  error( RtAudioError::WARNING );
+
+  return 0;
+}
+
+void RtApi :: setConvertInfo( StreamMode mode, unsigned int firstChannel )
+{
+  if ( mode == INPUT ) { // convert device to user buffer
+    stream_.convertInfo[mode].inJump = stream_.nDeviceChannels[1];
+    stream_.convertInfo[mode].outJump = stream_.nUserChannels[1];
+    stream_.convertInfo[mode].inFormat = stream_.deviceFormat[1];
+    stream_.convertInfo[mode].outFormat = stream_.userFormat;
+  }
+  else { // convert user to device buffer
+    stream_.convertInfo[mode].inJump = stream_.nUserChannels[0];
+    stream_.convertInfo[mode].outJump = stream_.nDeviceChannels[0];
+    stream_.convertInfo[mode].inFormat = stream_.userFormat;
+    stream_.convertInfo[mode].outFormat = stream_.deviceFormat[0];
+  }
+
+  if ( stream_.convertInfo[mode].inJump < stream_.convertInfo[mode].outJump )
+    stream_.convertInfo[mode].channels = stream_.convertInfo[mode].inJump;
+  else
+    stream_.convertInfo[mode].channels = stream_.convertInfo[mode].outJump;
+
+  // Set up the interleave/deinterleave offsets.
+  if ( stream_.deviceInterleaved[mode] != stream_.userInterleaved ) {
+    if ( ( mode == OUTPUT && stream_.deviceInterleaved[mode] ) ||
+         ( mode == INPUT && stream_.userInterleaved ) ) {
+      for ( int k=0; k<stream_.convertInfo[mode].channels; k++ ) {
+        stream_.convertInfo[mode].inOffset.push_back( k * stream_.bufferSize );
+        stream_.convertInfo[mode].outOffset.push_back( k );
+        stream_.convertInfo[mode].inJump = 1;
+      }
+    }
+    else {
+      for ( int k=0; k<stream_.convertInfo[mode].channels; k++ ) {
+        stream_.convertInfo[mode].inOffset.push_back( k );
+        stream_.convertInfo[mode].outOffset.push_back( k * stream_.bufferSize );
+        stream_.convertInfo[mode].outJump = 1;
+      }
+    }
+  }
+  else { // no (de)interleaving
+    if ( stream_.userInterleaved ) {
+      for ( int k=0; k<stream_.convertInfo[mode].channels; k++ ) {
+        stream_.convertInfo[mode].inOffset.push_back( k );
+        stream_.convertInfo[mode].outOffset.push_back( k );
+      }
+    }
+    else {
+      for ( int k=0; k<stream_.convertInfo[mode].channels; k++ ) {
+        stream_.convertInfo[mode].inOffset.push_back( k * stream_.bufferSize );
+        stream_.convertInfo[mode].outOffset.push_back( k * stream_.bufferSize );
+        stream_.convertInfo[mode].inJump = 1;
+        stream_.convertInfo[mode].outJump = 1;
+      }
+    }
+  }
+
+  // Add channel offset.
+  if ( firstChannel > 0 ) {
+    if ( stream_.deviceInterleaved[mode] ) {
+      if ( mode == OUTPUT ) {
+        for ( int k=0; k<stream_.convertInfo[mode].channels; k++ )
+          stream_.convertInfo[mode].outOffset[k] += firstChannel;
+      }
+      else {
+        for ( int k=0; k<stream_.convertInfo[mode].channels; k++ )
+          stream_.convertInfo[mode].inOffset[k] += firstChannel;
+      }
+    }
+    else {
+      if ( mode == OUTPUT ) {
+        for ( int k=0; k<stream_.convertInfo[mode].channels; k++ )
+          stream_.convertInfo[mode].outOffset[k] += ( firstChannel * stream_.bufferSize );
+      }
+      else {
+        for ( int k=0; k<stream_.convertInfo[mode].channels; k++ )
+          stream_.convertInfo[mode].inOffset[k] += ( firstChannel  * stream_.bufferSize );
+      }
+    }
+  }
+}
+
+void RtApi :: convertBuffer( char *outBuffer, char *inBuffer, ConvertInfo &info )
+{
+  // This function does format conversion, input/output channel compensation, and
+  // data interleaving/deinterleaving.  24-bit integers are assumed to occupy
+  // the lower three bytes of a 32-bit integer.
+
+  // Clear our device buffer when in/out duplex device channels are different
+  if ( outBuffer == stream_.deviceBuffer && stream_.mode == DUPLEX &&
+       ( stream_.nDeviceChannels[0] < stream_.nDeviceChannels[1] ) )
+    memset( outBuffer, 0, stream_.bufferSize * info.outJump * formatBytes( info.outFormat ) );
+
+  int j;
+  if (info.outFormat == RTAUDIO_FLOAT64) {
+    Float64 scale;
+    Float64 *out = (Float64 *)outBuffer;
+
+    if (info.inFormat == RTAUDIO_SINT8) {
+      signed char *in = (signed char *)inBuffer;
+      scale = 1.0 / 127.5;
+      for (unsigned int i=0; i<stream_.bufferSize; i++) {
+        for (j=0; j<info.channels; j++) {
+          out[info.outOffset[j]] = (Float64) in[info.inOffset[j]];
+          out[info.outOffset[j]] += 0.5;
+          out[info.outOffset[j]] *= scale;
+        }
+        in += info.inJump;
+        out += info.outJump;
+      }
+    }
+    else if (info.inFormat == RTAUDIO_SINT16) {
+      Int16 *in = (Int16 *)inBuffer;
+      scale = 1.0 / 32767.5;
+      for (unsigned int i=0; i<stream_.bufferSize; i++) {
+        for (j=0; j<info.channels; j++) {
+          out[info.outOffset[j]] = (Float64) in[info.inOffset[j]];
+          out[info.outOffset[j]] += 0.5;
+          out[info.outOffset[j]] *= scale;
+        }
+        in += info.inJump;
+        out += info.outJump;
+      }
+    }
+    else if (info.inFormat == RTAUDIO_SINT24) {
+      Int24 *in = (Int24 *)inBuffer;
+      scale = 1.0 / 8388607.5;
+      for (unsigned int i=0; i<stream_.bufferSize; i++) {
+        for (j=0; j<info.channels; j++) {
+          out[info.outOffset[j]] = (Float64) (in[info.inOffset[j]].asInt());
+          out[info.outOffset[j]] += 0.5;
+          out[info.outOffset[j]] *= scale;
+        }
+        in += info.inJump;
+        out += info.outJump;
+      }
+    }
+    else if (info.inFormat == RTAUDIO_SINT32) {
+      Int32 *in = (Int32 *)inBuffer;
+      scale = 1.0 / 2147483647.5;
+      for (unsigned int i=0; i<stream_.bufferSize; i++) {
+        for (j=0; j<info.channels; j++) {
+          out[info.outOffset[j]] = (Float64) in[info.inOffset[j]];
+          out[info.outOffset[j]] += 0.5;
+          out[info.outOffset[j]] *= scale;
+        }
+        in += info.inJump;
+        out += info.outJump;
+      }
+    }
+    else if (info.inFormat == RTAUDIO_FLOAT32) {
+      Float32 *in = (Float32 *)inBuffer;
+      for (unsigned int i=0; i<stream_.bufferSize; i++) {
+        for (j=0; j<info.channels; j++) {
+          out[info.outOffset[j]] = (Float64) in[info.inOffset[j]];
+        }
+        in += info.inJump;
+        out += info.outJump;
+      }
+    }
+    else if (info.inFormat == RTAUDIO_FLOAT64) {
+      // Channel compensation and/or (de)interleaving only.
+      Float64 *in = (Float64 *)inBuffer;
+      for (unsigned int i=0; i<stream_.bufferSize; i++) {
+        for (j=0; j<info.channels; j++) {
+          out[info.outOffset[j]] = in[info.inOffset[j]];
+        }
+        in += info.inJump;
+        out += info.outJump;
+      }
+    }
+  }
+  else if (info.outFormat == RTAUDIO_FLOAT32) {
+    Float32 scale;
+    Float32 *out = (Float32 *)outBuffer;
+
+    if (info.inFormat == RTAUDIO_SINT8) {
+      signed char *in = (signed char *)inBuffer;
+      scale = (Float32) ( 1.0 / 127.5 );
+      for (unsigned int i=0; i<stream_.bufferSize; i++) {
+        for (j=0; j<info.channels; j++) {
+          out[info.outOffset[j]] = (Float32) in[info.inOffset[j]];
+          out[info.outOffset[j]] += 0.5;
+          out[info.outOffset[j]] *= scale;
+        }
+        in += info.inJump;
+        out += info.outJump;
+      }
+    }
+    else if (info.inFormat == RTAUDIO_SINT16) {
+      Int16 *in = (Int16 *)inBuffer;
+      scale = (Float32) ( 1.0 / 32767.5 );
+      for (unsigned int i=0; i<stream_.bufferSize; i++) {
+        for (j=0; j<info.channels; j++) {
+          out[info.outOffset[j]] = (Float32) in[info.inOffset[j]];
+          out[info.outOffset[j]] += 0.5;
+          out[info.outOffset[j]] *= scale;
+        }
+        in += info.inJump;
+        out += info.outJump;
+      }
+    }
+    else if (info.inFormat == RTAUDIO_SINT24) {
+      Int24 *in = (Int24 *)inBuffer;
+      scale = (Float32) ( 1.0 / 8388607.5 );
+      for (unsigned int i=0; i<stream_.bufferSize; i++) {
+        for (j=0; j<info.channels; j++) {
+          out[info.outOffset[j]] = (Float32) (in[info.inOffset[j]].asInt());
+          out[info.outOffset[j]] += 0.5;
+          out[info.outOffset[j]] *= scale;
+        }
+        in += info.inJump;
+        out += info.outJump;
+      }
+    }
+    else if (info.inFormat == RTAUDIO_SINT32) {
+      Int32 *in = (Int32 *)inBuffer;
+      scale = (Float32) ( 1.0 / 2147483647.5 );
+      for (unsigned int i=0; i<stream_.bufferSize; i++) {
+        for (j=0; j<info.channels; j++) {
+          out[info.outOffset[j]] = (Float32) in[info.inOffset[j]];
+          out[info.outOffset[j]] += 0.5;
+          out[info.outOffset[j]] *= scale;
+        }
+        in += info.inJump;
+        out += info.outJump;
+      }
+    }
+    else if (info.inFormat == RTAUDIO_FLOAT32) {
+      // Channel compensation and/or (de)interleaving only.
+      Float32 *in = (Float32 *)inBuffer;
+      for (unsigned int i=0; i<stream_.bufferSize; i++) {
+        for (j=0; j<info.channels; j++) {
+          out[info.outOffset[j]] = in[info.inOffset[j]];
+        }
+        in += info.inJump;
+        out += info.outJump;
+      }
+    }
+    else if (info.inFormat == RTAUDIO_FLOAT64) {
+      Float64 *in = (Float64 *)inBuffer;
+      for (unsigned int i=0; i<stream_.bufferSize; i++) {
+        for (j=0; j<info.channels; j++) {
+          out[info.outOffset[j]] = (Float32) in[info.inOffset[j]];
+        }
+        in += info.inJump;
+        out += info.outJump;
+      }
+    }
+  }
+  else if (info.outFormat == RTAUDIO_SINT32) {
+    Int32 *out = (Int32 *)outBuffer;
+    if (info.inFormat == RTAUDIO_SINT8) {
+      signed char *in = (signed char *)inBuffer;
+      for (unsigned int i=0; i<stream_.bufferSize; i++) {
+        for (j=0; j<info.channels; j++) {
+          out[info.outOffset[j]] = (Int32) in[info.inOffset[j]];
+          out[info.outOffset[j]] <<= 24;
+        }
+        in += info.inJump;
+        out += info.outJump;
+      }
+    }
+    else if (info.inFormat == RTAUDIO_SINT16) {
+      Int16 *in = (Int16 *)inBuffer;
+      for (unsigned int i=0; i<stream_.bufferSize; i++) {
+        for (j=0; j<info.channels; j++) {
+          out[info.outOffset[j]] = (Int32) in[info.inOffset[j]];
+          out[info.outOffset[j]] <<= 16;
+        }
+        in += info.inJump;
+        out += info.outJump;
+      }
+    }
+    else if (info.inFormat == RTAUDIO_SINT24) {
+      Int24 *in = (Int24 *)inBuffer;
+      for (unsigned int i=0; i<stream_.bufferSize; i++) {
+        for (j=0; j<info.channels; j++) {
+          out[info.outOffset[j]] = (Int32) in[info.inOffset[j]].asInt();
+          out[info.outOffset[j]] <<= 8;
+        }
+        in += info.inJump;
+        out += info.outJump;
+      }
+    }
+    else if (info.inFormat == RTAUDIO_SINT32) {
+      // Channel compensation and/or (de)interleaving only.
+      Int32 *in = (Int32 *)inBuffer;
+      for (unsigned int i=0; i<stream_.bufferSize; i++) {
+        for (j=0; j<info.channels; j++) {
+          out[info.outOffset[j]] = in[info.inOffset[j]];
+        }
+        in += info.inJump;
+        out += info.outJump;
+      }
+    }
+    else if (info.inFormat == RTAUDIO_FLOAT32) {
+      Float32 *in = (Float32 *)inBuffer;
+      for (unsigned int i=0; i<stream_.bufferSize; i++) {
+        for (j=0; j<info.channels; j++) {
+          out[info.outOffset[j]] = (Int32) (in[info.inOffset[j]] * 2147483647.5 - 0.5);
+        }
+        in += info.inJump;
+        out += info.outJump;
+      }
+    }
+    else if (info.inFormat == RTAUDIO_FLOAT64) {
+      Float64 *in = (Float64 *)inBuffer;
+      for (unsigned int i=0; i<stream_.bufferSize; i++) {
+        for (j=0; j<info.channels; j++) {
+          out[info.outOffset[j]] = (Int32) (in[info.inOffset[j]] * 2147483647.5 - 0.5);
+        }
+        in += info.inJump;
+        out += info.outJump;
+      }
+    }
+  }
+  else if (info.outFormat == RTAUDIO_SINT24) {
+    Int24 *out = (Int24 *)outBuffer;
+    if (info.inFormat == RTAUDIO_SINT8) {
+      signed char *in = (signed char *)inBuffer;
+      for (unsigned int i=0; i<stream_.bufferSize; i++) {
+        for (j=0; j<info.channels; j++) {
+          out[info.outOffset[j]] = (Int32) (in[info.inOffset[j]] << 16);
+          //out[info.outOffset[j]] <<= 16;
+        }
+        in += info.inJump;
+        out += info.outJump;
+      }
+    }
+    else if (info.inFormat == RTAUDIO_SINT16) {
+      Int16 *in = (Int16 *)inBuffer;
+      for (unsigned int i=0; i<stream_.bufferSize; i++) {
+        for (j=0; j<info.channels; j++) {
+          out[info.outOffset[j]] = (Int32) (in[info.inOffset[j]] << 8);
+          //out[info.outOffset[j]] <<= 8;
+        }
+        in += info.inJump;
+        out += info.outJump;
+      }
+    }
+    else if (info.inFormat == RTAUDIO_SINT24) {
+      // Channel compensation and/or (de)interleaving only.
+      Int24 *in = (Int24 *)inBuffer;
+      for (unsigned int i=0; i<stream_.bufferSize; i++) {
+        for (j=0; j<info.channels; j++) {
+          out[info.outOffset[j]] = in[info.inOffset[j]];
+        }
+        in += info.inJump;
+        out += info.outJump;
+      }
+    }
+    else if (info.inFormat == RTAUDIO_SINT32) {
+      Int32 *in = (Int32 *)inBuffer;
+      for (unsigned int i=0; i<stream_.bufferSize; i++) {
+        for (j=0; j<info.channels; j++) {
+          out[info.outOffset[j]] = (Int32) (in[info.inOffset[j]] >> 8);
+          //out[info.outOffset[j]] >>= 8;
+        }
+        in += info.inJump;
+        out += info.outJump;
+      }
+    }
+    else if (info.inFormat == RTAUDIO_FLOAT32) {
+      Float32 *in = (Float32 *)inBuffer;
+      for (unsigned int i=0; i<stream_.bufferSize; i++) {
+        for (j=0; j<info.channels; j++) {
+          out[info.outOffset[j]] = (Int32) (in[info.inOffset[j]] * 8388607.5 - 0.5);
+        }
+        in += info.inJump;
+        out += info.outJump;
+      }
+    }
+    else if (info.inFormat == RTAUDIO_FLOAT64) {
+      Float64 *in = (Float64 *)inBuffer;
+      for (unsigned int i=0; i<stream_.bufferSize; i++) {
+        for (j=0; j<info.channels; j++) {
+          out[info.outOffset[j]] = (Int32) (in[info.inOffset[j]] * 8388607.5 - 0.5);
+        }
+        in += info.inJump;
+        out += info.outJump;
+      }
+    }
+  }
+  else if (info.outFormat == RTAUDIO_SINT16) {
+    Int16 *out = (Int16 *)outBuffer;
+    if (info.inFormat == RTAUDIO_SINT8) {
+      signed char *in = (signed char *)inBuffer;
+      for (unsigned int i=0; i<stream_.bufferSize; i++) {
+        for (j=0; j<info.channels; j++) {
+          out[info.outOffset[j]] = (Int16) in[info.inOffset[j]];
+          out[info.outOffset[j]] <<= 8;
+        }
+        in += info.inJump;
+        out += info.outJump;
+      }
+    }
+    else if (info.inFormat == RTAUDIO_SINT16) {
+      // Channel compensation and/or (de)interleaving only.
+      Int16 *in = (Int16 *)inBuffer;
+      for (unsigned int i=0; i<stream_.bufferSize; i++) {
+        for (j=0; j<info.channels; j++) {
+          out[info.outOffset[j]] = in[info.inOffset[j]];
+        }
+        in += info.inJump;
+        out += info.outJump;
+      }
+    }
+    else if (info.inFormat == RTAUDIO_SINT24) {
+      Int24 *in = (Int24 *)inBuffer;
+      for (unsigned int i=0; i<stream_.bufferSize; i++) {
+        for (j=0; j<info.channels; j++) {
+          out[info.outOffset[j]] = (Int16) (in[info.inOffset[j]].asInt() >> 8);
+        }
+        in += info.inJump;
+        out += info.outJump;
+      }
+    }
+    else if (info.inFormat == RTAUDIO_SINT32) {
+      Int32 *in = (Int32 *)inBuffer;
+      for (unsigned int i=0; i<stream_.bufferSize; i++) {
+        for (j=0; j<info.channels; j++) {
+          out[info.outOffset[j]] = (Int16) ((in[info.inOffset[j]] >> 16) & 0x0000ffff);
+        }
+        in += info.inJump;
+        out += info.outJump;
+      }
+    }
+    else if (info.inFormat == RTAUDIO_FLOAT32) {
+      Float32 *in = (Float32 *)inBuffer;
+      for (unsigned int i=0; i<stream_.bufferSize; i++) {
+        for (j=0; j<info.channels; j++) {
+          out[info.outOffset[j]] = (Int16) (in[info.inOffset[j]] * 32767.5 - 0.5);
+        }
+        in += info.inJump;
+        out += info.outJump;
+      }
+    }
+    else if (info.inFormat == RTAUDIO_FLOAT64) {
+      Float64 *in = (Float64 *)inBuffer;
+      for (unsigned int i=0; i<stream_.bufferSize; i++) {
+        for (j=0; j<info.channels; j++) {
+          out[info.outOffset[j]] = (Int16) (in[info.inOffset[j]] * 32767.5 - 0.5);
+        }
+        in += info.inJump;
+        out += info.outJump;
+      }
+    }
+  }
+  else if (info.outFormat == RTAUDIO_SINT8) {
+    signed char *out = (signed char *)outBuffer;
+    if (info.inFormat == RTAUDIO_SINT8) {
+      // Channel compensation and/or (de)interleaving only.
+      signed char *in = (signed char *)inBuffer;
+      for (unsigned int i=0; i<stream_.bufferSize; i++) {
+        for (j=0; j<info.channels; j++) {
+          out[info.outOffset[j]] = in[info.inOffset[j]];
+        }
+        in += info.inJump;
+        out += info.outJump;
+      }
+    }
+    if (info.inFormat == RTAUDIO_SINT16) {
+      Int16 *in = (Int16 *)inBuffer;
+      for (unsigned int i=0; i<stream_.bufferSize; i++) {
+        for (j=0; j<info.channels; j++) {
+          out[info.outOffset[j]] = (signed char) ((in[info.inOffset[j]] >> 8) & 0x00ff);
+        }
+        in += info.inJump;
+        out += info.outJump;
+      }
+    }
+    else if (info.inFormat == RTAUDIO_SINT24) {
+      Int24 *in = (Int24 *)inBuffer;
+      for (unsigned int i=0; i<stream_.bufferSize; i++) {
+        for (j=0; j<info.channels; j++) {
+          out[info.outOffset[j]] = (signed char) (in[info.inOffset[j]].asInt() >> 16);
+        }
+        in += info.inJump;
+        out += info.outJump;
+      }
+    }
+    else if (info.inFormat == RTAUDIO_SINT32) {
+      Int32 *in = (Int32 *)inBuffer;
+      for (unsigned int i=0; i<stream_.bufferSize; i++) {
+        for (j=0; j<info.channels; j++) {
+          out[info.outOffset[j]] = (signed char) ((in[info.inOffset[j]] >> 24) & 0x000000ff);
+        }
+        in += info.inJump;
+        out += info.outJump;
+      }
+    }
+    else if (info.inFormat == RTAUDIO_FLOAT32) {
+      Float32 *in = (Float32 *)inBuffer;
+      for (unsigned int i=0; i<stream_.bufferSize; i++) {
+        for (j=0; j<info.channels; j++) {
+          out[info.outOffset[j]] = (signed char) (in[info.inOffset[j]] * 127.5 - 0.5);
+        }
+        in += info.inJump;
+        out += info.outJump;
+      }
+    }
+    else if (info.inFormat == RTAUDIO_FLOAT64) {
+      Float64 *in = (Float64 *)inBuffer;
+      for (unsigned int i=0; i<stream_.bufferSize; i++) {
+        for (j=0; j<info.channels; j++) {
+          out[info.outOffset[j]] = (signed char) (in[info.inOffset[j]] * 127.5 - 0.5);
+        }
+        in += info.inJump;
+        out += info.outJump;
+      }
+    }
+  }
+}
+
+//static inline uint16_t bswap_16(uint16_t x) { return (x>>8) | (x<<8); }
+//static inline uint32_t bswap_32(uint32_t x) { return (bswap_16(x&0xffff)<<16) | (bswap_16(x>>16)); }
+//static inline uint64_t bswap_64(uint64_t x) { return (((unsigned long long)bswap_32(x&0xffffffffull))<<32) | (bswap_32(x>>32)); }
+
+void RtApi :: byteSwapBuffer( char *buffer, unsigned int samples, RtAudioFormat format )
+{
+  register char val;
+  register char *ptr;
+
+  ptr = buffer;
+  if ( format == RTAUDIO_SINT16 ) {
+    for ( unsigned int i=0; i<samples; i++ ) {
+      // Swap 1st and 2nd bytes.
+      val = *(ptr);
+      *(ptr) = *(ptr+1);
+      *(ptr+1) = val;
+
+      // Increment 2 bytes.
+      ptr += 2;
+    }
+  }
+  else if ( format == RTAUDIO_SINT32 ||
+            format == RTAUDIO_FLOAT32 ) {
+    for ( unsigned int i=0; i<samples; i++ ) {
+      // Swap 1st and 4th bytes.
+      val = *(ptr);
+      *(ptr) = *(ptr+3);
+      *(ptr+3) = val;
+
+      // Swap 2nd and 3rd bytes.
+      ptr += 1;
+      val = *(ptr);
+      *(ptr) = *(ptr+1);
+      *(ptr+1) = val;
+
+      // Increment 3 more bytes.
+      ptr += 3;
+    }
+  }
+  else if ( format == RTAUDIO_SINT24 ) {
+    for ( unsigned int i=0; i<samples; i++ ) {
+      // Swap 1st and 3rd bytes.
+      val = *(ptr);
+      *(ptr) = *(ptr+2);
+      *(ptr+2) = val;
+
+      // Increment 2 more bytes.
+      ptr += 2;
+    }
+  }
+  else if ( format == RTAUDIO_FLOAT64 ) {
+    for ( unsigned int i=0; i<samples; i++ ) {
+      // Swap 1st and 8th bytes
+      val = *(ptr);
+      *(ptr) = *(ptr+7);
+      *(ptr+7) = val;
+
+      // Swap 2nd and 7th bytes
+      ptr += 1;
+      val = *(ptr);
+      *(ptr) = *(ptr+5);
+      *(ptr+5) = val;
+
+      // Swap 3rd and 6th bytes
+      ptr += 1;
+      val = *(ptr);
+      *(ptr) = *(ptr+3);
+      *(ptr+3) = val;
+
+      // Swap 4th and 5th bytes
+      ptr += 1;
+      val = *(ptr);
+      *(ptr) = *(ptr+1);
+      *(ptr+1) = val;
+
+      // Increment 5 more bytes.
+      ptr += 5;
+    }
+  }
+}
+
+  // Indentation settings for Vim and Emacs
+  //
+  // Local Variables:
+  // c-basic-offset: 2
+  // indent-tabs-mode: nil
+  // End:
+  //
+  // vim: et sts=2 sw=2
+
+#endif
diff --git a/drivers/speex/audio_stream_speex.cpp b/drivers/speex/audio_stream_speex.cpp
index 2cffb17049..1bb4952cc8 100644
--- a/drivers/speex/audio_stream_speex.cpp
+++ b/drivers/speex/audio_stream_speex.cpp
@@ -15,14 +15,15 @@ static _FORCE_INLINE_ uint16_t le_short(uint16_t s)
 }
 
 
-void AudioStreamSpeex::update() {
+int AudioStreamPlaybackSpeex::mix(int16_t* p_buffer,int p_frames) {
+
+
 
-	_THREAD_SAFE_METHOD_;
 	//printf("update, loops %i, read ofs %i\n", (int)loops, read_ofs);
 	//printf("playing %i, paused %i\n", (int)playing, (int)paused);
 
-	if (!active || !playing || paused || !data.size())
-		return;
+	if (!active || !playing || !data.size())
+		return 0;
 
 	/*
 	if (read_ofs >= data.size()) {
@@ -35,12 +36,13 @@ void AudioStreamSpeex::update() {
 	};
 	*/
 
-	int todo = get_todo();
+	int todo = p_frames;
 	if (todo < page_size) {
-		return;
+		return 0;
 	};
 
-	int eos = 0;
+	int eos = 0;	
+	bool reloaded=false;
 
 	while (todo > page_size) {
 
@@ -92,7 +94,7 @@ void AudioStreamSpeex::update() {
 				for (int j=0;j!=nframes;j++)
 				{
 
-					int16_t* out = get_write_buffer();
+					int16_t* out = p_buffer;
 
 					int ret;
 					/*Decode frame*/
@@ -120,7 +122,7 @@ void AudioStreamSpeex::update() {
 
 
 					/*Convert to short and save to output file*/
-					for (int i=0;i<frame_size*get_channel_count();i++) {
+					for (int i=0;i<frame_size*stream_channels;i++) {
 						out[i]=le_short(out[i]);
 					}
 
@@ -149,7 +151,7 @@ void AudioStreamSpeex::update() {
 						}
 
 
-						write(new_frame_size);
+						p_buffer+=new_frame_size*stream_channels;
 						todo-=new_frame_size;
 					}
 				}
@@ -175,6 +177,7 @@ void AudioStreamSpeex::update() {
 				if (loops) {					
 					reload();
 					++loop_count;
+					//break;
 				} else {
 					playing=false;
 					unload();
@@ -183,18 +186,22 @@ void AudioStreamSpeex::update() {
 			}
 		};
 	};
+
+	return p_frames-todo;
 };
 
 
-void AudioStreamSpeex::unload() {
+void AudioStreamPlaybackSpeex::unload() {
+
 
-	_THREAD_SAFE_METHOD_
 
 	if (!active) return;
 
 	speex_bits_destroy(&bits);
 	if (st)
 		speex_decoder_destroy(st);
+
+	ogg_sync_clear(&oy);
 	active = false;
 	//data.resize(0);
 	st = NULL;
@@ -204,7 +211,7 @@ void AudioStreamSpeex::unload() {
 	loop_count = 0;
 }
 
-void *AudioStreamSpeex::process_header(ogg_packet *op, int *frame_size, int *rate, int *nframes, int *channels, int *extra_headers) {
+void *AudioStreamPlaybackSpeex::process_header(ogg_packet *op, int *frame_size, int *rate, int *nframes, int *channels, int *extra_headers) {
 
 	void *st;
 	SpeexHeader *header;
@@ -276,9 +283,9 @@ void *AudioStreamSpeex::process_header(ogg_packet *op, int *frame_size, int *rat
 
 
 
-void AudioStreamSpeex::reload() {
+void AudioStreamPlaybackSpeex::reload() {
+
 
-	_THREAD_SAFE_METHOD_
 
 	if (active)
 		unload();
@@ -359,8 +366,10 @@ void AudioStreamSpeex::reload() {
 					};
 
 					page_size = nframes * frame_size;
+					stream_srate=rate;
+					stream_channels=channels;
+					stream_minbuff_size=page_size;
 
-					_setup(channels, rate,page_size);
 
 				} else if (packet_count==1)
 				{
@@ -374,23 +383,23 @@ void AudioStreamSpeex::reload() {
 
 	} while (packet_count <= extra_headers);
 
-	active = true;
+	active=true;
 
 }
 
-void AudioStreamSpeex::_bind_methods() {
+void AudioStreamPlaybackSpeex::_bind_methods() {
 
-	ObjectTypeDB::bind_method(_MD("set_file","file"),&AudioStreamSpeex::set_file);
-	ObjectTypeDB::bind_method(_MD("get_file"),&AudioStreamSpeex::get_file);
+	//ObjectTypeDB::bind_method(_MD("set_file","file"),&AudioStreamPlaybackSpeex::set_file);
+//	ObjectTypeDB::bind_method(_MD("get_file"),&AudioStreamPlaybackSpeex::get_file);
 
-	ObjectTypeDB::bind_method(_MD("_set_bundled"),&AudioStreamSpeex::_set_bundled);
-	ObjectTypeDB::bind_method(_MD("_get_bundled"),&AudioStreamSpeex::_get_bundled);
+	ObjectTypeDB::bind_method(_MD("_set_bundled"),&AudioStreamPlaybackSpeex::_set_bundled);
+	ObjectTypeDB::bind_method(_MD("_get_bundled"),&AudioStreamPlaybackSpeex::_get_bundled);
 
 	ADD_PROPERTY( PropertyInfo(Variant::DICTIONARY,"_bundled",PROPERTY_HINT_NONE,"",PROPERTY_USAGE_BUNDLE),_SCS("_set_bundled"),_SCS("_get_bundled"));
-	ADD_PROPERTY( PropertyInfo(Variant::STRING,"file",PROPERTY_HINT_FILE,"*.spx"),_SCS("set_file"),_SCS("get_file"));
+	//ADD_PROPERTY( PropertyInfo(Variant::STRING,"file",PROPERTY_HINT_FILE,"*.spx"),_SCS("set_file"),_SCS("get_file"));
 };
 
-void AudioStreamSpeex::_set_bundled(const Dictionary& dict) {
+void AudioStreamPlaybackSpeex::_set_bundled(const Dictionary& dict) {
 
 	ERR_FAIL_COND( !dict.has("filename"));
 	ERR_FAIL_COND( !dict.has("data"));
@@ -399,7 +408,7 @@ void AudioStreamSpeex::_set_bundled(const Dictionary& dict) {
 	data = dict["data"];
 };
 
-Dictionary AudioStreamSpeex::_get_bundled() const {
+Dictionary AudioStreamPlaybackSpeex::_get_bundled() const {
 
 	Dictionary d;
 	d["filename"] = filename;
@@ -408,43 +417,17 @@ Dictionary AudioStreamSpeex::_get_bundled() const {
 };
 
 
-String AudioStreamSpeex::get_file() const {
-
-	return filename;
-};
-
-void AudioStreamSpeex::set_file(const String& p_file){
-
-	if (filename == p_file)
-		return;
-
-	if (active) {
-		unload();
-	}
-
-	if (p_file == "") {
-		data.resize(0);
-		return;
-	};
-
-	Error err;
-	FileAccess* file = FileAccess::open(p_file, FileAccess::READ,&err);
-	if (err != OK) {
-		data.resize(0);
-	};
-	ERR_FAIL_COND(err != OK);
 
-	filename = p_file;
-	data.resize(file->get_len());
-	int read = file->get_buffer(&data[0], data.size());
-	memdelete(file);
+void AudioStreamPlaybackSpeex::set_data(const Vector<uint8_t>& p_data) {
 
+	data=p_data;
 	reload();
 }
 
-void AudioStreamSpeex::play() {
 
-	_THREAD_SAFE_METHOD_
+void AudioStreamPlaybackSpeex::play(float p_from_pos) {
+
+
 
 	reload();
 	if (!active)
@@ -452,82 +435,103 @@ void AudioStreamSpeex::play() {
 	playing = true;
 
 }
-void AudioStreamSpeex::stop(){
+void AudioStreamPlaybackSpeex::stop(){
+
 
-	_THREAD_SAFE_METHOD_
 	unload();
 	playing = false;
-	_clear();
-}
-bool AudioStreamSpeex::is_playing() const{
 
-	return _is_ready() && (playing || (get_total() - get_todo() -1 > 0));
 }
+bool AudioStreamPlaybackSpeex::is_playing() const{
 
-void AudioStreamSpeex::set_paused(bool p_paused){
-
-	playing = !p_paused;
-	paused = p_paused;
+	return playing;
 }
-bool AudioStreamSpeex::is_paused(bool p_paused) const{
 
-	return paused;
-}
 
-void AudioStreamSpeex::set_loop(bool p_enable){
+void AudioStreamPlaybackSpeex::set_loop(bool p_enable){
 
 	loops = p_enable;
 }
-bool AudioStreamSpeex::has_loop() const{
+bool AudioStreamPlaybackSpeex::has_loop() const{
 
 	return loops;
 }
 
-float AudioStreamSpeex::get_length() const{
+float AudioStreamPlaybackSpeex::get_length() const{
 
 	return 0;
 }
 
-String AudioStreamSpeex::get_stream_name() const{
+String AudioStreamPlaybackSpeex::get_stream_name() const{
 
 	return "";
 }
 
-int AudioStreamSpeex::get_loop_count() const{
+int AudioStreamPlaybackSpeex::get_loop_count() const{
 
 	return 0;
 }
 
-float AudioStreamSpeex::get_pos() const{
+float AudioStreamPlaybackSpeex::get_pos() const{
 
 	return 0;
 }
-void AudioStreamSpeex::seek_pos(float p_time){
+void AudioStreamPlaybackSpeex::seek_pos(float p_time){
 
 
 };
 
-bool AudioStreamSpeex::_can_mix() const {
 
-	//return playing;
-	return data.size() != 0;
-};
 
+AudioStreamPlaybackSpeex::AudioStreamPlaybackSpeex() {
+
+	active=false;
+	st = NULL;
+	stream_channels=1;
+	stream_srate=1;
+	stream_minbuff_size=1;
+	playing=false;
 
-AudioStream::UpdateMode AudioStreamSpeex::get_update_mode() const {
 
-	return UPDATE_THREAD;
 }
 
-AudioStreamSpeex::AudioStreamSpeex() {
+AudioStreamPlaybackSpeex::~AudioStreamPlaybackSpeex() {
 
-	active=false;
-	st = NULL;
+	unload();
 }
 
-AudioStreamSpeex::~AudioStreamSpeex() {
 
-	unload();
+
+
+
+////////////////////////////////////////
+
+
+
+void AudioStreamSpeex::set_file(const String& p_file) {
+
+	if (this->file == p_file)
+		return;
+
+	this->file=p_file;
+
+	if (p_file == "") {
+		data.resize(0);
+		return;
+	};
+
+	Error err;
+	FileAccess* file = FileAccess::open(p_file, FileAccess::READ,&err);
+	if (err != OK) {
+		data.resize(0);
+	};
+	ERR_FAIL_COND(err != OK);
+
+	this->file = p_file;
+	data.resize(file->get_len());
+	int read = file->get_buffer(&data[0], data.size());
+	memdelete(file);
+
 }
 
 RES ResourceFormatLoaderAudioStreamSpeex::load(const String &p_path, const String& p_original_path, Error *r_error) {
diff --git a/drivers/speex/audio_stream_speex.h b/drivers/speex/audio_stream_speex.h
index f9e0fce666..f0617b302f 100644
--- a/drivers/speex/audio_stream_speex.h
+++ b/drivers/speex/audio_stream_speex.h
@@ -1,7 +1,7 @@
 #ifndef AUDIO_STREAM_SPEEX_H
 #define AUDIO_STREAM_SPEEX_H
 
-#include "scene/resources/audio_stream_resampled.h"
+#include "scene/resources/audio_stream.h"
 #include "speex/speex.h"
 #include "os/file_access.h"
 #include "io/resource_loader.h"
@@ -14,10 +14,10 @@
 
 #include <ogg/ogg.h>
 
-class AudioStreamSpeex : public AudioStreamResampled {
+class AudioStreamPlaybackSpeex : public AudioStreamPlayback {
+
+	OBJ_TYPE(AudioStreamPlaybackSpeex, AudioStreamPlayback);
 
-	OBJ_TYPE(AudioStreamSpeex, AudioStreamResampled);
-	_THREAD_SAFE_CLASS_
 
 	void *st;
 	SpeexBits bits;
@@ -29,7 +29,6 @@ class AudioStreamSpeex : public AudioStreamResampled {
 	bool loops;
 	int page_size;
 	bool playing;
-	bool paused;
 	bool packets_available;
 
 	void unload();
@@ -45,6 +44,9 @@ class AudioStreamSpeex : public AudioStreamResampled {
 
 	ogg_int64_t page_granule, last_granule;
 	int skip_samples, page_nb_packets;
+	int stream_channels;
+	int stream_srate;
+	int stream_minbuff_size;
 
 	void* process_header(ogg_packet *op, int *frame_size, int *rate, int *nframes, int *channels, int *extra_headers);
 
@@ -52,7 +54,7 @@ class AudioStreamSpeex : public AudioStreamResampled {
 
 protected:
 
-	virtual bool _can_mix() const;
+	//virtual bool _can_mix() const;
 
 	Dictionary _get_bundled() const;
 	void _set_bundled(const Dictionary& dict);
@@ -60,16 +62,12 @@ protected:
 public:
 
 
-	void set_file(const String& p_file);
-	String get_file() const;
+	void set_data(const Vector<uint8_t>& p_data);
 
-	virtual void play();
+	virtual void play(float p_from_pos=0);
 	virtual void stop();
 	virtual bool is_playing() const;
 
-	virtual void set_paused(bool p_paused);
-	virtual bool is_paused(bool p_paused) const;
-
 	virtual void set_loop(bool p_enable);
 	virtual bool has_loop() const;
 
@@ -82,13 +80,39 @@ public:
 	virtual float get_pos() const;
 	virtual void seek_pos(float p_time);
 
-	virtual UpdateMode get_update_mode() const;
-	virtual void update();
+	virtual int get_channels() const { return stream_channels; }
+	virtual int get_mix_rate() const { return stream_srate; }
+
+	virtual int get_minimum_buffer_size() const { return stream_minbuff_size; }
+	virtual int mix(int16_t* p_bufer,int p_frames);
+
+	virtual void set_loop_restart_time(float p_time) {  } //no loop restart, ignore
+
+	AudioStreamPlaybackSpeex();
+	~AudioStreamPlaybackSpeex();
+};
+
+
+
+class AudioStreamSpeex : public AudioStream {
+
+	OBJ_TYPE(AudioStreamSpeex,AudioStream);
+
+	Vector<uint8_t> data;
+	String file;
+public:
+
+	Ref<AudioStreamPlayback> instance_playback() {
+		Ref<AudioStreamPlaybackSpeex> pb = memnew( AudioStreamPlaybackSpeex );
+		pb->set_data(data);
+		return pb;
+	}
+
+	void set_file(const String& p_file);
 
-	AudioStreamSpeex();
-	~AudioStreamSpeex();
 };
 
+
 class ResourceFormatLoaderAudioStreamSpeex : public ResourceFormatLoader {
 public:
 	virtual RES load(const String &p_path,const String& p_original_path="",Error *r_error=NULL);
diff --git a/drivers/speex/config.h b/drivers/speex/config.h
index d31382702c..8c48e3b99d 100644
--- a/drivers/speex/config.h
+++ b/drivers/speex/config.h
@@ -1,52 +1,52 @@
-/*
-   Copyright (C) 2003 Commonwealth Scientific and Industrial Research
-   Organisation (CSIRO) Australia
-
-   Redistribution and use in source and binary forms, with or without
-   modification, are permitted provided that the following conditions
-   are met:
-
-   - Redistributions of source code must retain the above copyright
-   notice, this list of conditions and the following disclaimer.
-
-   - Redistributions in binary form must reproduce the above copyright
-   notice, this list of conditions and the following disclaimer in the
-   documentation and/or other materials provided with the distribution.
-
-   - Neither the name of CSIRO Australia nor the names of its
-   contributors may be used to endorse or promote products derived from
-   this software without specific prior written permission.
-
-   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
-   PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE ORGANISATION OR
-   CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
-   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
-   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-*/
-
-#ifndef CONFIG_H
-#define CONFIG_H
-
-/* An inline macro is required for use of the inline keyword as not all C compilers support */
-/* inline.  It is officially C99 and C++ only */
-
-
-/* Use only fixed point arithmetic */
-
-//#ifdef _MSC_VER
-//#define inline _inline
-//#endif
-
-#define FIXED_POINT 1
-
-#ifdef _MSC_VER
-#define inline __inline
-#endif
-
-#endif /* ! CONFIG_H */
+/*
+   Copyright (C) 2003 Commonwealth Scientific and Industrial Research
+   Organisation (CSIRO) Australia
+
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   - Neither the name of CSIRO Australia nor the names of its
+   contributors may be used to endorse or promote products derived from
+   this software without specific prior written permission.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+   PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE ORGANISATION OR
+   CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef CONFIG_H
+#define CONFIG_H
+
+/* An inline macro is required for use of the inline keyword as not all C compilers support */
+/* inline.  It is officially C99 and C++ only */
+
+
+/* Use only fixed point arithmetic */
+
+//#ifdef _MSC_VER
+//#define inline _inline
+//#endif
+
+#define FIXED_POINT 1
+
+#ifdef _MSC_VER
+#define inline __inline
+#endif
+
+#endif /* ! CONFIG_H */
diff --git a/drivers/speex/lsp.h b/drivers/speex/lsp.h
index 648652fb9e..b55bd42f2c 100644
--- a/drivers/speex/lsp.h
+++ b/drivers/speex/lsp.h
@@ -1,64 +1,64 @@
-/*---------------------------------------------------------------------------*\
-Original Copyright
-	FILE........: AK2LSPD.H
-	TYPE........: Turbo C header file
-	COMPANY.....: Voicetronix
-	AUTHOR......: James Whitehall
-	DATE CREATED: 21/11/95
-
-Modified by Jean-Marc Valin
-
-    This file contains functions for converting Linear Prediction
-    Coefficients (LPC) to Line Spectral Pair (LSP) and back. Note that the
-    LSP coefficients are not in radians format but in the x domain of the
-    unit circle.
-
-\*---------------------------------------------------------------------------*/
-/**
-   @file lsp.h
-   @brief Line Spectral Pair (LSP) functions.
-*/
-/* Speex License:
-
-   Redistribution and use in source and binary forms, with or without
-   modification, are permitted provided that the following conditions
-   are met:
-   
-   - Redistributions of source code must retain the above copyright
-   notice, this list of conditions and the following disclaimer.
-   
-   - Redistributions in binary form must reproduce the above copyright
-   notice, this list of conditions and the following disclaimer in the
-   documentation and/or other materials provided with the distribution.
-   
-   - Neither the name of the Xiph.org Foundation nor the names of its
-   contributors may be used to endorse or promote products derived from
-   this software without specific prior written permission.
-   
-   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-   A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR
-   CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
-   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
-   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-*/
-
-#ifndef __AK2LSPD__
-#define __AK2LSPD__
-
-#include "arch.h"
-
-int lpc_to_lsp (spx_coef_t *a, int lpcrdr, spx_lsp_t *freq, int nb, spx_word16_t delta, char *stack);
-void lsp_to_lpc(spx_lsp_t *freq, spx_coef_t *ak, int lpcrdr, char *stack);
-
-/*Added by JMV*/
-void lsp_enforce_margin(spx_lsp_t *lsp, int len, spx_word16_t margin);
-
-void lsp_interpolate(spx_lsp_t *old_lsp, spx_lsp_t *new_lsp, spx_lsp_t *interp_lsp, int len, int subframe, int nb_subframes);
-
-#endif	/* __AK2LSPD__ */
+/*---------------------------------------------------------------------------*\
+Original Copyright
+	FILE........: AK2LSPD.H
+	TYPE........: Turbo C header file
+	COMPANY.....: Voicetronix
+	AUTHOR......: James Whitehall
+	DATE CREATED: 21/11/95
+
+Modified by Jean-Marc Valin
+
+    This file contains functions for converting Linear Prediction
+    Coefficients (LPC) to Line Spectral Pair (LSP) and back. Note that the
+    LSP coefficients are not in radians format but in the x domain of the
+    unit circle.
+
+\*---------------------------------------------------------------------------*/
+/**
+   @file lsp.h
+   @brief Line Spectral Pair (LSP) functions.
+*/
+/* Speex License:
+
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+   
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+   
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+   
+   - Neither the name of the Xiph.org Foundation nor the names of its
+   contributors may be used to endorse or promote products derived from
+   this software without specific prior written permission.
+   
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR
+   CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef __AK2LSPD__
+#define __AK2LSPD__
+
+#include "arch.h"
+
+int lpc_to_lsp (spx_coef_t *a, int lpcrdr, spx_lsp_t *freq, int nb, spx_word16_t delta, char *stack);
+void lsp_to_lpc(spx_lsp_t *freq, spx_coef_t *ak, int lpcrdr, char *stack);
+
+/*Added by JMV*/
+void lsp_enforce_margin(spx_lsp_t *lsp, int len, spx_word16_t margin);
+
+void lsp_interpolate(spx_lsp_t *old_lsp, spx_lsp_t *new_lsp, spx_lsp_t *interp_lsp, int len, int subframe, int nb_subframes);
+
+#endif	/* __AK2LSPD__ */
diff --git a/drivers/speex/speex_bind.cpp b/drivers/speex/speex_bind.cpp
index 6e9eb638a2..d15bb3da8c 100644
--- a/drivers/speex/speex_bind.cpp
+++ b/drivers/speex/speex_bind.cpp
@@ -1,64 +1,64 @@
-
-#include "memory.h"
-#include "speex_bind.h"
-#include
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-void *speex_alloc (int size) {
-
-	uint8_t * mem = (uint8_t*)memalloc(size);
-	for(int i=0;i<size;i++)
-		mem[i]=0;
-	return mem;
-}
-
-void *speex_alloc_scratch (int size) {
-
-	return memalloc(size);
-}
-
-void *speex_realloc (void *ptr, int size) {
-
-	return memrealloc(ptr,size);
-}
-
-void speex_free (void *ptr) {
-
-	memfree(ptr);
-}
-
-void speex_free_scratch (void *ptr) {
-
-	memfree(ptr);
-}
-
-void _speex_fatal(const char *str, const char *file, int line) {
-
-	_err_print_error("SPEEX ERROR",p_file,p_line,str);
-}
-
-void speex_warning(const char *str) {
-
-	_err_print_error("SPEEX WARNING","",0,str);
-}
-
-void speex_warning_int(const char *str, int val) {
-
-	_err_print_error("SPEEX WARNING INT","Value",val,str);
-}
-
-void speex_notify(const char *str) {
-
-	print_line(str);
-}
-
-void _speex_putc(int ch, void *file) {
-
-	// will not putc, no.
-}
-
-#ifdef __cplusplus
-}
-#endif
+
+#include "memory.h"
+#include "speex_bind.h"
+#include
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+void *speex_alloc (int size) {
+
+	uint8_t * mem = (uint8_t*)memalloc(size);
+	for(int i=0;i<size;i++)
+		mem[i]=0;
+	return mem;
+}
+
+void *speex_alloc_scratch (int size) {
+
+	return memalloc(size);
+}
+
+void *speex_realloc (void *ptr, int size) {
+
+	return memrealloc(ptr,size);
+}
+
+void speex_free (void *ptr) {
+
+	memfree(ptr);
+}
+
+void speex_free_scratch (void *ptr) {
+
+	memfree(ptr);
+}
+
+void _speex_fatal(const char *str, const char *file, int line) {
+
+	_err_print_error("SPEEX ERROR",p_file,p_line,str);
+}
+
+void speex_warning(const char *str) {
+
+	_err_print_error("SPEEX WARNING","",0,str);
+}
+
+void speex_warning_int(const char *str, int val) {
+
+	_err_print_error("SPEEX WARNING INT","Value",val,str);
+}
+
+void speex_notify(const char *str) {
+
+	print_line(str);
+}
+
+void _speex_putc(int ch, void *file) {
+
+	// will not putc, no.
+}
+
+#ifdef __cplusplus
+}
+#endif
diff --git a/drivers/speex/speex_bind.h b/drivers/speex/speex_bind.h
index e842960d3c..c928430a33 100644
--- a/drivers/speex/speex_bind.h
+++ b/drivers/speex/speex_bind.h
@@ -1,48 +1,48 @@
-#ifndef SPEEX_BIND_H
-#define SPEEX_BIND_H
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-/*
-#define OVERRIDE_SPEEX_ALLOC
-#define OVERRIDE_SPEEX_ALLOC_SCRATCH
-#define OVERRIDE_SPEEX_REALLOC
-#define OVERRIDE_SPEEX_FREE
-#define OVERRIDE_SPEEX_FREE_SCRATCH
-#define OVERRIDE_SPEEX_FATAL
-#define OVERRIDE_SPEEX_WARNING
-#define OVERRIDE_SPEEX_WARNING_INT
-#define OVERRIDE_SPEEX_NOTIFY
-#define OVERRIDE_SPEEX_PUTC
-
-void *speex_alloc (int size);
-void *speex_alloc_scratch (int size);
-void *speex_realloc (void *ptr, int size);
-void speex_free (void *ptr);
-void speex_free_scratch (void *ptr);
-void _speex_fatal(const char *str, const char *file, int line);
-void speex_warning(const char *str);
-void speex_warning_int(const char *str, int val);
-void speex_notify(const char *str);
-void _speex_putc(int ch, void *file);
-
-
-*/
-#define RELEASE
-#define SPEEX_PI 3.14159265358979323846
-
-#ifdef _MSC_VER
-#define SPEEX_INLINE __inline
-#else
-#define SPEEX_INLINE inline
-#endif
-
-
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif // SPEEX_BIND_H
+#ifndef SPEEX_BIND_H
+#define SPEEX_BIND_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/*
+#define OVERRIDE_SPEEX_ALLOC
+#define OVERRIDE_SPEEX_ALLOC_SCRATCH
+#define OVERRIDE_SPEEX_REALLOC
+#define OVERRIDE_SPEEX_FREE
+#define OVERRIDE_SPEEX_FREE_SCRATCH
+#define OVERRIDE_SPEEX_FATAL
+#define OVERRIDE_SPEEX_WARNING
+#define OVERRIDE_SPEEX_WARNING_INT
+#define OVERRIDE_SPEEX_NOTIFY
+#define OVERRIDE_SPEEX_PUTC
+
+void *speex_alloc (int size);
+void *speex_alloc_scratch (int size);
+void *speex_realloc (void *ptr, int size);
+void speex_free (void *ptr);
+void speex_free_scratch (void *ptr);
+void _speex_fatal(const char *str, const char *file, int line);
+void speex_warning(const char *str);
+void speex_warning_int(const char *str, int val);
+void speex_notify(const char *str);
+void _speex_putc(int ch, void *file);
+
+
+*/
+#define RELEASE
+#define SPEEX_PI 3.14159265358979323846
+
+#ifdef _MSC_VER
+#define SPEEX_INLINE __inline
+#else
+#define SPEEX_INLINE inline
+#endif
+
+
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif // SPEEX_BIND_H
diff --git a/drivers/theora/video_stream_theora.cpp b/drivers/theora/video_stream_theora.cpp
index 214185cf88..bea49e34b7 100644
--- a/drivers/theora/video_stream_theora.cpp
+++ b/drivers/theora/video_stream_theora.cpp
@@ -1,16 +1,12 @@
 #ifdef THEORA_ENABLED
-#if 0
+
 #include "video_stream_theora.h"
 #include "os/os.h"
 #include "yuv2rgb.h"
+#include "globals.h"
 
 
-AudioStream::UpdateMode VideoStreamTheora::get_update_mode() const {
-
-	return UPDATE_IDLE;
-};
-
-int VideoStreamTheora::	buffer_data() {
+int VideoStreamPlaybackTheora::	buffer_data() {
   char *buffer=ogg_sync_buffer(&oy,4096);
   int bytes=file->get_buffer((uint8_t*)buffer, 4096);
 
@@ -18,33 +14,13 @@ int VideoStreamTheora::	buffer_data() {
   return(bytes);
 }
 
-int VideoStreamTheora::queue_page(ogg_page *page){
+int VideoStreamPlaybackTheora::queue_page(ogg_page *page){
   if(theora_p)ogg_stream_pagein(&to,page);
   if(vorbis_p)ogg_stream_pagein(&vo,page);
   return 0;
 }
 
-Image VideoStreamTheora::peek_frame() const {
-
-	if (frames_pending == 0)
-		return Image();
-	return Image(size.x, size.y, 0, format, frame_data);
-};
-
-Image VideoStreamTheora::pop_frame() {
-
-	Image ret = peek_frame();
-	frames_pending = 0;
-
-	return ret;
-};
-
-int VideoStreamTheora::get_pending_frame_count() const {
-
-	return frames_pending;
-};
-
-void VideoStreamTheora::video_write(void){
+void VideoStreamPlaybackTheora::video_write(void){
 	th_ycbcr_buffer yuv;
 	int y_offset, uv_offset;
 	th_decode_ycbcr_out(td,yuv);
@@ -78,25 +54,31 @@ void VideoStreamTheora::video_write(void){
 
 	int pitch = 4;
 	frame_data.resize(size.x * size.y * pitch);
-	DVector<uint8_t>::Write w = frame_data.write();
-	char* dst = (char*)w.ptr();
+	{
+		DVector<uint8_t>::Write w = frame_data.write();
+		char* dst = (char*)w.ptr();
 
-	uv_offset=(ti.pic_x/2)+(yuv[1].stride)*(ti.pic_y/2);
+		uv_offset=(ti.pic_x/2)+(yuv[1].stride)*(ti.pic_y/2);
 
-	if (px_fmt == TH_PF_444) {
+		if (px_fmt == TH_PF_444) {
 
-		yuv444_2_rgb8888((uint8_t*)dst, (uint8_t*)yuv[0].data, (uint8_t*)yuv[1].data, (uint8_t*)yuv[2].data, size.x, size.y, yuv[0].stride, yuv[1].stride, size.x<<2, 0);
+			yuv444_2_rgb8888((uint8_t*)dst, (uint8_t*)yuv[0].data, (uint8_t*)yuv[1].data, (uint8_t*)yuv[2].data, size.x, size.y, yuv[0].stride, yuv[1].stride, size.x<<2, 0);
 
-	} else if (px_fmt == TH_PF_422) {
+		} else if (px_fmt == TH_PF_422) {
 
-		yuv422_2_rgb8888((uint8_t*)dst, (uint8_t*)yuv[0].data, (uint8_t*)yuv[1].data, (uint8_t*)yuv[2].data, size.x, size.y, yuv[0].stride, yuv[1].stride, size.x<<2, 0);
+			yuv422_2_rgb8888((uint8_t*)dst, (uint8_t*)yuv[0].data, (uint8_t*)yuv[1].data, (uint8_t*)yuv[2].data, size.x, size.y, yuv[0].stride, yuv[1].stride, size.x<<2, 0);
 
-	} else if (px_fmt == TH_PF_420) {
+		} else if (px_fmt == TH_PF_420) {
 
-		yuv420_2_rgb8888((uint8_t*)dst, (uint8_t*)yuv[0].data, (uint8_t*)yuv[2].data, (uint8_t*)yuv[1].data, size.x, size.y, yuv[0].stride, yuv[1].stride, size.x<<2, 0);
-	};
+			yuv420_2_rgb8888((uint8_t*)dst, (uint8_t*)yuv[0].data, (uint8_t*)yuv[2].data, (uint8_t*)yuv[1].data, size.x, size.y, yuv[0].stride, yuv[1].stride, size.x<<2, 0);
+		};
 
-	format = Image::FORMAT_RGBA;
+		format = Image::FORMAT_RGBA;
+	}
+
+	Image img(size.x,size.y,0,Image::FORMAT_RGBA,frame_data); //zero copy image creation
+
+	texture->set_data(img); //zero copy send to visual server
 
 	/*
 
@@ -194,7 +176,7 @@ void VideoStreamTheora::video_write(void){
 	frames_pending = 1;
 }
 
-void VideoStreamTheora::clear() {
+void VideoStreamPlaybackTheora::clear() {
 
 	if (file_name == "")
 		return;
@@ -218,7 +200,7 @@ void VideoStreamTheora::clear() {
 	}
 	ogg_sync_clear(&oy);
 
-	file_name = "";
+	//file_name = "";
 
 	theora_p = 0;
 	vorbis_p = 0;
@@ -229,7 +211,7 @@ void VideoStreamTheora::clear() {
 	playing = false;
 };
 
-void VideoStreamTheora::set_file(const String& p_file) {
+void VideoStreamPlaybackTheora::set_file(const String& p_file) {
 
 	ogg_packet op;
 	th_setup_info    *ts = NULL;
@@ -241,7 +223,7 @@ void VideoStreamTheora::set_file(const String& p_file) {
 	file = FileAccess::open(p_file, FileAccess::READ);
 	ERR_FAIL_COND(!file);
 
-	audio_frames_wrote = 0;
+
 
 	ogg_sync_init(&oy);
 
@@ -256,6 +238,8 @@ void VideoStreamTheora::set_file(const String& p_file) {
 	/* Ogg file open; parse the headers */
 	/* Only interested in Vorbis/Theora streams */
 	int stateflag = 0;
+
+    int audio_track_skip=audio_track;
 	while(!stateflag){
 		int ret=buffer_data();
 		if(ret==0)break;
@@ -282,8 +266,14 @@ void VideoStreamTheora::set_file(const String& p_file) {
 				theora_p=1;
 			}else if(!vorbis_p && vorbis_synthesis_headerin(&vi,&vc,&op)>=0){
 				/* it is vorbis */
-				copymem(&vo,&test,sizeof(test));
-				vorbis_p=1;
+                if (audio_track_skip) {
+                    vorbis_info_clear(&vi);
+                    vorbis_comment_clear(&vc);
+                    audio_track_skip--;
+                } else {
+                    copymem(&vo,&test,sizeof(test));
+                    vorbis_p=1;
+                }
 			}else{
 				/* whatever it is, we don't care about it */
 				ogg_stream_clear(&test);
@@ -386,6 +376,8 @@ void VideoStreamTheora::set_file(const String& p_file) {
 		size.x = w;
 		size.y = h;
 
+		texture->create(w,h,Image::FORMAT_RGBA,Texture::FLAG_FILTER|Texture::FLAG_VIDEO_SURFACE);
+
 	}else{
 		/* tear down the partial theora setup */
 		th_info_clear(&ti);
@@ -399,7 +391,7 @@ void VideoStreamTheora::set_file(const String& p_file) {
 		vorbis_block_init(&vd,&vb);
 		fprintf(stderr,"Ogg logical stream %lx is Vorbis %d channel %ld Hz audio.\n",
 				vo.serialno,vi.channels,vi.rate);
-		_setup(vi.channels, vi.rate);
+		//_setup(vi.channels, vi.rate);
 	}else{
 		/* tear down the partial vorbis setup */
 		vorbis_info_clear(&vi);
@@ -411,227 +403,299 @@ void VideoStreamTheora::set_file(const String& p_file) {
 	time=0;
 };
 
-float VideoStreamTheora::get_time() const {
+float VideoStreamPlaybackTheora::get_time() const {
 
 	//print_line("total: "+itos(get_total())+" todo: "+itos(get_todo()));
 	//return MAX(0,time-((get_total())/(float)vi.rate));
-	return time-((get_total())/(float)vi.rate);
+	return time-AudioServer::get_singleton()->get_output_delay()-delay_compensation;//-((get_total())/(float)vi.rate);
 };
 
-void VideoStreamTheora::update() {
+Ref<Texture> VideoStreamPlaybackTheora::get_texture() {
+
+	return texture;
+}
+
+void VideoStreamPlaybackTheora::update(float p_delta) {
 
 	if (!playing) {
 		//printf("not playing\n");
 		return;
 	};
 
-	double ctime =AudioServer::get_singleton()->get_mix_time();
+	//double ctime =AudioServer::get_singleton()->get_mix_time();
 
-	if (last_update_time) {
-		double delta = (ctime-last_update_time);
-		time+=delta;
-		//print_line("delta: "+rtos(delta));
-	}
-	last_update_time=ctime;
+	//print_line("play "+rtos(p_delta));
+	time+=p_delta;
 
+	if (videobuf_time>get_time())
+		return; //no new frames need to be produced
 
-	int audio_todo = get_todo();
-	ogg_packet op;
-	int audio_pending = 0;
+	bool frame_done=false;
 
+	while (!frame_done) {
+		//a frame needs to be produced
 
-	while (vorbis_p && audio_todo) {
-		int ret;
-		float **pcm;
-
-		/* if there's pending, decoded audio, grab it */
-		if ((ret=vorbis_synthesis_pcmout(&vd,&pcm))>0) {
-
-			audio_pending = ret;
-			int16_t* out = get_write_buffer();
-			int count = 0;
-			int to_read = MIN(ret, audio_todo);
-			for (int i=0; i<to_read; i++) {
-
-				for(int j=0;j<vi.channels;j++){
-					int val=Math::fast_ftoi(pcm[j][i]*32767.f);
-					if(val>32767)val=32767;
-					if(val<-32768)val=-32768;
-					out[count++] = val;
-				};
-			};
-			int tr = vorbis_synthesis_read(&vd, to_read);
-			audio_todo -= to_read;
-			audio_frames_wrote += to_read;
-			write(to_read);
-			audio_pending -= to_read;
-			if (audio_todo==0)
-				buffering=false;
+		ogg_packet op;
+		bool audio_pending = false;
 
 
-		} else {
+		while (vorbis_p) {
+			int ret;
+			float **pcm;
+
+			bool buffer_full=false;
+
+			/* if there's pending, decoded audio, grab it */
+			if ((ret=vorbis_synthesis_pcmout(&vd,&pcm))>0) {
+
+
+
+				const int AUXBUF_LEN=4096;
+				int to_read = ret;
+				int16_t aux_buffer[AUXBUF_LEN];
+
+				while(to_read) {
+
+					int m = MIN(AUXBUF_LEN/vi.channels,to_read);
+
+					int count = 0;
+
+					for(int j=0;j<m;j++){
+						for(int i=0;i<vi.channels;i++){
+
+							int val=Math::fast_ftoi(pcm[i][j]*32767.f);
+							if(val>32767)val=32767;
+							if(val<-32768)val=-32768;
+							aux_buffer[count++] = val;
+						}
+					}
+
+					if (mix_callback) {
+						int mixed = mix_callback(mix_udata,aux_buffer,m);
+						to_read-=mixed;
+						if (mixed!=m) { //could mix no more
+							buffer_full=true;
+							break;
+						}
+					} else {
+						to_read-=m; //just pretend we sent the audio
+					}
+
 
-			/* no pending audio; is there a pending packet to decode? */
-			if (ogg_stream_packetout(&vo,&op)>0){
-				if(vorbis_synthesis(&vb,&op)==0) { /* test for success! */
-					vorbis_synthesis_blockin(&vd,&vb);
 				}
-			} else {  /* we need more data; break out to suck in another page */
-				//printf("need moar data\n");
+
+
+				int tr = vorbis_synthesis_read(&vd, ret-to_read);
+
+				audio_pending=true;
+
+
+			} else {
+
+				/* no pending audio; is there a pending packet to decode? */
+				if (ogg_stream_packetout(&vo,&op)>0){
+					if(vorbis_synthesis(&vb,&op)==0) { /* test for success! */
+						vorbis_synthesis_blockin(&vd,&vb);
+					}
+				} else {  /* we need more data; break out to suck in another page */
+					//printf("need moar data\n");
+					break;
+				};
+			}
+
+			if (buffer_full)
 				break;
-			};
 		}
-	}
 
-	while(theora_p && !videobuf_ready){
-		/* theora is one in, one out... */
-		if(ogg_stream_packetout(&to,&op)>0){
+		while(theora_p && !frame_done){
+			/* theora is one in, one out... */
+			if(ogg_stream_packetout(&to,&op)>0){
 
 
-			if(pp_inc){
-				pp_level+=pp_inc;
-				th_decode_ctl(td,TH_DECCTL_SET_PPLEVEL,&pp_level,
-							  sizeof(pp_level));
-				pp_inc=0;
-			}
-			/*HACK: This should be set after a seek or a gap, but we might not have
-			a granulepos for the first packet (we only have them for the last
-			packet on a page), so we just set it as often as we get it.
-			To do this right, we should back-track from the last packet on the
-			page and compute the correct granulepos for the first packet after
-			a seek or a gap.*/
-			if(op.granulepos>=0){
-				th_decode_ctl(td,TH_DECCTL_SET_GRANPOS,&op.granulepos,
-							  sizeof(op.granulepos));
-			}
-			ogg_int64_t videobuf_granulepos;
-			if(th_decode_packetin(td,&op,&videobuf_granulepos)==0){
-				videobuf_time=th_granule_time(td,videobuf_granulepos);
-				//printf("frame time %f, play time %f, ready %i\n", (float)videobuf_time, get_time(), videobuf_ready);
-
-				/* is it already too old to be useful?  This is only actually
-				 useful cosmetically after a SIGSTOP.  Note that we have to
-				 decode the frame even if we don't show it (for now) due to
-				 keyframing.  Soon enough libtheora will be able to deal
-				 with non-keyframe seeks.  */
-
-				if(videobuf_time>=get_time())
-					videobuf_ready=1;
-				else{
-					/*If we are too slow, reduce the pp level.*/
-					pp_inc=pp_level>0?-1:0;
+				if(pp_inc){
+					pp_level+=pp_inc;
+					th_decode_ctl(td,TH_DECCTL_SET_PPLEVEL,&pp_level,
+								  sizeof(pp_level));
+					pp_inc=0;
+				}
+				/*HACK: This should be set after a seek or a gap, but we might not have
+				a granulepos for the first packet (we only have them for the last
+				packet on a page), so we just set it as often as we get it.
+				To do this right, we should back-track from the last packet on the
+				page and compute the correct granulepos for the first packet after
+				a seek or a gap.*/
+				if(op.granulepos>=0){
+					th_decode_ctl(td,TH_DECCTL_SET_GRANPOS,&op.granulepos,
+								  sizeof(op.granulepos));
+				}
+				ogg_int64_t videobuf_granulepos;
+				if(th_decode_packetin(td,&op,&videobuf_granulepos)==0){
+					videobuf_time=th_granule_time(td,videobuf_granulepos);
+
+					//printf("frame time %f, play time %f, ready %i\n", (float)videobuf_time, get_time(), videobuf_ready);
+
+					/* is it already too old to be useful?  This is only actually
+					 useful cosmetically after a SIGSTOP.  Note that we have to
+					 decode the frame even if we don't show it (for now) due to
+					 keyframing.  Soon enough libtheora will be able to deal
+					 with non-keyframe seeks.  */
+
+					if(videobuf_time>=get_time())
+						frame_done=true;
+					else{
+						/*If we are too slow, reduce the pp level.*/
+						pp_inc=pp_level>0?-1:0;
+					}
 				}
-			}
-
-		} else
-			break;
-	}
 
-	if (/*!videobuf_ready && */ audio_pending == 0 && file->eof_reached()) {
-		printf("video done, stopping\n");
-		stop();
-		return;
-	};
+			} else
+				break;
+		}
 
-	if (!videobuf_ready || audio_todo > 0){
-		/* no data yet for somebody.  Grab another page */
+		if (file && /*!videobuf_ready && */ file->eof_reached()) {
+			printf("video done, stopping\n");
+			stop();
+			return;
+		};
+	#if 0
+		if (!videobuf_ready || audio_todo > 0){
+			/* no data yet for somebody.  Grab another page */
 
-		buffer_data();
-		while(ogg_sync_pageout(&oy,&og)>0){
-			queue_page(&og);
+			buffer_data();
+			while(ogg_sync_pageout(&oy,&og)>0){
+				queue_page(&og);
+			}
 		}
-	}
+	#else
+		if (!frame_done){
+			//what's the point of waiting for audio to grab a page?
 
-	/* If playback has begun, top audio buffer off immediately. */
-	//if(stateflag) audio_write_nonblocking();
+			buffer_data();
+			while(ogg_sync_pageout(&oy,&og)>0){
+				queue_page(&og);
+			}
+		}
+	#endif
+		/* If playback has begun, top audio buffer off immediately. */
+		//if(stateflag) audio_write_nonblocking();
 
-	/* are we at or past time for this video frame? */
-	if(videobuf_ready && videobuf_time<=get_time()){
+		/* are we at or past time for this video frame? */
+		if(videobuf_ready && videobuf_time<=get_time()){
 
-		video_write();
-		videobuf_ready=0;
-	} else {
-		//printf("frame at %f not ready (time %f), ready %i\n", (float)videobuf_time, get_time(), videobuf_ready);
-	}
+			//video_write();
+			//videobuf_ready=0;
+		} else {
+			//printf("frame at %f not ready (time %f), ready %i\n", (float)videobuf_time, get_time(), videobuf_ready);
+		}
 
-	float tdiff=videobuf_time-get_time();
-	/*If we have lots of extra time, increase the post-processing level.*/
-	if(tdiff>ti.fps_denominator*0.25/ti.fps_numerator){
-		pp_inc=pp_level<pp_level_max?1:0;
-	}
-	else if(tdiff<ti.fps_denominator*0.05/ti.fps_numerator){
-		pp_inc=pp_level>0?-1:0;
+		float tdiff=videobuf_time-get_time();
+		/*If we have lots of extra time, increase the post-processing level.*/
+		if(tdiff>ti.fps_denominator*0.25/ti.fps_numerator){
+			pp_inc=pp_level<pp_level_max?1:0;
+		}
+		else if(tdiff<ti.fps_denominator*0.05/ti.fps_numerator){
+			pp_inc=pp_level>0?-1:0;
+		}
 	}
-};
 
-bool VideoStreamTheora::_can_mix() const {
+	video_write();
 
-	return !buffering;
 };
 
-void VideoStreamTheora::play() {
+
+void VideoStreamPlaybackTheora::play() {
 
 	if (!playing)
-		last_update_time=0;
+		time=0;
 	playing = true;
+	delay_compensation=Globals::get_singleton()->get("audio/video_delay_compensation_ms");
+	delay_compensation/=1000.0;
+
 };
 
-void VideoStreamTheora::stop() {
+void VideoStreamPlaybackTheora::stop() {
 
+	if (playing) {
+		clear();
+		set_file(file_name); //reset
+	}
 	playing = false;
-	last_update_time=0;
+	time=0;
 };
 
-bool VideoStreamTheora::is_playing() const {
+bool VideoStreamPlaybackTheora::is_playing() const {
 
 	return playing;
 };
 
-void VideoStreamTheora::set_paused(bool p_paused) {
+void VideoStreamPlaybackTheora::set_paused(bool p_paused) {
 
 	playing = !p_paused;
 };
 
-bool VideoStreamTheora::is_paused(bool p_paused) const {
+bool VideoStreamPlaybackTheora::is_paused(bool p_paused) const {
 
 	return playing;
 };
 
-void VideoStreamTheora::set_loop(bool p_enable) {
+void VideoStreamPlaybackTheora::set_loop(bool p_enable) {
 
 };
 
-bool VideoStreamTheora::has_loop() const {
+bool VideoStreamPlaybackTheora::has_loop() const {
 
 	return false;
 };
 
-float VideoStreamTheora::get_length() const {
+float VideoStreamPlaybackTheora::get_length() const {
 
 	return 0;
 };
 
-String VideoStreamTheora::get_stream_name() const {
+String VideoStreamPlaybackTheora::get_stream_name() const {
 
 	return "";
 };
 
-int VideoStreamTheora::get_loop_count() const {
+int VideoStreamPlaybackTheora::get_loop_count() const {
 
 	return 0;
 };
 
-float VideoStreamTheora::get_pos() const {
+float VideoStreamPlaybackTheora::get_pos() const {
 
 	return get_time();
 };
 
-void VideoStreamTheora::seek_pos(float p_time) {
+void VideoStreamPlaybackTheora::seek_pos(float p_time) {
 
 	// no
 };
 
-VideoStreamTheora::VideoStreamTheora() {
+void VideoStreamPlaybackTheora::set_mix_callback(AudioMixCallback p_callback,void *p_userdata) {
+
+	mix_callback=p_callback;
+	mix_udata=p_userdata;
+}
+
+int VideoStreamPlaybackTheora::get_channels() const{
+
+	return vi.channels;
+}
+
+void VideoStreamPlaybackTheora::set_audio_track(int p_idx) {
+
+    audio_track=p_idx;
+}
+
+int VideoStreamPlaybackTheora::get_mix_rate() const{
+
+	return vi.rate;
+}
+
+
+
+VideoStreamPlaybackTheora::VideoStreamPlaybackTheora() {
 
 	file = NULL;
 	theora_p = 0;
@@ -640,11 +704,16 @@ VideoStreamTheora::VideoStreamTheora() {
 	playing = false;
 	frames_pending = 0;
 	videobuf_time = 0;
-	last_update_time =0;
+
 	buffering=false;
+	texture = Ref<ImageTexture>( memnew(ImageTexture ));
+	mix_callback=NULL;
+	mix_udata=NULL;
+    audio_track=0;
+	delay_compensation=0;
 };
 
-VideoStreamTheora::~VideoStreamTheora() {
+VideoStreamPlaybackTheora::~VideoStreamPlaybackTheora() {
 
 	clear();
 
@@ -653,10 +722,16 @@ VideoStreamTheora::~VideoStreamTheora() {
 };
 
 
-RES ResourceFormatLoaderVideoStreamTheora::load(const String &p_path,const String& p_original_path) {
+RES ResourceFormatLoaderVideoStreamTheora::load(const String &p_path,const String& p_original_path, Error *r_error) {
+	if (r_error)
+		*r_error=ERR_FILE_CANT_OPEN;
 
 	VideoStreamTheora *stream = memnew(VideoStreamTheora);
 	stream->set_file(p_path);
+
+	if (r_error)
+		*r_error=OK;
+
 	return Ref<VideoStreamTheora>(stream);
 }
 
@@ -666,16 +741,16 @@ void ResourceFormatLoaderVideoStreamTheora::get_recognized_extensions(List<Strin
 	p_extensions->push_back("ogv");
 }
 bool ResourceFormatLoaderVideoStreamTheora::handles_type(const String& p_type) const {
-	return (p_type=="AudioStream" || p_type=="VideoStreamTheora");
+	return (p_type=="VideoStream" || p_type=="VideoStreamTheora");
 }
 
 String ResourceFormatLoaderVideoStreamTheora::get_resource_type(const String &p_path) const {
 
 	String exl=p_path.extension().to_lower();
 	if (exl=="ogm" || exl=="ogv")
-		return "AudioStreamTheora";
+		return "VideoStreamTheora";
 	return "";
 }
 
 #endif
-#endif
+
diff --git a/drivers/theora/video_stream_theora.h b/drivers/theora/video_stream_theora.h
index 12aac731fc..95c7fe88f6 100644
--- a/drivers/theora/video_stream_theora.h
+++ b/drivers/theora/video_stream_theora.h
@@ -10,9 +10,9 @@
 #include "io/resource_loader.h"
 #include "scene/resources/video_stream.h"
 
-class VideoStreamTheora : public VideoStream {
+class VideoStreamPlaybackTheora : public VideoStreamPlayback {
 
-	OBJ_TYPE(VideoStreamTheora, VideoStream);
+	OBJ_TYPE(VideoStreamPlaybackTheora, VideoStreamPlayback);
 
 	enum {
 		MAX_FRAMES = 4,
@@ -58,16 +58,19 @@ class VideoStreamTheora : public VideoStream {
 
 	double last_update_time;
 	double time;
+	double delay_compensation;
 
-protected:
+	Ref<ImageTexture> texture;
 
-	virtual UpdateMode get_update_mode() const;
-	virtual void update();
+	AudioMixCallback mix_callback;
+	void* mix_udata;
 
-	void clear();
+    int audio_track;
 
-	virtual bool _can_mix() const;
+protected:
 
+	void clear();
+	
 public:
 
 	virtual void play();
@@ -92,12 +95,43 @@ public:
 
 	void set_file(const String& p_file);
 
-	int get_pending_frame_count() const;
-	Image pop_frame();
-	Image peek_frame() const;
+	virtual Ref<Texture> get_texture();
+	virtual void update(float p_delta);
+
+	virtual void set_mix_callback(AudioMixCallback p_callback,void *p_userdata);
+	virtual int get_channels() const;
+	virtual int get_mix_rate() const;
+
+	virtual void set_audio_track(int p_idx);
+
+	VideoStreamPlaybackTheora();
+	~VideoStreamPlaybackTheora();
+};
+
+
+
+class VideoStreamTheora : public VideoStream {
+
+	OBJ_TYPE(VideoStreamTheora,VideoStream);
+
+	String file;
+    int audio_track;
+
+
+public:
+
+	Ref<VideoStreamPlayback> instance_playback() {
+		Ref<VideoStreamPlaybackTheora> pb = memnew( VideoStreamPlaybackTheora );
+        pb->set_audio_track(audio_track);
+		pb->set_file(file);
+		return pb;
+	}
+
+	void set_file(const String& p_file) { file=p_file; }
+    void set_audio_track(int p_track) { audio_track=p_track; }
+
+    VideoStreamTheora() { audio_track=0; }
 
-	VideoStreamTheora();
-	~VideoStreamTheora();
 };
 
 class ResourceFormatLoaderVideoStreamTheora : public ResourceFormatLoader {
diff --git a/drivers/theoraplayer/SCsub b/drivers/theoraplayer/SCsub
deleted file mode 100644
index 09fb13d8e9..0000000000
--- a/drivers/theoraplayer/SCsub
+++ /dev/null
@@ -1,106 +0,0 @@
-Import("env")
-
-import string
-
-sources = string.split("""
-src/TheoraVideoClip.cpp
-src/FFmpeg/TheoraVideoClip_FFmpeg.cpp
-src/TheoraAsync.cpp
-src/TheoraAudioInterface.cpp
-src/TheoraException.cpp
-src/TheoraWorkerThread.cpp
-src/TheoraVideoManager.cpp
-src/TheoraTimer.cpp
-src/TheoraUtil.cpp
-src/TheoraDataSource.cpp
-src/TheoraAudioPacketQueue.cpp
-src/TheoraFrameQueue.cpp
-src/Theora/TheoraVideoClip_Theora.cpp
-src/YUV/yuv_util.c
-src/YUV/libyuv/src/row_any.cc
-src/YUV/libyuv/src/compare_common.cc
-src/YUV/libyuv/src/scale_neon.cc
-src/YUV/libyuv/src/planar_functions.cc
-src/YUV/libyuv/src/compare.cc
-src/YUV/libyuv/src/scale_mips.cc
-src/YUV/libyuv/src/scale_posix.cc
-src/YUV/libyuv/src/row_posix.cc
-src/YUV/libyuv/src/row_win.cc
-src/YUV/libyuv/src/compare_neon.cc
-src/YUV/libyuv/src/convert_from_argb.cc
-src/YUV/libyuv/src/mjpeg_validate.cc
-src/YUV/libyuv/src/convert_from.cc
-src/YUV/libyuv/src/rotate_neon.cc
-src/YUV/libyuv/src/row_neon.cc
-src/YUV/libyuv/src/rotate_mips.cc
-src/YUV/libyuv/src/compare_posix.cc
-src/YUV/libyuv/src/row_mips.cc
-src/YUV/libyuv/src/scale.cc
-src/YUV/libyuv/src/scale_argb.cc
-src/YUV/libyuv/src/mjpeg_decoder.cc
-src/YUV/libyuv/src/scale_win.cc
-src/YUV/libyuv/src/scale_common.cc
-src/YUV/libyuv/src/scale_argb_neon.cc
-src/YUV/libyuv/src/row_common.cc
-src/YUV/libyuv/src/convert.cc
-src/YUV/libyuv/src/format_conversion.cc
-src/YUV/libyuv/src/rotate_argb.cc
-src/YUV/libyuv/src/rotate.cc
-src/YUV/libyuv/src/convert_argb.cc
-src/YUV/libyuv/src/cpu_id.cc
-src/YUV/libyuv/src/video_common.cc
-src/YUV/libyuv/src/convert_to_argb.cc
-src/YUV/libyuv/src/compare_win.cc
-src/YUV/libyuv/src/convert_to_i420.cc
-src/YUV/libyuv/src/convert_jpeg.cc
-src/YUV/libyuv/yuv_libyuv.c
-src/YUV/android/cpu-features.c
-src/YUV/C/yuv420_grey_c.c
-src/YUV/C/yuv420_yuv_c.c
-src/YUV/C/yuv420_rgb_c.c
-src/TheoraVideoFrame.cpp
-""")
-
-env_theora = env.Clone()
-
-if env["platform"] == "iphone":
-	sources.append("src/AVFoundation/TheoraVideoClip_AVFoundation.mm")
-	env.Append(LINKFLAGS=['-framework', 'CoreVideo', '-framework', 'CoreMedia', '-framework', 'AVFoundation'])
-	if env["target"] == "release":
-		env_theora.Append(CPPFLAGS=["-D_IOS", "-D__ARM_NEON__", "-fstrict-aliasing", "-fmessage-length=210", "-fdiagnostics-show-note-include-stack", "-fmacro-backtrace-limit=0", "-fcolor-diagnostics", "-Wno-trigraphs", "-fpascal-strings", "-fvisibility=hidden", "-fvisibility-inlines-hidden"])
-
-env_theora.Append(CPPFLAGS=["-D_LIB", "-D__THEORA"]) # removed -D_YUV_C
-env_theora.Append(CPPFLAGS=["-D_YUV_LIBYUV"])
-#env_theora.Append(CPPFLAGS=["-D_YUV_C"])
-
-if env["platform"] == "iphone":
-	env_theora.Append(CPPFLAGS=["-D__AVFOUNDATION"])
-else:
-	pass
-	#env_theora.Append(CPPFLAGS=["-D__FFMPEG"])
-
-if env["platform"] == "android":
-	env_theora.Append(CPPFLAGS=["-D_ANDROID"])
-
-if env["platform"] == "winrt":
-		env_theora.Append(CPPFLAGS=["-D_WINRT"])
-
-env_theora.Append(CPPPATH=["#drivers/theoraplayer/include/theoraplayer", "#drivers/theoraplayer/src/YUV", "#drivers/theoraplayer/src/YUV/libyuv/include", "#drivers/theoraplayer/src/Theora", "#drivers/theoraplayer/src/AVFoundation"])
-
-objs = []
-
-env_theora.add_source_files(objs, ["video_stream_theoraplayer.cpp"])
-
-if env['use_theoraplayer_binary'] == "yes":
-	if env["platform"] == "iphone":
-		env.Append(LIBPATH=['#drivers/theoraplayer/lib/ios'])
-		env.Append(LIBS=['theoraplayer', 'ogg', 'theora', 'tremor'])
-	if env["platform"] == "windows":
-		env.Append(LIBPATH=['#drivers/theoraplayer/lib/windows'])
-		env.Append(LINKFLAGS=['libtheoraplayer_static.lib', 'libogg.lib', 'libtheora.lib', 'libvorbis.lib'])
-else:
-	env_theora.add_source_files(objs, sources)
-
-env.drivers_sources += objs
-
-
diff --git a/drivers/theoraplayer/include/theoraplayer/TheoraAsync.h b/drivers/theoraplayer/include/theoraplayer/TheoraAsync.h
deleted file mode 100644
index 7f1b49b9af..0000000000
--- a/drivers/theoraplayer/include/theoraplayer/TheoraAsync.h
+++ /dev/null
@@ -1,51 +0,0 @@
-/************************************************************************************
-This source file is part of the Theora Video Playback Library
-For latest info, see http://libtheoraplayer.googlecode.com
-*************************************************************************************
-Copyright (c) 2008-2014 Kresimir Spes (kspes@cateia.com)
-This program is free software; you can redistribute it and/or modify it under
-the terms of the BSD license: http://opensource.org/licenses/BSD-3-Clause
-*************************************************************************************/
-#ifndef _TheoraAsync_h
-#define _TheoraAsync_h
-
-#ifndef _WIN32
-#include <pthread.h>
-#endif
-
-/// @note Based on hltypes::Thread
-class TheoraMutex
-{
-public:
-	TheoraMutex();
-	~TheoraMutex();
-	void lock();
-	void unlock();
-		
-protected:
-	void* mHandle;
-		
-};
-
-/// @note Based on hltypes::Thread
-class TheoraThread
-{
-	TheoraMutex mRunningMutex;
-public:
-	TheoraThread();
-	virtual ~TheoraThread();
-	void start();
-	void stop();
-	void resume();
-	void pause();
-	bool isRunning();
-	virtual void execute() = 0;
-	void join();
-		
-protected:
-	void* mId;
-	volatile bool mRunning;
-		
-};
-
-#endif
diff --git a/drivers/theoraplayer/include/theoraplayer/TheoraAudioInterface.h b/drivers/theoraplayer/include/theoraplayer/TheoraAudioInterface.h
deleted file mode 100644
index aa03293806..0000000000
--- a/drivers/theoraplayer/include/theoraplayer/TheoraAudioInterface.h
+++ /dev/null
@@ -1,51 +0,0 @@
-/************************************************************************************
-This source file is part of the Theora Video Playback Library
-For latest info, see http://libtheoraplayer.googlecode.com
-*************************************************************************************
-Copyright (c) 2008-2014 Kresimir Spes (kspes@cateia.com)
-This program is free software; you can redistribute it and/or modify it under
-the terms of the BSD license: http://opensource.org/licenses/BSD-3-Clause
-*************************************************************************************/
-#ifndef _TheoraAudioInterface_h
-#define _TheoraAudioInterface_h
-
-#include "TheoraExport.h"
-
-class TheoraVideoClip;
-
-
-/**
- This is the class that serves as an interface between the library's audio
- output and the audio playback library of your choice.
- The class gets mono or stereo PCM data in in floating point data
- */
-class TheoraPlayerExport TheoraAudioInterface
-{
-public:
-	//! PCM frequency, usualy 44100 Hz
-	int mFreq;
-	//! Mono or stereo
-	int mNumChannels;
-	//! Pointer to the parent TheoraVideoClip object
-	TheoraVideoClip* mClip;
-	
-	TheoraAudioInterface(TheoraVideoClip* owner, int nChannels, int freq);
-	virtual ~TheoraAudioInterface();
-    //! A function that the TheoraVideoClip object calls once more audio packets are decoded
-    /*!
-	 \param data contains one or two channels of float PCM data in the range [-1,1]
-	 \param nSamples contains the number of samples that the data parameter contains in each channel
-	 */
-	virtual void insertData(float* data, int nSamples)=0;	
-};
-
-class TheoraPlayerExport TheoraAudioInterfaceFactory
-{
-public:
-	//! VideoManager calls this when creating a new TheoraVideoClip object
-	virtual TheoraAudioInterface* createInstance(TheoraVideoClip* owner, int nChannels, int freq) = 0;
-};
-
-
-#endif
-
diff --git a/drivers/theoraplayer/include/theoraplayer/TheoraAudioPacketQueue.h b/drivers/theoraplayer/include/theoraplayer/TheoraAudioPacketQueue.h
deleted file mode 100644
index e0d17516e6..0000000000
--- a/drivers/theoraplayer/include/theoraplayer/TheoraAudioPacketQueue.h
+++ /dev/null
@@ -1,48 +0,0 @@
-/************************************************************************************
-This source file is part of the Theora Video Playback Library
-For latest info, see http://libtheoraplayer.googlecode.com
-*************************************************************************************
-Copyright (c) 2008-2014 Kresimir Spes (kspes@cateia.com)
-This program is free software; you can redistribute it and/or modify it under
-the terms of the BSD license: http://opensource.org/licenses/BSD-3-Clause
-*************************************************************************************/
-#ifndef _TheoraAudioPacketQueue_h
-#define _TheoraAudioPacketQueue_h
-
-#include "TheoraExport.h"
-
-class TheoraAudioInterface;
-/**
- This is an internal structure which TheoraVideoClip_Theora uses to store audio packets
- */
-struct TheoraAudioPacket
-{
-	float* pcm;
-	int numSamples; //! size in number of float samples (stereo has twice the number of samples)
-	TheoraAudioPacket* next; // pointer to the next audio packet, to implement a linked list
-};
-
-/**
-    This is a Mutex object, used in thread syncronization.
- */
-class TheoraPlayerExport TheoraAudioPacketQueue
-{
-protected:
-	unsigned int mAudioFrequency, mNumAudioChannels;
-	TheoraAudioPacket* mTheoraAudioPacketQueue;
-	void _addAudioPacket(float* data, int numSamples);
-public:
-	TheoraAudioPacketQueue();
-	~TheoraAudioPacketQueue();
-	
-	float getAudioPacketQueueLength();
-	void addAudioPacket(float** buffer, int numSamples, float gain);
-	void addAudioPacket(float* buffer, int numSamples, float gain);
-	TheoraAudioPacket* popAudioPacket();
-	void destroyAudioPacket(TheoraAudioPacket* p);
-	void destroyAllAudioPackets();
-	
-	void flushAudioPackets(TheoraAudioInterface* audioInterface);
-};
-
-#endif
diff --git a/drivers/theoraplayer/include/theoraplayer/TheoraDataSource.h b/drivers/theoraplayer/include/theoraplayer/TheoraDataSource.h
deleted file mode 100644
index b7427e97d3..0000000000
--- a/drivers/theoraplayer/include/theoraplayer/TheoraDataSource.h
+++ /dev/null
@@ -1,89 +0,0 @@
-/************************************************************************************
-This source file is part of the Theora Video Playback Library
-For latest info, see http://libtheoraplayer.googlecode.com
-*************************************************************************************
-Copyright (c) 2008-2014 Kresimir Spes (kspes@cateia.com)
-This program is free software; you can redistribute it and/or modify it under
-the terms of the BSD license: http://opensource.org/licenses/BSD-3-Clause
-*************************************************************************************/
-#ifndef _TheoraDataSource_h
-#define _TheoraDataSource_h
-
-#include <stdio.h>
-#include <string>
-#include "TheoraExport.h"
-
-/**
-	This is a simple class that provides abstracted data feeding. You can use the
-	TheoraFileDataSource for regular file playback or you can implement your own
-	internet streaming solution, or a class that uses encrypted datafiles etc.
-	The sky is the limit
-*/
-class TheoraPlayerExport TheoraDataSource
-{
-public:
-
-    virtual ~TheoraDataSource();
-	/**
-		Reads nBytes bytes from data source and returns number of read bytes.
-		if function returns less bytes then nBytes, the system assumes EOF is reached.
-	*/
-	virtual int read(void* output,int nBytes)=0;
-    //! returns a string representation of the DataSource, eg 'File: source.ogg'
-	virtual std::string repr()=0;
-	//! position the source pointer to byte_index from the start of the source
-	virtual void seek(unsigned long byte_index)=0;
-	//! return the size of the stream in bytes
-	virtual unsigned long size()=0;
-	//! return the current position of the source pointer
-	virtual unsigned long tell()=0;
-};
-
-
-/**
-	provides standard file IO
-*/
-class TheoraPlayerExport TheoraFileDataSource : public TheoraDataSource
-{
-	FILE* mFilePtr;
-	std::string mFilename;
-	unsigned long mSize;
-	
-	void openFile();
-public:
-	TheoraFileDataSource(std::string filename);
-	~TheoraFileDataSource();
-
-	int read(void* output,int nBytes);
-	void seek(unsigned long byte_index);
-	std::string repr() { return mFilename; }
-	unsigned long size();
-	unsigned long tell();
-	
-	std::string getFilename() { return mFilename; }
-};
-
-/**
-	Pre-loads the entire file and streams from memory.
-	Very useful if you're continuously displaying a video and want to avoid disk reads.
-	Not very practical for large files.
-*/
-class TheoraPlayerExport TheoraMemoryFileDataSource : public TheoraDataSource
-{
-	std::string mFilename;
-	unsigned long mSize, mReadPointer;
-	unsigned char* mData;
-public:
-	TheoraMemoryFileDataSource(unsigned char* data, long size, const std::string& filename = "memory");
-	TheoraMemoryFileDataSource(std::string filename);
-	~TheoraMemoryFileDataSource();
-
-	int read(void* output,int nBytes);
-	void seek(unsigned long byte_index);
-	std::string repr() { return "MEM:"+mFilename; }
-	unsigned long size();
-	unsigned long tell();
-	std::string getFilename() { return mFilename; }
-};
-
-#endif
diff --git a/drivers/theoraplayer/include/theoraplayer/TheoraException.h b/drivers/theoraplayer/include/theoraplayer/TheoraException.h
deleted file mode 100644
index f79368fa1e..0000000000
--- a/drivers/theoraplayer/include/theoraplayer/TheoraException.h
+++ /dev/null
@@ -1,46 +0,0 @@
-/************************************************************************************
-This source file is part of the Theora Video Playback Library
-For latest info, see http://libtheoraplayer.googlecode.com
-*************************************************************************************
-Copyright (c) 2008-2014 Kresimir Spes (kspes@cateia.com)
-This program is free software; you can redistribute it and/or modify it under
-the terms of the BSD license: http://opensource.org/licenses/BSD-3-Clause
-*************************************************************************************/
-#ifndef EXCEPTION_H
-#define EXCEPTION_H
-
-#include <string>
-#include "TheoraExport.h"
-
-class TheoraPlayerExport _TheoraGenericException
-{
-public:
-    std::string mErrText,mFile,mType;
-	int mLineNumber;
-
-	_TheoraGenericException(const std::string& errorText, std::string type = "",std::string file = "", int line = 0);
-    virtual ~_TheoraGenericException() {}
-    
-	virtual std::string repr();
-    
-	void writeOutput();
-	
-	virtual const std::string& getErrorText() { return mErrText; }
-    
-	const std::string getType(){ return mType; }
-};
-
-#define TheoraGenericException(msg) _TheoraGenericException(msg, "TheoraGenericException", __FILE__, __LINE__)
-
-
-#define exception_cls(name) class name : public _TheoraGenericException \
-{ \
-public: \
-	name(const std::string& errorText,std::string type = "",std::string file = "",int line = 0) : \
-	  _TheoraGenericException(errorText, type, file, line){} \
-}
-
-exception_cls(_KeyException);
-
-
-#endif
diff --git a/drivers/theoraplayer/include/theoraplayer/TheoraExport.h b/drivers/theoraplayer/include/theoraplayer/TheoraExport.h
deleted file mode 100644
index cf16d1004c..0000000000
--- a/drivers/theoraplayer/include/theoraplayer/TheoraExport.h
+++ /dev/null
@@ -1,38 +0,0 @@
-/************************************************************************************
-This source file is part of the Theora Video Playback Library
-For latest info, see http://libtheoraplayer.googlecode.com
-*************************************************************************************
-Copyright (c) 2008-2014 Kresimir Spes (kspes@cateia.com)
-This program is free software; you can redistribute it and/or modify it under
-the terms of the BSD license: http://opensource.org/licenses/BSD-3-Clause
-*************************************************************************************/
-#ifndef _theoraVideoExport_h
-#define _theoraVideoExport_h
-
-	#ifdef _LIB
-		#define TheoraPlayerExport
-		#define TheoraPlayerFnExport
-	#else
-		#ifdef _WIN32
-			#ifdef THEORAVIDEO_EXPORTS
-				#define TheoraPlayerExport __declspec(dllexport)
-				#define TheoraPlayerFnExport __declspec(dllexport)
-			#else
-				#define TheoraPlayerExport __declspec(dllimport)
-				#define TheoraPlayerFnExport __declspec(dllimport)
-			#endif
-		#else
-			#define TheoraPlayerExport __attribute__ ((visibility("default")))
-			#define TheoraPlayerFnExport __attribute__ ((visibility("default")))
-		#endif
-	#endif
-	#ifndef DEPRECATED_ATTRIBUTE
-		#ifdef _MSC_VER
-			#define DEPRECATED_ATTRIBUTE __declspec(deprecated("function is deprecated"))
-		#else
-			#define DEPRECATED_ATTRIBUTE __attribute__((deprecated))
-		#endif
-	#endif
-
-#endif
-
diff --git a/drivers/theoraplayer/include/theoraplayer/TheoraFrameQueue.h b/drivers/theoraplayer/include/theoraplayer/TheoraFrameQueue.h
deleted file mode 100644
index fd985bb65a..0000000000
--- a/drivers/theoraplayer/include/theoraplayer/TheoraFrameQueue.h
+++ /dev/null
@@ -1,95 +0,0 @@
-/************************************************************************************
-This source file is part of the Theora Video Playback Library
-For latest info, see http://libtheoraplayer.googlecode.com
-*************************************************************************************
-Copyright (c) 2008-2014 Kresimir Spes (kspes@cateia.com)
-This program is free software; you can redistribute it and/or modify it under
-the terms of the BSD license: http://opensource.org/licenses/BSD-3-Clause
-*************************************************************************************/
-
-#ifndef _TheoraFrameQueue_h
-#define _TheoraFrameQueue_h
-
-#include "TheoraAsync.h"
-#include <list>
-#include "TheoraExport.h"
-
-class TheoraVideoFrame;
-class TheoraVideoClip;
-
-/**
-	This class handles the frame queue. contains frames and handles their alloctation/deallocation
-	it is designed to be thread-safe
-*/
-class TheoraPlayerExport TheoraFrameQueue
-{
-protected:
-	std::list<TheoraVideoFrame*> mQueue;
-	TheoraVideoClip* mParent;
-	TheoraMutex mMutex;
-	
-	//! implementation function that returns a TheoraVideoFrame instance
-	TheoraVideoFrame* createFrameInstance(TheoraVideoClip* clip);
-public:
-	TheoraFrameQueue(TheoraVideoClip* parent);
-	~TheoraFrameQueue();
-
-	/**
-	    \brief Returns the first available frame in the queue or NULL if no frames are available.
-
-		This function DOES NOT remove the frame from the queue, you have to do it manually
-		when you want to mark the frame as used by calling the pop() function.
-	*/
-	TheoraVideoFrame* getFirstAvailableFrame();
-    //! non-mutex version
-	TheoraVideoFrame* _getFirstAvailableFrame();
-
-	//! return the number of used (not ready) frames
-	int getUsedCount();
-
-	//! return the number of ready frames
-	int getReadyCount();
-    //! non-mutex version
-	int _getReadyCount();
-
-	/**
-	    \brief remove the first N available frame from the queue.
-
-	    Use this every time you display a frame	so you can get the next one when the time comes.
-		This function marks the frame on the front of the queue as unused and it's memory then
-		get's used again in the decoding process.
-		If you don't call this, the frame queue will fill up with precached frames up to the
-		specified amount in the TheoraVideoManager class and you won't be able to advance the video.
-	*/
-	void pop(int n = 1);
-    
-    //! This is an internal _pop function. use externally only in combination with lock() / unlock() calls
-	void _pop(int n);
-
-	//! frees all decoded frames for reuse (does not destroy memory, just marks them as free)
-	void clear();
-	//! Called by WorkerThreads when they need to unload frame data, do not call directly!
-	TheoraVideoFrame* requestEmptyFrame();
-
-	/** 
-	    \brief set's the size of the frame queue.
-		
-		Beware, currently stored ready frames will be lost upon this call
-	*/
-	void setSize(int n);
-	//! return the size of the queue
-	int getSize();
-	
-	//! return whether all frames in the queue are ready for display
-	bool isFull();
-	
-	//! lock the queue's mutex manually
-	void lock();
-	//! unlock the queue's mutex manually
-	void unlock();
-    
-    //! returns the internal frame queue. Warning: Always lock / unlock queue's mutex before accessing frames directly!
-    std::list<TheoraVideoFrame*>& _getFrameQueue();
-};
-
-#endif
diff --git a/drivers/theoraplayer/include/theoraplayer/TheoraPixelTransform.h b/drivers/theoraplayer/include/theoraplayer/TheoraPixelTransform.h
deleted file mode 100644
index 73d853cd03..0000000000
--- a/drivers/theoraplayer/include/theoraplayer/TheoraPixelTransform.h
+++ /dev/null
@@ -1,18 +0,0 @@
-/************************************************************************************
-This source file is part of the Theora Video Playback Library
-For latest info, see http://libtheoraplayer.googlecode.com
-*************************************************************************************
-Copyright (c) 2008-2014 Kresimir Spes (kspes@cateia.com)
-This program is free software; you can redistribute it and/or modify it under
-the terms of the BSD license: http://opensource.org/licenses/BSD-3-Clause
-*************************************************************************************/
-#ifndef _TheoraPixelTransform_h
-#define _TheoraPixelTransform_h
-
-struct TheoraPixelTransform
-{
-	unsigned char *raw, *y, *u, *v, *out;
-	unsigned int w, h, rawStride, yStride, uStride, vStride;
-};
-
-#endif
diff --git a/drivers/theoraplayer/include/theoraplayer/TheoraPlayer.h b/drivers/theoraplayer/include/theoraplayer/TheoraPlayer.h
deleted file mode 100644
index 8c5f2c735c..0000000000
--- a/drivers/theoraplayer/include/theoraplayer/TheoraPlayer.h
+++ /dev/null
@@ -1,17 +0,0 @@
-/************************************************************************************
-This source file is part of the Theora Video Playback Library
-For latest info, see http://libtheoraplayer.googlecode.com
-*************************************************************************************
-Copyright (c) 2008-2014 Kresimir Spes (kspes@cateia.com)
-This program is free software; you can redistribute it and/or modify it under
-the terms of the BSD license: http://opensource.org/licenses/BSD-3-Clause
-*************************************************************************************/
-#ifndef _TheoraPlayer_h
-#define _TheoraPlayer_h
-
-#include "TheoraVideoManager.h"
-#include "TheoraVideoClip.h"
-#include "TheoraVideoFrame.h"
-
-#endif
-
diff --git a/drivers/theoraplayer/include/theoraplayer/TheoraTimer.h b/drivers/theoraplayer/include/theoraplayer/TheoraTimer.h
deleted file mode 100644
index 14fdbf47fc..0000000000
--- a/drivers/theoraplayer/include/theoraplayer/TheoraTimer.h
+++ /dev/null
@@ -1,69 +0,0 @@
-/************************************************************************************
-This source file is part of the Theora Video Playback Library
-For latest info, see http://libtheoraplayer.googlecode.com
-*************************************************************************************
-Copyright (c) 2008-2014 Kresimir Spes (kspes@cateia.com)
-This program is free software; you can redistribute it and/or modify it under
-the terms of the BSD license: http://opensource.org/licenses/BSD-3-Clause
-*************************************************************************************/
-
-#ifndef _TheoraTimer_h
-#define _TheoraTimer_h
-
-#include "TheoraExport.h"
-
-/**
-    This is a Timer object, it is used to control the playback of a TheoraVideoClip.
-
-	You can inherit this class and make a timer that eg. plays twice as fast,
-	or playbacks an audio track and uses it's time offset for syncronizing Video etc.
- */
-class TheoraPlayerExport TheoraTimer
-{
-protected:
-	//! Current time in seconds
-	float mTime,mSpeed;
-	//! Is the timer paused or not
-	bool mPaused;
-public:
-	TheoraTimer();
-	virtual ~TheoraTimer();
-
-	virtual float getTime();
-	/**
-	    \brief advance the time.
-
-		If you're using another synronization system, eg. an audio track,
-		then you can ignore this call or use it to perform other updates.
-
-		NOTE: this is called by TheoraVideoManager from the main thread
-	 */
-	virtual void update(float timeDelta);
-
-	virtual void pause();
-	virtual void play();
-	virtual bool isPaused();
-	virtual void stop();
-	/**
-	    \brief set's playback speed
-
-        1.0 is the default. The speed factor multiplies time advance, thus
-        setting the value higher will increase playback speed etc.
-    
-        NOTE: depending on Timer implementation, it may not support setting the speed
-         
-	 */
-    virtual void setSpeed(float speed);
-    //! return the update speed 1.0 is the default
-    virtual float getSpeed();
-
-	/**
-	    \brief change the current time.
-
-		if you're using another syncronization mechanism, make sure to adjust
-		the time offset there
-	 */
-	virtual void seek(float time);
-};
-#endif
-
diff --git a/drivers/theoraplayer/include/theoraplayer/TheoraUtil.h b/drivers/theoraplayer/include/theoraplayer/TheoraUtil.h
deleted file mode 100644
index f168971ac7..0000000000
--- a/drivers/theoraplayer/include/theoraplayer/TheoraUtil.h
+++ /dev/null
@@ -1,32 +0,0 @@
-/************************************************************************************
-This source file is part of the Theora Video Playback Library
-For latest info, see http://libtheoraplayer.googlecode.com
-*************************************************************************************
-Copyright (c) 2008-2014 Kresimir Spes (kspes@cateia.com)
-This program is free software; you can redistribute it and/or modify it under
-the terms of the BSD license: http://opensource.org/licenses/BSD-3-Clause
-*************************************************************************************/
-#ifndef _TheoraUtil_h
-#define _TheoraUtil_h
-
-#include <string>
-#include <vector>
-
-#ifndef THEORAUTIL_NOMACROS
-
-#define foreach(type,lst) for (std::vector<type>::iterator it=lst.begin();it != lst.end(); ++it)
-#define foreach_l(type,lst) for (std::list<type>::iterator it=lst.begin();it != lst.end(); ++it)
-#define foreach_r(type,lst) for (std::vector<type>::reverse_iterator it=lst.rbegin();it != lst.rend(); ++it)
-#define foreach_in_map(type,lst) for (std::map<std::string,type>::iterator it=lst.begin();it != lst.end(); ++it)
-
-#endif
-
-#define th_writelog(x) TheoraVideoManager::getSingleton().logMessage(x)
-
-
-std::string str(int i);
-std::string strf(float i);
-void _psleep(int milliseconds);
-int _nextPow2(int x);
-
-#endif
-\ No newline at end of file
diff --git a/drivers/theoraplayer/include/theoraplayer/TheoraVideoClip.h b/drivers/theoraplayer/include/theoraplayer/TheoraVideoClip.h
deleted file mode 100644
index fe71cf8566..0000000000
--- a/drivers/theoraplayer/include/theoraplayer/TheoraVideoClip.h
+++ /dev/null
@@ -1,282 +0,0 @@
-/************************************************************************************
-This source file is part of the Theora Video Playback Library
-For latest info, see http://libtheoraplayer.googlecode.com
-*************************************************************************************
-Copyright (c) 2008-2014 Kresimir Spes (kspes@cateia.com)
-This program is free software; you can redistribute it and/or modify it under
-the terms of the BSD license: http://opensource.org/licenses/BSD-3-Clause
-*************************************************************************************/
-
-#ifndef _TheoraVideoClip_h
-#define _TheoraVideoClip_h
-
-#include <string>
-#include "TheoraExport.h"
-
-// forward class declarations
-class TheoraMutex;
-class TheoraFrameQueue;
-class TheoraTimer;
-class TheoraAudioInterface;
-class TheoraWorkerThread;
-class TheoraDataSource;
-class TheoraVideoFrame;
-
-/**
-    format of the TheoraVideoFrame pixels. Affects decoding time
- */
-enum TheoraOutputMode
-{
-	// A = full alpha (255), order of letters represents the byte order for a pixel
-	// A means the image is treated as if it contains an alpha channel, while X formats
-	// just mean that RGB frame is transformed to a 4 byte format
-	TH_UNDEFINED = 0,
-	TH_RGB    =  1,
-	TH_RGBA   =  2,
-	TH_RGBX   =  3,
-	TH_ARGB   =  4,
-	TH_XRGB   =  5,
-	TH_BGR    =  6,
-	TH_BGRA   =  7,
-	TH_BGRX   =  8,
-	TH_ABGR   =  9,
-	TH_XBGR   = 10,
-	TH_GREY   = 11,
-	TH_GREY3  = 12,
-	TH_GREY3A = 13,
-	TH_GREY3X = 14,
-	TH_AGREY3 = 15,
-	TH_XGREY3 = 16,
-	TH_YUV    = 17,
-	TH_YUVA   = 18,
-	TH_YUVX   = 19,
-	TH_AYUV   = 20,
-	TH_XYUV   = 21
-};
-
-/**
-	This object contains all data related to video playback, eg. the open source file,
-	the frame queue etc.
-*/
-class TheoraPlayerExport TheoraVideoClip
-{
-	friend class TheoraWorkerThread;
-	friend class TheoraVideoFrame;
-	friend class TheoraVideoManager;
-protected:
-	TheoraFrameQueue* mFrameQueue;
-	TheoraAudioInterface* mAudioInterface;
-	TheoraDataSource* mStream;
-
-	TheoraTimer *mTimer, *mDefaultTimer;
-
-	TheoraWorkerThread* mAssignedWorkerThread;
-	
-	bool mUseAlpha;
-	
-	bool mWaitingForCache;
-	
-	// benchmark vars
-	int mNumDroppedFrames, mNumDisplayedFrames, mNumPrecachedFrames;
-
-	int mThreadAccessCount; //! counter used by TheoraVideoManager to schedule workload
-	
-	int mSeekFrame; //! stores desired seek position as a frame number. next worker thread will do the seeking and reset this var to -1
-	float mDuration, mFrameDuration, mFPS;
-	float mPriority; //! User assigned priority. Default value is 1
-    std::string mName;
-	int mWidth, mHeight, mStride;
-	int mNumFrames;
-	int audio_track;
-
-	int mSubFrameWidth, mSubFrameHeight, mSubFrameOffsetX, mSubFrameOffsetY;
-	float mAudioGain; //! multiplier for audio samples. between 0 and 1
-
-	TheoraOutputMode mOutputMode, mRequestedOutputMode;
-	bool mFirstFrameDisplayed;
-	bool mAutoRestart;
-	bool mEndOfFile, mRestarted;
-	int mIteration, mPlaybackIteration; //! used to ensure smooth playback of looping videos
-
-	TheoraMutex* mAudioMutex; //! syncs audio decoding and extraction
-	TheoraMutex* mThreadAccessMutex;
-	
-	/**
-	 * Get the priority of a video clip. based on a forumula that includes user
-	 * priority factor, whether the video is paused or not, how many precached
-	 * frames it has etc.
-	 * This function is used in TheoraVideoManager to efficiently distribute job
-	 * assignments among worker threads
-	 * @return priority number of this video clip
-	 */
-	int calculatePriority();
-	void readTheoraVorbisHeaders();
-	virtual void doSeek() = 0; //! called by WorkerThread to seek to mSeekFrame
-	virtual bool _readData() = 0;
-	bool isBusy();
-
-	/**
-	 * decodes audio from the vorbis stream and stores it in audio packets
-	 * This is an internal function of TheoraVideoClip, called regularly if playing an
-	 * audio enabled video clip.
-	 * @return last decoded timestamp (if found in decoded packet's granule position)
-	 */
-	virtual float decodeAudio() = 0;
-    
-    int _getNumReadyFrames();
-    void resetFrameQueue();
-    int discardOutdatedFrames(float absTime);
-    float getAbsPlaybackTime();
-	virtual void load(TheoraDataSource* source) = 0;
-
-	virtual void _restart() = 0; // resets the decoder and stream but leaves the frame queue intact
-public:
-	TheoraVideoClip(TheoraDataSource* data_source,
-		            TheoraOutputMode output_mode,
-					int nPrecachedFrames,
-					bool usePower2Stride);
-	virtual ~TheoraVideoClip();
-
-	std::string getName();
-	//! Returns the string name of the decoder backend (eg. Theora, AVFoundation)
-	virtual std::string getDecoderName() = 0;
-
-	//! benchmark function
-	int getNumDisplayedFrames() { return mNumDisplayedFrames; }
-	//! benchmark function
-	int getNumDroppedFrames() { return mNumDroppedFrames; }
-
-	//! return width in pixels of the video clip
-	int getWidth();
-	//! return height in pixels of the video clip
-	int getHeight();
-    
-    //! Width of the actual picture inside a video frame (depending on implementation, this may be equal to mWidth or differ within a codec block size (usually 16))
-    int getSubFrameWidth();
-    //! Height of the actual picture inside a video frame (depending on implementation, this may be equal to mHeight or differ within a codec block size (usually 16))
-	int getSubFrameHeight();
-    //! X Offset of the actual picture inside a video frame (depending on implementation, this may be 0 or within a codec block size (usually 16))
-	int getSubFrameOffsetX();
-    //! Y Offset of the actual picture inside a video frame (depending on implementation, this may be 0 or differ within a codec block size (usually 16))
-	int getSubFrameOffsetY();
-    /**
-	    \brief return stride in pixels
-
-		If you've specified usePower2Stride when creating the TheoraVideoClip object
-		then this value will be the next power of two size compared to width,
-		eg: w=376, stride=512.
-
-		Otherwise, stride will be equal to width
-	 */
-	int getStride() { return mStride; }
-
-	//! retur the timer objet associated with this object
-	TheoraTimer* getTimer();
-	//! replace the timer object with a new one
-	void setTimer(TheoraTimer* timer);
-
-	//! used by TheoraWorkerThread, do not call directly
-	virtual bool decodeNextFrame() = 0;
-
-	//! advance time. TheoraVideoManager calls this
-	void update(float timeDelta);
-	/**
-	    \brief update timer to the display time of the next frame
-
-		useful if you want to grab frames instead of regular display
-		\return time advanced. 0 if no frames are ready
-	*/
-	float updateToNextFrame();
-
-	
-	TheoraFrameQueue* getFrameQueue();
-	
-	/**
-	    \brief pop the frame from the front of the FrameQueue
-
-		see TheoraFrameQueue::pop() for more details
-	 */
-	void popFrame();
-
-	/**
-	    \brief Returns the first available frame in the queue or NULL if no frames are available.
-
-		see TheoraFrameQueue::getFirstAvailableFrame() for more details
-	*/
-	TheoraVideoFrame* getNextFrame();
-	/**
-	    check if there is enough audio data decoded to submit to the audio interface
-
-		TheoraWorkerThread calls this
-	 */
-	virtual void decodedAudioCheck() = 0;
-
-	void setAudioInterface(TheoraAudioInterface* iface);
-	TheoraAudioInterface* getAudioInterface();
-
-	/**
-	    \brief resize the frame queues
-
-		Warning: this call discards ready frames in the frame queue
-	 */
-	void setNumPrecachedFrames(int n);
-	//! returns the size of the frame queue
-	int getNumPrecachedFrames();
-	//! returns the number of ready frames in the frame queue
-	int getNumReadyFrames();
-
-	//! if you want to adjust the audio gain. range [0,1]
-	void setAudioGain(float gain);
-	float getAudioGain();
-
-	//! if you want the video to automatically and smoothly restart when the last frame is reached
-	void setAutoRestart(bool value);
-	bool getAutoRestart() { return mAutoRestart; }
-
-
-	void set_audio_track(int p_track) { audio_track=p_track; }
-
-	/**
-	    TODO: user priority. Useful only when more than one video is being decoded
-	 */
-	void setPriority(float priority);
-	float getPriority();
-
-	//! Used by TheoraVideoManager to schedule work
-	float getPriorityIndex();
-
-	//! get the current time index from the timer object
-	float getTimePosition();
-	//! get the duration of the movie in seconds
-	float getDuration();
-	//! return the clips' frame rate, warning, fps can be a non integer number!
-	float getFPS();
-	//! get the number of frames in this movie
-	int getNumFrames() { return mNumFrames; }
-
-	//! return the current output mode for this video object
-	TheoraOutputMode getOutputMode();
-	/**
-	    set a new output mode
-
-		Warning: this discards the frame queue. ready frames will be lost.
-	 */
-	void setOutputMode(TheoraOutputMode mode);
-
-    bool isDone();
-	void play();
-	void pause();
-	void restart();
-	bool isPaused();
-	void stop();
-    void setPlaybackSpeed(float speed);
-    float getPlaybackSpeed();
-	//! seek to a given time position
-	void seek(float time);
-	//! seek to a given frame number
-	void seekToFrame(int frame);
-	//! wait max_time for the clip to cache a given percentage of frames, factor in range [0,1]
-	void waitForCache(float desired_cache_factor = 0.5f, float max_wait_time = 1.0f);
-};
-
-#endif
diff --git a/drivers/theoraplayer/include/theoraplayer/TheoraVideoFrame.h b/drivers/theoraplayer/include/theoraplayer/TheoraVideoFrame.h
deleted file mode 100644
index 5d27f54d1c..0000000000
--- a/drivers/theoraplayer/include/theoraplayer/TheoraVideoFrame.h
+++ /dev/null
@@ -1,56 +0,0 @@
-/************************************************************************************
-This source file is part of the Theora Video Playback Library
-For latest info, see http://libtheoraplayer.googlecode.com
-*************************************************************************************
-Copyright (c) 2008-2014 Kresimir Spes (kspes@cateia.com)
-This program is free software; you can redistribute it and/or modify it under
-the terms of the BSD license: http://opensource.org/licenses/BSD-3-Clause
-*************************************************************************************/
-#ifndef _TheoraVideoFrame_h
-#define _TheoraVideoFrame_h
-
-#include "TheoraExport.h"
-#include "TheoraVideoClip.h"
-
-struct TheoraPixelTransform;
-/**
-	
-*/
-class TheoraPlayerExport TheoraVideoFrame
-{
-protected:
-	TheoraVideoClip* mParent;
-	unsigned char* mBuffer;
-	unsigned long mFrameNumber;
-public:
-	//! global time in seconds this frame should be displayed on
-	float mTimeToDisplay;
-	//! whether the frame is ready for display or not
-	bool mReady;
-	//! indicates the frame is being used by TheoraWorkerThread instance
-	bool mInUse;
-	//! used to keep track of linear time in looping videos
-	int mIteration;
-	
-	int mBpp;
-
-	TheoraVideoFrame(TheoraVideoClip* parent);
-	virtual ~TheoraVideoFrame();
-
-	//! internal function, do not use directly
-	void _setFrameNumber(unsigned long number) { mFrameNumber = number; }
-	//! returns the frame number of this frame in the theora stream
-	unsigned long getFrameNumber() { return mFrameNumber; }
-
-	void clear();
-
-	int getWidth();
-	int getStride();
-	int getHeight();
-
-	unsigned char* getBuffer();
-
-	//! Called by TheoraVideoClip to decode a source buffer onto itself
-	virtual void decode(struct TheoraPixelTransform* t);
-};
-#endif
diff --git a/drivers/theoraplayer/include/theoraplayer/TheoraVideoManager.h b/drivers/theoraplayer/include/theoraplayer/TheoraVideoManager.h
deleted file mode 100644
index d94c51b4d4..0000000000
--- a/drivers/theoraplayer/include/theoraplayer/TheoraVideoManager.h
+++ /dev/null
@@ -1,110 +0,0 @@
-/************************************************************************************
-This source file is part of the Theora Video Playback Library
-For latest info, see http://libtheoraplayer.googlecode.com
-*************************************************************************************
-Copyright (c) 2008-2014 Kresimir Spes (kspes@cateia.com)
-This program is free software; you can redistribute it and/or modify it under
-the terms of the BSD license: http://opensource.org/licenses/BSD-3-Clause
-*************************************************************************************/
-
-#ifndef _TheoraVideoManager_h
-#define _TheoraVideoManager_h
-
-#include <vector>
-#include <list>
-#include <string>
-#include "TheoraExport.h"
-#include "TheoraVideoClip.h"
-#ifdef _WIN32
-#pragma warning( disable: 4251 ) // MSVC++
-#endif
-// forward class declarations
-class TheoraWorkerThread;
-class TheoraMutex;
-class TheoraDataSource;
-class TheoraAudioInterfaceFactory;
-/**
-	This is the main singleton class that handles all playback/sync operations
-*/
-class TheoraPlayerExport TheoraVideoManager
-{
-protected:
-	friend class TheoraWorkerThread;
-	typedef std::vector<TheoraVideoClip*> ClipList;
-	typedef std::vector<TheoraWorkerThread*> ThreadList;
-
-	//! stores pointers to worker threads which are decoding video and audio
-	ThreadList mWorkerThreads;
-	//! stores pointers to created video clips
-	ClipList mClips;
-	
-	//! stores pointer to clips that were docoded in the past in order to achieve fair scheduling
-	std::list<TheoraVideoClip*> mWorkLog;
-
-	int mDefaultNumPrecachedFrames;
-
-	TheoraMutex* mWorkMutex;
-	TheoraAudioInterfaceFactory* mAudioFactory;
-
-	void createWorkerThreads(int n);
-	void destroyWorkerThreads();
-	
-	float calcClipWorkTime(TheoraVideoClip* clip);
-
-	/**
-	 * Called by TheoraWorkerThread to request a TheoraVideoClip instance to work on decoding
-	 */
-	TheoraVideoClip* requestWork(TheoraWorkerThread* caller);
-public:
-	TheoraVideoManager(int num_worker_threads=1);
-	virtual ~TheoraVideoManager();
-
-	//! get the global reference to the manager instance
-	static TheoraVideoManager& getSingleton();
-	//! get the global pointer to the manager instance
-	static TheoraVideoManager* getSingletonPtr();
-
-	//! search registered clips by name
-	TheoraVideoClip* getVideoClipByName(std::string name);
-
-	TheoraVideoClip* createVideoClip(std::string filename,TheoraOutputMode output_mode=TH_RGB,int numPrecachedOverride=0,bool usePower2Stride=0, int p_track=0);
-	TheoraVideoClip* createVideoClip(TheoraDataSource* data_source,TheoraOutputMode output_mode=TH_RGB,int numPrecachedOverride=0,bool usePower2Stride=0, int p_audio_track=0);
-
-	void update(float timeDelta);
-
-	void destroyVideoClip(TheoraVideoClip* clip);
-
-	void setAudioInterfaceFactory(TheoraAudioInterfaceFactory* factory);
-	TheoraAudioInterfaceFactory* getAudioInterfaceFactory();
-
-	int getNumWorkerThreads();
-	void setNumWorkerThreads(int n);
-
-	void setDefaultNumPrecachedFrames(int n) { mDefaultNumPrecachedFrames=n; }
-	int getDefaultNumPrecachedFrames() { return mDefaultNumPrecachedFrames; }
-
-	//! used by libtheoraplayer functions
-	void logMessage(std::string msg);
-
-	/**
-		\brief you can set your own log function to recieve theora's log calls
-
-		This way you can integrate libtheoraplayer's log messages in your own
-		logging system, prefix them, mute them or whatever you want
-	 */
-	static void setLogFunction(void (*fn)(std::string));
-
-	//! get nicely formated version string
-	std::string getVersionString();
-	/**
-	    \brief get version numbers
-
-		if c is negative, it means it's a release candidate -c
-	 */
-	void getVersion(int* a,int* b,int* c);
-
-	//! returns the supported decoders (eg. Theora, AVFoundation...)
-	std::vector<std::string> getSupportedDecoders();
-};
-#endif
-
diff --git a/drivers/theoraplayer/include/theoraplayer/TheoraWorkerThread.h b/drivers/theoraplayer/include/theoraplayer/TheoraWorkerThread.h
deleted file mode 100644
index 2299acedbd..0000000000
--- a/drivers/theoraplayer/include/theoraplayer/TheoraWorkerThread.h
+++ /dev/null
@@ -1,32 +0,0 @@
-/************************************************************************************
-This source file is part of the Theora Video Playback Library
-For latest info, see http://libtheoraplayer.googlecode.com
-*************************************************************************************
-Copyright (c) 2008-2014 Kresimir Spes (kspes@cateia.com)
-This program is free software; you can redistribute it and/or modify it under
-the terms of the BSD license: http://opensource.org/licenses/BSD-3-Clause
-*************************************************************************************/
-#ifndef _TheoraWorkerThread_h
-#define _TheoraWorkerThread_h
-
-#include "TheoraAsync.h"
-
-class TheoraVideoClip;
-
-/**
-	This is the worker thread, requests work from TheoraVideoManager
-	and decodes assigned TheoraVideoClip objects
-*/
-class TheoraWorkerThread : public TheoraThread
-{
-	TheoraVideoClip* mClip;
-public:
-	TheoraWorkerThread();
-	~TheoraWorkerThread();
-
-	TheoraVideoClip* getAssignedClip() { return mClip; }
-
-    //! Main Thread Body - do not call directly!
-	void execute();
-};
-#endif
diff --git a/drivers/theoraplayer/src/AVFoundation/TheoraVideoClip_AVFoundation.h b/drivers/theoraplayer/src/AVFoundation/TheoraVideoClip_AVFoundation.h
deleted file mode 100644
index abd898aa01..0000000000
--- a/drivers/theoraplayer/src/AVFoundation/TheoraVideoClip_AVFoundation.h
+++ /dev/null
@@ -1,47 +0,0 @@
-/************************************************************************************
-This source file is part of the Theora Video Playback Library
-For latest info, see http://libtheoraplayer.googlecode.com
-*************************************************************************************
-Copyright (c) 2008-2014 Kresimir Spes (kspes@cateia.com)
-This program is free software; you can redistribute it and/or modify it under
-the terms of the BSD license: http://opensource.org/licenses/BSD-3-Clause
-*************************************************************************************/
-#if defined(__AVFOUNDATION) && !defined(_TheoraVideoClip_AVFoundation_h)
-#define _TheoraVideoClip_AVFoundation_h
-
-#include "TheoraAudioPacketQueue.h"
-#include "TheoraVideoClip.h"
-
-#ifndef AVFOUNDATION_CLASSES_DEFINED
-class AVAssetReader;
-class AVAssetReaderTrackOutput;
-#endif
-
-class TheoraVideoClip_AVFoundation : public TheoraVideoClip, public TheoraAudioPacketQueue
-{
-protected:
-	bool mLoaded;
-	int mFrameNumber;
-	AVAssetReader* mReader;
-	AVAssetReaderTrackOutput *mOutput, *mAudioOutput;
-	unsigned int mReadAudioSamples;
-	
-	void unload();
-	void doSeek();
-public:
-	TheoraVideoClip_AVFoundation(TheoraDataSource* data_source,
-								 TheoraOutputMode output_mode,
-								 int nPrecachedFrames,
-								 bool usePower2Stride);
-	~TheoraVideoClip_AVFoundation();
-	
-	bool _readData();
-	bool decodeNextFrame();
-	void _restart();
-	void load(TheoraDataSource* source);
-	float decodeAudio();
-	void decodedAudioCheck();
-	std::string getDecoderName() { return "AVFoundation"; }
-};
-
-#endif
diff --git a/drivers/theoraplayer/src/AVFoundation/TheoraVideoClip_AVFoundation.mm b/drivers/theoraplayer/src/AVFoundation/TheoraVideoClip_AVFoundation.mm
deleted file mode 100644
index 1b5cf0ab13..0000000000
--- a/drivers/theoraplayer/src/AVFoundation/TheoraVideoClip_AVFoundation.mm
+++ /dev/null
@@ -1,457 +0,0 @@
-/************************************************************************************
-This source file is part of the Theora Video Playback Library
-For latest info, see http://libtheoraplayer.googlecode.com
-*************************************************************************************
-Copyright (c) 2008-2014 Kresimir Spes (kspes@cateia.com)
-This program is free software; you can redistribute it and/or modify it under
-the terms of the BSD license: http://opensource.org/licenses/BSD-3-Clause
-*************************************************************************************/
-#ifdef __AVFOUNDATION
-#define AVFOUNDATION_CLASSES_DEFINED
-#import <AVFoundation/AVFoundation.h>
-#include "TheoraAudioInterface.h"
-#include "TheoraDataSource.h"
-#include "TheoraException.h"
-#include "TheoraTimer.h"
-#include "TheoraUtil.h"
-#include "TheoraFrameQueue.h"
-#include "TheoraVideoFrame.h"
-#include "TheoraVideoManager.h"
-#include "TheoraVideoClip_AVFoundation.h"
-#include "TheoraPixelTransform.h"
-
-#ifdef _AVFOUNDATION_BGRX
-// a fast function developed to use kernel byte swapping calls to optimize alpha decoding.
-// In AVFoundation, BGRX mode conversion is prefered to YUV conversion because apple's YUV
-// conversion on iOS seems to run faster than libtheoraplayer's implementation
-// This may change in the future with more optimizations to libtheoraplayers's YUV conversion
-// code, making this function obsolete.
-static void bgrx2rgba(unsigned char* dest, int w, int h, struct TheoraPixelTransform* t)
-{
-	unsigned register int a;
-	unsigned int *dst = (unsigned int*) dest, *dstEnd;
-	unsigned char* src = t->raw;
-	int y, x, ax;
-	
-	for (y = 0; y < h; ++y, src += t->rawStride)
-	{
-		for (x = 0, ax = w * 4, dstEnd = dst + w; dst != dstEnd; x += 4, ax += 4, ++dst)
-		{
-            // use the full alpha range here because the Y channel has already been converted
-            // to RGB and that's in [0, 255] range.
-			a = src[ax];
-            *dst = (OSReadSwapInt32(src, x) >> 8) | (a << 24);
-		}
-	}
-}
-#endif
-
-static CVPlanarPixelBufferInfo_YCbCrPlanar getYUVStruct(void* src)
-{
-	CVPlanarPixelBufferInfo_YCbCrPlanar* bigEndianYuv = (CVPlanarPixelBufferInfo_YCbCrPlanar*) src;
-	CVPlanarPixelBufferInfo_YCbCrPlanar yuv;
-	yuv.componentInfoY.offset = OSSwapInt32(bigEndianYuv->componentInfoY.offset);
-	yuv.componentInfoY.rowBytes = OSSwapInt32(bigEndianYuv->componentInfoY.rowBytes);
-	yuv.componentInfoCb.offset = OSSwapInt32(bigEndianYuv->componentInfoCb.offset);
-	yuv.componentInfoCb.rowBytes = OSSwapInt32(bigEndianYuv->componentInfoCb.rowBytes);
-	yuv.componentInfoCr.offset = OSSwapInt32(bigEndianYuv->componentInfoCr.offset);
-	yuv.componentInfoCr.rowBytes = OSSwapInt32(bigEndianYuv->componentInfoCr.rowBytes);
-	return yuv;
-}
-
-TheoraVideoClip_AVFoundation::TheoraVideoClip_AVFoundation(TheoraDataSource* data_source,
-											   TheoraOutputMode output_mode,
-											   int nPrecachedFrames,
-											   bool usePower2Stride):
-	TheoraVideoClip(data_source, output_mode, nPrecachedFrames, usePower2Stride),
-	TheoraAudioPacketQueue()
-{
-	mLoaded = 0;
-	mReader = NULL;
-	mOutput = mAudioOutput = NULL;
-	mReadAudioSamples = mAudioFrequency = mNumAudioChannels = 0;
-}
-
-TheoraVideoClip_AVFoundation::~TheoraVideoClip_AVFoundation()
-{
-	unload();
-}
-
-void TheoraVideoClip_AVFoundation::unload()
-{
-	if (mOutput != NULL || mAudioOutput != NULL || mReader != NULL)
-	{
-		NSAutoreleasePool* pool = [[NSAutoreleasePool alloc] init];
-
-		if (mOutput != NULL)
-		{
-			[mOutput release];
-			mOutput = NULL;
-		}
-		
-		if (mAudioOutput)
-		{
-			[mAudioOutput release];
-			mAudioOutput = NULL;
-		}
-		
-		if (mReader != NULL)
-		{
-			[mReader release];
-			mReader = NULL;
-		}
-		
-		[pool release];
-	}
-}
-
-bool TheoraVideoClip_AVFoundation::_readData()
-{
-	return 1;
-}
-
-bool TheoraVideoClip_AVFoundation::decodeNextFrame()
-{
-	if (mReader == NULL || mEndOfFile) return 0;
-	AVAssetReaderStatus status = [mReader status];
-	if (status == AVAssetReaderStatusFailed)
-	{
-		// This can happen on iOS when you suspend the app... Only happens on the device, iOS simulator seems to work fine.
-		th_writelog("AVAssetReader reading failed, restarting...");
-
-		mSeekFrame = mTimer->getTime() * mFPS;
-		// just in case
-		if (mSeekFrame < 0) mSeekFrame = 0;
-		if (mSeekFrame > mDuration * mFPS - 1) mSeekFrame = mDuration * mFPS - 1;
-		_restart();
-		status = [mReader status];
-		if (status == AVAssetReaderStatusFailed)
-		{
-			th_writelog("AVAssetReader restart failed!");
-			return 0;
-		}
-		th_writelog("AVAssetReader restart succeeded!");
-	}
-
-	TheoraVideoFrame* frame = mFrameQueue->requestEmptyFrame();
-	if (!frame) return 0;
-
-	CMSampleBufferRef sampleBuffer = NULL;
-	NSAutoreleasePool* pool = NULL;
-	CMTime presentationTime;
-	
-	if (mAudioInterface) decodeAudio();
-	
-	if (status == AVAssetReaderStatusReading)
-	{
-		pool = [[NSAutoreleasePool alloc] init];
-		
-		while ((sampleBuffer = [mOutput copyNextSampleBuffer]))
-		{
-			presentationTime = CMSampleBufferGetOutputPresentationTimeStamp(sampleBuffer);
-			frame->mTimeToDisplay = (float) CMTimeGetSeconds(presentationTime);
-			frame->mIteration = mIteration;
-			frame->_setFrameNumber(mFrameNumber);
-			++mFrameNumber;
-			if (frame->mTimeToDisplay < mTimer->getTime() && !mRestarted && mFrameNumber % 16 != 0)
-			{
-				// %16 operation is here to prevent a playback halt during video playback if the decoder can't keep up with demand.
-#ifdef _DEBUG
-				th_writelog(mName + ": pre-dropped frame " + str(mFrameNumber - 1));
-#endif
-				++mNumDisplayedFrames;
-				++mNumDroppedFrames;
-				CMSampleBufferInvalidate(sampleBuffer);
-				CFRelease(sampleBuffer);
-				sampleBuffer = NULL;
-				continue; // drop frame
-			}
-
-			CVImageBufferRef imageBuffer = CMSampleBufferGetImageBuffer(sampleBuffer);
-			CVPixelBufferLockBaseAddress(imageBuffer, 0);
-			void *baseAddress = CVPixelBufferGetBaseAddress(imageBuffer);
-			
-			mStride = CVPixelBufferGetBytesPerRow(imageBuffer);
-			size_t width = CVPixelBufferGetWidth(imageBuffer);
-			size_t height = CVPixelBufferGetHeight(imageBuffer);
-
-			TheoraPixelTransform t;
-			memset(&t, 0, sizeof(TheoraPixelTransform));
-#ifdef _AVFOUNDATION_BGRX
-			if (mOutputMode == TH_BGRX || mOutputMode == TH_RGBA)
-			{
-				t.raw = (unsigned char*) baseAddress;
-				t.rawStride = mStride;
-			}
-			else
-#endif
-			{
-				CVPlanarPixelBufferInfo_YCbCrPlanar yuv = getYUVStruct(baseAddress);
-				
-				t.y = (unsigned char*) baseAddress + yuv.componentInfoY.offset;  t.yStride = yuv.componentInfoY.rowBytes;
-				t.u = (unsigned char*) baseAddress + yuv.componentInfoCb.offset; t.uStride = yuv.componentInfoCb.rowBytes;
-				t.v = (unsigned char*) baseAddress + yuv.componentInfoCr.offset; t.vStride = yuv.componentInfoCr.rowBytes;
-			}
-#ifdef _AVFOUNDATION_BGRX
-			if (mOutputMode == TH_RGBA)
-			{
-				for (int i = 0; i < 1000; ++i)
-					bgrx2rgba(frame->getBuffer(), mWidth / 2, mHeight, &t);
-				frame->mReady = true;
-			}
-			else
-#endif
-			frame->decode(&t);
-
-			CVPixelBufferUnlockBaseAddress(imageBuffer, 0);
-			CMSampleBufferInvalidate(sampleBuffer);
-			CFRelease(sampleBuffer);
-
-			break; // TODO - should this really be a while loop instead of an if block?
-		}
-	}
-	if (pool) [pool release];
-
-	if (!frame->mReady) // in case the frame wasn't used
-	{
-		frame->mInUse = 0;
-	}
-
-	if (sampleBuffer == NULL && mReader.status == AVAssetReaderStatusCompleted) // other cases could be app suspended
-	{
-		if (mAutoRestart)
-        {
-            ++mIteration;
-			_restart();
-        }
-		else
-		{
-			unload();
-			mEndOfFile = true;
-		}
-		return 0;
-	}
-	
-	
-	return 1;
-}
-
-void TheoraVideoClip_AVFoundation::_restart()
-{
-	mEndOfFile = false;
-	unload();
-	load(mStream);
-	mRestarted = true;
-}
-
-void TheoraVideoClip_AVFoundation::load(TheoraDataSource* source)
-{
-	mStream = source;
-	mFrameNumber = 0;
-	mEndOfFile = false;
-	TheoraFileDataSource* fileDataSource = dynamic_cast<TheoraFileDataSource*>(source);
-	std::string filename;
-	if (fileDataSource != NULL) filename = fileDataSource->getFilename();
-	else
-	{
-		TheoraMemoryFileDataSource* memoryDataSource = dynamic_cast<TheoraMemoryFileDataSource*>(source);
-		if (memoryDataSource != NULL) filename = memoryDataSource->getFilename();
-		else throw TheoraGenericException("Unable to load MP4 file");
-	}
-	
-	NSAutoreleasePool* pool = [[NSAutoreleasePool alloc] init];
-	NSString* path = [NSString stringWithUTF8String:filename.c_str()];
-	NSError* err;
-	NSURL *url = [NSURL fileURLWithPath:path];
-	AVAsset* asset = [[AVURLAsset alloc] initWithURL:url options:nil];
-	mReader = [[AVAssetReader alloc] initWithAsset:asset error:&err];
-	NSArray* tracks = [asset tracksWithMediaType:AVMediaTypeVideo];
-	if ([tracks count] == 0)
-		throw TheoraGenericException("Unable to open video file: " + filename);
-	AVAssetTrack *videoTrack = [tracks objectAtIndex:0];
-
-	NSArray* audioTracks = [asset tracksWithMediaType:AVMediaTypeAudio];
-	if (audio_track >= audioTracks.count)
-		audio_track = 0;
-	AVAssetTrack *audioTrack = audioTracks.count > 0 ? [audioTracks objectAtIndex:audio_track] : NULL;
-	printf("*********** using audio track %i\n", audio_track);
-	
-#ifdef _AVFOUNDATION_BGRX
-	bool yuv_output = (mOutputMode != TH_BGRX && mOutputMode != TH_RGBA);
-#else
-	bool yuv_output = true;
-#endif
-	
-	NSDictionary *videoOptions = [NSDictionary dictionaryWithObjectsAndKeys:[NSNumber numberWithInt:(yuv_output) ? kCVPixelFormatType_420YpCbCr8Planar : kCVPixelFormatType_32BGRA], kCVPixelBufferPixelFormatTypeKey, nil];
-
-	mOutput = [[AVAssetReaderTrackOutput alloc] initWithTrack:videoTrack outputSettings:videoOptions];
-	[mReader addOutput:mOutput];
-	if ([mOutput respondsToSelector:@selector(setAlwaysCopiesSampleData:)]) // Not supported on iOS versions older than 5.0
-		mOutput.alwaysCopiesSampleData = NO;
-
-	mFPS = videoTrack.nominalFrameRate;
-	mWidth = mSubFrameWidth = mStride = videoTrack.naturalSize.width;
-	mHeight = mSubFrameHeight = videoTrack.naturalSize.height;
-	mFrameDuration = 1.0f / mFPS;
-	mDuration = (float) CMTimeGetSeconds(asset.duration);
-	if (mFrameQueue == NULL)
-	{
-		mFrameQueue = new TheoraFrameQueue(this);
-		mFrameQueue->setSize(mNumPrecachedFrames);
-	}
-
-	if (mSeekFrame != -1)
-	{
-		mFrameNumber = mSeekFrame;
-		[mReader setTimeRange: CMTimeRangeMake(CMTimeMakeWithSeconds(mSeekFrame / mFPS, 1), kCMTimePositiveInfinity)];
-	}
-	if (audioTrack)
-	{
-		TheoraAudioInterfaceFactory* audio_factory = TheoraVideoManager::getSingleton().getAudioInterfaceFactory();
-		if (audio_factory)
-		{
-			NSDictionary *audioOptions = [NSDictionary dictionaryWithObjectsAndKeys:
-										  [NSNumber numberWithInt:kAudioFormatLinearPCM], AVFormatIDKey,
-										  [NSNumber numberWithBool:NO], AVLinearPCMIsNonInterleaved,
-										  [NSNumber numberWithBool:NO], AVLinearPCMIsBigEndianKey,
-										  [NSNumber numberWithBool:YES], AVLinearPCMIsFloatKey,
-										  [NSNumber numberWithInt:32], AVLinearPCMBitDepthKey,
-										  nil];
-
-			mAudioOutput = [[AVAssetReaderTrackOutput alloc] initWithTrack:audioTrack outputSettings:audioOptions];
-			[mReader addOutput:mAudioOutput];
-			if ([mAudioOutput respondsToSelector:@selector(setAlwaysCopiesSampleData:)]) // Not supported on iOS versions older than 5.0
-				mAudioOutput.alwaysCopiesSampleData = NO;
-			
-			NSArray* desclst = audioTrack.formatDescriptions;
-			CMAudioFormatDescriptionRef desc = (CMAudioFormatDescriptionRef) [desclst objectAtIndex:0];
-			const AudioStreamBasicDescription* audioDesc = CMAudioFormatDescriptionGetStreamBasicDescription(desc);
-			mAudioFrequency = (unsigned int) audioDesc->mSampleRate;
-			mNumAudioChannels = audioDesc->mChannelsPerFrame;
-			
-			if (mSeekFrame != -1)
-			{
-				mReadAudioSamples = mFrameNumber * (mAudioFrequency * mNumAudioChannels) / mFPS;
-			}
-			else mReadAudioSamples = 0;
-
-			if (mAudioInterface == NULL)
-				setAudioInterface(audio_factory->createInstance(this, mNumAudioChannels, mAudioFrequency));
-		}
-	}
-	
-#ifdef _DEBUG
-	else if (!mLoaded)
-	{
-		th_writelog("-----\nwidth: " + str(mWidth) + ", height: " + str(mHeight) + ", fps: " + str((int) getFPS()));
-		th_writelog("duration: " + strf(mDuration) + " seconds\n-----");
-	}
-#endif
-	[mReader startReading];
-	[pool release];
-	mLoaded = true;
-}
- 
-void TheoraVideoClip_AVFoundation::decodedAudioCheck()
-{
-	if (!mAudioInterface || mTimer->isPaused()) return;
-	
-	mAudioMutex->lock();
-	flushAudioPackets(mAudioInterface);
-	mAudioMutex->unlock();
-}
-
-float TheoraVideoClip_AVFoundation::decodeAudio()
-{
-	if (mRestarted) return -1;
-
-	if (mReader == NULL || mEndOfFile) return 0;
-	AVAssetReaderStatus status = [mReader status];
-
-	if (mAudioOutput)
-	{
-		CMSampleBufferRef sampleBuffer = NULL;
-		NSAutoreleasePool* pool = NULL;
-		bool mutexLocked = 0;
-
-		float factor = 1.0f / (mAudioFrequency * mNumAudioChannels);
-		float videoTime = (float) mFrameNumber / mFPS;
-		float min = mFrameQueue->getSize() / mFPS + 1.0f;
-		
-		if (status == AVAssetReaderStatusReading)
-		{
-			pool = [[NSAutoreleasePool alloc] init];
-
-			// always buffer up of audio ahead of the frames
-			while (mReadAudioSamples * factor - videoTime < min)
-			{
-				if ((sampleBuffer = [mAudioOutput copyNextSampleBuffer]))
-				{
-					AudioBufferList audioBufferList;
-					
-					CMBlockBufferRef blockBuffer = NULL;
-					CMSampleBufferGetAudioBufferListWithRetainedBlockBuffer(sampleBuffer, NULL, &audioBufferList, sizeof(audioBufferList), NULL, NULL, 0, &blockBuffer);
-					
-					for (int y = 0; y < audioBufferList.mNumberBuffers; ++y)
-					{
-						AudioBuffer audioBuffer = audioBufferList.mBuffers[y];
-						float *frame = (float*) audioBuffer.mData;
-
-						if (!mutexLocked)
-						{
-							mAudioMutex->lock();
-							mutexLocked = 1;
-						}
-						addAudioPacket(frame, audioBuffer.mDataByteSize / (mNumAudioChannels * sizeof(float)), mAudioGain);
-						
-						mReadAudioSamples += audioBuffer.mDataByteSize / (sizeof(float));
-					}
-
-					CFRelease(blockBuffer);
-					CMSampleBufferInvalidate(sampleBuffer);
-					CFRelease(sampleBuffer);
-				}
-				else
-				{
-					[mAudioOutput release];
-					mAudioOutput = nil;
-					break;
-				}
-			}
-			[pool release];
-		}
-		if (mutexLocked) mAudioMutex->unlock();
-	}
-	
-	return -1;
-}
-
-void TheoraVideoClip_AVFoundation::doSeek()
-{
-#if _DEBUG
-	th_writelog(mName + " [seek]: seeking to frame " + str(mSeekFrame));
-#endif
-	int frame;
-	float time = mSeekFrame / getFPS();
-	mTimer->seek(time);
-	bool paused = mTimer->isPaused();
-	if (!paused) mTimer->pause(); // pause until seeking is done
-	
-	mEndOfFile = false;
-	mRestarted = false;
-	
-    resetFrameQueue();
-	unload();
-	load(mStream);
-
-	if (mAudioInterface)
-	{
-		mAudioMutex->lock();
-		destroyAllAudioPackets();
-		mAudioMutex->unlock();
-	}
-
-	if (!paused) mTimer->play();
-	mSeekFrame = -1;
-}
-#endif
diff --git a/drivers/theoraplayer/src/FFmpeg/TheoraVideoClip_FFmpeg.cpp b/drivers/theoraplayer/src/FFmpeg/TheoraVideoClip_FFmpeg.cpp
deleted file mode 100644
index fa3fd43a47..0000000000
--- a/drivers/theoraplayer/src/FFmpeg/TheoraVideoClip_FFmpeg.cpp
+++ /dev/null
@@ -1,439 +0,0 @@
-/************************************************************************************
-This source file is part of the Theora Video Playback Library
-For latest info, see http://libtheoraplayer.googlecode.com
-*************************************************************************************
-Copyright (c) 2008-2014 Kresimir Spes (kspes@cateia.com)
-This program is free software; you can redistribute it and/or modify it under
-the terms of the BSD license: http://opensource.org/licenses/BSD-3-Clause
-*************************************************************************************/
-#ifdef __FFMPEG
-#include "TheoraAudioInterface.h"
-#include "TheoraDataSource.h"
-#include "TheoraException.h"
-#include "TheoraTimer.h"
-#include "TheoraUtil.h"
-#include "TheoraFrameQueue.h"
-#include "TheoraVideoFrame.h"
-#include "TheoraVideoManager.h"
-#include "TheoraVideoClip_FFmpeg.h"
-#include "TheoraPixelTransform.h"
-
-#define READ_BUFFER_SIZE 4096
-
-#ifdef __cplusplus
-#define __STDC_CONSTANT_MACROS
-#ifdef _STDINT_H
-#undef _STDINT_H
-#endif
-# include <stdint.h>
-#endif
-
-#define _FFMPEG_DEBUG
-
-extern "C"
-{
-#include <libavcodec/avcodec.h>
-#include <libavformat/avformat.h>
-#include "libavutil/avassert.h"
-}
-
-static bool ffmpegInitialised = 0;
-
-static int readFunction(void* data, uint8_t* buf, int buf_size)
-{
-#ifdef _FFMPEG_DEBUG
-	th_writelog("reading " + str(buf_size) + " bytes");
-#endif
-
-	TheoraDataSource* src = (TheoraDataSource*) data;
-	return src->read(buf, buf_size);
-}
-
-static int64_t seekFunction(void* data, int64_t offset, int whence)
-{
-#ifdef _FFMPEG_DEBUG
-	th_writelog("seeking: offset = " + str((long) offset) + ", whence = " + str(whence));
-#endif
-
-	TheoraDataSource* src = (TheoraDataSource*) data;
-    if (whence == AVSEEK_SIZE)
-		return src->size();
-	else if (whence == SEEK_SET)
-		src->seek((long) offset);
-	else if (whence == SEEK_END)
-		src->seek(src->size() - (long) offset);
-    return src->tell();
-}
-
-static void avlog_theoraplayer(void* p, int level, const char* fmt, va_list vargs)
-{
-	th_writelog(fmt);
-	static char logstr[2048];
-	vsprintf(logstr, fmt, vargs);
-	th_writelog("ffmpeg: " + std::string(logstr));
-}
-
-
-std::string text;
-
-static void _log(const char* s)
-{
-	text += s;
-//	th_writelog(text);
-//	text = "";
-}
-
-static void _log(const char c)
-{
-	char s[2] = {c, 0};
-	_log(s);
-}
-
-static const AVCodec *next_codec_for_id(enum AVCodecID id, const AVCodec *prev,
-                                        int encoder)
-{
-    while ((prev = av_codec_next(prev))) {
-        if (prev->id == id &&
-            (encoder ? av_codec_is_encoder(prev) : av_codec_is_decoder(prev)))
-            return prev;
-    }
-    return NULL;
-}
-
-static int compare_codec_desc(const void *a, const void *b)
-{
-    const AVCodecDescriptor **da = (const AVCodecDescriptor **) a;
-    const AVCodecDescriptor **db = (const AVCodecDescriptor **) b;
-	
-    return (*da)->type != (*db)->type ? (*da)->type - (*db)->type :
-	strcmp((*da)->name, (*db)->name);
-}
-
-static unsigned get_codecs_sorted(const AVCodecDescriptor ***rcodecs)
-{
-    const AVCodecDescriptor *desc = NULL;
-    const AVCodecDescriptor **codecs;
-    unsigned nb_codecs = 0, i = 0;
-	
-    while ((desc = avcodec_descriptor_next(desc)))
-        ++nb_codecs;
-    if (!(codecs = (const AVCodecDescriptor**) av_calloc(nb_codecs, sizeof(*codecs)))) {
-        av_log(NULL, AV_LOG_ERROR, "Out of memory\n");
-        exit(1);
-    }
-    desc = NULL;
-    while ((desc = avcodec_descriptor_next(desc)))
-        codecs[i++] = desc;
-    av_assert0(i == nb_codecs);
-    qsort(codecs, nb_codecs, sizeof(*codecs), compare_codec_desc);
-    *rcodecs = codecs;
-    return nb_codecs;
-}
-
-static char get_media_type_char(enum AVMediaType type)
-{
-    switch (type) {
-        case AVMEDIA_TYPE_VIDEO:    return 'V';
-        case AVMEDIA_TYPE_AUDIO:    return 'A';
-        case AVMEDIA_TYPE_DATA:     return 'D';
-        case AVMEDIA_TYPE_SUBTITLE: return 'S';
-        case AVMEDIA_TYPE_ATTACHMENT:return 'T';
-        default:                    return '?';
-    }
-}
-
-static void print_codecs_for_id(enum AVCodecID id, int encoder)
-{
-    const AVCodec *codec = NULL;
-	
-    _log(encoder ? "encoders" : "decoders");
-	
-    while ((codec = next_codec_for_id(id, codec, encoder)))
-        _log(codec->name);
-	
-    _log(")");
-}
-
-int show_codecs(void *optctx, const char *opt, const char *arg)
-{
-    const AVCodecDescriptor **codecs;
-    unsigned i, nb_codecs = get_codecs_sorted(&codecs);
-	
-	char tmp[1024];
-    th_writelog("Codecs:\n"
-           " D..... = Decoding supported\n"
-           " .E.... = Encoding supported\n"
-           " ..V... = Video codec\n"
-           " ..A... = Audio codec\n"
-           " ..S... = Subtitle codec\n"
-           " ...I.. = Intra frame-only codec\n"
-           " ....L. = Lossy compression\n"
-           " .....S = Lossless compression\n"
-           " -------\n");
-    for (i = 0; i < nb_codecs; ++i) {
-        const AVCodecDescriptor *desc = codecs[i];
-        const AVCodec *codec = NULL;
-		
-        _log(" ");
-        _log(avcodec_find_decoder(desc->id) ? "D" : ".");
-        _log(avcodec_find_encoder(desc->id) ? "E" : ".");
-		
-        _log(get_media_type_char(desc->type));
-        _log((desc->props & AV_CODEC_PROP_INTRA_ONLY) ? "I" : ".");
-        _log((desc->props & AV_CODEC_PROP_LOSSY)      ? "L" : ".");
-        _log((desc->props & AV_CODEC_PROP_LOSSLESS)   ? "S" : ".");
-		
-		
-        sprintf(tmp, " %-20s %s", desc->name, desc->long_name ? desc->long_name : "");
-		
-		_log(tmp);
-        /* print decoders/encoders when there's more than one or their
-         * names are different from codec name */
-        while ((codec = next_codec_for_id(desc->id, codec, 0))) {
-            if (strcmp(codec->name, desc->name)) {
-                print_codecs_for_id(desc->id, 0);
-                break;
-            }
-        }
-        codec = NULL;
-        while ((codec = next_codec_for_id(desc->id, codec, 1))) {
-            if (strcmp(codec->name, desc->name)) {
-                print_codecs_for_id(desc->id, 1);
-                break;
-            }
-        }
-		_log("\n");
-    }
-    av_free(codecs);
-	
-	av_log(0, 0, "%s", text.c_str());
-    return 0;
-}
-
-TheoraVideoClip_FFmpeg::TheoraVideoClip_FFmpeg(TheoraDataSource* data_source,
-														 TheoraOutputMode output_mode,
-														 int nPrecachedFrames,
-														 bool usePower2Stride):
-								 						 TheoraVideoClip(data_source, output_mode, nPrecachedFrames, usePower2Stride),
-														 TheoraAudioPacketQueue()
-{
-	mFormatContext = NULL;
-	mCodecContext = NULL;
-	mCodec = NULL;
-	mFrame = NULL;
-	mVideoStreamIndex = -1;
-}
-
-TheoraVideoClip_FFmpeg::~TheoraVideoClip_FFmpeg()
-{
-	unload();
-}
-
-void TheoraVideoClip_FFmpeg::load(TheoraDataSource* source)
-{
-	mVideoStreamIndex = -1;
-	mFrameNumber = 0;
-	AVDictionary* optionsDict = NULL;
-	
-	if (!ffmpegInitialised)
-	{
-#ifdef _FFMPEG_DEBUG
-		th_writelog("Initializing ffmpeg");
-#endif
-		th_writelog("avcodec version: " + str(avcodec_version()));
-		av_register_all();
-		av_log_set_level(AV_LOG_DEBUG);
-		av_log_set_callback(avlog_theoraplayer);
-		ffmpegInitialised = 1;
-		//show_codecs(0, 0, 0);
-	}
-	
-	mInputBuffer = (unsigned char*) av_malloc(READ_BUFFER_SIZE);
-	mAvioContext = avio_alloc_context(mInputBuffer, READ_BUFFER_SIZE, 0, source, &readFunction, NULL, &seekFunction);
-	
-#ifdef _FFMPEG_DEBUG
-	th_writelog(mName + ": avio context created");
-#endif
-
-	mFormatContext = avformat_alloc_context();
-#ifdef _FFMPEG_DEBUG
-	th_writelog(mName + ": avformat context created");
-#endif
-	mFormatContext->pb = mAvioContext;
-	
-	int err;
-	if ((err = avformat_open_input(&mFormatContext, "", NULL, NULL)) != 0)
-	{
-		th_writelog(mName + ": avformat input opening failed!");
-		th_writelog(mName + ": error_code: " + str(err));
-		return;
-	}
-	
-#ifdef _FFMPEG_DEBUG
-	th_writelog(mName + ": avformat input opened");
-#endif
-	
-	// Retrieve stream information
-	if (avformat_find_stream_info(mFormatContext, NULL) < 0)
-		return; // Couldn't find stream information
-	
-#ifdef _FFMPEG_DEBUG
-	th_writelog(mName + ": got stream info");
-#endif
-	
-	// Dump information about file onto standard error
-	//	av_dump_format(mFormatContext, 0, "", 0);
-	
-	// Find the first video stream
-	for (int i = 0; i < mFormatContext->nb_streams; ++i)
-	{
-		if(mFormatContext->streams[i]->codec->codec_type == AVMEDIA_TYPE_VIDEO)
-		{
-			mVideoStreamIndex = i;
-			break;
-		}
-	}
-	if (mVideoStreamIndex == -1)
-		return; // Didn't find a video stream
-
-#ifdef _FFMPEG_DEBUG
-	th_writelog(mName + ": Found video stream at index " + str(mVideoStreamIndex));
-#endif
-
-	// Get a pointer to the codec context for the video stream
-	mCodecContext = mFormatContext->streams[mVideoStreamIndex]->codec;
-	
-	// Find the decoder for the video stream
-	mCodec = avcodec_find_decoder(mCodecContext->codec_id);
-	if (mCodec == NULL)
-	{
-		th_writelog("Unsupported codec!");
-		return; // Codec not found
-	}
-	// Open codec
-	if(avcodec_open2(mCodecContext, mCodec, &optionsDict) < 0)
-		return; // Could not open codec
-	
-#ifdef _FFMPEG_DEBUG
-	th_writelog(mName + ": Codec opened");
-#endif
-
-	
-	mFrame = avcodec_alloc_frame();
-	
-#ifdef _FFMPEG_DEBUG
-	th_writelog(mName + ": Frame allocated");
-#endif
-		
-	//AVRational rational = mCodecContext->time_base;
-
-	mFPS = 25; //TODOOOOOO!!!
-	
-	mWidth = mStride = mCodecContext->width;
-	mHeight = mCodecContext->height;
-	mFrameDuration = 1.0f / mFPS;
-	mDuration = mFormatContext->duration / AV_TIME_BASE;
-	
-	if (mFrameQueue == NULL) // todo - why is this set in the backend class? it should be set in the base class, check other backends as well
-	{
-		mFrameQueue = new TheoraFrameQueue(this);
-		mFrameQueue->setSize(mNumPrecachedFrames);
-	}
-}
-
-void TheoraVideoClip_FFmpeg::unload()
-{
-	if (mInputBuffer)
-	{
-//		av_free(mInputBuffer);
-		mInputBuffer = NULL;
-	}
-	if (mAvioContext)
-	{
-		av_free(mAvioContext);
-		mAvioContext = NULL;
-	}
-	if (mFrame)
-	{
-		av_free(mFrame);
-		mFrame = NULL;
-	}
-	if (mCodecContext)
-	{
-		avcodec_close(mCodecContext);
-		mCodecContext = NULL;
-	}
-	if (mFormatContext)
-	{
-		avformat_close_input(&mFormatContext);
-		mFormatContext = NULL;
-	}
-}
-
-bool TheoraVideoClip_FFmpeg::_readData()
-{
-	return 1;
-}
-
-bool TheoraVideoClip_FFmpeg::decodeNextFrame()
-{
-	TheoraVideoFrame* frame = mFrameQueue->requestEmptyFrame();
-	if (!frame) return 0;
-
-	AVPacket packet;
-	int frameFinished;
-	
-	while (av_read_frame(mFormatContext, &packet) >= 0)
-	{
-		if (packet.stream_index == mVideoStreamIndex)
-		{
-			avcodec_decode_video2(mCodecContext, mFrame, &frameFinished, &packet);
-			
-			if (frameFinished)
-			{
-				TheoraPixelTransform t;
-				memset(&t, 0, sizeof(TheoraPixelTransform));
-
-				t.y = mFrame->data[0]; t.yStride = mFrame->linesize[0];
-				t.u = mFrame->data[1]; t.uStride = mFrame->linesize[1];
-				t.v = mFrame->data[2]; t.vStride = mFrame->linesize[2];
-				
-				frame->decode(&t);
-				frame->mTimeToDisplay = mFrameNumber / mFPS;
-				frame->mIteration = mIteration;
-				frame->_setFrameNumber(mFrameNumber++);
-
-				av_free_packet(&packet);
-				break;
-			}
-		}
-		av_free_packet(&packet);
-	}
-	return 1;
-}
- 
-void TheoraVideoClip_FFmpeg::decodedAudioCheck()
-{
-	if (!mAudioInterface || mTimer->isPaused()) return;
-	
-	mAudioMutex->lock();
-	flushAudioPackets(mAudioInterface);
-	mAudioMutex->unlock();
-}
-
-float TheoraVideoClip_FFmpeg::decodeAudio()
-{
-	return -1;
-}
-
-void TheoraVideoClip_FFmpeg::doSeek()
-{
-
-}
-
-void TheoraVideoClip_FFmpeg::_restart()
-{
-
-}
-
-#endif
diff --git a/drivers/theoraplayer/src/FFmpeg/TheoraVideoClip_FFmpeg.h b/drivers/theoraplayer/src/FFmpeg/TheoraVideoClip_FFmpeg.h
deleted file mode 100644
index 03f9a3d964..0000000000
--- a/drivers/theoraplayer/src/FFmpeg/TheoraVideoClip_FFmpeg.h
+++ /dev/null
@@ -1,53 +0,0 @@
-/************************************************************************************
-This source file is part of the Theora Video Playback Library
-For latest info, see http://libtheoraplayer.googlecode.com
-*************************************************************************************
-Copyright (c) 2008-2014 Kresimir Spes (kspes@cateia.com)
-This program is free software; you can redistribute it and/or modify it under
-the terms of the BSD license: http://opensource.org/licenses/BSD-3-Clause
-*************************************************************************************/
-#if defined(__FFMPEG) && !defined(_TheoraVideoClip_FFmpeg_h)
-#define _TheoraVideoClip_FFmpeg_h
-
-#include "TheoraAudioPacketQueue.h"
-#include "TheoraVideoClip.h"
-
-struct AVFormatContext;
-struct AVCodecContext;
-struct AVCodec;
-struct AVFrame;
-struct AVIOContext;
-
-class TheoraVideoClip_FFmpeg : public TheoraVideoClip, public TheoraAudioPacketQueue
-{
-protected:
-	bool mLoaded;
-	
-	AVFormatContext* mFormatContext;
-	AVCodecContext* mCodecContext;
-	AVIOContext* mAvioContext;
-	AVCodec* mCodec;
-	AVFrame* mFrame;
-	unsigned char* mInputBuffer;
-	int mVideoStreamIndex;
-	int mFrameNumber;
-	
-	void unload();
-	void doSeek();
-public:
-	TheoraVideoClip_FFmpeg(TheoraDataSource* data_source,
-								 TheoraOutputMode output_mode,
-								 int nPrecachedFrames,
-								 bool usePower2Stride);
-	~TheoraVideoClip_FFmpeg();
-	
-	bool _readData();
-	bool decodeNextFrame();
-	void _restart();
-	void load(TheoraDataSource* source);
-	float decodeAudio();
-	void decodedAudioCheck();
-	std::string getDecoderName() { return "FFmpeg"; }
-};
-
-#endif
diff --git a/drivers/theoraplayer/src/Theora/TheoraVideoClip_Theora.cpp b/drivers/theoraplayer/src/Theora/TheoraVideoClip_Theora.cpp
deleted file mode 100644
index c4f070ec50..0000000000
--- a/drivers/theoraplayer/src/Theora/TheoraVideoClip_Theora.cpp
+++ /dev/null
@@ -1,703 +0,0 @@
-/************************************************************************************
-This source file is part of the Theora Video Playback Library
-For latest info, see http://libtheoraplayer.googlecode.com
-*************************************************************************************
-Copyright (c) 2008-2014 Kresimir Spes (kspes@cateia.com)
-This program is free software; you can redistribute it and/or modify it under
-the terms of the BSD license: http://opensource.org/licenses/BSD-3-Clause
-*************************************************************************************/
-#ifdef __THEORA
-#include <memory.h>
-#include <algorithm>
-#include "TheoraVideoManager.h"
-#include "TheoraFrameQueue.h"
-#include "TheoraVideoFrame.h"
-#include "TheoraAudioInterface.h"
-#include "TheoraTimer.h"
-#include "TheoraDataSource.h"
-#include "TheoraUtil.h"
-#include "TheoraException.h"
-#include "TheoraVideoClip_Theora.h"
-#include "TheoraPixelTransform.h"
-
-TheoraVideoClip_Theora::TheoraVideoClip_Theora(TheoraDataSource* data_source,
-										TheoraOutputMode output_mode,
-										int nPrecachedFrames,
-										bool usePower2Stride):
-	TheoraVideoClip(data_source, output_mode, nPrecachedFrames, usePower2Stride),
-	TheoraAudioPacketQueue()
-{
-	mInfo.TheoraDecoder = NULL;
-	mInfo.TheoraSetup = NULL;
-	mVorbisStreams = mTheoraStreams = 0;
-	mReadAudioSamples = 0;
-	mLastDecodedFrameNumber = 0;
-}
-
-TheoraVideoClip_Theora::~TheoraVideoClip_Theora()
-{
-	if (mInfo.TheoraDecoder)
-	{
-		th_decode_free(mInfo.TheoraDecoder);
-		th_setup_free(mInfo.TheoraSetup);
-
-		if (mAudioInterface)
-		{
-			vorbis_dsp_clear(&mInfo.VorbisDSPState);
-			vorbis_block_clear(&mInfo.VorbisBlock);
-		}
-
-		ogg_stream_clear(&mInfo.TheoraStreamState);
-		th_comment_clear(&mInfo.TheoraComment);
-		th_info_clear(&mInfo.TheoraInfo);
-		
-		ogg_stream_clear(&mInfo.VorbisStreamState);
-		vorbis_comment_clear(&mInfo.VorbisComment);
-		vorbis_info_clear(&mInfo.VorbisInfo);
-		
-		ogg_sync_clear(&mInfo.OggSyncState);
-	}
-}
-
-bool TheoraVideoClip_Theora::_readData()
-{
-	int audio_eos = 0, serno;
-	float audio_time = 0;
-	float time = mTimer->getTime();
-	if (mRestarted) time = 0;
-	
-	for (;;)
-	{
-		char *buffer = ogg_sync_buffer(&mInfo.OggSyncState, 4096);
-		int bytes_read = mStream->read(buffer, 4096);
-		ogg_sync_wrote(&mInfo.OggSyncState, bytes_read);
-		
-		if (bytes_read < 4096)
-		{
-			if (bytes_read == 0)
-			{
-				if (!mAutoRestart) mEndOfFile = true;
-				return 0;
-			}
-		}
-		// when we fill the stream with enough pages, it'll start spitting out packets
-		// which contain keyframes, delta frames or audio data
-		while (ogg_sync_pageout(&mInfo.OggSyncState, &mInfo.OggPage) > 0)
-		{
-			serno = ogg_page_serialno(&mInfo.OggPage);
-			if (serno == mInfo.TheoraStreamState.serialno) ogg_stream_pagein(&mInfo.TheoraStreamState, &mInfo.OggPage);
-			if (mAudioInterface && serno == mInfo.VorbisStreamState.serialno)
-			{
-				ogg_int64_t g = ogg_page_granulepos(&mInfo.OggPage);
-				audio_time = (float) vorbis_granule_time(&mInfo.VorbisDSPState, g);
-				audio_eos = ogg_page_eos(&mInfo.OggPage);
-				ogg_stream_pagein(&mInfo.VorbisStreamState, &mInfo.OggPage);
-			}
-		}
-		if (!(mAudioInterface && !audio_eos && audio_time < time + 1.0f))
-			break;
-	}
-	return 1;
-}
-
-bool TheoraVideoClip_Theora::decodeNextFrame()
-{
-	if (mEndOfFile) return 0;
-	
-	TheoraVideoFrame* frame = mFrameQueue->requestEmptyFrame();
-	if (!frame) return 0; // max number of precached frames reached
-	bool should_restart = 0;
-	ogg_packet opTheora;
-	ogg_int64_t granulePos;
-	th_ycbcr_buffer buff;
-    int ret, nAttempts;
-	for (;;)
-	{
-        // ogg_stream_packetout can return -1 and the official docs suggest to do subsequent calls until it succeeds
-        // because the data is out of sync. still will limit the number of attempts just in case
-        for (ret = -1, nAttempts = 0; ret < 0 && nAttempts < 100; nAttempts++)
-        {
-            ret = ogg_stream_packetout(&mInfo.TheoraStreamState, &opTheora);
-        }
-		
-		if (ret > 0)
-		{
-			int status = th_decode_packetin(mInfo.TheoraDecoder, &opTheora, &granulePos);
-            if (status != 0 && status != TH_DUPFRAME) continue; // 0 means success
-
-			float time = (float) th_granule_time(mInfo.TheoraDecoder, granulePos);
-			unsigned long frame_number = (unsigned long) th_granule_frame(mInfo.TheoraDecoder, granulePos);
-			
-			if (time < mTimer->getTime() && !mRestarted && frame_number % 16 != 0)
-			{
-				// %16 operation is here to prevent a playback halt during video playback if the decoder can't keep up with demand.
-#ifdef _DEBUG
-				th_writelog(mName + ": pre-dropped frame " + str((int) frame_number));
-#endif
-				++mNumDroppedFrames;
-				continue; // drop frame
-			}
-			frame->mTimeToDisplay = time - mFrameDuration;
-			frame->mIteration     = mIteration;
-			frame->_setFrameNumber(frame_number);
-			mLastDecodedFrameNumber = frame_number;
-			th_decode_ycbcr_out(mInfo.TheoraDecoder, buff);
-			TheoraPixelTransform t;
-			memset(&t, 0, sizeof(TheoraPixelTransform));
-			
-			t.y = buff[0].data; t.yStride = buff[0].stride;
-			t.u = buff[1].data; t.uStride = buff[1].stride;
-			t.v = buff[2].data; t.vStride = buff[2].stride;
-			frame->decode(&t);
-			break;
-		}
-		else
-		{
-			if (!_readData())
-			{
-				frame->mInUse = 0;
-				should_restart = mAutoRestart;
-				break;
-			}
-		}
-	}
-	
-	if (mAudioInterface != NULL)
-	{
-		mAudioMutex->lock();
-		decodeAudio();
-		mAudioMutex->unlock();
-	}
-	if (should_restart)
-    {
-        ++mIteration;
-		_restart();
-	}
-	return 1;
-}
-
-void TheoraVideoClip_Theora::_restart()
-{
-	bool paused = mTimer->isPaused();
-	if (!paused) mTimer->pause();
-	long granule=0;
-	th_decode_ctl(mInfo.TheoraDecoder,TH_DECCTL_SET_GRANPOS,&granule,sizeof(granule));
-	th_decode_free(mInfo.TheoraDecoder);
-	mInfo.TheoraDecoder=th_decode_alloc(&mInfo.TheoraInfo,mInfo.TheoraSetup);
-	ogg_stream_reset(&mInfo.TheoraStreamState);
-	if (mAudioInterface)
-	{
-		// empty the DSP buffer
-		//float **pcm;
-		//int len = vorbis_synthesis_pcmout(&mInfo.VorbisDSPState,&pcm);
-		//if (len) vorbis_synthesis_read(&mInfo.VorbisDSPState,len);
-		ogg_packet opVorbis;
-		mReadAudioSamples = 0;
-		while (ogg_stream_packetout(&mInfo.VorbisStreamState,&opVorbis) > 0)
-		{
-			if (vorbis_synthesis(&mInfo.VorbisBlock,&opVorbis) == 0)
-				vorbis_synthesis_blockin(&mInfo.VorbisDSPState,&mInfo.VorbisBlock);
-		}
-		ogg_stream_reset(&mInfo.VorbisStreamState);
-	}
-	
-	ogg_sync_reset(&mInfo.OggSyncState);
-	mStream->seek(0);
-	ogg_int64_t granulePos = 0;
-	th_decode_ctl(mInfo.TheoraDecoder, TH_DECCTL_SET_GRANPOS, &granulePos, sizeof(granule));
-	
-	mEndOfFile = false;
-	
-	mRestarted = 1;
-	
-	if (!paused) mTimer->play();
-}
-
-void TheoraVideoClip_Theora::load(TheoraDataSource* source)
-{
-#ifdef _DEBUG
-	th_writelog("-----");
-#endif
-	mStream = source;
-	readTheoraVorbisHeaders();
-	
-	mInfo.TheoraDecoder = th_decode_alloc(&mInfo.TheoraInfo,mInfo.TheoraSetup);
-	
-	mWidth = mInfo.TheoraInfo.frame_width;
-	mHeight = mInfo.TheoraInfo.frame_height;
-    mSubFrameWidth	 = mInfo.TheoraInfo.pic_width;
-    mSubFrameHeight	 = mInfo.TheoraInfo.pic_height;
-    mSubFrameOffsetX = mInfo.TheoraInfo.pic_x;
-    mSubFrameOffsetY = mInfo.TheoraInfo.pic_y;
-    mStride = (mStride == 1) ? mStride = _nextPow2(getWidth()) : getWidth();
-	mFPS = mInfo.TheoraInfo.fps_numerator / (float) mInfo.TheoraInfo.fps_denominator;
-	
-#ifdef _DEBUG
-	th_writelog("width: " + str(mWidth) + ", height: " + str(mHeight) + ", fps: " + str((int) getFPS()));
-#endif
-	mFrameQueue = new TheoraFrameQueue(this);
-	mFrameQueue->setSize(mNumPrecachedFrames);
-	// find out the duration of the file by seeking to the end
-	// having ogg decode pages, extract the granule pos from
-	// the last theora page and seek back to beginning of the file
-	long streamSize = mStream->size(), seekPos;
-	for (int i = 1; i <= 50; ++i)
-	{
-		ogg_sync_reset(&mInfo.OggSyncState);
-		seekPos = streamSize - 4096 * i;
-		if (seekPos < 0) seekPos = 0;
-		mStream->seek(seekPos);
-		
-		char *buffer = ogg_sync_buffer(&mInfo.OggSyncState, 4096 * i);
-		int bytes_read = mStream->read(buffer, 4096 * i);
-		ogg_sync_wrote(&mInfo.OggSyncState, bytes_read);
-		ogg_sync_pageseek(&mInfo.OggSyncState, &mInfo.OggPage);
-		
-		for (;;)
-		{
-			int ret = ogg_sync_pageout(&mInfo.OggSyncState, &mInfo.OggPage);
-			if (ret == 0) break;
-			// if page is not a theora page, skip it
-			if (ogg_page_serialno(&mInfo.OggPage) != mInfo.TheoraStreamState.serialno) continue;
-			
-			ogg_int64_t granule = ogg_page_granulepos(&mInfo.OggPage);
-			if (granule >= 0)
-			{
-				mNumFrames = (int) th_granule_frame(mInfo.TheoraDecoder, granule) + 1;
-			}
-			else if (mNumFrames > 0)
-				++mNumFrames; // append delta frames at the end to get the exact numbe
-		}
-		if (mNumFrames > 0 || streamSize - 4096 * i < 0) break;
-		
-	}
-	if (mNumFrames < 0)
-		th_writelog("unable to determine file duration!");
-	else
-	{
-		mDuration = mNumFrames / mFPS;
-#ifdef _DEBUG
-		th_writelog("duration: " + strf(mDuration) + " seconds");
-#endif
-	}
-	// restore to beginning of stream.
-	ogg_sync_reset(&mInfo.OggSyncState);
-	mStream->seek(0);
-	
-	if (mVorbisStreams) // if there is no audio interface factory defined, even though the video
-		// clip might have audio, it will be ignored
-	{
-		vorbis_synthesis_init(&mInfo.VorbisDSPState, &mInfo.VorbisInfo);
-		vorbis_block_init(&mInfo.VorbisDSPState, &mInfo.VorbisBlock);
-		mNumAudioChannels = mInfo.VorbisInfo.channels;
-		mAudioFrequency = (int) mInfo.VorbisInfo.rate;
-
-		// create an audio interface instance if available
-		TheoraAudioInterfaceFactory* audio_factory = TheoraVideoManager::getSingleton().getAudioInterfaceFactory();
-		printf("**** audio factory is %p\n", audio_factory);
-		if (audio_factory) setAudioInterface(audio_factory->createInstance(this, mNumAudioChannels, mAudioFrequency));
-	}
-	
-	mFrameDuration = 1.0f / getFPS();
-#ifdef _DEBUG
-	th_writelog("-----");
-#endif
-}
-
-void TheoraVideoClip_Theora::readTheoraVorbisHeaders()
-{
-	ogg_packet tempOggPacket;
-	bool done = false;
-	bool decode_audio=TheoraVideoManager::getSingleton().getAudioInterfaceFactory() != NULL;
-	//init Vorbis/Theora Layer
-	//Ensure all structures get cleared out.
-	memset(&mInfo.OggSyncState, 0, sizeof(ogg_sync_state));
-	memset(&mInfo.OggPage, 0, sizeof(ogg_page));
-	memset(&mInfo.VorbisStreamState, 0, sizeof(ogg_stream_state));
-	memset(&mInfo.TheoraStreamState, 0, sizeof(ogg_stream_state));
-	memset(&mInfo.TheoraInfo, 0, sizeof(th_info));
-	memset(&mInfo.TheoraComment, 0, sizeof(th_comment));
-	memset(&mInfo.VorbisInfo, 0, sizeof(vorbis_info));
-	memset(&mInfo.VorbisDSPState, 0, sizeof(vorbis_dsp_state));
-	memset(&mInfo.VorbisBlock, 0, sizeof(vorbis_block));
-	memset(&mInfo.VorbisComment, 0, sizeof(vorbis_comment));
-	
-	ogg_sync_init(&mInfo.OggSyncState);
-	th_comment_init(&mInfo.TheoraComment);
-	th_info_init(&mInfo.TheoraInfo);
-	vorbis_info_init(&mInfo.VorbisInfo);
-	vorbis_comment_init(&mInfo.VorbisComment);
-	
-	while (!done)
-	{
-		char *buffer = ogg_sync_buffer(&mInfo.OggSyncState, 4096);
-		int bytes_read = mStream->read(buffer, 4096);
-		ogg_sync_wrote(&mInfo.OggSyncState, bytes_read);
-		
-		if (bytes_read == 0)
-			break;
-		
-		while (ogg_sync_pageout(&mInfo.OggSyncState, &mInfo.OggPage) > 0)
-		{
-			ogg_stream_state OggStateTest;
-			
-			//is this an initial header? If not, stop
-			if (!ogg_page_bos(&mInfo.OggPage))
-			{
-				//This is done blindly, because stream only accept themselves
-				if (mTheoraStreams) ogg_stream_pagein(&mInfo.TheoraStreamState, &mInfo.OggPage);
-				if (mVorbisStreams) ogg_stream_pagein(&mInfo.VorbisStreamState, &mInfo.OggPage);
-				
-				done=true;
-				break;
-			}
-			
-			ogg_stream_init(&OggStateTest, ogg_page_serialno(&mInfo.OggPage));
-			ogg_stream_pagein(&OggStateTest, &mInfo.OggPage);
-			ogg_stream_packetout(&OggStateTest, &tempOggPacket);
-			
-			//identify the codec
-			int ret;
-			if (!mTheoraStreams)
-			{
-				ret = th_decode_headerin(&mInfo.TheoraInfo, &mInfo.TheoraComment, &mInfo.TheoraSetup, &tempOggPacket);
-				
-				if (ret > 0)
-				{
-					//This is the Theora Header
-					memcpy(&mInfo.TheoraStreamState, &OggStateTest, sizeof(OggStateTest));
-					mTheoraStreams = 1;
-					continue;
-				}
-			}
-			if (decode_audio && !mVorbisStreams &&
-				vorbis_synthesis_headerin(&mInfo.VorbisInfo, &mInfo.VorbisComment, &tempOggPacket) >=0)
-			{
-				//This is vorbis header
-				memcpy(&mInfo.VorbisStreamState, &OggStateTest, sizeof(OggStateTest));
-				mVorbisStreams = 1;
-				continue;
-			}
-			//Hmm. I guess it's not a header we support, so erase it
-			ogg_stream_clear(&OggStateTest);
-		}
-	}
-	
-	while ((mTheoraStreams && (mTheoraStreams < 3)) ||
-		   (mVorbisStreams && (mVorbisStreams < 3)))
-	{
-		//Check 2nd'dary headers... Theora First
-		int iSuccess;
-		while (mTheoraStreams && mTheoraStreams < 3 &&
-			  (iSuccess = ogg_stream_packetout(&mInfo.TheoraStreamState, &tempOggPacket)))
-		{
-			if (iSuccess < 0)
-				throw TheoraGenericException("Error parsing Theora stream headers.");
-			if (!th_decode_headerin(&mInfo.TheoraInfo, &mInfo.TheoraComment, &mInfo.TheoraSetup, &tempOggPacket))
-				throw TheoraGenericException("invalid theora stream");
-			
-			++mTheoraStreams;
-		} //end while looking for more theora headers
-		
-		//look 2nd vorbis header packets
-		while (mVorbisStreams < 3 && (iSuccess = ogg_stream_packetout(&mInfo.VorbisStreamState, &tempOggPacket)))
-		{
-			if (iSuccess < 0)
-				throw TheoraGenericException("Error parsing vorbis stream headers");
-			
-			if (vorbis_synthesis_headerin(&mInfo.VorbisInfo, &mInfo.VorbisComment,&tempOggPacket))
-				throw TheoraGenericException("invalid stream");
-			
-			++mVorbisStreams;
-		} //end while looking for more vorbis headers
-		
-		//Not finished with Headers, get some more file data
-		if (ogg_sync_pageout(&mInfo.OggSyncState, &mInfo.OggPage) > 0)
-		{
-			if (mTheoraStreams) ogg_stream_pagein(&mInfo.TheoraStreamState, &mInfo.OggPage);
-			if (mVorbisStreams) ogg_stream_pagein(&mInfo.VorbisStreamState, &mInfo.OggPage);
-		}
-		else
-		{
-			char *buffer = ogg_sync_buffer(&mInfo.OggSyncState, 4096);
-			int bytes_read = mStream->read(buffer, 4096);
-			ogg_sync_wrote(&mInfo.OggSyncState, bytes_read);
-			
-			if (bytes_read == 0)
-				throw TheoraGenericException("End of file found prematurely");
-		}
-	} //end while looking for all headers
-	//	writelog("Vorbis Headers: " + str(mVorbisHeaders) + " Theora Headers : " + str(mTheoraHeaders));
-}
-
-void TheoraVideoClip_Theora::decodedAudioCheck()
-{
-	if (!mAudioInterface || mTimer->isPaused()) return;
-
-	mAudioMutex->lock();
-	flushAudioPackets(mAudioInterface);
-	mAudioMutex->unlock();
-}
-
-float TheoraVideoClip_Theora::decodeAudio()
-{
-	if (mRestarted) return -1;
-	
-	ogg_packet opVorbis;
-	float **pcm;
-	int len = 0;
-	float timestamp = -1;
-	bool read_past_timestamp = 0;
-	
-	float factor = 1.0f / mAudioFrequency;
-	float videoTime = (float) mLastDecodedFrameNumber / mFPS;
-	float min = mFrameQueue->getSize() / mFPS + 1.0f;
-
-	for (;;)
-	{
-		len = vorbis_synthesis_pcmout(&mInfo.VorbisDSPState, &pcm);
-		if (len == 0)
-		{
-			if (ogg_stream_packetout(&mInfo.VorbisStreamState, &opVorbis) > 0)
-			{
-				if (vorbis_synthesis(&mInfo.VorbisBlock, &opVorbis) == 0)
-				{
-					if (timestamp < 0 && opVorbis.granulepos >= 0)
-					{
-						timestamp = (float) vorbis_granule_time(&mInfo.VorbisDSPState, opVorbis.granulepos);
-					}
-					else if (timestamp >= 0) read_past_timestamp = 1;
-					vorbis_synthesis_blockin(&mInfo.VorbisDSPState, &mInfo.VorbisBlock);
-				}
-				continue;
-			}
-			else
-			{
-				float audioTime = mReadAudioSamples * factor;
-				// always buffer up of audio ahead of the frames
-				if (audioTime - videoTime < min)
-				{
-					if (!_readData()) break;
-				}
-				else
-					break;
-			}
-		}
-		addAudioPacket(pcm, len, mAudioGain);
-		mReadAudioSamples += len;
-		if (read_past_timestamp) timestamp += (float) len / mInfo.VorbisInfo.rate;
-		vorbis_synthesis_read(&mInfo.VorbisDSPState, len); // tell vorbis we read a number of samples
-	}
-	return timestamp;
-}
-
-long TheoraVideoClip_Theora::seekPage(long targetFrame, bool return_keyframe)
-{
-	int i,seek_min = 0, seek_max = (int) mStream->size();
-	long frame;
-	ogg_int64_t granule = 0;
-	
-	if (targetFrame == 0) mStream->seek(0);
-	for (i = (targetFrame == 0) ? 100 : 0; i < 100; ++i)
-	{
-		ogg_sync_reset(&mInfo.OggSyncState);
-		mStream->seek((seek_min + seek_max) / 2); // do a binary search
-		memset(&mInfo.OggPage, 0, sizeof(ogg_page));
-		ogg_sync_pageseek(&mInfo.OggSyncState, &mInfo.OggPage);
-		
-		for (;i < 1000;)
-		{
-			int ret = ogg_sync_pageout(&mInfo.OggSyncState, &mInfo.OggPage);
-			if (ret == 1)
-			{
-				int serno = ogg_page_serialno(&mInfo.OggPage);
-				if (serno == mInfo.TheoraStreamState.serialno)
-				{
-					granule = ogg_page_granulepos(&mInfo.OggPage);
-					if (granule >= 0)
-					{
-						frame = (long) th_granule_frame(mInfo.TheoraDecoder, granule);
-						if (frame < targetFrame && targetFrame - frame < 10)
-						{
-							// we're close enough, let's break this.
-							i = 1000;
-							break;
-						}
-						// we're not close enough, let's shorten the borders of the binary search
-						if (targetFrame - 1 > frame) seek_min = (seek_min + seek_max) / 2;
-						else				         seek_max = (seek_min + seek_max) / 2;
-						break;
-					}
-				}
-			}
-			else
-			{
-				char *buffer = ogg_sync_buffer(&mInfo.OggSyncState, 4096);
-				int bytes_read = mStream->read(buffer, 4096);
-				if (bytes_read == 0) break;
-				ogg_sync_wrote(&mInfo.OggSyncState, bytes_read);
-			}
-		}
-	}
-	if (return_keyframe) return (long) (granule >> mInfo.TheoraInfo.keyframe_granule_shift);
-	
-	ogg_sync_reset(&mInfo.OggSyncState);
-	memset(&mInfo.OggPage, 0, sizeof(ogg_page));
-	ogg_sync_pageseek(&mInfo.OggSyncState, &mInfo.OggPage);
-	if (targetFrame == 0) return -1;
-	mStream->seek((seek_min + seek_max) / 2); // do a binary search
-	return -1;
-}
-
-void TheoraVideoClip_Theora::doSeek()
-{
-#if _DEBUG
-	th_writelog(mName + " [seek]: seeking to frame " + str(mSeekFrame));
-#endif
-	int frame;
-	float time = mSeekFrame / getFPS();
-	mTimer->seek(time);
-	bool paused = mTimer->isPaused();
-	if (!paused) mTimer->pause(); // pause until seeking is done
-	
-	mEndOfFile = false;
-	mRestarted = false;
-	
-	resetFrameQueue();
-	// reset the video decoder.
-	ogg_stream_reset(&mInfo.TheoraStreamState);
-	th_decode_free(mInfo.TheoraDecoder);
-	mInfo.TheoraDecoder = th_decode_alloc(&mInfo.TheoraInfo, mInfo.TheoraSetup);
-	
-	if (mAudioInterface)
-	{
-		mAudioMutex->lock();
-		ogg_stream_reset(&mInfo.VorbisStreamState);
-		vorbis_synthesis_restart(&mInfo.VorbisDSPState);
-		destroyAllAudioPackets();
-	}
-	// first seek to desired frame, then figure out the location of the
-	// previous keyframe and seek to it.
-	// then by setting the correct time, the decoder will skip N frames untill
-	// we get the frame we want.
-	frame = (int) seekPage(mSeekFrame, 1); // find the keyframe nearest to the target frame
-#ifdef _DEBUG
-	//		th_writelog(mName + " [seek]: nearest keyframe for frame " + str(mSeekFrame) + " is frame: " + str(frame));
-#endif
-	seekPage(std::max(0, frame - 1), 0);
-	
-	ogg_packet opTheora;
-	ogg_int64_t granulePos;
-	bool granule_set = 0;
-	if (frame <= 1)
-	{
-		if (mInfo.TheoraInfo.version_major == 3 && mInfo.TheoraInfo.version_minor == 2 && mInfo.TheoraInfo.version_subminor == 0)
-			granulePos = 0;
-		else
-			granulePos = 1; // because of difference in granule interpretation in theora streams 3.2.0 and newer ones
-		th_decode_ctl(mInfo.TheoraDecoder, TH_DECCTL_SET_GRANPOS, &granulePos, sizeof(granulePos));
-		granule_set = 1;
-	}
-	
-	// now that we've found the keyframe that preceeds our desired frame, lets keep on decoding frames until we
-	// reach our target frame.
-	
-    int status, ret;
-	for (;mSeekFrame != 0;)
-	{
-		ret = ogg_stream_packetout(&mInfo.TheoraStreamState, &opTheora);
-		if (ret > 0)
-		{
-			if (!granule_set)
-			{
-				// theora decoder requires to set the granule pos after seek to be able to determine the current frame
-				if (opTheora.granulepos >= 0)
-				{
-					th_decode_ctl(mInfo.TheoraDecoder, TH_DECCTL_SET_GRANPOS, &opTheora.granulepos, sizeof(opTheora.granulepos));
-					granule_set = 1;
-				}
-				else continue; // ignore prev delta frames until we hit a keyframe
-			}
-			status = th_decode_packetin(mInfo.TheoraDecoder, &opTheora, &granulePos);
-            if (status != 0 && status != TH_DUPFRAME) continue;
-			frame = (int) th_granule_frame(mInfo.TheoraDecoder, granulePos);
-			if (frame >= mSeekFrame - 1) break;
-		}
-		else
-		{
-			if (!_readData())
-			{
-				th_writelog(mName + " [seek]: fineseeking failed, _readData failed!");
-				if (mAudioInterface) mAudioMutex->unlock();
-				return;
-			}
-		}
-	}
-#ifdef _DEBUG
-	//	th_writelog(mName + " [seek]: fineseeked to frame " + str(frame + 1) + ", requested: " + str(mSeekFrame));
-#endif
-	if (mAudioInterface)
-	{
-		// read audio data until we reach a timestamp. this usually takes only one iteration, but just in case let's
-		// wrap it in a loop
-		float timestamp;
-		for (;;)
-		{
-			timestamp = decodeAudio();
-			if (timestamp >= 0) break;
-			else _readData();
-		}
-		float rate = (float) mAudioFrequency * mNumAudioChannels;
-		float queued_time = getAudioPacketQueueLength();
-		// at this point there are only 2 possibilities: either we have too much packets and we have to delete
-		// the first N ones, or we don't have enough, so let's fill the gap with silence.
- 		if (time > timestamp - queued_time)
-		{
-			while (mTheoraAudioPacketQueue != NULL)
-			{
-				if (time > timestamp - queued_time + mTheoraAudioPacketQueue->numSamples / rate)
-				{
-					queued_time -= mTheoraAudioPacketQueue->numSamples / rate;
-					destroyAudioPacket(popAudioPacket());
-				}
-				else
-				{
-					int n_trim = (int) ((timestamp - queued_time + mTheoraAudioPacketQueue->numSamples / rate - time) * rate);
-					if (mTheoraAudioPacketQueue->numSamples - n_trim <= 0)
-						destroyAudioPacket(popAudioPacket()); // if there's no data to be left, just destroy it
-					else
-					{
-						for (int i = n_trim, j = 0; i < mTheoraAudioPacketQueue->numSamples; ++i, ++j)
-							mTheoraAudioPacketQueue->pcm[j] = mTheoraAudioPacketQueue->pcm[i];
-						mTheoraAudioPacketQueue->numSamples -= n_trim;
-					}
-					break;
-				}
-			}
-		}
-		else
-		{
-			// expand the first packet with silence.
-			if (mTheoraAudioPacketQueue) // just in case!
-			{
-				int i, j, nmissing = (int) ((timestamp - queued_time - time) * rate);
-				if (nmissing > 0)
-				{
-					float* samples = new float[nmissing + mTheoraAudioPacketQueue->numSamples];
-					for (i = 0; i < nmissing; ++i) samples[i] = 0;
-					for (j = 0; i < nmissing + mTheoraAudioPacketQueue->numSamples; ++i, ++j)
-						samples[i] = mTheoraAudioPacketQueue->pcm[j];
-					delete [] mTheoraAudioPacketQueue->pcm;
-					mTheoraAudioPacketQueue->pcm = samples;
-				}
-			}
-		}
-		mLastDecodedFrameNumber = mSeekFrame;
-		mReadAudioSamples = (unsigned int) (timestamp * mAudioFrequency);
-		
-		mAudioMutex->unlock();
-	}
-	if (!paused) mTimer->play();
-	mSeekFrame = -1;
-}
-#endif
diff --git a/drivers/theoraplayer/src/Theora/TheoraVideoClip_Theora.h b/drivers/theoraplayer/src/Theora/TheoraVideoClip_Theora.h
deleted file mode 100644
index c64c183029..0000000000
--- a/drivers/theoraplayer/src/Theora/TheoraVideoClip_Theora.h
+++ /dev/null
@@ -1,64 +0,0 @@
-/************************************************************************************
-This source file is part of the Theora Video Playback Library
-For latest info, see http://libtheoraplayer.googlecode.com
-*************************************************************************************
-Copyright (c) 2008-2014 Kresimir Spes (kspes@cateia.com)
-This program is free software; you can redistribute it and/or modify it under
-the terms of the BSD license: http://opensource.org/licenses/BSD-3-Clause
-*************************************************************************************/
-#if defined(__THEORA) && !defined(_TheoraVideoClip_Theora_h)
-#define _TheoraVideoClip_Theora_h
-
-#include <ogg/ogg.h>
-#include <vorbis/vorbisfile.h>
-#include <theora/theoradec.h>
-#include "TheoraAudioPacketQueue.h"
-#include "TheoraVideoClip.h"
-
-struct TheoraInfoStruct
-{
-	// ogg/vorbis/theora variables
-	ogg_sync_state   OggSyncState;
-	ogg_page         OggPage;
-	ogg_stream_state VorbisStreamState;
-	ogg_stream_state TheoraStreamState;
-	//Theora State
-	th_info        TheoraInfo;
-	th_comment     TheoraComment;
-	th_setup_info* TheoraSetup;
-	th_dec_ctx*    TheoraDecoder;
-	//Vorbis State
-	vorbis_info      VorbisInfo;
-	vorbis_dsp_state VorbisDSPState;
-	vorbis_block     VorbisBlock;
-	vorbis_comment   VorbisComment;
-};
-
-class TheoraVideoClip_Theora : public TheoraVideoClip, public TheoraAudioPacketQueue
-{
-protected:
-	TheoraInfoStruct mInfo; // a pointer is used to avoid having to include theora & vorbis headers
-	int mTheoraStreams, mVorbisStreams;	// Keeps track of Theora and Vorbis Streams
-
-	long seekPage(long targetFrame, bool return_keyframe);
-	void doSeek();
-	void readTheoraVorbisHeaders();
-	unsigned int mReadAudioSamples;
-	unsigned long mLastDecodedFrameNumber;
-public:
-	TheoraVideoClip_Theora(TheoraDataSource* data_source,
-						   TheoraOutputMode output_mode,
-						   int nPrecachedFrames,
-						   bool usePower2Stride);
-	~TheoraVideoClip_Theora();
-
-	bool _readData();
-	bool decodeNextFrame();
-	void _restart();
-	void load(TheoraDataSource* source);
-	float decodeAudio();
-	void decodedAudioCheck();
-	std::string getDecoderName() { return "Theora"; }
-};
-
-#endif
diff --git a/drivers/theoraplayer/src/TheoraAsync.cpp b/drivers/theoraplayer/src/TheoraAsync.cpp
deleted file mode 100644
index cc3b7a4bf5..0000000000
--- a/drivers/theoraplayer/src/TheoraAsync.cpp
+++ /dev/null
@@ -1,253 +0,0 @@
-/************************************************************************************
-This source file is part of the Theora Video Playback Library
-For latest info, see http://libtheoraplayer.googlecode.com
-*************************************************************************************
-Copyright (c) 2008-2014 Kresimir Spes (kspes@cateia.com)
-This program is free software; you can redistribute it and/or modify it under
-the terms of the BSD license: http://opensource.org/licenses/BSD-3-Clause
-*************************************************************************************/
-
-#include <stdio.h>
-#include <stdlib.h>
-
-#ifdef _WIN32
-#include <windows.h>
-#else
-#include <unistd.h>
-#include <pthread.h>
-#endif
-
-#include "TheoraAsync.h"
-#include "TheoraUtil.h"
-
-#ifdef _WINRT
-#include <wrl.h>
-#endif
-
-///////////////////////////////////////////////////////////////////////////////////////////////////
-// Mutex
-///////////////////////////////////////////////////////////////////////////////////////////////////
-
-TheoraMutex::TheoraMutex()
-{
-#ifdef _WIN32
-#ifndef _WINRT // WinXP does not have CreateTheoraMutexEx()
-	mHandle = CreateMutex(0, 0, 0);
-#else
-	mHandle = CreateMutexEx(NULL, NULL, 0, SYNCHRONIZE);
-#endif
-#else
-	mHandle = (pthread_mutex_t*)malloc(sizeof(pthread_mutex_t));
-	pthread_mutex_init((pthread_mutex_t*)mHandle, 0);
-#endif
-}
-
-TheoraMutex::~TheoraMutex()
-{
-#ifdef _WIN32
-	CloseHandle(mHandle);
-#else
-	pthread_mutex_destroy((pthread_mutex_t*)mHandle);
-	free((pthread_mutex_t*)mHandle);
-	mHandle = NULL;
-#endif
-}
-
-void TheoraMutex::lock()
-{
-#ifdef _WIN32
-	WaitForSingleObjectEx(mHandle, INFINITE, FALSE);
-#else
-	pthread_mutex_lock((pthread_mutex_t*)mHandle);
-#endif
-}
-
-void TheoraMutex::unlock()
-{
-#ifdef _WIN32
-	ReleaseMutex(mHandle);
-#else
-	pthread_mutex_unlock((pthread_mutex_t*)mHandle);
-#endif
-}
-	
-///////////////////////////////////////////////////////////////////////////////////////////////////
-// Thread
-///////////////////////////////////////////////////////////////////////////////////////////////////
-
-#ifdef _WINRT
-using namespace Windows::Foundation;
-using namespace Windows::System::Threading;
-#endif
-
-#ifdef _WIN32
-unsigned long WINAPI theoraAsyncCall(void* param)
-#else
-void* theoraAsyncCall(void* param)
-#endif
-{
-	TheoraThread* t = (TheoraThread*)param;
-	t->execute();
-#ifdef _WIN32
-	return 0;
-#else
-	pthread_exit(NULL);
-	return NULL;
-#endif
-}
-
-#ifdef _WINRT
-struct TheoraAsyncActionWrapper
-{
-public:
-	IAsyncAction^ mAsyncAction;
-	TheoraAsyncActionWrapper(IAsyncAction^ asyncAction)
-	{
-		mAsyncAction = asyncAction;
-	}
-};
-#endif
-	
-TheoraThread::TheoraThread() : mRunning(false), mId(0)
-{
-#ifndef _WIN32
-	mId = (pthread_t*)malloc(sizeof(pthread_t));
-#endif
-}
-
-TheoraThread::~TheoraThread()
-{
-	if (mRunning)
-	{
-		stop();
-	}
-	if (mId != NULL)
-	{
-#ifdef _WIN32
-#ifndef _WINRT
-		CloseHandle(mId);
-#else
-		delete mId;
-#endif
-#else
-		free((pthread_t*)mId);
-#endif
-		mId = NULL;
-	}
-}
-
-void TheoraThread::start()
-{
-	mRunning = true;
-#ifdef _WIN32
-#ifndef _WINRT
-	mId = CreateThread(0, 0, &theoraAsyncCall, this, 0, 0);
-#else
-	mId = new TheoraAsyncActionWrapper(ThreadPool::RunAsync(
-		ref new WorkItemHandler([&](IAsyncAction^ work_item)
-		{
-			execute();
-		}),
-		WorkItemPriority::Normal, WorkItemOptions::TimeSliced));
-#endif
-#else
-	pthread_create((pthread_t*)mId, NULL, &theoraAsyncCall, this);
-#endif
-}
-
-bool TheoraThread::isRunning()
-{
-	bool ret;
-	mRunningMutex.lock();
-	ret = mRunning;
-	mRunningMutex.unlock();
-	
-	return ret;
-}
-
-void TheoraThread::join()
-{
-	mRunningMutex.lock();
-	mRunning = false;
-	mRunningMutex.unlock();
-#ifdef _WIN32
-#ifndef _WINRT
-	WaitForSingleObject(mId, INFINITE);
-	if (mId != NULL)
-	{
-		CloseHandle(mId);
-		mId = NULL;
-	}
-#else
-	IAsyncAction^ action = ((TheoraAsyncActionWrapper*)mId)->mAsyncAction;
-	int i = 0;
-	while (action->Status != AsyncStatus::Completed &&
-		action->Status != AsyncStatus::Canceled &&
-		action->Status != AsyncStatus::Error &&
-		i < 100)
-	{
-		_psleep(50);
-		++i;
-	}
-	if (i >= 100)
-	{
-		i = 0;
-		action->Cancel();
-		while (action->Status != AsyncStatus::Completed &&
-			action->Status != AsyncStatus::Canceled &&
-			action->Status != AsyncStatus::Error &&
-			i < 100)
-		{
-			_psleep(50);
-			++i;
-		}
-	}
-#endif
-#else
-	pthread_join(*((pthread_t*)mId), 0);
-#endif
-}
-	
-void TheoraThread::resume()
-{
-#ifdef _WIN32
-#ifndef _WINRT
-	ResumeThread(mId);
-#else
-	// not available in WinRT
-#endif
-#endif
-}
-	
-void TheoraThread::pause()
-{
-#ifdef _WIN32
-#ifndef _WINRT
-	SuspendThread(mId);
-#else
-	// not available in WinRT
-#endif
-#endif
-}
-	
-void TheoraThread::stop()
-{
-	if (mRunning)
-	{
-		mRunningMutex.lock();
-		mRunning = false;
-		mRunningMutex.unlock();
-#ifdef _WIN32
-#ifndef _WINRT
-		TerminateThread(mId, 0);
-#else
-		((TheoraAsyncActionWrapper*)mId)->mAsyncAction->Cancel();
-#endif
-#elif defined(_ANDROID)
-		pthread_kill(*((pthread_t*)mId), 0);
-#else
-		pthread_cancel(*((pthread_t*)mId));
-#endif
-	}
-}
-	
diff --git a/drivers/theoraplayer/src/TheoraAudioInterface.cpp b/drivers/theoraplayer/src/TheoraAudioInterface.cpp
deleted file mode 100644
index a265cb57b5..0000000000
--- a/drivers/theoraplayer/src/TheoraAudioInterface.cpp
+++ /dev/null
@@ -1,21 +0,0 @@
-/************************************************************************************
-This source file is part of the Theora Video Playback Library
-For latest info, see http://libtheoraplayer.googlecode.com
-*************************************************************************************
-Copyright (c) 2008-2014 Kresimir Spes (kspes@cateia.com)
-This program is free software; you can redistribute it and/or modify it under
-the terms of the BSD license: http://opensource.org/licenses/BSD-3-Clause
-*************************************************************************************/
-#include "TheoraAudioInterface.h"
-
-TheoraAudioInterface::TheoraAudioInterface(TheoraVideoClip* owner, int nChannels, int freq)
-{
-	mFreq = freq;
-	mNumChannels = nChannels;
-	mClip = owner;
-}
-
-TheoraAudioInterface::~TheoraAudioInterface()
-{
-	
-}
diff --git a/drivers/theoraplayer/src/TheoraAudioPacketQueue.cpp b/drivers/theoraplayer/src/TheoraAudioPacketQueue.cpp
deleted file mode 100644
index be5e1018f9..0000000000
--- a/drivers/theoraplayer/src/TheoraAudioPacketQueue.cpp
+++ /dev/null
@@ -1,126 +0,0 @@
-/************************************************************************************
-This source file is part of the Theora Video Playback Library
-For latest info, see http://libtheoraplayer.googlecode.com
-*************************************************************************************
-Copyright (c) 2008-2014 Kresimir Spes (kspes@cateia.com)
-This program is free software; you can redistribute it and/or modify it under
-the terms of the BSD license: http://opensource.org/licenses/BSD-3-Clause
-*************************************************************************************/
-#include <stdlib.h>
-#include "TheoraAudioPacketQueue.h"
-#include "TheoraAudioInterface.h"
-
-TheoraAudioPacketQueue::TheoraAudioPacketQueue()
-{
-	mTheoraAudioPacketQueue = NULL;
-}
-
-TheoraAudioPacketQueue::~TheoraAudioPacketQueue()
-{
-	destroyAllAudioPackets();
-}
-
-float TheoraAudioPacketQueue::getAudioPacketQueueLength()
-{
-	float len = 0;
-	for (TheoraAudioPacket* p = mTheoraAudioPacketQueue; p != NULL; p = p->next)
-		len += p->numSamples;
-	
-	return len / (mAudioFrequency * mNumAudioChannels);
-}
-
-void TheoraAudioPacketQueue::_addAudioPacket(float* data, int numSamples)
-{
-	TheoraAudioPacket* packet = new TheoraAudioPacket;
-	packet->pcm = data;
-	packet->numSamples = numSamples;
-	packet->next = NULL;
-
-
-	if (mTheoraAudioPacketQueue == NULL) mTheoraAudioPacketQueue = packet;
-	else
-	{
-		TheoraAudioPacket* last = mTheoraAudioPacketQueue;
-		for (TheoraAudioPacket* p = last; p != NULL; p = p->next)
-			last = p;
-		last->next = packet;
-	}
-}
-
-void TheoraAudioPacketQueue::addAudioPacket(float** buffer, int numSamples, float gain)
-{
-	float* data = new float[numSamples * mNumAudioChannels];
-	float* dataptr = data;
-	int i;
-	unsigned int j;
-	
-	if (gain < 1.0f)
-	{
-		// apply gain, let's attenuate the samples
-		for (i = 0; i < numSamples; ++i)
-			for (j = 0; j < mNumAudioChannels; j++, ++dataptr)
-				*dataptr = buffer[i][j] * gain;
-	}
-	else
-	{
-		// do a simple copy, faster then the above method, when gain is 1.0f
-		for (i = 0; i < numSamples; ++i)
-			for (j = 0; j < mNumAudioChannels; j++, ++dataptr)
-				*dataptr = buffer[j][i];
-	}
-		
-	_addAudioPacket(data, numSamples * mNumAudioChannels);
-}
-
-void TheoraAudioPacketQueue::addAudioPacket(float* buffer, int numSamples, float gain)
-{
-	float* data = new float[numSamples * mNumAudioChannels];
-	float* dataptr = data;
-	int i, numFloats = numSamples * mNumAudioChannels;
-	
-	if (gain < 1.0f)
-	{
-		// apply gain, let's attenuate the samples
-		for (i = 0; i < numFloats; ++i, dataptr++)
-			*dataptr = buffer[i] * gain;
-	}
-	else
-	{
-		// do a simple copy, faster then the above method, when gain is 1.0f
-		for (i = 0; i < numFloats; ++i, dataptr++)
-			*dataptr = buffer[i];
-	}
-	
-	_addAudioPacket(data, numFloats);
-}
-
-TheoraAudioPacket* TheoraAudioPacketQueue::popAudioPacket()
-{
-	if (mTheoraAudioPacketQueue == NULL) return NULL;
-	TheoraAudioPacket* p = mTheoraAudioPacketQueue;
-	mTheoraAudioPacketQueue = mTheoraAudioPacketQueue->next;
-	return p;
-}
-
-void TheoraAudioPacketQueue::destroyAudioPacket(TheoraAudioPacket* p)
-{
-	if (p == NULL) return;
-	delete [] p->pcm;
-	delete p;
-}
-
-void TheoraAudioPacketQueue::destroyAllAudioPackets()
-{
-	for (TheoraAudioPacket* p = popAudioPacket(); p != NULL; p = popAudioPacket())
-		destroyAudioPacket(p);
-}
-
-void TheoraAudioPacketQueue::flushAudioPackets(TheoraAudioInterface* audioInterface)
-{
-	
-	for (TheoraAudioPacket* p = popAudioPacket(); p != NULL; p = popAudioPacket())
-	{
-		audioInterface->insertData(p->pcm, p->numSamples);
-		destroyAudioPacket(p);
-	}
-}
-\ No newline at end of file
diff --git a/drivers/theoraplayer/src/TheoraDataSource.cpp b/drivers/theoraplayer/src/TheoraDataSource.cpp
deleted file mode 100644
index 6011dc6783..0000000000
--- a/drivers/theoraplayer/src/TheoraDataSource.cpp
+++ /dev/null
@@ -1,128 +0,0 @@
-/************************************************************************************
-This source file is part of the Theora Video Playback Library
-For latest info, see http://libtheoraplayer.googlecode.com
-*************************************************************************************
-Copyright (c) 2008-2014 Kresimir Spes (kspes@cateia.com)
-This program is free software; you can redistribute it and/or modify it under
-the terms of the BSD license: http://opensource.org/licenses/BSD-3-Clause
-*************************************************************************************/
-#include <stdio.h>
-#include <memory.h>
-#include "TheoraDataSource.h"
-#include "TheoraException.h"
-#include "TheoraVideoManager.h"
-#include "TheoraUtil.h"
-
-TheoraDataSource::~TheoraDataSource()
-{
-
-}
-
-TheoraFileDataSource::TheoraFileDataSource(std::string filename)
-{
-	mFilename = filename;
-	mFilePtr = NULL;
-}
-
-TheoraFileDataSource::~TheoraFileDataSource()
-{
-	if (mFilePtr)
-	{
-		fclose(mFilePtr);
-		mFilePtr = NULL;
-	}
-}
-
-void TheoraFileDataSource::openFile()
-{
-	if (mFilePtr == NULL)
-	{
-		mFilePtr=fopen(mFilename.c_str(), "rb");
-		if (!mFilePtr)
-        {
-            std::string msg = "Can't open video file: " + mFilename;
-            th_writelog(msg);
-            throw TheoraGenericException(msg);
-        }
-		fseek(mFilePtr, 0, SEEK_END);
-		mSize = ftell(mFilePtr);
-		fseek(mFilePtr, 0, SEEK_SET);
-	}
-}
-
-int TheoraFileDataSource::read(void* output, int nBytes)
-{
-	if (mFilePtr == NULL) openFile();
-	size_t n = fread(output, 1, nBytes, mFilePtr);
-	return (int) n;
-}
-
-void TheoraFileDataSource::seek(unsigned long byte_index)
-{
-	if (mFilePtr == NULL) openFile();
-	fseek(mFilePtr, byte_index, SEEK_SET);
-}
-
-unsigned long TheoraFileDataSource::size()
-{
-	if (mFilePtr == NULL) openFile();
-	return mSize;
-}
-
-unsigned long TheoraFileDataSource::tell()
-{
-	if (mFilePtr == NULL) return 0;
-	return ftell(mFilePtr);
-}
-
-TheoraMemoryFileDataSource::TheoraMemoryFileDataSource(std::string filename) :
-	mReadPointer(0),
-	mData(0)
-{
-	mFilename=filename;
-	FILE* f=fopen(filename.c_str(),"rb");
-	if (!f) throw TheoraGenericException("Can't open video file: "+filename);
-	fseek(f,0,SEEK_END);
-	mSize=ftell(f);
-	fseek(f,0,SEEK_SET);
-	mData=new unsigned char[mSize];
-	fread(mData,1,mSize,f);
-	fclose(f);
-}
-
-TheoraMemoryFileDataSource::TheoraMemoryFileDataSource(unsigned char* data, long size, const std::string& filename)
-{
-	mFilename = filename;
-	mData = data;
-	mSize = size;
-	mReadPointer = 0;
-}
-
-TheoraMemoryFileDataSource::~TheoraMemoryFileDataSource()
-{
-	if (mData) delete [] mData;
-}
-
-int TheoraMemoryFileDataSource::read(void* output, int nBytes)
-{
-	int n = (int) ((mReadPointer+nBytes <= mSize) ? nBytes : mSize - mReadPointer);
-	if (!n) return 0;
-	memcpy(output, mData + mReadPointer, n);
-	mReadPointer += n;
-	return n;
-}
-
-void TheoraMemoryFileDataSource::seek(unsigned long byte_index)
-{
-	mReadPointer=byte_index;
-}
-
-unsigned long TheoraMemoryFileDataSource::size()
-{
-	return mSize;
-}
-
-unsigned long TheoraMemoryFileDataSource::tell()
-{
-	return mReadPointer;
-}
diff --git a/drivers/theoraplayer/src/TheoraException.cpp b/drivers/theoraplayer/src/TheoraException.cpp
deleted file mode 100644
index 4588a81397..0000000000
--- a/drivers/theoraplayer/src/TheoraException.cpp
+++ /dev/null
@@ -1,37 +0,0 @@
-/************************************************************************************
-This source file is part of the Theora Video Playback Library
-For latest info, see http://libtheoraplayer.googlecode.com
-*************************************************************************************
-Copyright (c) 2008-2014 Kresimir Spes (kspes@cateia.com)
-This program is free software; you can redistribute it and/or modify it under
-the terms of the BSD license: http://opensource.org/licenses/BSD-3-Clause
-*************************************************************************************/
-#include "TheoraException.h"
-#include "TheoraUtil.h"
-#include "TheoraVideoManager.h"
-#include <stdio.h>
-
-_TheoraGenericException::_TheoraGenericException(const std::string& errorText, std::string type, std::string file, int line)
-{
-    mErrText = errorText;
-	int src = (int) file.find("src");
-	if (src >= 0) file = file.substr(src + 4, 1000);
-	mLineNumber = line;
-	mFile = file;
-}
-
-
-std::string _TheoraGenericException::repr()
-{
-	std::string text = getType();
-	if (text != "") text += ": ";
-
-	if (mFile != "") text += "[" + mFile + ":" + str(mLineNumber) + "] - ";
-
-	return text + getErrorText();
-}
-
-void _TheoraGenericException::writeOutput()
-{
-	th_writelog("----------------\nException Error!\n\n" + repr() + "\n----------------");
-}
diff --git a/drivers/theoraplayer/src/TheoraFrameQueue.cpp b/drivers/theoraplayer/src/TheoraFrameQueue.cpp
deleted file mode 100644
index f402144795..0000000000
--- a/drivers/theoraplayer/src/TheoraFrameQueue.cpp
+++ /dev/null
@@ -1,174 +0,0 @@
-/************************************************************************************
-This source file is part of the Theora Video Playback Library
-For latest info, see http://libtheoraplayer.googlecode.com
-*************************************************************************************
-Copyright (c) 2008-2014 Kresimir Spes (kspes@cateia.com)
-This program is free software; you can redistribute it and/or modify it under
-the terms of the BSD license: http://opensource.org/licenses/BSD-3-Clause
-*************************************************************************************/
-#include "TheoraFrameQueue.h"
-#include "TheoraVideoFrame.h"
-#include "TheoraVideoManager.h"
-#include "TheoraUtil.h"
-
-
-TheoraFrameQueue::TheoraFrameQueue(TheoraVideoClip* parent)
-{
-	mParent = parent;
-}
-
-TheoraFrameQueue::~TheoraFrameQueue()
-{
-	foreach_l(TheoraVideoFrame*, mQueue)
-    {
-		delete (*it);
-    }
-	mQueue.clear();
-}
-
-TheoraVideoFrame* TheoraFrameQueue::createFrameInstance(TheoraVideoClip* clip)
-{
-	TheoraVideoFrame* frame = new TheoraVideoFrame(clip);
-	if (frame->getBuffer() == NULL) // This can happen if you run out of memory
-	{
-		delete frame;
-		return NULL;
-	}
-	return frame;
-}
-
-void TheoraFrameQueue::setSize(int n)
-{
-	mMutex.lock();
-	if (mQueue.size() > 0)
-	{
-		foreach_l (TheoraVideoFrame*, mQueue)
-        {
-			delete (*it);
-        }
-		mQueue.clear();
-	}
-	TheoraVideoFrame* frame;
-	for (int i = 0;i < n; ++i)
-	{
-		frame = createFrameInstance(mParent);
-		if (frame != NULL) mQueue.push_back(frame);
-		else
-		{
-			TheoraVideoManager::getSingleton().logMessage("TheoraFrameQueue: unable to create " + str(n) + " frames, out of memory. Created " + str((int) mQueue.size()) + " frames.");
-			break;
-		}
-	}
-	mMutex.unlock();
-}
-
-int TheoraFrameQueue::getSize()
-{
-	return (int) mQueue.size();
-}
-
-TheoraVideoFrame* TheoraFrameQueue::_getFirstAvailableFrame()
-{
-	TheoraVideoFrame* frame = mQueue.front();
-	if (frame->mReady) return frame;
-	else               return NULL;
-}
-
-TheoraVideoFrame* TheoraFrameQueue::getFirstAvailableFrame()
-{
-	mMutex.lock();
-	TheoraVideoFrame* frame = _getFirstAvailableFrame();
-	mMutex.unlock();
-	return frame;
-}
-
-void TheoraFrameQueue::clear()
-{
-	mMutex.lock();
-	foreach_l (TheoraVideoFrame*, mQueue)
-		(*it)->clear();
-	mMutex.unlock();
-}
-
-void TheoraFrameQueue::_pop(int n)
-{
-    for (int i = 0; i < n; ++i)
-    {
-        TheoraVideoFrame* first = mQueue.front();
-        first->clear();
-        mQueue.pop_front();
-        mQueue.push_back(first);
-    }
-}
-
-void TheoraFrameQueue::pop(int n)
-{
-	mMutex.lock();
-    _pop(n);
-	mMutex.unlock();
-}
-
-TheoraVideoFrame* TheoraFrameQueue::requestEmptyFrame()
-{
-	TheoraVideoFrame* frame = NULL;
-	mMutex.lock();
-	foreach_l (TheoraVideoFrame*, mQueue)
-	{
-		if (!(*it)->mInUse)
-		{
-			(*it)->mInUse = 1;
-			(*it)->mReady = 0;
-			frame = (*it);
-			break;
-		}
-	}
-	mMutex.unlock();
-	return frame;
-}
-
-int TheoraFrameQueue::getUsedCount()
-{
-	mMutex.lock();
-	int n=0;
-	foreach_l(TheoraVideoFrame*,mQueue)
-		if ((*it)->mInUse) ++n;
-	mMutex.unlock();
-	return n;
-}
-
-int TheoraFrameQueue::_getReadyCount()
-{
-	int n = 0;
-	foreach_l (TheoraVideoFrame*, mQueue)
-    if ((*it)->mReady) ++n;
-	return n;
-}
-
-
-int TheoraFrameQueue::getReadyCount()
-{
-	mMutex.lock();
-	int n = _getReadyCount();
-	mMutex.unlock();
-	return n;
-}
-
-bool TheoraFrameQueue::isFull()
-{
-	return getReadyCount() == mQueue.size();
-}
-
-void TheoraFrameQueue::lock()
-{
-	mMutex.lock();
-}
-
-void TheoraFrameQueue::unlock()
-{
-	mMutex.unlock();
-}
-
-std::list<TheoraVideoFrame*>& TheoraFrameQueue::_getFrameQueue()
-{
-    return mQueue;
-}
diff --git a/drivers/theoraplayer/src/TheoraTimer.cpp b/drivers/theoraplayer/src/TheoraTimer.cpp
deleted file mode 100644
index 644d1c2ab7..0000000000
--- a/drivers/theoraplayer/src/TheoraTimer.cpp
+++ /dev/null
@@ -1,70 +0,0 @@
-/************************************************************************************
-This source file is part of the Theora Video Playback Library
-For latest info, see http://libtheoraplayer.googlecode.com
-*************************************************************************************
-Copyright (c) 2008-2014 Kresimir Spes (kspes@cateia.com)
-This program is free software; you can redistribute it and/or modify it under
-the terms of the BSD license: http://opensource.org/licenses/BSD-3-Clause
-*************************************************************************************/
-#include "TheoraTimer.h"
-
-TheoraTimer::TheoraTimer()
-{
-	mTime = 0;
-	mPaused = 0;
-    mSpeed = 1.0f;
-}
-
-TheoraTimer::~TheoraTimer()
-{
-
-}
-
-void TheoraTimer::update(float timeDelta)
-{
-	if (!isPaused())
-	{
-		mTime += timeDelta * mSpeed;
-	}
-}
-
-float TheoraTimer::getTime()
-{
-	return mTime;
-}
-
-void TheoraTimer::pause()
-{
-	mPaused = true;
-}
-
-void TheoraTimer::play()
-{
-	mPaused = false;
-}
-
-
-bool TheoraTimer::isPaused()
-{
-	return mPaused;
-}
-
-void TheoraTimer::stop()
-{
-
-}
-
-void TheoraTimer::seek(float time)
-{
-	mTime = time;
-}
-
-void TheoraTimer::setSpeed(float speed)
-{
-    mSpeed = speed;
-}
-
-float TheoraTimer::getSpeed()
-{
-    return mSpeed;
-}
diff --git a/drivers/theoraplayer/src/TheoraUtil.cpp b/drivers/theoraplayer/src/TheoraUtil.cpp
deleted file mode 100644
index 8f1ad0c9c1..0000000000
--- a/drivers/theoraplayer/src/TheoraUtil.cpp
+++ /dev/null
@@ -1,59 +0,0 @@
-/************************************************************************************
-This source file is part of the Theora Video Playback Library
-For latest info, see http://libtheoraplayer.googlecode.com
-*************************************************************************************
-Copyright (c) 2008-2014 Kresimir Spes (kspes@cateia.com)
-This program is free software; you can redistribute it and/or modify it under
-the terms of the BSD license: http://opensource.org/licenses/BSD-3-Clause
-*************************************************************************************/
-#include <stdio.h>
-#include <algorithm>
-#include <math.h>
-#include <map>
-#ifndef _WIN32
-#include <unistd.h>
-#include <pthread.h>
-#endif
-
-#include "TheoraUtil.h"
-#include "TheoraException.h"
-
-#ifdef _WIN32
-#include <windows.h>
-#pragma warning( disable: 4996 ) // MSVC++
-#endif
-
-std::string str(int i)
-{
-    char s[32];
-    sprintf(s, "%d", i);
-    return std::string(s);
-}
-
-std::string strf(float i)
-{
-    char s[32];
-    sprintf(s, "%.3f", i);
-    return std::string(s);
-}
-
-void _psleep(int miliseconds)
-{
-#ifdef _WIN32
-#ifndef _WINRT
-	Sleep(miliseconds);
-#else
-	WaitForSingleObjectEx(GetCurrentThread(), miliseconds, 0);
-#endif
-#else
-	usleep(miliseconds * 1000);
-#endif
-}
-
-
-int _nextPow2(int x)
-{
-	int y;
-	for (y = 1; y < x; y *= 2);
-	return y;
-}
diff --git a/drivers/theoraplayer/src/TheoraVideoClip.cpp b/drivers/theoraplayer/src/TheoraVideoClip.cpp
deleted file mode 100644
index 16897ee80e..0000000000
--- a/drivers/theoraplayer/src/TheoraVideoClip.cpp
+++ /dev/null
@@ -1,496 +0,0 @@
-/************************************************************************************
-This source file is part of the Theora Video Playback Library
-For latest info, see http://libtheoraplayer.googlecode.com
-*************************************************************************************
-Copyright (c) 2008-2014 Kresimir Spes (kspes@cateia.com)
-This program is free software; you can redistribute it and/or modify it under
-the terms of the BSD license: http://opensource.org/licenses/BSD-3-Clause
-*************************************************************************************/
-#include "TheoraVideoClip.h"
-#include "TheoraVideoManager.h"
-#include "TheoraVideoFrame.h"
-#include "TheoraFrameQueue.h"
-#include "TheoraAudioInterface.h"
-#include "TheoraTimer.h"
-#include "TheoraDataSource.h"
-#include "TheoraUtil.h"
-#include "TheoraException.h"
-
-#include "core/os/memory.h"
-
-TheoraVideoClip::TheoraVideoClip(TheoraDataSource* data_source,
-								 TheoraOutputMode output_mode,
-								 int nPrecachedFrames,
-								 bool usePower2Stride):
-	mAudioInterface(NULL),
-	mNumDroppedFrames(0),
-	mNumDisplayedFrames(0),
-	mSeekFrame(-1),
-	mDuration(-1),
-	mNumFrames(-1),
-	mFPS(1),
-	mUseAlpha(0),
-	mFrameDuration(0),
-	mName(data_source->repr()),
-	mStride(usePower2Stride),
-	mSubFrameWidth(0),
-	mSubFrameHeight(0),
-	mSubFrameOffsetX(0),
-	mSubFrameOffsetY(0),
-	mAudioGain(1),
-	mRequestedOutputMode(output_mode),
-	mAutoRestart(0),
-	mEndOfFile(0),
-	mRestarted(0),
-	mIteration(0),
-    mPlaybackIteration(0),
-	mStream(0),
-	mThreadAccessCount(0),
-	mPriority(1),
-	mFirstFrameDisplayed(0),
-	mWaitingForCache(false),
-	mOutputMode(TH_UNDEFINED)
-{
-
-	audio_track=0;
-	mAudioMutex = NULL;
-	mThreadAccessMutex = new TheoraMutex();
-	mTimer = mDefaultTimer = new TheoraTimer();
-
-	mFrameQueue = NULL;
-	mAssignedWorkerThread = NULL;
-	mNumPrecachedFrames = nPrecachedFrames;
-	setOutputMode(output_mode);
-}
-
-TheoraVideoClip::~TheoraVideoClip()
-{
-	// wait untill a worker thread is done decoding the frame
-	mThreadAccessMutex->lock();
-
-	delete mDefaultTimer;
-
-	if (mStream) memdelete(mStream);
-
-	if (mFrameQueue) delete mFrameQueue;
-
-	if (mAudioInterface)
-	{
-		mAudioMutex->lock(); // ensure a thread isn't using this mutex
-		delete mAudioInterface; // notify audio interface it's time to call it a day
-		mAudioMutex ->unlock();
-		delete mAudioMutex;
-	}
-	
-	mThreadAccessMutex->unlock();
-
-	delete mThreadAccessMutex;
-}
-
-TheoraTimer* TheoraVideoClip::getTimer()
-{
-	return mTimer;
-}
-
-void TheoraVideoClip::setTimer(TheoraTimer* timer)
-{
-	if (!timer) mTimer = mDefaultTimer;
-	else mTimer = timer;
-}
-
-void TheoraVideoClip::resetFrameQueue()
-{
-	mFrameQueue->clear();
-    mPlaybackIteration = mIteration = 0;
-}
-
-void TheoraVideoClip::restart()
-{
-	mEndOfFile = true; //temp, to prevent threads to decode while restarting
-	mThreadAccessMutex->lock();
-	_restart();
-	mTimer->seek(0);
-	mFirstFrameDisplayed = false;
-    resetFrameQueue();
-	mEndOfFile = false;
-	mRestarted = false;
-	mSeekFrame = -1;
-	mThreadAccessMutex->unlock();
-}
-
-void TheoraVideoClip::update(float timeDelta)
-{
-	if (mTimer->isPaused())
-	{
-		mTimer->update(0); // update timer in case there is some code that needs to execute each frame
-		return;
-	}
-	float time = mTimer->getTime(), speed = mTimer->getSpeed();
-    if (time + timeDelta * speed >= mDuration)
-    {
-        if (mAutoRestart && mRestarted)
-        {
-            float seekTime = time + timeDelta * speed;
-            for (;seekTime >= mDuration;)
-            {
-                seekTime -= mDuration;
-                ++mPlaybackIteration;
-            }
-
-            mTimer->seek(seekTime);
-        }
-        else
-        {
-            if (time != mDuration)
-            {
-                mTimer->update((mDuration - time) / speed);
-            }
-        }
-    }
-    else
-    {
-        mTimer->update(timeDelta);
-    }
-}
-
-float TheoraVideoClip::updateToNextFrame()
-{
-	TheoraVideoFrame* f = mFrameQueue->getFirstAvailableFrame();
-	if (!f) return 0;
-
-	float time = f->mTimeToDisplay - mTimer->getTime();
-	update(time);
-	return time;
-}
-
-TheoraFrameQueue* TheoraVideoClip::getFrameQueue()
-{
-	return mFrameQueue;
-}
-
-void TheoraVideoClip::popFrame()
-{
-	++mNumDisplayedFrames;
-	
-    // after transfering frame data to the texture, free the frame
-	// so it can be used again
-	if (!mFirstFrameDisplayed)
-	{
-		mFrameQueue->lock();
-		mFrameQueue->_pop(1);
-		mFirstFrameDisplayed = true;
-		mFrameQueue->unlock();
-	}
-	else
-	{
-		mFrameQueue->pop();
-	}
-}
-
-int TheoraVideoClip::getWidth()
-{
-	return mUseAlpha ? mWidth / 2 : mWidth;
-}
-
-int TheoraVideoClip::getHeight()
-{
-	return mHeight;
-}
-
-int TheoraVideoClip::getSubFrameWidth()
-{
-	return mUseAlpha ? mWidth / 2 : mSubFrameWidth;
-}
-
-int TheoraVideoClip::getSubFrameHeight()
-{
-	return mUseAlpha ? mHeight : mSubFrameHeight;
-}
-
-int TheoraVideoClip::getSubFrameOffsetX()
-{
-	return mUseAlpha ? 0 : mSubFrameOffsetX;
-}
-
-int TheoraVideoClip::getSubFrameOffsetY()
-{
-	return mUseAlpha ? 0 : mSubFrameOffsetY;
-}
-
-float TheoraVideoClip::getAbsPlaybackTime()
-{
-    return mTimer->getTime() + mPlaybackIteration * mDuration;
-}
-
-int TheoraVideoClip::discardOutdatedFrames(float absTime)
-{
-    int nReady = mFrameQueue->_getReadyCount();
-    // only drop frames if you have more frames to show. otherwise even the late frame will do..
-    if (nReady == 1) return 0;
-    float time = absTime;
-
-    int nPop = 0;
-    TheoraVideoFrame* frame;
-    float timeToDisplay;
-    
-    std::list<TheoraVideoFrame*>& queue = mFrameQueue->_getFrameQueue();
-    foreach_l (TheoraVideoFrame*, queue)
-    {
-        frame = *it;
-        if (!frame->mReady) break;
-        timeToDisplay = frame->mTimeToDisplay + frame->mIteration * mDuration;
-        if (time > timeToDisplay + mFrameDuration)
-        {
-            ++nPop;
-            if (nReady - nPop == 1) break; // always leave at least one in the queue
-        }
-        else break;
-    }
-    
-	if (nPop > 0)
-    {
-#define _DEBUG
-#ifdef _DEBUG
-        std::string log = getName() + ": dropped frame ";
-    
-        int i = nPop;
-        foreach_l (TheoraVideoFrame*, queue)
-        {
-            log += str((int) (*it)->getFrameNumber());
-            if (i-- > 1)
-            {
-                log += ", ";
-            }
-            else break;
-        }
-        th_writelog(log);
-#endif
-        mNumDroppedFrames += nPop;
-        mFrameQueue->_pop(nPop);
-	}
-    
-    return nPop;
-}
-
-TheoraVideoFrame* TheoraVideoClip::getNextFrame()
-{
-	TheoraVideoFrame* frame;
-    // if we are about to seek, then the current frame queue is invalidated
-	// (will be cleared when a worker thread does the actual seek)
-    if (mSeekFrame != -1) return NULL;
-
-    mFrameQueue->lock();
-	float time = getAbsPlaybackTime();
-    discardOutdatedFrames(time);
-    
-    frame = mFrameQueue->_getFirstAvailableFrame();
-    if (frame != NULL)
-    {
-        if (frame->mTimeToDisplay + frame->mIteration * mDuration > time && mFirstFrameDisplayed)
-        {
-            frame = NULL; // frame is ready but it's not yet time to display it, except when we haven't displayed any frames yet
-        }
-    }
-
-    mFrameQueue->unlock();
-	return frame;
-}
-
-std::string TheoraVideoClip::getName()
-{
-	return mName;
-}
-
-bool TheoraVideoClip::isBusy()
-{
-	return mAssignedWorkerThread || mOutputMode != mRequestedOutputMode;
-}
-
-TheoraOutputMode TheoraVideoClip::getOutputMode()
-{
-	return mOutputMode;
-}
-
-void TheoraVideoClip::setOutputMode(TheoraOutputMode mode)
-{
-	if (mode == TH_UNDEFINED) throw TheoraGenericException("Invalid output mode: TH_UNDEFINED for video: " + mName);
-	if (mOutputMode == mode) return;
-	mRequestedOutputMode = mode;
-	mUseAlpha = (mode == TH_RGBA   ||
-				 mode == TH_ARGB   ||
-				 mode == TH_BGRA   ||
-				 mode == TH_ABGR   ||
-				 mode == TH_GREY3A ||
-				 mode == TH_AGREY3 ||
-				 mode == TH_YUVA   ||
-				 mode == TH_AYUV);
-	if (mAssignedWorkerThread)
-	{
-		mThreadAccessMutex->lock();
-		// discard current frames and recreate them
-		mFrameQueue->setSize(mFrameQueue->getSize());
-		mThreadAccessMutex->unlock();
-
-	}
-	mOutputMode = mRequestedOutputMode;
-}
-
-float TheoraVideoClip::getTimePosition()
-{
-	return mTimer->getTime();
-}
-
-int TheoraVideoClip::getNumPrecachedFrames()
-{
-	return mFrameQueue->getSize();
-}
-
-void TheoraVideoClip::setNumPrecachedFrames(int n)
-{
-	if (mFrameQueue->getSize() != n)
-		mFrameQueue->setSize(n);
-}
-
-int TheoraVideoClip::_getNumReadyFrames()
-{
-	if (mSeekFrame != -1) return 0;
-	return mFrameQueue->_getReadyCount();
-}
-
-int TheoraVideoClip::getNumReadyFrames()
-{
-	if (mSeekFrame != -1) return 0; // we are about to seek, consider frame queue empty even though it will be emptied upon seek
-	return mFrameQueue->getReadyCount();
-}
-
-float TheoraVideoClip::getDuration()
-{
-	return mDuration;
-}
-
-float TheoraVideoClip::getFPS()
-{
-	return mFPS;
-}
-
-void TheoraVideoClip::play()
-{
-	mTimer->play();
-}
-
-void TheoraVideoClip::pause()
-{
-	mTimer->pause();
-}
-
-bool TheoraVideoClip::isPaused()
-{
-	return mTimer->isPaused();
-}
-
-bool TheoraVideoClip::isDone()
-{
-	return mEndOfFile && !mFrameQueue->getFirstAvailableFrame();
-}
-
-void TheoraVideoClip::stop()
-{
-	pause();
-    resetFrameQueue();
-	mFirstFrameDisplayed = false;
-	seek(0);
-}
-
-void TheoraVideoClip::setPlaybackSpeed(float speed)
-{
-	mTimer->setSpeed(speed);
-}
-
-float TheoraVideoClip::getPlaybackSpeed()
-{
-	return mTimer->getSpeed();
-}
-
-void TheoraVideoClip::seek(float time)
-{
-	seekToFrame((int) (time * getFPS()));
-}
-
-void TheoraVideoClip::seekToFrame(int frame)
-{
-	if      (frame < 0)          mSeekFrame = 0;
-	else if (frame > mNumFrames) mSeekFrame = mNumFrames;
-	else                         mSeekFrame = frame;
-
-	mFirstFrameDisplayed = false;
-	mEndOfFile = false;
-}
-
-void TheoraVideoClip::waitForCache(float desired_cache_factor, float max_wait_time)
-{
-	mWaitingForCache = true;
-	bool paused = mTimer->isPaused();
-	if (!paused) mTimer->pause();
-	int elapsed = 0;
-	int desired_num_precached_frames = (int) (desired_cache_factor * getNumPrecachedFrames());
-	while (getNumReadyFrames() < desired_num_precached_frames)
-	{
-		_psleep(10);
-		elapsed += 10;
-		if (elapsed >= max_wait_time * 1000) break;
-	}
-	if (!paused) mTimer->play();
-	mWaitingForCache = false;
-}
-
-float TheoraVideoClip::getPriority()
-{
-	return mPriority;
-}
-
-void TheoraVideoClip::setPriority(float priority)
-{
-	mPriority = priority;
-}
-
-float TheoraVideoClip::getPriorityIndex()
-{
-	float priority = (float) getNumReadyFrames();
-	if (mTimer->isPaused()) priority += getNumPrecachedFrames() / 2;
-	
-	return priority;
-}
-
-void TheoraVideoClip::setAudioInterface(TheoraAudioInterface* iface)
-{
-	mAudioInterface = iface;
-	if (iface && !mAudioMutex) mAudioMutex = new TheoraMutex;
-	if (!iface && mAudioMutex)
-	{
-		delete mAudioMutex;
-		mAudioMutex = NULL;
-	}
-}
-
-TheoraAudioInterface* TheoraVideoClip::getAudioInterface()
-{
-	return mAudioInterface;
-}
-
-void TheoraVideoClip::setAudioGain(float gain)
-{
-	if (gain > 1) mAudioGain=1;
-	if (gain < 0) mAudioGain=0;
-	else          mAudioGain=gain;
-}
-
-float TheoraVideoClip::getAudioGain()
-{
-	return mAudioGain;
-}
-
-void TheoraVideoClip::setAutoRestart(bool value)
-{
-	mAutoRestart = value;
-	if (value) mEndOfFile = false;
-}
diff --git a/drivers/theoraplayer/src/TheoraVideoFrame.cpp b/drivers/theoraplayer/src/TheoraVideoFrame.cpp
deleted file mode 100644
index b70253dabf..0000000000
--- a/drivers/theoraplayer/src/TheoraVideoFrame.cpp
+++ /dev/null
@@ -1,159 +0,0 @@
-/************************************************************************************
-This source file is part of the Theora Video Playback Library
-For latest info, see http://libtheoraplayer.googlecode.com
-*************************************************************************************
-Copyright (c) 2008-2014 Kresimir Spes (kspes@cateia.com)
-This program is free software; you can redistribute it and/or modify it under
-the terms of the BSD license: http://opensource.org/licenses/BSD-3-Clause
-*************************************************************************************/
-#include <memory.h>
-#include "TheoraPixelTransform.h"
-#include "TheoraVideoClip.h"
-#include "TheoraVideoFrame.h"
-#include "TheoraVideoManager.h"
-
-//#define YUV_TEST // uncomment this if you want to benchmark YUV decoding functions
-
-extern "C"
-{
-void decodeRGB  (struct TheoraPixelTransform* t);
-void decodeRGBA (struct TheoraPixelTransform* t);
-void decodeRGBX (struct TheoraPixelTransform* t);
-void decodeARGB (struct TheoraPixelTransform* t);
-void decodeXRGB (struct TheoraPixelTransform* t);
-void decodeBGR  (struct TheoraPixelTransform* t);
-void decodeBGRA (struct TheoraPixelTransform* t);
-void decodeBGRX (struct TheoraPixelTransform* t);
-void decodeABGR (struct TheoraPixelTransform* t);
-void decodeXBGR (struct TheoraPixelTransform* t);
-void decodeGrey (struct TheoraPixelTransform* t);
-void decodeGrey3(struct TheoraPixelTransform* t);
-void decodeGreyA(struct TheoraPixelTransform* t);
-void decodeGreyX(struct TheoraPixelTransform* t);
-void decodeAGrey(struct TheoraPixelTransform* t);
-void decodeXGrey(struct TheoraPixelTransform* t);
-void decodeYUV  (struct TheoraPixelTransform* t);
-void decodeYUVA (struct TheoraPixelTransform* t);
-void decodeYUVX (struct TheoraPixelTransform* t);
-void decodeAYUV (struct TheoraPixelTransform* t);
-void decodeXYUV (struct TheoraPixelTransform* t);
-}
-
-static void (*conversion_functions[])(struct TheoraPixelTransform*) = {0,
-	decodeRGB,
-	decodeRGBA,
-	decodeRGBX,
-	decodeARGB,
-	decodeXRGB,
-	decodeBGR,
-	decodeBGRA,
-	decodeBGRX,
-	decodeABGR,
-	decodeXBGR,
-	decodeGrey,
-	decodeGrey3,
-	decodeGreyA,
-	decodeGreyX,
-	decodeAGrey,
-	decodeXGrey,
-	decodeYUV,
-	decodeYUVA,
-	decodeYUVX,
-	decodeAYUV,
-	decodeXYUV
-};
-
-TheoraVideoFrame::TheoraVideoFrame(TheoraVideoClip* parent)
-{
-	mReady = mInUse = false;
-	mParent = parent;
-	mIteration = 0;
-	// number of bytes based on output mode
-	int bytemap[]={0, 3, 4, 4, 4, 4, 3, 4, 4, 4, 4, 1, 3, 4, 4, 4, 4, 3, 4, 4, 4, 4};
-	mBpp = bytemap[mParent->getOutputMode()];
-	unsigned int size = mParent->getStride() * mParent->mHeight * mBpp;
-	try
-	{
-		mBuffer = new unsigned char[size];
-	}
-	catch (std::bad_alloc)
-	{
-		mBuffer = NULL;
-		return;
-	}
-	memset(mBuffer, 255, size);
-}
-
-TheoraVideoFrame::~TheoraVideoFrame()
-{
-	if (mBuffer) delete [] mBuffer;
-}
-
-int TheoraVideoFrame::getWidth()
-{
-	return mParent->getWidth();
-}
-
-int TheoraVideoFrame::getStride()
-{
-	return mParent->mStride;
-}
-
-int TheoraVideoFrame::getHeight()
-{
-	return mParent->getHeight();
-}
-
-unsigned char* TheoraVideoFrame::getBuffer()
-{
-	return mBuffer;
-}
-
-void TheoraVideoFrame::decode(struct TheoraPixelTransform* t)
-{
-	if (t->raw != NULL)
-	{
-		int bufferStride = mParent->getWidth() * mBpp;
-		if (bufferStride == t->rawStride)
-		{
-			memcpy(mBuffer, t->raw, t->rawStride * mParent->getHeight());
-		}
-		else
-		{
-			unsigned char *buff = mBuffer, *src = t->raw;
-			int i, h = mParent->getHeight();
-			for (i = 0; i < h; ++i, buff += bufferStride, src += t->rawStride)
-			{
-				memcpy(buff, src, bufferStride);
-			}
-		}
-	}
-	else
-	{
-		t->out = mBuffer;
-		t->w = mParent->getWidth();
-		t->h = mParent->getHeight();
-        
-#ifdef YUV_TEST // when benchmarking yuv conversion functions during development, do a timed average
-        #define N 1000
-        clock_t time = clock();
-        for (int i = 0; i < N; ++i)
-        {
-            conversion_functions[mParent->getOutputMode()](t);
-        }
-        float diff = (clock() - time) * 1000.0f / CLOCKS_PER_SEC;
-        
-		char s[128];
-		sprintf(s, "%.2f", diff / N);
-        TheoraVideoManager::getSingleton().logMessage("YUV Decoding time: " + std::string(s) + " ms\n");
-#else
-		conversion_functions[mParent->getOutputMode()](t);
-#endif
-	}
-	mReady = true;
-}
-
-void TheoraVideoFrame::clear()
-{
-	mInUse = mReady = false;
-}
diff --git a/drivers/theoraplayer/src/TheoraVideoManager.cpp b/drivers/theoraplayer/src/TheoraVideoManager.cpp
deleted file mode 100644
index 53b211374a..0000000000
--- a/drivers/theoraplayer/src/TheoraVideoManager.cpp
+++ /dev/null
@@ -1,485 +0,0 @@
-/************************************************************************************
-This source file is part of the Theora Video Playback Library
-For latest info, see http://libtheoraplayer.googlecode.com
-*************************************************************************************
-Copyright (c) 2008-2014 Kresimir Spes (kspes@cateia.com)
-This program is free software; you can redistribute it and/or modify it under
-the terms of the BSD license: http://opensource.org/licenses/BSD-3-Clause
-*************************************************************************************/
-#include "TheoraVideoManager.h"
-#include "TheoraWorkerThread.h"
-#include "TheoraVideoClip.h"
-#include "TheoraFrameQueue.h"
-#include "TheoraAudioInterface.h"
-#include "TheoraUtil.h"
-#include "TheoraDataSource.h"
-#include "TheoraException.h"
-#ifdef __THEORA
-	#include <theora/codec.h>
-	#include <vorbis/codec.h>
-	#include "TheoraVideoClip_Theora.h"
-#endif
-#ifdef __AVFOUNDATION
-	#include "TheoraVideoClip_AVFoundation.h"
-#endif
-#ifdef __FFMPEG
-	#include "TheoraVideoClip_FFmpeg.h"
-#endif
-#ifdef _ANDROID //libtheoraplayer addition for cpu feature detection
-	#include "cpu-features.h"
-#endif
-// declaring function prototype here so I don't have to put it in a header file
-// it only needs to be used by this plugin and called once
-extern "C"
-{
-	void initYUVConversionModule();
-}
-
-#include "core/os/memory.h"
-
-//#define _DECODING_BENCHMARK //uncomment to test average decoding time on a given device
-
-
-// --------------------------
-//#define _SCHEDULING_DEBUG
-#ifdef _SCHEDULING_DEBUG
-float gThreadDiagnosticTimer = 0;
-#endif
-// --------------------------
-
-#ifdef _DECODING_BENCHMARK
-void benchmark(TheoraVideoClip* clip)
-{
-	int nPrecached = 256;
-	int n = nPrecached;
-	char msg[1024];
-	clock_t t = clock();
-	while (n > 0)
-	{
-		clip->waitForCache(1.0f, 1000000);
-		n -= 32;
-		clip->getFrameQueue()->clear();
-	}
-	float diff = ((float) (clock() - t) * 1000.0f) / CLOCKS_PER_SEC;
-	sprintf(msg, "BENCHMARK: %s: Decoding %d frames took %.1fms (%.2fms average per frame)\n",clip->getName().c_str(), nPrecached, diff, diff / nPrecached);
-	TheoraVideoManager::getSingleton().logMessage(msg);
-	clip->seek(0);
-}
-#endif
-
-struct TheoraWorkCandidate
-{
-	TheoraVideoClip* clip;
-	float priority, queuedTime, workTime, entitledTime;
-};
-
-TheoraVideoManager* g_ManagerSingleton = NULL;
-
-void theora_writelog(std::string output)
-{
-	printf("%s\n", output.c_str());
-}
-
-void (*g_LogFuction)(std::string) = theora_writelog;
-
-void TheoraVideoManager::setLogFunction(void (*fn)(std::string))
-{
-	g_LogFuction = fn;
-}
-
-TheoraVideoManager* TheoraVideoManager::getSingletonPtr()
-{
-    return g_ManagerSingleton;
-}
-
-TheoraVideoManager& TheoraVideoManager::getSingleton()
-{  
-    return *g_ManagerSingleton;  
-}
-
-TheoraVideoManager::TheoraVideoManager(int num_worker_threads) : 
-	mDefaultNumPrecachedFrames(8)
-{
-	if (num_worker_threads < 1) throw TheoraGenericException("Unable to create TheoraVideoManager, at least one worker thread is reqired");
-
-	g_ManagerSingleton = this;
-
-	std::string msg = "Initializing Theora Playback Library (" + getVersionString() + ")\n";
-#ifdef __THEORA
-	msg += "  - libtheora version: " + std::string(th_version_string()) + "\n" +
-	       "  - libvorbis version: " +  std::string(vorbis_version_string()) + "\n";
-#endif
-#ifdef _ANDROID
-	uint64_t features = android_getCpuFeaturesExt();
-	char s[128];
-	sprintf(s, "  - Android: CPU Features: %u\n", (unsigned int) features);
-	msg += s;
-	if ((features & ANDROID_CPU_ARM_FEATURE_NEON) == 0)
-		msg += "  - Android: NEON features NOT SUPPORTED by CPU\n";
-	else
-		msg += "  - Android: Detected NEON CPU features\n";
-#endif
-
-#ifdef __AVFOUNDATION
-	msg += "  - using Apple AVFoundation classes.\n";
-#endif
-#ifdef __FFMPEG
-	msg += "  - using FFmpeg library.\n";
-#endif
-	
-	logMessage(msg + "------------------------------------");
-	mAudioFactory = NULL;
-	mWorkMutex = new TheoraMutex();
-
-	// for CPU based yuv2rgb decoding
-	initYUVConversionModule();
-
-	createWorkerThreads(num_worker_threads);
-}
-
-TheoraVideoManager::~TheoraVideoManager()
-{
-	destroyWorkerThreads();
-
-	mWorkMutex->lock();
-	ClipList::iterator ci;
-	for (ci = mClips.begin(); ci != mClips.end(); ++ci)
-		delete (*ci);
-	mClips.clear();
-	mWorkMutex->unlock();
-	delete mWorkMutex;
-}
-
-void TheoraVideoManager::logMessage(std::string msg)
-{
-	g_LogFuction(msg);
-}
-
-TheoraVideoClip* TheoraVideoManager::getVideoClipByName(std::string name)
-{
-	TheoraVideoClip* clip = NULL;
-	mWorkMutex->lock();
-
-	foreach(TheoraVideoClip*, mClips)
-	{
-		if ((*it)->getName() == name)
-		{
-			clip = *it;
-			break;
-		}
-	}
-	mWorkMutex->unlock();
-
-	return clip;
-}
-
-void TheoraVideoManager::setAudioInterfaceFactory(TheoraAudioInterfaceFactory* factory)
-{
-	mAudioFactory = factory;
-}
-
-TheoraAudioInterfaceFactory* TheoraVideoManager::getAudioInterfaceFactory()
-{
-	return mAudioFactory;
-}
-
-TheoraVideoClip* TheoraVideoManager::createVideoClip(std::string filename,
-													 TheoraOutputMode output_mode,
-													 int numPrecachedOverride,
-													 bool usePower2Stride,
-													 int p_track)
-{
-	TheoraDataSource* src=memnew(TheoraFileDataSource(filename));
-	return createVideoClip(src,output_mode,numPrecachedOverride,usePower2Stride, p_track);
-}
-
-TheoraVideoClip* TheoraVideoManager::createVideoClip(TheoraDataSource* data_source,
-													 TheoraOutputMode output_mode,
-													 int numPrecachedOverride,
-													 bool usePower2Stride,
-													 int p_audio_track)
-{
-	mWorkMutex->lock();
-
-	TheoraVideoClip* clip = NULL;
-	int nPrecached = numPrecachedOverride ? numPrecachedOverride : mDefaultNumPrecachedFrames;
-	logMessage("Creating video from data source: " + data_source->repr() + " [" + str(nPrecached) + " precached frames].");
-	
-#ifdef __AVFOUNDATION
-	TheoraFileDataSource* fileDataSource = dynamic_cast<TheoraFileDataSource*>(data_source);
-	std::string filename;
-	if (fileDataSource == NULL)
-	{
-		TheoraMemoryFileDataSource* memoryDataSource = dynamic_cast<TheoraMemoryFileDataSource*>(data_source);
-		if (memoryDataSource != NULL) filename = memoryDataSource->getFilename();
-		// if the user has his own data source, it's going to be a problem for AVAssetReader since it only supports reading from files...
-	}
-	else filename = fileDataSource->getFilename();
-
-	if (filename.size() > 4 && filename.substr(filename.size() - 4, filename.size()) == ".mp4")
-	{
-		clip = new TheoraVideoClip_AVFoundation(data_source, output_mode, nPrecached, usePower2Stride);
-	}
-#endif
-#if defined(__AVFOUNDATION) && defined(__THEORA)
-	else
-#endif
-#ifdef __THEORA
-		clip = new TheoraVideoClip_Theora(data_source, output_mode, nPrecached, usePower2Stride);
-#endif
-#ifdef __FFMPEG
-		clip = new TheoraVideoClip_FFmpeg(data_source, output_mode, nPrecached, usePower2Stride);
-#endif
-
-	clip->set_audio_track(p_audio_track);
-	clip->load(data_source);
-	clip->decodeNextFrame(); // ensure the first frame is always preloaded and have the main thread do it to prevent potential thread starvatio
-
-	mClips.push_back(clip);
-	mWorkMutex->unlock();
-	
-#ifdef _DECODING_BENCHMARK
-	benchmark(clip);
-#endif
-	return clip;
-}
-
-void TheoraVideoManager::destroyVideoClip(TheoraVideoClip* clip)
-{
-	if (clip)
-	{
-		th_writelog("Destroying video clip: " + clip->getName());
-		mWorkMutex->lock();
-		bool reported = 0;
-		while (clip->mAssignedWorkerThread)
-		{
-			if (!reported)
-			{
-				th_writelog(" - Waiting for WorkerThread to finish decoding in order to destroy");
-				reported = 1;
-			}
-			_psleep(1);
-		}
-		if (reported) th_writelog(" - WorkerThread done, destroying...");
-		
-		// erase the clip from the clip list
-		foreach (TheoraVideoClip*, mClips)
-		{
-			if ((*it) == clip)
-			{
-				mClips.erase(it);
-				break;
-			}
-		}
-		// remove all it's references from the work log
-		mWorkLog.remove(clip);
-
-		// delete the actual clip
-		delete clip;
-#ifdef _DEBUG
-		th_writelog("Destroyed video.");
-#endif
-		mWorkMutex->unlock();
-	}
-}
-
-TheoraVideoClip* TheoraVideoManager::requestWork(TheoraWorkerThread* caller)
-{
-	if (!mWorkMutex) return NULL;
-	mWorkMutex->lock();
-
-	TheoraVideoClip* selectedClip = NULL;
-	float maxQueuedTime = 0, totalAccessCount = 0, prioritySum = 0, diff, maxDiff = -1;
-	int nReadyFrames;
-	std::vector<TheoraWorkCandidate> candidates;
-	TheoraVideoClip* clip;
-	TheoraWorkCandidate candidate;
-
-	// first pass is for playing videos, but if no such videos are available for decoding
-	// paused videos are selected in the second pass.
-    // Note that paused videos that are waiting for cache are considered equal to playing
-    // videos in the scheduling context
-
-	for (int i = 0; i < 2 && candidates.size() == 0; ++i)
-	{
-		foreach (TheoraVideoClip*, mClips)
-		{
-			clip = *it;
-			if (clip->isBusy() || (i == 0 && clip->isPaused() && !clip->mWaitingForCache)) continue;
-			nReadyFrames = clip->getNumReadyFrames();
-			if (nReadyFrames == clip->getFrameQueue()->getSize()) continue;
-
-			candidate.clip = clip;
-			candidate.priority = clip->getPriority();
-			candidate.queuedTime = (float) nReadyFrames / (clip->getFPS() * clip->getPlaybackSpeed());
-			candidate.workTime = (float) clip->mThreadAccessCount;
-			
-			totalAccessCount += candidate.workTime;
-			if (maxQueuedTime < candidate.queuedTime) maxQueuedTime = candidate.queuedTime;
-
-			candidates.push_back(candidate);
-		}
-	}
-
-	// prevent division by zero
-	if (totalAccessCount == 0) totalAccessCount = 1;
-	if (maxQueuedTime == 0) maxQueuedTime = 1;
-
-	// normalize candidate values
-	foreach (TheoraWorkCandidate, candidates)
-	{
-		it->workTime /= totalAccessCount;
-		// adjust user priorities to favor clips that have fewer frames queued
-		it->priority *= 1.0f - (it->queuedTime / maxQueuedTime) * 0.5f;
-		prioritySum += it->priority;
-	}
-	foreach (TheoraWorkCandidate, candidates)
-	{
-		it->entitledTime = it->priority / prioritySum;
-	}
-
-	// now, based on how much access time has been given to each clip in the work log
-	// and how much time should be given to each clip based on calculated priorities,
-	// we choose a best suited clip for this worker thread to decode next
-	foreach (TheoraWorkCandidate, candidates)
-	{
-		diff = it->entitledTime - it->workTime;
-
-		if (maxDiff < diff)
-		{
-			maxDiff = diff;
-			selectedClip = it->clip;
-		}
-	}
-
-	if (selectedClip)
-	{
-		selectedClip->mAssignedWorkerThread = caller;
-		
-		int nClips = (int) mClips.size();
-		unsigned int maxWorkLogSize = (nClips - 1) * 50;
-
-		if (nClips > 1)
-		{
-			mWorkLog.push_front(selectedClip);
-			++selectedClip->mThreadAccessCount;
-		}
-		
-		TheoraVideoClip* c;
-		while (mWorkLog.size() > maxWorkLogSize)
-		{
-			c = mWorkLog.back();
-			mWorkLog.pop_back();
-			c->mThreadAccessCount--;
-		}
-#ifdef _SCHEDULING_DEBUG
-		if (mClips.size() > 1)
-		{
-			int accessCount = mWorkLog.size();
-			if (gThreadDiagnosticTimer > 2.0f)
-			{
-				gThreadDiagnosticTimer = 0;
-				std::string logstr = "-----\nTheora Playback Library debug CPU time analysis (" + str(accessCount) + "):\n";
-				int percent;
-				foreach (TheoraVideoClip*, mClips)
-				{
-					percent = ((float) (*it)->mThreadAccessCount / mWorkLog.size()) * 100.0f;
-					logstr += (*it)->getName() + " (" + str((*it)->getPriority()) + "): " + str((*it)->mThreadAccessCount) + ", " + str(percent) + "%\n";
-				}
-				logstr += "-----";
-				th_writelog(logstr);
-			}
-		}
-#endif
-	}
-
-	mWorkMutex->unlock();
-	return selectedClip;
-}
-
-void TheoraVideoManager::update(float timeDelta)
-{
-	mWorkMutex->lock();
-	foreach (TheoraVideoClip*, mClips)
-	{
-		(*it)->update(timeDelta);
-		(*it)->decodedAudioCheck();
-	}
-	mWorkMutex->unlock();
-#ifdef _SCHEDULING_DEBUG
-	gThreadDiagnosticTimer += timeDelta;
-#endif
-}
-
-int TheoraVideoManager::getNumWorkerThreads()
-{
-	return (int) mWorkerThreads.size();
-}
-
-void TheoraVideoManager::createWorkerThreads(int n)
-{
-	TheoraWorkerThread* t;
-	for (int i=0;i<n;++i)
-	{
-		t=new TheoraWorkerThread();
-		t->start();
-		mWorkerThreads.push_back(t);
-	}
-}
-
-void TheoraVideoManager::destroyWorkerThreads()
-{
-	foreach(TheoraWorkerThread*,mWorkerThreads)
-	{
-		(*it)->join();
-		delete (*it);
-	}
-	mWorkerThreads.clear();
-}
-
-void TheoraVideoManager::setNumWorkerThreads(int n)
-{
-	if (n == getNumWorkerThreads()) return;
-	if (n < 1) throw TheoraGenericException("Unable to change the number of worker threads in TheoraVideoManager, at least one worker thread is reqired");
-
-	th_writelog("changing number of worker threats to: "+str(n));
-
-	destroyWorkerThreads();
-	createWorkerThreads(n);
-}
-
-std::string TheoraVideoManager::getVersionString()
-{
-	int a, b, c;
-	getVersion(&a, &b, &c);
-	std::string out = str(a) + "." + str(b);
-	if (c != 0)
-	{
-		if (c < 0) out += " RC" + str(-c);
-		else       out += "." + str(c);
-	}
-	return out;
-}
-
-void TheoraVideoManager::getVersion(int* a, int* b, int* c) // TODO, return a struct instead of the current solution.
-{
-	*a = 1;
-	*b = 1;
-	*c = 0;
-}
-
-std::vector<std::string> TheoraVideoManager::getSupportedDecoders()
-{
-	std::vector<std::string> lst;
-#ifdef __THEORA
-	lst.push_back("Theora");
-#endif
-#ifdef __AVFOUNDATION
-	lst.push_back("AVFoundation");
-#endif
-#ifdef __FFMPEG
-	lst.push_back("FFmpeg");
-#endif
-	
-	return lst;
-}
diff --git a/drivers/theoraplayer/src/TheoraWorkerThread.cpp b/drivers/theoraplayer/src/TheoraWorkerThread.cpp
deleted file mode 100644
index cef8545b8d..0000000000
--- a/drivers/theoraplayer/src/TheoraWorkerThread.cpp
+++ /dev/null
@@ -1,49 +0,0 @@
-/************************************************************************************
-This source file is part of the Theora Video Playback Library
-For latest info, see http://libtheoraplayer.googlecode.com
-*************************************************************************************
-Copyright (c) 2008-2014 Kresimir Spes (kspes@cateia.com)
-This program is free software; you can redistribute it and/or modify it under
-the terms of the BSD license: http://opensource.org/licenses/BSD-3-Clause
-*************************************************************************************/
-#ifdef _WIN32
-#pragma warning( disable: 4251 ) // MSVC++
-#endif
-#include "TheoraWorkerThread.h"
-#include "TheoraVideoManager.h"
-#include "TheoraVideoClip.h"
-#include "TheoraUtil.h"
-
-TheoraWorkerThread::TheoraWorkerThread() : TheoraThread()
-{
-	mClip = NULL;
-}
-
-TheoraWorkerThread::~TheoraWorkerThread()
-{
-
-}
-
-void TheoraWorkerThread::execute()
-{
-	while (isRunning())
-	{
-		mClip = TheoraVideoManager::getSingleton().requestWork(this);
-		if (!mClip)
-		{
-			_psleep(100);
-			continue;
-		}
-
-		mClip->mThreadAccessMutex->lock();
-		// if user requested seeking, do that then.
-		if (mClip->mSeekFrame >= 0) mClip->doSeek();
-
-		if (!mClip->decodeNextFrame())
-			_psleep(1); // this happens when the video frame queue is full.
-
-		mClip->mAssignedWorkerThread = NULL;
-		mClip->mThreadAccessMutex->unlock();
-		mClip = NULL;
-	}
-}
diff --git a/drivers/theoraplayer/src/YUV/C/yuv420_grey_c.c b/drivers/theoraplayer/src/YUV/C/yuv420_grey_c.c
deleted file mode 100644
index 8af5dd1f58..0000000000
--- a/drivers/theoraplayer/src/YUV/C/yuv420_grey_c.c
+++ /dev/null
@@ -1,56 +0,0 @@
-/************************************************************************************
-This source file is part of the Theora Video Playback Library
-For latest info, see http://libtheoraplayer.googlecode.com
-*************************************************************************************
-Copyright (c) 2008-2014 Kresimir Spes (kspes@cateia.com)
-This program is free software; you can redistribute it and/or modify it under
-the terms of the BSD license: http://opensource.org/licenses/BSD-3-Clause
-*************************************************************************************/
-#include "yuv_util.h"
-
-static void _decodeGrey3(struct TheoraPixelTransform* t, int stride, int nBytes)
-{
-	unsigned char *ySrc = t->y, *yLineEnd, *out = t->out;
-	unsigned int y;
-	for (y = 0; y < t->h; ++y, ySrc += t->yStride - t->w, out += stride-t->w * nBytes)
-		for (yLineEnd = ySrc + t->w; ySrc != yLineEnd; ++ySrc, out += nBytes)
-			out[0] = out[1] = out[2] = *ySrc;
-}
-
-void decodeGrey(struct TheoraPixelTransform* t)
-{
-	unsigned char *ySrc = t->y, *yLineEnd, *out = t->out;
-	unsigned int y;
-	for (y = 0; y < t->h; ++y, ySrc += t->yStride - t->w)
-		for (yLineEnd = ySrc + t->w; ySrc != yLineEnd; ++ySrc, ++out)
-			*out = *ySrc;
-
-}
-
-void decodeGrey3(struct TheoraPixelTransform* t)
-{
-	_decodeGrey3(t, t->w * 3, 3);
-}
-
-void decodeGreyA(struct TheoraPixelTransform* t)
-{
-	_decodeGrey3(t, t->w * 4, 4);
-	_decodeAlpha(incOut(t, 3), t->w * 4);
-}
-
-void decodeGreyX(struct TheoraPixelTransform* t)
-{
-	_decodeGrey3(t, t->w * 4, 4);
-}
-
-void decodeAGrey(struct TheoraPixelTransform* t)
-{
-	_decodeGrey3(incOut(t, 1), t->w * 4, 4);
-	_decodeAlpha(t, t->w * 4);
-}
-
-void decodeXGrey(struct TheoraPixelTransform* t)
-{
-	_decodeGrey3(incOut(t, 1), t->w * 4, 4);
-}
-
diff --git a/drivers/theoraplayer/src/YUV/C/yuv420_rgb_c.c b/drivers/theoraplayer/src/YUV/C/yuv420_rgb_c.c
deleted file mode 100644
index e981e75ead..0000000000
--- a/drivers/theoraplayer/src/YUV/C/yuv420_rgb_c.c
+++ /dev/null
@@ -1,358 +0,0 @@
-/************************************************************************************
-This source file is part of the Theora Video Playback Library
-For latest info, see http://libtheoraplayer.googlecode.com
-*************************************************************************************
-Copyright (c) 2008-2014 Kresimir Spes (kspes@cateia.com)
-This program is free software; you can redistribute it and/or modify it under
-the terms of the BSD license: http://opensource.org/licenses/BSD-3-Clause
-*************************************************************************************/
-#ifdef _YUV_C
-#include "yuv_util.h"
-
-int YTable [256];
-int BUTable[256];
-int GUTable[256];
-int GVTable[256];
-int RVTable[256];
-
-#define CLIP_RGB_COLOR(dst, x) \
-	tmp = (x) >> 13;\
-	if ((tmp & ~0xFF) == 0) dst = tmp;\
-	else                    dst = (-tmp) >> 31;
-
-#define _decodeRGB(t, stride, nBytes, maxWidth, i1, i2, i3, j1, j2, j3)\
-	register int tmp;\
-	int nBytes2 = nBytes * 2, cv, cu, rgbY1, rgbY2, rgbY3, rgbY4, rV, gUV, bU, width = maxWidth == 0 ? t->w : maxWidth;\
-	unsigned int y;\
-	unsigned char *ySrcEven, *ySrcOdd, *yLineEnd, *uSrc, *vSrc, *out1, *out2;\
-	\
-	for (y = 0; y < t->h; y += 2)\
-	{\
-		ySrcEven = t->y + y * t->yStride;\
-		ySrcOdd  = t->y + (y + 1) * t->yStride;\
-		uSrc = t->u + y * t->uStride / 2;\
-		vSrc = t->v + y * t->vStride / 2;\
-		out1 = t->out + y * stride;\
-		out2 = t->out + (y + 1) * stride;\
-		\
-		for (yLineEnd = ySrcEven + width; ySrcEven != yLineEnd;)\
-		{\
-			cu = *uSrc; ++uSrc;\
-			cv = *vSrc; ++vSrc;\
-			rV   = RVTable[cv];\
-			gUV  = GUTable[cu] + GVTable[cv];\
-			bU   = BUTable[cu];\
-			\
-			rgbY1 = YTable[*ySrcEven]; ++ySrcEven;\
-			rgbY2 = YTable[*ySrcOdd];  ++ySrcOdd;\
-			rgbY3 = YTable[*ySrcEven]; ++ySrcEven;\
-			rgbY4 = YTable[*ySrcOdd];  ++ySrcOdd;\
-			\
-			CLIP_RGB_COLOR(out1[i1], rgbY1 + rV );\
-			CLIP_RGB_COLOR(out1[i2], rgbY1 - gUV);\
-			CLIP_RGB_COLOR(out1[i3], rgbY1 + bU );\
-			\
-			CLIP_RGB_COLOR(out2[i1], rgbY2 + rV );\
-			CLIP_RGB_COLOR(out2[i2], rgbY2 - gUV);\
-			CLIP_RGB_COLOR(out2[i3], rgbY2 + bU );\
-			\
-			CLIP_RGB_COLOR(out1[j1], rgbY3 + rV );\
-			CLIP_RGB_COLOR(out1[j2], rgbY3 - gUV);\
-			CLIP_RGB_COLOR(out1[j3], rgbY3 + bU );\
-			\
-			CLIP_RGB_COLOR(out2[j1], rgbY4 + rV );\
-			CLIP_RGB_COLOR(out2[j2], rgbY4 - gUV);\
-			CLIP_RGB_COLOR(out2[j3], rgbY4 + bU );\
-			\
-			out1 += nBytes2;  out2 += nBytes2;\
-		}\
-	}
-
-// The 'trick' with this function is that it skips decoding YUV pixels if the alpha value is 0, thus improving the decoding speed of a frame
-#define _decodeRGBA(t, stride, nBytes, maxWidth, i1, i2, i3, j1, j2, j3, aindex1, aindex2)\
-\
-	register int tmp;\
-	int nBytes2 = nBytes * 2, cv, cu, rgbY1, rgbY2, rgbY3, rgbY4, a1, a2, a3, a4, rV, gUV, bU, width = maxWidth == 0 ? t->w : maxWidth;\
-	int alphaStride = t->w;\
-	unsigned int y;\
-	unsigned char *ySrcEven, *ySrcOdd, *yLineEnd, *uSrc, *vSrc, *out1, *out2;\
-	\
-	for (y = 0; y < t->h; y += 2)\
-	{\
-		ySrcEven = t->y + y * t->yStride;\
-		ySrcOdd  = t->y + (y + 1) * t->yStride;\
-		uSrc = t->u + y * t->uStride / 2;\
-		vSrc = t->v + y * t->vStride / 2;\
-		out1 = t->out + y * stride;\
-		out2 = t->out + (y + 1) * stride;\
-		\
-		for (yLineEnd = ySrcEven + width; ySrcEven != yLineEnd;)\
-		{\
-			cu = *uSrc; ++uSrc;\
-			cv = *vSrc; ++vSrc;\
-			rV   = RVTable[cv];\
-			gUV  = GUTable[cu] + GVTable[cv];\
-			bU   = BUTable[cu];\
-			\
-			rgbY1 = YTable[*ySrcEven]; a1 = ySrcEven[alphaStride]; ++ySrcEven;\
-			rgbY2 = YTable[*ySrcOdd];  a2 = ySrcOdd [alphaStride];  ++ySrcOdd;\
-			rgbY3 = YTable[*ySrcEven]; a3 = ySrcEven[alphaStride]; ++ySrcEven;\
-			rgbY4 = YTable[*ySrcOdd];  a4 = ySrcOdd [alphaStride];  ++ySrcOdd;\
-			\
-			if (a1 > 16)\
-			{\
-				CLIP_RGB_COLOR(out1[i1], rgbY1 + rV );\
-				CLIP_RGB_COLOR(out1[i2], rgbY1 - gUV);\
-				CLIP_RGB_COLOR(out1[i3], rgbY1 + bU );\
-				out1[aindex1] = a1 >= 235 ? 255 : (unsigned char) (((a1 - 16) * 255) / 219);\
-			}\
-			else *((unsigned int*) out1) = 0;\
-			\
-			if (a2 > 16)\
-			{\
-				CLIP_RGB_COLOR(out2[i1], rgbY2 + rV );\
-				CLIP_RGB_COLOR(out2[i2], rgbY2 - gUV);\
-				CLIP_RGB_COLOR(out2[i3], rgbY2 + bU );\
-				out2[aindex1] = a2 >= 235 ? 255 : (unsigned char) (((a2 - 16) * 255) / 219);\
-			}\
-			else *((unsigned int*) out2) = 0;\
-			\
-			if (a3 > 16)\
-			{\
-				CLIP_RGB_COLOR(out1[j1], rgbY3 + rV );\
-				CLIP_RGB_COLOR(out1[j2], rgbY3 - gUV);\
-				CLIP_RGB_COLOR(out1[j3], rgbY3 + bU );\
-				out1[aindex2] = a3 >= 235 ? 255 : (unsigned char) (((a3 - 16) * 255) / 219);\
-			}\
-			else *((unsigned int*) &out1[4]) = 0;\
-			\
-			if (a4 > 16)\
-			{\
-				CLIP_RGB_COLOR(out2[j1], rgbY4 + rV );\
-				CLIP_RGB_COLOR(out2[j2], rgbY4 - gUV);\
-				CLIP_RGB_COLOR(out2[j3], rgbY4 + bU );\
-				out2[aindex2] = a4 >= 235 ? 255 : (unsigned char) (((a4 - 16) * 255) / 219);\
-			}\
-			else *((unsigned int*) &out2[4]) = 0;\
-			\
-			out1 += nBytes2;  out2 += nBytes2;\
-		}\
-	}\
-
-void decodeRGB(struct TheoraPixelTransform* t)
-{
-	_decodeRGB(t, t->w * 3, 3, 0, 0, 1, 2, 3, 4, 5);
-}
-
-void decodeRGBA(struct TheoraPixelTransform* t)
-{
-	_decodeRGBA(t, t->w * 4, 4, 0, 0, 1, 2, 4, 5, 6, 3, 7);
-// This is the old 2-phase version, leaving it here in case more debugging is needed
-//	_decodeRGB(t, t->w * 4, 4, 0, 0, 1, 2, 4, 5, 6);
-//	_decodeAlpha(incOut(t, 3), t->w * 4);
-}
-
-void decodeRGBX(struct TheoraPixelTransform* t)
-{
-	_decodeRGB(t, t->w * 4, 4, 0, 0, 1, 2, 4, 5, 6);
-}
-
-void decodeARGB(struct TheoraPixelTransform* t)
-{
-	_decodeRGBA(t, t->w * 4, 4, 0, 1, 2, 3, 5, 6, 7, 0, 4);
-// This is the old 2-phase version, leaving it here in case more debugging is needed
-//	_decodeRGB(t, t->w * 4, 4, 0, 1, 2, 3, 5, 6, 7);
-//	_decodeAlpha(t, t->w * 4);
-}
-
-void decodeXRGB(struct TheoraPixelTransform* t)
-{
-	_decodeRGB(t, t->w * 4, 4, 0, 1, 2, 3, 5, 6, 7);
-}
-
-void decodeBGR(struct TheoraPixelTransform* t)
-{
-	_decodeRGB(t, t->w * 3, 3, 0, 2, 1, 0, 5, 4, 3);
-}
-
-void decodeBGRA(struct TheoraPixelTransform* t)
-{
-	_decodeRGBA(t, t->w * 4, 4, 0, 2, 1, 0, 6, 5, 4, 3, 7);
-// This is the old 2-phase version, leaving it here in case more debugging is needed
-//	_decodeRGB(t, t->w * 4, 4, 0, 2, 1, 0, 6, 5, 4);
-//	_decodeAlpha(incOut(t, 3), t->w * 4);
-}
-
-void decodeBGRX(struct TheoraPixelTransform* t)
-{
-	_decodeRGB(t, t->w * 4, 4, 0, 2, 1, 0, 6, 5, 4);
-}
-
-void decodeABGR(struct TheoraPixelTransform* t)
-{
-	_decodeRGBA(t, t->w * 4, 4, 0, 3, 2, 1, 7, 6, 5, 0, 4);
-// This is the old 2-phase version, leaving it here in case more debugging is needed
-//	_decodeRGB(t, t->w * 4, 4, 0, 3, 2, 1, 7, 6, 5);
-//	_decodeAlpha(t, t->w * 4);
-}
-
-void decodeXBGR(struct TheoraPixelTransform* t)
-{
-	_decodeRGB(t, t->w * 4, 4, 0, 3, 2, 1, 7, 6, 5);
-}
-
-void initYUVConversionModule()
-{
-	//used to bring the table into the high side (scale up) so we
-	//can maintain high precision and not use floats (FIXED POINT)
-	
-	// this is the pseudocode for yuv->rgb conversion
-	//        r = 1.164*(*ySrc - 16) + 1.596*(cv - 128);
-	//        b = 1.164*(*ySrc - 16)                   + 2.018*(cu - 128);
-	//        g = 1.164*(*ySrc - 16) - 0.813*(cv - 128) - 0.391*(cu - 128);
-	
-    double scale = 1L << 13, temp;
-	
-	int i;
-	for (i = 0; i < 256; ++i)
-	{
-		temp = i - 128;
-		
-		YTable[i]  = (int)((1.164 * scale + 0.5) * (i - 16));	//Calc Y component
-		RVTable[i] = (int)((1.596 * scale + 0.5) * temp);		//Calc R component
-		GUTable[i] = (int)((0.391 * scale + 0.5) * temp);		//Calc G u & v components
-		GVTable[i] = (int)((0.813 * scale + 0.5) * temp);
-		BUTable[i] = (int)((2.018 * scale + 0.5) * temp);		//Calc B component
-	}
-}
-
-/*
- * Below are the function versions of the above macros, use those for debugging, but leave the macros for maximum CPU execution speed
- *
- *
- *
- *
-
-void _decodeRGB(struct TheoraPixelTransform* t, int stride, int nBytes, int maxWidth, int i1, int i2, int i3, int j1, int j2, int j3)
-{
-	register int tmp;
-	int nBytes2 = nBytes * 2, cv, cu, rgbY1, rgbY2, rgbY3, rgbY4, rV, gUV, bU, width = maxWidth == 0 ? t->w : maxWidth;
-	unsigned int y;
-	unsigned char *ySrcEven, *ySrcOdd, *yLineEnd, *uSrc, *vSrc, *out1, *out2;
-	
-	for (y = 0; y < t->h; y += 2)
-	{
-		ySrcEven = t->y + y * t->yStride;
-		ySrcOdd  = t->y + (y + 1) * t->yStride;
-		uSrc = t->u + y * t->uStride / 2;
-		vSrc = t->v + y * t->vStride / 2;
-		out1 = t->out + y * stride;
-		out2 = t->out + (y + 1) * stride;
-		
-		for (yLineEnd = ySrcEven + width; ySrcEven != yLineEnd;)
-		{
-			cu = *uSrc; ++uSrc;
-			cv = *vSrc; ++vSrc;
-			rV   = RVTable[cv];
-			gUV  = GUTable[cu] + GVTable[cv];
-			bU   = BUTable[cu];
-			
-			rgbY1 = YTable[*ySrcEven]; ++ySrcEven;
-			rgbY2 = YTable[*ySrcOdd];  ++ySrcOdd;
-			rgbY3 = YTable[*ySrcEven]; ++ySrcEven;
-			rgbY4 = YTable[*ySrcOdd];  ++ySrcOdd;
-			
-			CLIP_RGB_COLOR(out1[i1], rgbY1 + rV );
-			CLIP_RGB_COLOR(out1[i2], rgbY1 - gUV);
-			CLIP_RGB_COLOR(out1[i3], rgbY1 + bU );
-			
-			CLIP_RGB_COLOR(out2[i1], rgbY2 + rV );
-			CLIP_RGB_COLOR(out2[i2], rgbY2 - gUV);
-			CLIP_RGB_COLOR(out2[i3], rgbY2 + bU );
-			
-			CLIP_RGB_COLOR(out1[j1], rgbY3 + rV );
-			CLIP_RGB_COLOR(out1[j2], rgbY3 - gUV);
-			CLIP_RGB_COLOR(out1[j3], rgbY3 + bU );
-			
-			CLIP_RGB_COLOR(out2[j1], rgbY4 + rV );
-			CLIP_RGB_COLOR(out2[j2], rgbY4 - gUV);
-			CLIP_RGB_COLOR(out2[j3], rgbY4 + bU );
-			
-			out1 += nBytes2;  out2 += nBytes2;
-		}
-	}
-}
- 
-void _decodeRGBA(struct TheoraPixelTransform* t, int stride, int nBytes, int maxWidth, int i1, int i2, int i3, int j1, int j2, int j3, int aindex1, int aindex2)
-{
-	register int tmp;
-	int nBytes2 = nBytes * 2, cv, cu, rgbY1, rgbY2, rgbY3, rgbY4, a1, a2, a3, a4, rV, gUV, bU, width = maxWidth == 0 ? t->w : maxWidth;
-	int alphaStride = t->w;
-	unsigned int y;
-	unsigned char *ySrcEven, *ySrcOdd, *yLineEnd, *uSrc, *vSrc, *out1, *out2;
-	
-	for (y = 0; y < t->h; y += 2)
-	{
-		ySrcEven = t->y + y * t->yStride;
-		ySrcOdd  = t->y + (y + 1) * t->yStride;
-		uSrc = t->u + y * t->uStride / 2;
-		vSrc = t->v + y * t->vStride / 2;
-		out1 = t->out + y * stride;
-		out2 = t->out + (y + 1) * stride;
-		
-		for (yLineEnd = ySrcEven + width; ySrcEven != yLineEnd;)
-		{
-			cu = *uSrc; ++uSrc;
-			cv = *vSrc; ++vSrc;
-			rV   = RVTable[cv];
-			gUV  = GUTable[cu] + GVTable[cv];
-			bU   = BUTable[cu];
-			
-			rgbY1 = YTable[*ySrcEven]; a1 = ySrcEven[alphaStride]; ++ySrcEven;
-			rgbY2 = YTable[*ySrcOdd];  a2 = ySrcOdd [alphaStride];  ++ySrcOdd;
-			rgbY3 = YTable[*ySrcEven]; a3 = ySrcEven[alphaStride]; ++ySrcEven;
-			rgbY4 = YTable[*ySrcOdd];  a4 = ySrcOdd [alphaStride];  ++ySrcOdd;
-			
-			if (a1 >= 32)
-			{
-				CLIP_RGB_COLOR(out1[i1], rgbY1 + rV );
-				CLIP_RGB_COLOR(out1[i2], rgbY1 - gUV);
-				CLIP_RGB_COLOR(out1[i3], rgbY1 + bU );
-				out1[aindex1] = a1 > 224 ? 255 : a1;
-			}
-			else *((unsigned int*) out1) = 0;
-			
-			if (a2 >= 32)
-			{
-				CLIP_RGB_COLOR(out2[i1], rgbY2 + rV );
-				CLIP_RGB_COLOR(out2[i2], rgbY2 - gUV);
-				CLIP_RGB_COLOR(out2[i3], rgbY2 + bU );
-				out2[aindex1] = a2 > 224 ? 255 : a2;
-			}
-			else *((unsigned int*) out2) = 0;
-
-			
-			if (a3 >= 32)
-			{
-				CLIP_RGB_COLOR(out1[j1], rgbY3 + rV );
-				CLIP_RGB_COLOR(out1[j2], rgbY3 - gUV);
-				CLIP_RGB_COLOR(out1[j3], rgbY3 + bU );
-				out1[aindex2] = a3 > 224 ? 255 : a3;
-			}
-			else *((unsigned int*) &out1[4]) = 0;
-
-			if (a4 >= 32)
-			{
-				CLIP_RGB_COLOR(out2[j1], rgbY4 + rV );
-				CLIP_RGB_COLOR(out2[j2], rgbY4 - gUV);
-				CLIP_RGB_COLOR(out2[j3], rgbY4 + bU );
-				out2[aindex2] = a4 > 224 ? 255 : a4;
-			}
-			else *((unsigned int*) &out2[4]) = 0;
-
-			out1 += nBytes2;  out2 += nBytes2;
-		}
-	}
-}
-*/
-#endif
diff --git a/drivers/theoraplayer/src/YUV/C/yuv420_yuv_c.c b/drivers/theoraplayer/src/YUV/C/yuv420_yuv_c.c
deleted file mode 100644
index fea74eca71..0000000000
--- a/drivers/theoraplayer/src/YUV/C/yuv420_yuv_c.c
+++ /dev/null
@@ -1,86 +0,0 @@
-/************************************************************************************
-This source file is part of the Theora Video Playback Library
-For latest info, see http://libtheoraplayer.googlecode.com
-*************************************************************************************
-Copyright (c) 2008-2014 Kresimir Spes (kspes@cateia.com)
-This program is free software; you can redistribute it and/or modify it under
-the terms of the BSD license: http://opensource.org/licenses/BSD-3-Clause
-*************************************************************************************/
-#include "yuv_util.h"
-
-static void _decodeYUV(struct TheoraPixelTransform* t, int stride, int nBytes, int maxWidth)
-{
-	int cv, cu, y1, y2, y3, y4, width = maxWidth == 0 ? t->w : maxWidth;
-	unsigned char *ySrcEven, *ySrcOdd, *yLineEnd, *uSrc, *vSrc, *out1, *out2;
-	unsigned int y;
-
-	for (y=0; y < t->h; y += 2)
-	{
-		ySrcEven = t->y + y * t->yStride;
-		ySrcOdd  = t->y + (y + 1) * t->yStride;
-		uSrc = t->u + y * t->uStride / 2;
-		vSrc = t->v + y * t->vStride / 2;
-		out1 = t->out + y * stride;
-		out2 = t->out + (y + 1) * stride;
-		
-		for (yLineEnd = ySrcEven + width; ySrcEven != yLineEnd;)
-		{
-			// EVEN columns
-			cu = *uSrc; ++uSrc;
-			cv = *vSrc; ++vSrc;
-			
-			y1 = *ySrcEven; ++ySrcEven;
-			y2 = *ySrcOdd;  ++ySrcOdd;
-			y3 = *ySrcEven; ++ySrcEven;
-			y4 = *ySrcOdd;  ++ySrcOdd;
-			
-			// EVEN columns
-			out1[0] = y1;
-			out1[1] = cu;
-			out1[2] = cv;
-			
-			out2[0] = y2;
-			out2[1] = cu;
-			out2[2] = cv;
-			
-			out1 += nBytes;  out2 += nBytes;
-			// ODD columns
-			out1[0] = y3;
-			out1[1] = cu;
-			out1[2] = cv;
-			
-			out2[0] = y4;
-			out2[1] = cu;
-			out2[2] = cv;
-			out1 += nBytes;  out2 += nBytes;
-		}
-	}
-}
-
-void decodeYUV(struct TheoraPixelTransform* t)
-{
-	_decodeYUV(t, t->w * 3, 3, 0);
-}
-
-void decodeYUVA(struct TheoraPixelTransform* t)
-{
-	_decodeYUV(t, t->w * 4, 4, 0);
-	_decodeAlpha(incOut(t, 3), t->w * 4);
-}
-
-void decodeYUVX(struct TheoraPixelTransform* t)
-{
-	_decodeYUV(t, t->w * 4, 4, 0);
-}
-
-void decodeAYUV(struct TheoraPixelTransform* t)
-{
-	_decodeYUV(incOut(t, 1), t->w * 4, 4, 0);
-	_decodeAlpha(t, t->w * 4);
-}
-
-void decodeXYUV(struct TheoraPixelTransform* t)
-{
-	_decodeYUV(incOut(t, 1), t->w * 4, 4, 0);
-}
-
diff --git a/drivers/theoraplayer/src/YUV/android/cpu-features.c b/drivers/theoraplayer/src/YUV/android/cpu-features.c
deleted file mode 100644
index 623dc94e0e..0000000000
--- a/drivers/theoraplayer/src/YUV/android/cpu-features.c
+++ /dev/null
@@ -1,1095 +0,0 @@
-/*
- * Copyright (C) 2010 The Android Open Source Project
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *  * Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- *  * Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in
- *    the documentation and/or other materials provided with the
- *    distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
- * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
- * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
- * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
- * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
- * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
- * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
- * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
- * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- */
-
-/* ChangeLog for this library:
- *
- * NDK r8d: Add android_setCpu().
- *
- * NDK r8c: Add new ARM CPU features: VFPv2, VFP_D32, VFP_FP16,
- *          VFP_FMA, NEON_FMA, IDIV_ARM, IDIV_THUMB2 and iWMMXt.
- *
- *          Rewrite the code to parse /proc/self/auxv instead of
- *          the "Features" field in /proc/cpuinfo.
- *
- *          Dynamically allocate the buffer that hold the content
- *          of /proc/cpuinfo to deal with newer hardware.
- *
- * NDK r7c: Fix CPU count computation. The old method only reported the
- *           number of _active_ CPUs when the library was initialized,
- *           which could be less than the real total.
- *
- * NDK r5: Handle buggy kernels which report a CPU Architecture number of 7
- *         for an ARMv6 CPU (see below).
- *
- *         Handle kernels that only report 'neon', and not 'vfpv3'
- *         (VFPv3 is mandated by the ARM architecture is Neon is implemented)
- *
- *         Handle kernels that only report 'vfpv3d16', and not 'vfpv3'
- *
- *         Fix x86 compilation. Report ANDROID_CPU_FAMILY_X86 in
- *         android_getCpuFamily().
- *
- * NDK r4: Initial release
- */
-
-#if 0
-
-#ifdef _ANDROID
-#if defined(__le32__)
-
-// When users enter this, we should only provide interface and
-// libportable will give the implementations.
-
-#else // !__le32__
-
-#include <sys/system_properties.h>
-#include <pthread.h>
-#include "cpu-features.h"
-#include <stdio.h>
-#include <stdlib.h>
-#include <fcntl.h>
-#include <errno.h>
-
-static  pthread_once_t     g_once;
-static  int                g_inited;
-static  AndroidCpuFamily   g_cpuFamily;
-static  uint64_t           g_cpuFeatures;
-static  int                g_cpuCount;
-
-#ifdef __arm__
-static  uint32_t           g_cpuIdArm;
-#endif
-
-static const int  android_cpufeatures_debug = 0;
-
-#ifdef __arm__
-#  define DEFAULT_CPU_FAMILY  ANDROID_CPU_FAMILY_ARM
-#elif defined __i386__
-#  define DEFAULT_CPU_FAMILY  ANDROID_CPU_FAMILY_X86
-#else
-#  define DEFAULT_CPU_FAMILY  ANDROID_CPU_FAMILY_UNKNOWN
-#endif
-
-#define  D(...) \
-    do { \
-        if (android_cpufeatures_debug) { \
-            printf(__VA_ARGS__); fflush(stdout); \
-        } \
-    } while (0)
-
-#ifdef __i386__
-static __inline__ void x86_cpuid(int func, int values[4])
-{
-    int a, b, c, d;
-    /* We need to preserve ebx since we're compiling PIC code */
-    /* this means we can't use "=b" for the second output register */
-    __asm__ __volatile__ ( \
-      "push %%ebx\n"
-      "cpuid\n" \
-      "mov %%ebx, %1\n"
-      "pop %%ebx\n"
-      : "=a" (a), "=r" (b), "=c" (c), "=d" (d) \
-      : "a" (func) \
-    );
-    values[0] = a;
-    values[1] = b;
-    values[2] = c;
-    values[3] = d;
-}
-#endif
-
-/* Get the size of a file by reading it until the end. This is needed
- * because files under /proc do not always return a valid size when
- * using fseek(0, SEEK_END) + ftell(). Nor can they be mmap()-ed.
- */
-static int
-get_file_size(const char* pathname)
-{
-    int fd, ret, result = 0;
-    char buffer[256];
-
-    fd = open(pathname, O_RDONLY);
-    if (fd < 0) {
-        D("Can't open %s: %s\n", pathname, strerror(errno));
-        return -1;
-    }
-
-    for (;;) {
-        int ret = read(fd, buffer, sizeof buffer);
-        if (ret < 0) {
-            if (errno == EINTR)
-                continue;
-            D("Error while reading %s: %s\n", pathname, strerror(errno));
-            break;
-        }
-        if (ret == 0)
-            break;
-
-        result += ret;
-    }
-    close(fd);
-    return result;
-}
-
-/* Read the content of /proc/cpuinfo into a user-provided buffer.
- * Return the length of the data, or -1 on error. Does *not*
- * zero-terminate the content. Will not read more
- * than 'buffsize' bytes.
- */
-static int
-read_file(const char*  pathname, char*  buffer, size_t  buffsize)
-{
-    int  fd, count;
-
-    fd = open(pathname, O_RDONLY);
-    if (fd < 0) {
-        D("Could not open %s: %s\n", pathname, strerror(errno));
-        return -1;
-    }
-    count = 0;
-    while (count < (int)buffsize) {
-        int ret = read(fd, buffer + count, buffsize - count);
-        if (ret < 0) {
-            if (errno == EINTR)
-                continue;
-            D("Error while reading from %s: %s\n", pathname, strerror(errno));
-            if (count == 0)
-                count = -1;
-            break;
-        }
-        if (ret == 0)
-            break;
-        count += ret;
-    }
-    close(fd);
-    return count;
-}
-
-/* Extract the content of a the first occurence of a given field in
- * the content of /proc/cpuinfo and return it as a heap-allocated
- * string that must be freed by the caller.
- *
- * Return NULL if not found
- */
-static char*
-extract_cpuinfo_field(const char* buffer, int buflen, const char* field)
-{
-    int  fieldlen = strlen(field);
-    const char* bufend = buffer + buflen;
-    char* result = NULL;
-    int len, ignore;
-    const char *p, *q;
-
-    /* Look for first field occurence, and ensures it starts the line. */
-    p = buffer;
-    for (;;) {
-        p = memmem(p, bufend-p, field, fieldlen);
-        if (p == NULL)
-            goto EXIT;
-
-        if (p == buffer || p[-1] == '\n')
-            break;
-
-        p += fieldlen;
-    }
-
-    /* Skip to the first column followed by a space */
-    p += fieldlen;
-    p  = memchr(p, ':', bufend-p);
-    if (p == NULL || p[1] != ' ')
-        goto EXIT;
-
-    /* Find the end of the line */
-    p += 2;
-    q = memchr(p, '\n', bufend-p);
-    if (q == NULL)
-        q = bufend;
-
-    /* Copy the line into a heap-allocated buffer */
-    len = q-p;
-    result = malloc(len+1);
-    if (result == NULL)
-        goto EXIT;
-
-    memcpy(result, p, len);
-    result[len] = '\0';
-
-EXIT:
-    return result;
-}
-
-/* Checks that a space-separated list of items contains one given 'item'.
- * Returns 1 if found, 0 otherwise.
- */
-static int
-has_list_item(const char* list, const char* item)
-{
-    const char*  p = list;
-    int itemlen = strlen(item);
-
-    if (list == NULL)
-        return 0;
-
-    while (*p) {
-        const char*  q;
-
-        /* skip spaces */
-        while (*p == ' ' || *p == '\t')
-            p++;
-
-        /* find end of current list item */
-        q = p;
-        while (*q && *q != ' ' && *q != '\t')
-            q++;
-
-        if (itemlen == q-p && !memcmp(p, item, itemlen))
-            return 1;
-
-        /* skip to next item */
-        p = q;
-    }
-    return 0;
-}
-
-/* Parse a number starting from 'input', but not going further
- * than 'limit'. Return the value into '*result'.
- *
- * NOTE: Does not skip over leading spaces, or deal with sign characters.
- * NOTE: Ignores overflows.
- *
- * The function returns NULL in case of error (bad format), or the new
- * position after the decimal number in case of success (which will always
- * be <= 'limit').
- */
-static const char*
-parse_number(const char* input, const char* limit, int base, int* result)
-{
-    const char* p = input;
-    int val = 0;
-    while (p < limit) {
-        int d = (*p - '0');
-        if ((unsigned)d >= 10U) {
-            d = (*p - 'a');
-            if ((unsigned)d >= 6U)
-              d = (*p - 'A');
-            if ((unsigned)d >= 6U)
-              break;
-            d += 10;
-        }
-        if (d >= base)
-          break;
-        val = val*base + d;
-        p++;
-    }
-    if (p == input)
-        return NULL;
-
-    *result = val;
-    return p;
-}
-
-static const char*
-parse_decimal(const char* input, const char* limit, int* result)
-{
-    return parse_number(input, limit, 10, result);
-}
-
-static const char*
-parse_hexadecimal(const char* input, const char* limit, int* result)
-{
-    return parse_number(input, limit, 16, result);
-}
-
-/* This small data type is used to represent a CPU list / mask, as read
- * from sysfs on Linux. See http://www.kernel.org/doc/Documentation/cputopology.txt
- *
- * For now, we don't expect more than 32 cores on mobile devices, so keep
- * everything simple.
- */
-typedef struct {
-    uint32_t mask;
-} CpuList;
-
-static __inline__ void
-cpulist_init(CpuList* list) {
-    list->mask = 0;
-}
-
-static __inline__ void
-cpulist_and(CpuList* list1, CpuList* list2) {
-    list1->mask &= list2->mask;
-}
-
-static __inline__ void
-cpulist_set(CpuList* list, int index) {
-    if ((unsigned)index < 32) {
-        list->mask |= (uint32_t)(1U << index);
-    }
-}
-
-static __inline__ int
-cpulist_count(CpuList* list) {
-    return __builtin_popcount(list->mask);
-}
-
-/* Parse a textual list of cpus and store the result inside a CpuList object.
- * Input format is the following:
- * - comma-separated list of items (no spaces)
- * - each item is either a single decimal number (cpu index), or a range made
- *   of two numbers separated by a single dash (-). Ranges are inclusive.
- *
- * Examples:   0
- *             2,4-127,128-143
- *             0-1
- */
-static void
-cpulist_parse(CpuList* list, const char* line, int line_len)
-{
-    const char* p = line;
-    const char* end = p + line_len;
-    const char* q;
-
-    /* NOTE: the input line coming from sysfs typically contains a
-     * trailing newline, so take care of it in the code below
-     */
-    while (p < end && *p != '\n')
-    {
-        int val, start_value, end_value;
-
-        /* Find the end of current item, and put it into 'q' */
-        q = memchr(p, ',', end-p);
-        if (q == NULL) {
-            q = end;
-        }
-
-        /* Get first value */
-        p = parse_decimal(p, q, &start_value);
-        if (p == NULL)
-            goto BAD_FORMAT;
-
-        end_value = start_value;
-
-        /* If we're not at the end of the item, expect a dash and
-         * and integer; extract end value.
-         */
-        if (p < q && *p == '-') {
-            p = parse_decimal(p+1, q, &end_value);
-            if (p == NULL)
-                goto BAD_FORMAT;
-        }
-
-        /* Set bits CPU list bits */
-        for (val = start_value; val <= end_value; val++) {
-            cpulist_set(list, val);
-        }
-
-        /* Jump to next item */
-        p = q;
-        if (p < end)
-            p++;
-    }
-
-BAD_FORMAT:
-    ;
-}
-
-/* Read a CPU list from one sysfs file */
-static void
-cpulist_read_from(CpuList* list, const char* filename)
-{
-    char   file[64];
-    int    filelen;
-
-    cpulist_init(list);
-
-    filelen = read_file(filename, file, sizeof file);
-    if (filelen < 0) {
-        D("Could not read %s: %s\n", filename, strerror(errno));
-        return;
-    }
-
-    cpulist_parse(list, file, filelen);
-}
-
-// See <asm/hwcap.h> kernel header.
-#define HWCAP_VFP       (1 << 6)
-#define HWCAP_IWMMXT    (1 << 9)
-#define HWCAP_NEON      (1 << 12)
-#define HWCAP_VFPv3     (1 << 13)
-#define HWCAP_VFPv3D16  (1 << 14)
-#define HWCAP_VFPv4     (1 << 16)
-#define HWCAP_IDIVA     (1 << 17)
-#define HWCAP_IDIVT     (1 << 18)
-
-#define AT_HWCAP 16
-
-#if defined(__arm__)
-/* Compute the ELF HWCAP flags.
- */
-static uint32_t
-get_elf_hwcap(const char* cpuinfo, int cpuinfo_len)
-{
-  /* IMPORTANT:
-   *   Accessing /proc/self/auxv doesn't work anymore on all
-   *   platform versions. More specifically, when running inside
-   *   a regular application process, most of /proc/self/ will be
-   *   non-readable, including /proc/self/auxv. This doesn't
-   *   happen however if the application is debuggable, or when
-   *   running under the "shell" UID, which is why this was not
-   *   detected appropriately.
-   */
-#if 0
-    uint32_t result = 0;
-    const char filepath[] = "/proc/self/auxv";
-    int fd = open(filepath, O_RDONLY);
-    if (fd < 0) {
-        D("Could not open %s: %s\n", filepath, strerror(errno));
-        return 0;
-    }
-
-    struct { uint32_t tag; uint32_t value; } entry;
-
-    for (;;) {
-        int ret = read(fd, (char*)&entry, sizeof entry);
-        if (ret < 0) {
-            if (errno == EINTR)
-                continue;
-            D("Error while reading %s: %s\n", filepath, strerror(errno));
-            break;
-        }
-        // Detect end of list.
-        if (ret == 0 || (entry.tag == 0 && entry.value == 0))
-          break;
-        if (entry.tag == AT_HWCAP) {
-          result = entry.value;
-          break;
-        }
-    }
-    close(fd);
-    return result;
-#else
-    // Recreate ELF hwcaps by parsing /proc/cpuinfo Features tag.
-    uint32_t hwcaps = 0;
-
-    char* cpuFeatures = extract_cpuinfo_field(cpuinfo, cpuinfo_len, "Features");
-
-    if (cpuFeatures != NULL) {
-        D("Found cpuFeatures = '%s'\n", cpuFeatures);
-
-        if (has_list_item(cpuFeatures, "vfp"))
-            hwcaps |= HWCAP_VFP;
-        if (has_list_item(cpuFeatures, "vfpv3"))
-            hwcaps |= HWCAP_VFPv3;
-        if (has_list_item(cpuFeatures, "vfpv3d16"))
-            hwcaps |= HWCAP_VFPv3D16;
-        if (has_list_item(cpuFeatures, "vfpv4"))
-            hwcaps |= HWCAP_VFPv4;
-        if (has_list_item(cpuFeatures, "neon"))
-            hwcaps |= HWCAP_NEON;
-        if (has_list_item(cpuFeatures, "idiva"))
-            hwcaps |= HWCAP_IDIVA;
-        if (has_list_item(cpuFeatures, "idivt"))
-            hwcaps |= HWCAP_IDIVT;
-        if (has_list_item(cpuFeatures, "idiv"))
-            hwcaps |= HWCAP_IDIVA | HWCAP_IDIVT;
-        if (has_list_item(cpuFeatures, "iwmmxt"))
-            hwcaps |= HWCAP_IWMMXT;
-
-        free(cpuFeatures);
-    }
-    return hwcaps;
-#endif
-}
-#endif  /* __arm__ */
-
-/* Return the number of cpus present on a given device.
- *
- * To handle all weird kernel configurations, we need to compute the
- * intersection of the 'present' and 'possible' CPU lists and count
- * the result.
- */
-static int
-get_cpu_count(void)
-{
-    CpuList cpus_present[1];
-    CpuList cpus_possible[1];
-
-    cpulist_read_from(cpus_present, "/sys/devices/system/cpu/present");
-    cpulist_read_from(cpus_possible, "/sys/devices/system/cpu/possible");
-
-    /* Compute the intersection of both sets to get the actual number of
-     * CPU cores that can be used on this device by the kernel.
-     */
-    cpulist_and(cpus_present, cpus_possible);
-
-    return cpulist_count(cpus_present);
-}
-
-static void
-android_cpuInitFamily(void)
-{
-#if defined(__arm__)
-    g_cpuFamily = ANDROID_CPU_FAMILY_ARM;
-#elif defined(__i386__)
-    g_cpuFamily = ANDROID_CPU_FAMILY_X86;
-#elif defined(__mips64)
-/* Needs to be before __mips__ since the compiler defines both */
-    g_cpuFamily = ANDROID_CPU_FAMILY_MIPS64;
-#elif defined(__mips__)
-    g_cpuFamily = ANDROID_CPU_FAMILY_MIPS;
-#elif defined(__aarch64__)
-    g_cpuFamily = ANDROID_CPU_FAMILY_ARM64;
-#elif defined(__x86_64__)
-    g_cpuFamily = ANDROID_CPU_FAMILY_X86_64;
-#else
-    g_cpuFamily = ANDROID_CPU_FAMILY_UNKNOWN;
-#endif
-}
-
-static void
-android_cpuInit(void)
-{
-    char* cpuinfo = NULL;
-    int   cpuinfo_len;
-
-    android_cpuInitFamily();
-
-    g_cpuFeatures = 0;
-    g_cpuCount    = 1;
-    g_inited      = 1;
-
-    cpuinfo_len = get_file_size("/proc/cpuinfo");
-    if (cpuinfo_len < 0) {
-      D("cpuinfo_len cannot be computed!");
-      return;
-    }
-    cpuinfo = malloc(cpuinfo_len);
-    if (cpuinfo == NULL) {
-      D("cpuinfo buffer could not be allocated");
-      return;
-    }
-    cpuinfo_len = read_file("/proc/cpuinfo", cpuinfo, cpuinfo_len);
-    D("cpuinfo_len is (%d):\n%.*s\n", cpuinfo_len,
-      cpuinfo_len >= 0 ? cpuinfo_len : 0, cpuinfo);
-
-    if (cpuinfo_len < 0)  /* should not happen */ {
-        free(cpuinfo);
-        return;
-    }
-
-    /* Count the CPU cores, the value may be 0 for single-core CPUs */
-    g_cpuCount = get_cpu_count();
-    if (g_cpuCount == 0) {
-        g_cpuCount = 1;
-    }
-
-    D("found cpuCount = %d\n", g_cpuCount);
-
-#ifdef __arm__
-    {
-        char*  features = NULL;
-        char*  architecture = NULL;
-
-        /* Extract architecture from the "CPU Architecture" field.
-         * The list is well-known, unlike the the output of
-         * the 'Processor' field which can vary greatly.
-         *
-         * See the definition of the 'proc_arch' array in
-         * $KERNEL/arch/arm/kernel/setup.c and the 'c_show' function in
-         * same file.
-         */
-        char* cpuArch = extract_cpuinfo_field(cpuinfo, cpuinfo_len, "CPU architecture");
-
-        if (cpuArch != NULL) {
-            char*  end;
-            long   archNumber;
-            int    hasARMv7 = 0;
-
-            D("found cpuArch = '%s'\n", cpuArch);
-
-            /* read the initial decimal number, ignore the rest */
-            archNumber = strtol(cpuArch, &end, 10);
-
-            /* Here we assume that ARMv8 will be upwards compatible with v7
-             * in the future. Unfortunately, there is no 'Features' field to
-             * indicate that Thumb-2 is supported.
-             */
-            if (end > cpuArch && archNumber >= 7) {
-                hasARMv7 = 1;
-            }
-
-            /* Unfortunately, it seems that certain ARMv6-based CPUs
-             * report an incorrect architecture number of 7!
-             *
-             * See http://code.google.com/p/android/issues/detail?id=10812
-             *
-             * We try to correct this by looking at the 'elf_format'
-             * field reported by the 'Processor' field, which is of the
-             * form of "(v7l)" for an ARMv7-based CPU, and "(v6l)" for
-             * an ARMv6-one.
-             */
-            if (hasARMv7) {
-                char* cpuProc = extract_cpuinfo_field(cpuinfo, cpuinfo_len,
-                                                      "Processor");
-                if (cpuProc != NULL) {
-                    D("found cpuProc = '%s'\n", cpuProc);
-                    if (has_list_item(cpuProc, "(v6l)")) {
-                        D("CPU processor and architecture mismatch!!\n");
-                        hasARMv7 = 0;
-                    }
-                    free(cpuProc);
-                }
-            }
-
-            if (hasARMv7) {
-                g_cpuFeatures |= ANDROID_CPU_ARM_FEATURE_ARMv7;
-            }
-
-            /* The LDREX / STREX instructions are available from ARMv6 */
-            if (archNumber >= 6) {
-                g_cpuFeatures |= ANDROID_CPU_ARM_FEATURE_LDREX_STREX;
-            }
-
-            free(cpuArch);
-        }
-
-        /* Extract the list of CPU features from ELF hwcaps */
-        uint32_t hwcaps = get_elf_hwcap(cpuinfo, cpuinfo_len);
-
-        if (hwcaps != 0) {
-            int has_vfp = (hwcaps & HWCAP_VFP);
-            int has_vfpv3 = (hwcaps & HWCAP_VFPv3);
-            int has_vfpv3d16 = (hwcaps & HWCAP_VFPv3D16);
-            int has_vfpv4 = (hwcaps & HWCAP_VFPv4);
-            int has_neon = (hwcaps & HWCAP_NEON);
-            int has_idiva = (hwcaps & HWCAP_IDIVA);
-            int has_idivt = (hwcaps & HWCAP_IDIVT);
-            int has_iwmmxt = (hwcaps & HWCAP_IWMMXT);
-
-            // The kernel does a poor job at ensuring consistency when
-            // describing CPU features. So lots of guessing is needed.
-
-            // 'vfpv4' implies VFPv3|VFP_FMA|FP16
-            if (has_vfpv4)
-                g_cpuFeatures |= ANDROID_CPU_ARM_FEATURE_VFPv3    |
-                                 ANDROID_CPU_ARM_FEATURE_VFP_FP16 |
-                                 ANDROID_CPU_ARM_FEATURE_VFP_FMA;
-
-            // 'vfpv3' or 'vfpv3d16' imply VFPv3. Note that unlike GCC,
-            // a value of 'vfpv3' doesn't necessarily mean that the D32
-            // feature is present, so be conservative. All CPUs in the
-            // field that support D32 also support NEON, so this should
-            // not be a problem in practice.
-            if (has_vfpv3 || has_vfpv3d16)
-                g_cpuFeatures |= ANDROID_CPU_ARM_FEATURE_VFPv3;
-
-            // 'vfp' is super ambiguous. Depending on the kernel, it can
-            // either mean VFPv2 or VFPv3. Make it depend on ARMv7.
-            if (has_vfp) {
-              if (g_cpuFeatures & ANDROID_CPU_ARM_FEATURE_ARMv7)
-                  g_cpuFeatures |= ANDROID_CPU_ARM_FEATURE_VFPv3;
-              else
-                  g_cpuFeatures |= ANDROID_CPU_ARM_FEATURE_VFPv2;
-            }
-
-            // Neon implies VFPv3|D32, and if vfpv4 is detected, NEON_FMA
-            if (has_neon) {
-                g_cpuFeatures |= ANDROID_CPU_ARM_FEATURE_VFPv3 |
-                                 ANDROID_CPU_ARM_FEATURE_NEON |
-                                 ANDROID_CPU_ARM_FEATURE_VFP_D32;
-              if (has_vfpv4)
-                  g_cpuFeatures |= ANDROID_CPU_ARM_FEATURE_NEON_FMA;
-            }
-
-            // VFPv3 implies VFPv2 and ARMv7
-            if (g_cpuFeatures & ANDROID_CPU_ARM_FEATURE_VFPv3)
-                g_cpuFeatures |= ANDROID_CPU_ARM_FEATURE_VFPv2 |
-                                 ANDROID_CPU_ARM_FEATURE_ARMv7;
-
-            if (has_idiva)
-                g_cpuFeatures |= ANDROID_CPU_ARM_FEATURE_IDIV_ARM;
-            if (has_idivt)
-                g_cpuFeatures |= ANDROID_CPU_ARM_FEATURE_IDIV_THUMB2;
-
-            if (has_iwmmxt)
-                g_cpuFeatures |= ANDROID_CPU_ARM_FEATURE_iWMMXt;
-        }
-
-        /* Extract the cpuid value from various fields */
-        // The CPUID value is broken up in several entries in /proc/cpuinfo.
-        // This table is used to rebuild it from the entries.
-        static const struct CpuIdEntry {
-            const char* field;
-            char        format;
-            char        bit_lshift;
-            char        bit_length;
-        } cpu_id_entries[] = {
-            { "CPU implementer", 'x', 24, 8 },
-            { "CPU variant", 'x', 20, 4 },
-            { "CPU part", 'x', 4, 12 },
-            { "CPU revision", 'd', 0, 4 },
-        };
-        size_t i;
-        D("Parsing /proc/cpuinfo to recover CPUID\n");
-        for (i = 0;
-             i < sizeof(cpu_id_entries)/sizeof(cpu_id_entries[0]);
-             ++i) {
-            const struct CpuIdEntry* entry = &cpu_id_entries[i];
-            char* value = extract_cpuinfo_field(cpuinfo,
-                                                cpuinfo_len,
-                                                entry->field);
-            if (value == NULL)
-                continue;
-
-            D("field=%s value='%s'\n", entry->field, value);
-            char* value_end = value + strlen(value);
-            int val = 0;
-            const char* start = value;
-            const char* p;
-            if (value[0] == '0' && (value[1] == 'x' || value[1] == 'X')) {
-              start += 2;
-              p = parse_hexadecimal(start, value_end, &val);
-            } else if (entry->format == 'x')
-              p = parse_hexadecimal(value, value_end, &val);
-            else
-              p = parse_decimal(value, value_end, &val);
-
-            if (p > (const char*)start) {
-              val &= ((1 << entry->bit_length)-1);
-              val <<= entry->bit_lshift;
-              g_cpuIdArm |= (uint32_t) val;
-            }
-
-            free(value);
-        }
-
-        // Handle kernel configuration bugs that prevent the correct
-        // reporting of CPU features.
-        static const struct CpuFix {
-            uint32_t  cpuid;
-            uint64_t  or_flags;
-        } cpu_fixes[] = {
-            /* The Nexus 4 (Qualcomm Krait) kernel configuration
-             * forgets to report IDIV support. */
-            { 0x510006f2, ANDROID_CPU_ARM_FEATURE_IDIV_ARM |
-                          ANDROID_CPU_ARM_FEATURE_IDIV_THUMB2 },
-            { 0x510006f3, ANDROID_CPU_ARM_FEATURE_IDIV_ARM |
-                          ANDROID_CPU_ARM_FEATURE_IDIV_THUMB2 },
-        };
-        size_t n;
-        for (n = 0; n < sizeof(cpu_fixes)/sizeof(cpu_fixes[0]); ++n) {
-            const struct CpuFix* entry = &cpu_fixes[n];
-
-            if (g_cpuIdArm == entry->cpuid)
-                g_cpuFeatures |= entry->or_flags;
-        }
-
-    }
-#endif /* __arm__ */
-
-#ifdef __i386__
-    int regs[4];
-
-/* According to http://en.wikipedia.org/wiki/CPUID */
-#define VENDOR_INTEL_b  0x756e6547
-#define VENDOR_INTEL_c  0x6c65746e
-#define VENDOR_INTEL_d  0x49656e69
-
-    x86_cpuid(0, regs);
-    int vendorIsIntel = (regs[1] == VENDOR_INTEL_b &&
-                         regs[2] == VENDOR_INTEL_c &&
-                         regs[3] == VENDOR_INTEL_d);
-
-    x86_cpuid(1, regs);
-    if ((regs[2] & (1 << 9)) != 0) {
-        g_cpuFeatures |= ANDROID_CPU_X86_FEATURE_SSSE3;
-    }
-    if ((regs[2] & (1 << 23)) != 0) {
-        g_cpuFeatures |= ANDROID_CPU_X86_FEATURE_POPCNT;
-    }
-    if (vendorIsIntel && (regs[2] & (1 << 22)) != 0) {
-        g_cpuFeatures |= ANDROID_CPU_X86_FEATURE_MOVBE;
-    }
-#endif
-
-    free(cpuinfo);
-}
-
-
-AndroidCpuFamily
-android_getCpuFamily(void)
-{
-    pthread_once(&g_once, android_cpuInit);
-    return g_cpuFamily;
-}
-
-
-uint64_t
-android_getCpuFeaturesExt(void)
-{
-    pthread_once(&g_once, android_cpuInit);
-    return g_cpuFeatures;
-}
-
-
-int
-android_getCpuCount(void)
-{
-    pthread_once(&g_once, android_cpuInit);
-    return g_cpuCount;
-}
-
-static void
-android_cpuInitDummy(void)
-{
-    g_inited = 1;
-}
-
-int
-android_setCpu(int cpu_count, uint64_t cpu_features)
-{
-    /* Fail if the library was already initialized. */
-    if (g_inited)
-        return 0;
-
-    android_cpuInitFamily();
-    g_cpuCount = (cpu_count <= 0 ? 1 : cpu_count);
-    g_cpuFeatures = cpu_features;
-    pthread_once(&g_once, android_cpuInitDummy);
-
-    return 1;
-}
-
-#ifdef __arm__
-uint32_t
-android_getCpuIdArm(void)
-{
-    pthread_once(&g_once, android_cpuInit);
-    return g_cpuIdArm;
-}
-
-int
-android_setCpuArm(int cpu_count, uint64_t cpu_features, uint32_t cpu_id)
-{
-    if (!android_setCpu(cpu_count, cpu_features))
-        return 0;
-
-    g_cpuIdArm = cpu_id;
-    return 1;
-}
-#endif  /* __arm__ */
-
-/*
- * Technical note: Making sense of ARM's FPU architecture versions.
- *
- * FPA was ARM's first attempt at an FPU architecture. There is no Android
- * device that actually uses it since this technology was already obsolete
- * when the project started. If you see references to FPA instructions
- * somewhere, you can be sure that this doesn't apply to Android at all.
- *
- * FPA was followed by "VFP", soon renamed "VFPv1" due to the emergence of
- * new versions / additions to it. ARM considers this obsolete right now,
- * and no known Android device implements it either.
- *
- * VFPv2 added a few instructions to VFPv1, and is an *optional* extension
- * supported by some ARMv5TE, ARMv6 and ARMv6T2 CPUs. Note that a device
- * supporting the 'armeabi' ABI doesn't necessarily support these.
- *
- * VFPv3-D16 adds a few instructions on top of VFPv2 and is typically used
- * on ARMv7-A CPUs which implement a FPU. Note that it is also mandated
- * by the Android 'armeabi-v7a' ABI. The -D16 suffix in its name means
- * that it provides 16 double-precision FPU registers (d0-d15) and 32
- * single-precision ones (s0-s31) which happen to be mapped to the same
- * register banks.
- *
- * VFPv3-D32 is the name of an extension to VFPv3-D16 that provides 16
- * additional double precision registers (d16-d31). Note that there are
- * still only 32 single precision registers.
- *
- * VFPv3xD is a *subset* of VFPv3-D16 that only provides single-precision
- * registers. It is only used on ARMv7-M (i.e. on micro-controllers) which
- * are not supported by Android. Note that it is not compatible with VFPv2.
- *
- * NOTE: The term 'VFPv3' usually designate either VFPv3-D16 or VFPv3-D32
- *       depending on context. For example GCC uses it for VFPv3-D32, but
- *       the Linux kernel code uses it for VFPv3-D16 (especially in
- *       /proc/cpuinfo). Always try to use the full designation when
- *       possible.
- *
- * NEON, a.k.a. "ARM Advanced SIMD" is an extension that provides
- * instructions to perform parallel computations on vectors of 8, 16,
- * 32, 64 and 128 bit quantities. NEON requires VFPv32-D32 since all
- * NEON registers are also mapped to the same register banks.
- *
- * VFPv4-D16, adds a few instructions on top of VFPv3-D16 in order to
- * perform fused multiply-accumulate on VFP registers, as well as
- * half-precision (16-bit) conversion operations.
- *
- * VFPv4-D32 is VFPv4-D16 with 32, instead of 16, FPU double precision
- * registers.
- *
- * VPFv4-NEON is VFPv4-D32 with NEON instructions. It also adds fused
- * multiply-accumulate instructions that work on the NEON registers.
- *
- * NOTE: Similarly, "VFPv4" might either reference VFPv4-D16 or VFPv4-D32
- *       depending on context.
- *
- * The following information was determined by scanning the binutils-2.22
- * sources:
- *
- * Basic VFP instruction subsets:
- *
- * #define FPU_VFP_EXT_V1xD 0x08000000     // Base VFP instruction set.
- * #define FPU_VFP_EXT_V1   0x04000000     // Double-precision insns.
- * #define FPU_VFP_EXT_V2   0x02000000     // ARM10E VFPr1.
- * #define FPU_VFP_EXT_V3xD 0x01000000     // VFPv3 single-precision.
- * #define FPU_VFP_EXT_V3   0x00800000     // VFPv3 double-precision.
- * #define FPU_NEON_EXT_V1  0x00400000     // Neon (SIMD) insns.
- * #define FPU_VFP_EXT_D32  0x00200000     // Registers D16-D31.
- * #define FPU_VFP_EXT_FP16 0x00100000     // Half-precision extensions.
- * #define FPU_NEON_EXT_FMA 0x00080000     // Neon fused multiply-add
- * #define FPU_VFP_EXT_FMA  0x00040000     // VFP fused multiply-add
- *
- * FPU types (excluding NEON)
- *
- * FPU_VFP_V1xD (EXT_V1xD)
- *    |
- *    +--------------------------+
- *    |                          |
- * FPU_VFP_V1 (+EXT_V1)       FPU_VFP_V3xD (+EXT_V2+EXT_V3xD)
- *    |                          |
- *    |                          |
- * FPU_VFP_V2 (+EXT_V2)       FPU_VFP_V4_SP_D16 (+EXT_FP16+EXT_FMA)
- *    |
- * FPU_VFP_V3D16 (+EXT_Vx3D+EXT_V3)
- *    |
- *    +--------------------------+
- *    |                          |
- * FPU_VFP_V3 (+EXT_D32)     FPU_VFP_V4D16 (+EXT_FP16+EXT_FMA)
- *    |                          |
- *    |                      FPU_VFP_V4 (+EXT_D32)
- *    |
- * FPU_VFP_HARD (+EXT_FMA+NEON_EXT_FMA)
- *
- * VFP architectures:
- *
- * ARCH_VFP_V1xD  (EXT_V1xD)
- *   |
- *   +------------------+
- *   |                  |
- *   |             ARCH_VFP_V3xD (+EXT_V2+EXT_V3xD)
- *   |                  |
- *   |             ARCH_VFP_V3xD_FP16 (+EXT_FP16)
- *   |                  |
- *   |             ARCH_VFP_V4_SP_D16 (+EXT_FMA)
- *   |
- * ARCH_VFP_V1 (+EXT_V1)
- *   |
- * ARCH_VFP_V2 (+EXT_V2)
- *   |
- * ARCH_VFP_V3D16 (+EXT_V3xD+EXT_V3)
- *   |
- *   +-------------------+
- *   |                   |
- *   |         ARCH_VFP_V3D16_FP16  (+EXT_FP16)
- *   |
- *   +-------------------+
- *   |                   |
- *   |         ARCH_VFP_V4_D16 (+EXT_FP16+EXT_FMA)
- *   |                   |
- *   |         ARCH_VFP_V4 (+EXT_D32)
- *   |                   |
- *   |         ARCH_NEON_VFP_V4 (+EXT_NEON+EXT_NEON_FMA)
- *   |
- * ARCH_VFP_V3 (+EXT_D32)
- *   |
- *   +-------------------+
- *   |                   |
- *   |         ARCH_VFP_V3_FP16 (+EXT_FP16)
- *   |
- * ARCH_VFP_V3_PLUS_NEON_V1 (+EXT_NEON)
- *   |
- * ARCH_NEON_FP16 (+EXT_FP16)
- *
- * -fpu=<name> values and their correspondance with FPU architectures above:
- *
- *   {"vfp",               FPU_ARCH_VFP_V2},
- *   {"vfp9",              FPU_ARCH_VFP_V2},
- *   {"vfp3",              FPU_ARCH_VFP_V3}, // For backwards compatbility.
- *   {"vfp10",             FPU_ARCH_VFP_V2},
- *   {"vfp10-r0",          FPU_ARCH_VFP_V1},
- *   {"vfpxd",             FPU_ARCH_VFP_V1xD},
- *   {"vfpv2",             FPU_ARCH_VFP_V2},
- *   {"vfpv3",             FPU_ARCH_VFP_V3},
- *   {"vfpv3-fp16",        FPU_ARCH_VFP_V3_FP16},
- *   {"vfpv3-d16",         FPU_ARCH_VFP_V3D16},
- *   {"vfpv3-d16-fp16",    FPU_ARCH_VFP_V3D16_FP16},
- *   {"vfpv3xd",           FPU_ARCH_VFP_V3xD},
- *   {"vfpv3xd-fp16",      FPU_ARCH_VFP_V3xD_FP16},
- *   {"neon",              FPU_ARCH_VFP_V3_PLUS_NEON_V1},
- *   {"neon-fp16",         FPU_ARCH_NEON_FP16},
- *   {"vfpv4",             FPU_ARCH_VFP_V4},
- *   {"vfpv4-d16",         FPU_ARCH_VFP_V4D16},
- *   {"fpv4-sp-d16",       FPU_ARCH_VFP_V4_SP_D16},
- *   {"neon-vfpv4",        FPU_ARCH_NEON_VFP_V4},
- *
- *
- * Simplified diagram that only includes FPUs supported by Android:
- * Only ARCH_VFP_V3D16 is actually mandated by the armeabi-v7a ABI,
- * all others are optional and must be probed at runtime.
- *
- * ARCH_VFP_V3D16 (EXT_V1xD+EXT_V1+EXT_V2+EXT_V3xD+EXT_V3)
- *   |
- *   +-------------------+
- *   |                   |
- *   |         ARCH_VFP_V3D16_FP16  (+EXT_FP16)
- *   |
- *   +-------------------+
- *   |                   |
- *   |         ARCH_VFP_V4_D16 (+EXT_FP16+EXT_FMA)
- *   |                   |
- *   |         ARCH_VFP_V4 (+EXT_D32)
- *   |                   |
- *   |         ARCH_NEON_VFP_V4 (+EXT_NEON+EXT_NEON_FMA)
- *   |
- * ARCH_VFP_V3 (+EXT_D32)
- *   |
- *   +-------------------+
- *   |                   |
- *   |         ARCH_VFP_V3_FP16 (+EXT_FP16)
- *   |
- * ARCH_VFP_V3_PLUS_NEON_V1 (+EXT_NEON)
- *   |
- * ARCH_NEON_FP16 (+EXT_FP16)
- *
- */
-
-#endif // defined(__le32__)
-#endif
-
-#endif
diff --git a/drivers/theoraplayer/src/YUV/android/cpu-features.h b/drivers/theoraplayer/src/YUV/android/cpu-features.h
deleted file mode 100644
index 12d3ad5645..0000000000
--- a/drivers/theoraplayer/src/YUV/android/cpu-features.h
+++ /dev/null
@@ -1,212 +0,0 @@
-/*
- * Copyright (C) 2010 The Android Open Source Project
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *  * Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- *  * Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in
- *    the documentation and/or other materials provided with the
- *    distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
- * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
- * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
- * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
- * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
- * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
- * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
- * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
- * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- */
-#ifndef CPU_FEATURES_H
-#define CPU_FEATURES_H
-
-#include <sys/cdefs.h>
-#include <stdint.h>
-
-__BEGIN_DECLS
-
-typedef enum {
-    ANDROID_CPU_FAMILY_UNKNOWN = 0,
-    ANDROID_CPU_FAMILY_ARM,
-    ANDROID_CPU_FAMILY_X86,
-    ANDROID_CPU_FAMILY_MIPS,
-
-    ANDROID_CPU_FAMILY_MAX  /* do not remove */
-
-} AndroidCpuFamily;
-
-/* Return family of the device's CPU */
-extern AndroidCpuFamily   android_getCpuFamily(void);
-
-/* The list of feature flags for ARM CPUs that can be recognized by the
- * library. Value details are:
- *
- *   VFPv2:
- *     CPU supports the VFPv2 instruction set. Many, but not all, ARMv6 CPUs
- *     support these instructions. VFPv2 is a subset of VFPv3 so this will
- *     be set whenever VFPv3 is set too.
- *
- *   ARMv7:
- *     CPU supports the ARMv7-A basic instruction set.
- *     This feature is mandated by the 'armeabi-v7a' ABI.
- *
- *   VFPv3:
- *     CPU supports the VFPv3-D16 instruction set, providing hardware FPU
- *     support for single and double precision floating point registers.
- *     Note that only 16 FPU registers are available by default, unless
- *     the D32 bit is set too. This feature is also mandated by the
- *     'armeabi-v7a' ABI.
- *
- *   VFP_D32:
- *     CPU VFP optional extension that provides 32 FPU registers,
- *     instead of 16. Note that ARM mandates this feature is the 'NEON'
- *     feature is implemented by the CPU.
- *
- *   NEON:
- *     CPU FPU supports "ARM Advanced SIMD" instructions, also known as
- *     NEON. Note that this mandates the VFP_D32 feature as well, per the
- *     ARM Architecture specification.
- *
- *   VFP_FP16:
- *     Half-width floating precision VFP extension. If set, the CPU
- *     supports instructions to perform floating-point operations on
- *     16-bit registers. This is part of the VFPv4 specification, but
- *     not mandated by any Android ABI.
- *
- *   VFP_FMA:
- *     Fused multiply-accumulate VFP instructions extension. Also part of
- *     the VFPv4 specification, but not mandated by any Android ABI.
- *
- *   NEON_FMA:
- *     Fused multiply-accumulate NEON instructions extension. Optional
- *     extension from the VFPv4 specification, but not mandated by any
- *     Android ABI.
- *
- *   IDIV_ARM:
- *     Integer division available in ARM mode. Only available
- *     on recent CPUs (e.g. Cortex-A15).
- *
- *   IDIV_THUMB2:
- *     Integer division available in Thumb-2 mode. Only available
- *     on recent CPUs (e.g. Cortex-A15).
- *
- *   iWMMXt:
- *     Optional extension that adds MMX registers and operations to an
- *     ARM CPU. This is only available on a few XScale-based CPU designs
- *     sold by Marvell. Pretty rare in practice.
- *
- * If you want to tell the compiler to generate code that targets one of
- * the feature set above, you should probably use one of the following
- * flags (for more details, see technical note at the end of this file):
- *
- *   -mfpu=vfp
- *   -mfpu=vfpv2
- *     These are equivalent and tell GCC to use VFPv2 instructions for
- *     floating-point operations. Use this if you want your code to
- *     run on *some* ARMv6 devices, and any ARMv7-A device supported
- *     by Android.
- *
- *     Generated code requires VFPv2 feature.
- *
- *   -mfpu=vfpv3-d16
- *     Tell GCC to use VFPv3 instructions (using only 16 FPU registers).
- *     This should be generic code that runs on any CPU that supports the
- *     'armeabi-v7a' Android ABI. Note that no ARMv6 CPU supports this.
- *
- *     Generated code requires VFPv3 feature.
- *
- *   -mfpu=vfpv3
- *     Tell GCC to use VFPv3 instructions with 32 FPU registers.
- *     Generated code requires VFPv3|VFP_D32 features.
- *
- *   -mfpu=neon
- *     Tell GCC to use VFPv3 instructions with 32 FPU registers, and
- *     also support NEON intrinsics (see <arm_neon.h>).
- *     Generated code requires VFPv3|VFP_D32|NEON features.
- *
- *   -mfpu=vfpv4-d16
- *     Generated code requires VFPv3|VFP_FP16|VFP_FMA features.
- *
- *   -mfpu=vfpv4
- *     Generated code requires VFPv3|VFP_FP16|VFP_FMA|VFP_D32 features.
- *
- *   -mfpu=neon-vfpv4
- *     Generated code requires VFPv3|VFP_FP16|VFP_FMA|VFP_D32|NEON|NEON_FMA
- *     features.
- *
- *   -mcpu=cortex-a7
- *   -mcpu=cortex-a15
- *     Generated code requires VFPv3|VFP_FP16|VFP_FMA|VFP_D32|
- *                             NEON|NEON_FMA|IDIV_ARM|IDIV_THUMB2
- *     This flag implies -mfpu=neon-vfpv4.
- *
- *   -mcpu=iwmmxt
- *     Allows the use of iWMMXt instrinsics with GCC.
- */
-enum {
-    ANDROID_CPU_ARM_FEATURE_ARMv7       = (1 << 0),
-    ANDROID_CPU_ARM_FEATURE_VFPv3       = (1 << 1),
-    ANDROID_CPU_ARM_FEATURE_NEON        = (1 << 2),
-    ANDROID_CPU_ARM_FEATURE_LDREX_STREX = (1 << 3),
-    ANDROID_CPU_ARM_FEATURE_VFPv2       = (1 << 4),
-    ANDROID_CPU_ARM_FEATURE_VFP_D32     = (1 << 5),
-    ANDROID_CPU_ARM_FEATURE_VFP_FP16    = (1 << 6),
-    ANDROID_CPU_ARM_FEATURE_VFP_FMA     = (1 << 7),
-    ANDROID_CPU_ARM_FEATURE_NEON_FMA    = (1 << 8),
-    ANDROID_CPU_ARM_FEATURE_IDIV_ARM    = (1 << 9),
-    ANDROID_CPU_ARM_FEATURE_IDIV_THUMB2 = (1 << 10),
-    ANDROID_CPU_ARM_FEATURE_iWMMXt      = (1 << 11),
-};
-
-enum {
-    ANDROID_CPU_X86_FEATURE_SSSE3  = (1 << 0),
-    ANDROID_CPU_X86_FEATURE_POPCNT = (1 << 1),
-    ANDROID_CPU_X86_FEATURE_MOVBE  = (1 << 2),
-};
-
-// libtheoraplayer addition, renamed this to "Ext" as not to conflict with your own project if you've included cpu-features.c in it
-//extern uint64_t    android_getCpuFeaturesExt(void);
-#define android_getCpuFeaturesExt android_getCpuFeatures
-
-/* Return the number of CPU cores detected on this device. */
-extern int         android_getCpuCount(void);
-
-/* The following is used to force the CPU count and features
- * mask in sandboxed processes. Under 4.1 and higher, these processes
- * cannot access /proc, which is the only way to get information from
- * the kernel about the current hardware (at least on ARM).
- *
- * It _must_ be called only once, and before any android_getCpuXXX
- * function, any other case will fail.
- *
- * This function return 1 on success, and 0 on failure.
- */
-extern int android_setCpu(int      cpu_count,
-                          uint64_t cpu_features);
-
-#ifdef __arm__
-/* Retrieve the ARM 32-bit CPUID value from the kernel.
- * Note that this cannot work on sandboxed processes under 4.1 and
- * higher, unless you called android_setCpuArm() before.
- */
-extern uint32_t android_getCpuIdArm(void);
-
-/* An ARM-specific variant of android_setCpu() that also allows you
- * to set the ARM CPUID field.
- */
-extern int android_setCpuArm(int      cpu_count,
-                             uint64_t cpu_features,
-                             uint32_t cpu_id);
-#endif
-
-__END_DECLS
-
-#endif /* CPU_FEATURES_H */
diff --git a/drivers/theoraplayer/src/YUV/libyuv/LICENSE b/drivers/theoraplayer/src/YUV/libyuv/LICENSE
deleted file mode 100755
index c911747a6b..0000000000
--- a/drivers/theoraplayer/src/YUV/libyuv/LICENSE
+++ /dev/null
@@ -1,29 +0,0 @@
-Copyright 2011 The LibYuv Project Authors. All rights reserved.
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions are
-met:
-
-  * Redistributions of source code must retain the above copyright
-    notice, this list of conditions and the following disclaimer.
-
-  * Redistributions in binary form must reproduce the above copyright
-    notice, this list of conditions and the following disclaimer in
-    the documentation and/or other materials provided with the
-    distribution.
-
-  * Neither the name of Google nor the names of its contributors may
-    be used to endorse or promote products derived from this software
-    without specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
diff --git a/drivers/theoraplayer/src/YUV/libyuv/LICENSE_THIRD_PARTY b/drivers/theoraplayer/src/YUV/libyuv/LICENSE_THIRD_PARTY
deleted file mode 100755
index a71591e771..0000000000
--- a/drivers/theoraplayer/src/YUV/libyuv/LICENSE_THIRD_PARTY
+++ /dev/null
@@ -1,8 +0,0 @@
-This source tree contains third party source code which is governed by third
-party licenses. This file contains references to files which are under other
-licenses than the one provided in the LICENSE file in the root of the source
-tree.
-
-Files governed by third party licenses:
-source/x86inc.asm
-
diff --git a/drivers/theoraplayer/src/YUV/libyuv/include/libyuv.h b/drivers/theoraplayer/src/YUV/libyuv/include/libyuv.h
deleted file mode 100755
index 3bebe642cc..0000000000
--- a/drivers/theoraplayer/src/YUV/libyuv/include/libyuv.h
+++ /dev/null
@@ -1,33 +0,0 @@
-/*
- *  Copyright 2011 The LibYuv Project Authors. All rights reserved.
- *
- *  Use of this source code is governed by a BSD-style license
- *  that can be found in the LICENSE file in the root of the source
- *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS. All contributing project authors may
- *  be found in the AUTHORS file in the root of the source tree.
- */
-
-#ifndef INCLUDE_LIBYUV_H_  // NOLINT
-#define INCLUDE_LIBYUV_H_
-
-#include "libyuv/basic_types.h"
-#include "libyuv/compare.h"
-#include "libyuv/convert.h"
-#include "libyuv/convert_argb.h"
-#include "libyuv/convert_from.h"
-#include "libyuv/convert_from_argb.h"
-#include "libyuv/cpu_id.h"
-#include "libyuv/format_conversion.h"
-#include "libyuv/mjpeg_decoder.h"
-#include "libyuv/planar_functions.h"
-#include "libyuv/rotate.h"
-#include "libyuv/rotate_argb.h"
-#include "libyuv/row.h"
-#include "libyuv/scale.h"
-#include "libyuv/scale_argb.h"
-#include "libyuv/scale_row.h"
-#include "libyuv/version.h"
-#include "libyuv/video_common.h"
-
-#endif  // INCLUDE_LIBYUV_H_  NOLINT
diff --git a/drivers/theoraplayer/src/YUV/libyuv/include/libyuv/basic_types.h b/drivers/theoraplayer/src/YUV/libyuv/include/libyuv/basic_types.h
deleted file mode 100755
index beb750ba65..0000000000
--- a/drivers/theoraplayer/src/YUV/libyuv/include/libyuv/basic_types.h
+++ /dev/null
@@ -1,118 +0,0 @@
-/*
- *  Copyright 2011 The LibYuv Project Authors. All rights reserved.
- *
- *  Use of this source code is governed by a BSD-style license
- *  that can be found in the LICENSE file in the root of the source
- *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS. All contributing project authors may
- *  be found in the AUTHORS file in the root of the source tree.
- */
-
-#ifndef INCLUDE_LIBYUV_BASIC_TYPES_H_  // NOLINT
-#define INCLUDE_LIBYUV_BASIC_TYPES_H_
-
-#include <stddef.h>  // for NULL, size_t
-
-#if defined(__ANDROID__) || (defined(_MSC_VER) && (_MSC_VER < 1600))
-#include <sys/types.h>  // for uintptr_t on x86
-#else
-#include <stdint.h>  // for uintptr_t
-#endif
-
-#ifndef GG_LONGLONG
-#ifndef INT_TYPES_DEFINED
-#define INT_TYPES_DEFINED
-#ifdef COMPILER_MSVC
-typedef unsigned __int64 uint64;
-typedef __int64 int64;
-#ifndef INT64_C
-#define INT64_C(x) x ## I64
-#endif
-#ifndef UINT64_C
-#define UINT64_C(x) x ## UI64
-#endif
-#define INT64_F "I64"
-#else  // COMPILER_MSVC
-#if defined(__LP64__) && !defined(__OpenBSD__) && !defined(__APPLE__)
-typedef unsigned long uint64;  // NOLINT
-typedef long int64;  // NOLINT
-#ifndef INT64_C
-#define INT64_C(x) x ## L
-#endif
-#ifndef UINT64_C
-#define UINT64_C(x) x ## UL
-#endif
-#define INT64_F "l"
-#else  // defined(__LP64__) && !defined(__OpenBSD__) && !defined(__APPLE__)
-typedef unsigned long long uint64;  // NOLINT
-typedef long long int64;  // NOLINT
-#ifndef INT64_C
-#define INT64_C(x) x ## LL
-#endif
-#ifndef UINT64_C
-#define UINT64_C(x) x ## ULL
-#endif
-#define INT64_F "ll"
-#endif  // __LP64__
-#endif  // COMPILER_MSVC
-typedef unsigned int uint32;
-typedef int int32;
-typedef unsigned short uint16;  // NOLINT
-typedef short int16;  // NOLINT
-typedef unsigned char uint8;
-typedef signed char int8;
-#endif  // INT_TYPES_DEFINED
-#endif  // GG_LONGLONG
-
-// Detect compiler is for x86 or x64.
-#if defined(__x86_64__) || defined(_M_X64) || \
-    defined(__i386__) || defined(_M_IX86)
-#define CPU_X86 1
-#endif
-// Detect compiler is for ARM.
-#if defined(__arm__) || defined(_M_ARM)
-#define CPU_ARM 1
-#endif
-
-#ifndef ALIGNP
-#ifdef __cplusplus
-#define ALIGNP(p, t) \
-    (reinterpret_cast<uint8*>(((reinterpret_cast<uintptr_t>(p) + \
-    ((t) - 1)) & ~((t) - 1))))
-#else
-#define ALIGNP(p, t) \
-    ((uint8*)((((uintptr_t)(p) + ((t) - 1)) & ~((t) - 1))))  /* NOLINT */
-#endif
-#endif
-
-#if !defined(LIBYUV_API)
-#if defined(_WIN32) || defined(__CYGWIN__)
-#if defined(LIBYUV_BUILDING_SHARED_LIBRARY)
-#define LIBYUV_API __declspec(dllexport)
-#elif defined(LIBYUV_USING_SHARED_LIBRARY)
-#define LIBYUV_API __declspec(dllimport)
-#else
-#define LIBYUV_API
-#endif  // LIBYUV_BUILDING_SHARED_LIBRARY
-#elif defined(__GNUC__) && (__GNUC__ >= 4) && !defined(__APPLE__) && \
-    (defined(LIBYUV_BUILDING_SHARED_LIBRARY) || \
-    defined(LIBYUV_USING_SHARED_LIBRARY))
-#define LIBYUV_API __attribute__ ((visibility ("default")))
-#else
-#define LIBYUV_API
-#endif  // __GNUC__
-#endif  // LIBYUV_API
-
-#define LIBYUV_BOOL int
-#define LIBYUV_FALSE 0
-#define LIBYUV_TRUE 1
-
-// Visual C x86 or GCC little endian.
-#if defined(__x86_64__) || defined(_M_X64) || \
-  defined(__i386__) || defined(_M_IX86) || \
-  defined(__arm__) || defined(_M_ARM) || \
-  (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)
-#define LIBYUV_LITTLE_ENDIAN
-#endif
-
-#endif  // INCLUDE_LIBYUV_BASIC_TYPES_H_  NOLINT
diff --git a/drivers/theoraplayer/src/YUV/libyuv/include/libyuv/compare.h b/drivers/theoraplayer/src/YUV/libyuv/include/libyuv/compare.h
deleted file mode 100755
index 5dfac7c86a..0000000000
--- a/drivers/theoraplayer/src/YUV/libyuv/include/libyuv/compare.h
+++ /dev/null
@@ -1,73 +0,0 @@
-/*
- *  Copyright 2011 The LibYuv Project Authors. All rights reserved.
- *
- *  Use of this source code is governed by a BSD-style license
- *  that can be found in the LICENSE file in the root of the source
- *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS. All contributing project authors may
- *  be found in the AUTHORS file in the root of the source tree.
- */
-
-#ifndef INCLUDE_LIBYUV_COMPARE_H_  // NOLINT
-#define INCLUDE_LIBYUV_COMPARE_H_
-
-#include "libyuv/basic_types.h"
-
-#ifdef __cplusplus
-namespace libyuv {
-extern "C" {
-#endif
-
-// Compute a hash for specified memory. Seed of 5381 recommended.
-LIBYUV_API
-uint32 HashDjb2(const uint8* src, uint64 count, uint32 seed);
-
-// Sum Square Error - used to compute Mean Square Error or PSNR.
-LIBYUV_API
-uint64 ComputeSumSquareError(const uint8* src_a,
-                             const uint8* src_b, int count);
-
-LIBYUV_API
-uint64 ComputeSumSquareErrorPlane(const uint8* src_a, int stride_a,
-                                  const uint8* src_b, int stride_b,
-                                  int width, int height);
-
-static const int kMaxPsnr = 128;
-
-LIBYUV_API
-double SumSquareErrorToPsnr(uint64 sse, uint64 count);
-
-LIBYUV_API
-double CalcFramePsnr(const uint8* src_a, int stride_a,
-                     const uint8* src_b, int stride_b,
-                     int width, int height);
-
-LIBYUV_API
-double I420Psnr(const uint8* src_y_a, int stride_y_a,
-                const uint8* src_u_a, int stride_u_a,
-                const uint8* src_v_a, int stride_v_a,
-                const uint8* src_y_b, int stride_y_b,
-                const uint8* src_u_b, int stride_u_b,
-                const uint8* src_v_b, int stride_v_b,
-                int width, int height);
-
-LIBYUV_API
-double CalcFrameSsim(const uint8* src_a, int stride_a,
-                     const uint8* src_b, int stride_b,
-                     int width, int height);
-
-LIBYUV_API
-double I420Ssim(const uint8* src_y_a, int stride_y_a,
-                const uint8* src_u_a, int stride_u_a,
-                const uint8* src_v_a, int stride_v_a,
-                const uint8* src_y_b, int stride_y_b,
-                const uint8* src_u_b, int stride_u_b,
-                const uint8* src_v_b, int stride_v_b,
-                int width, int height);
-
-#ifdef __cplusplus
-}  // extern "C"
-}  // namespace libyuv
-#endif
-
-#endif  // INCLUDE_LIBYUV_COMPARE_H_  NOLINT
diff --git a/drivers/theoraplayer/src/YUV/libyuv/include/libyuv/convert.h b/drivers/theoraplayer/src/YUV/libyuv/include/libyuv/convert.h
deleted file mode 100755
index 1bd45c837f..0000000000
--- a/drivers/theoraplayer/src/YUV/libyuv/include/libyuv/convert.h
+++ /dev/null
@@ -1,254 +0,0 @@
-/*
- *  Copyright 2011 The LibYuv Project Authors. All rights reserved.
- *
- *  Use of this source code is governed by a BSD-style license
- *  that can be found in the LICENSE file in the root of the source
- *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS. All contributing project authors may
- *  be found in the AUTHORS file in the root of the source tree.
- */
-
-#ifndef INCLUDE_LIBYUV_CONVERT_H_  // NOLINT
-#define INCLUDE_LIBYUV_CONVERT_H_
-
-#include "libyuv/basic_types.h"
-// TODO(fbarchard): Remove the following headers includes.
-#include "libyuv/convert_from.h"
-#include "libyuv/planar_functions.h"
-#include "libyuv/rotate.h"
-
-#ifdef __cplusplus
-namespace libyuv {
-extern "C" {
-#endif
-
-// Convert I444 to I420.
-LIBYUV_API
-int I444ToI420(const uint8* src_y, int src_stride_y,
-               const uint8* src_u, int src_stride_u,
-               const uint8* src_v, int src_stride_v,
-               uint8* dst_y, int dst_stride_y,
-               uint8* dst_u, int dst_stride_u,
-               uint8* dst_v, int dst_stride_v,
-               int width, int height);
-
-// Convert I422 to I420.
-LIBYUV_API
-int I422ToI420(const uint8* src_y, int src_stride_y,
-               const uint8* src_u, int src_stride_u,
-               const uint8* src_v, int src_stride_v,
-               uint8* dst_y, int dst_stride_y,
-               uint8* dst_u, int dst_stride_u,
-               uint8* dst_v, int dst_stride_v,
-               int width, int height);
-
-// Convert I411 to I420.
-LIBYUV_API
-int I411ToI420(const uint8* src_y, int src_stride_y,
-               const uint8* src_u, int src_stride_u,
-               const uint8* src_v, int src_stride_v,
-               uint8* dst_y, int dst_stride_y,
-               uint8* dst_u, int dst_stride_u,
-               uint8* dst_v, int dst_stride_v,
-               int width, int height);
-
-// Copy I420 to I420.
-#define I420ToI420 I420Copy
-LIBYUV_API
-int I420Copy(const uint8* src_y, int src_stride_y,
-             const uint8* src_u, int src_stride_u,
-             const uint8* src_v, int src_stride_v,
-             uint8* dst_y, int dst_stride_y,
-             uint8* dst_u, int dst_stride_u,
-             uint8* dst_v, int dst_stride_v,
-             int width, int height);
-
-// Convert I400 (grey) to I420.
-LIBYUV_API
-int I400ToI420(const uint8* src_y, int src_stride_y,
-               uint8* dst_y, int dst_stride_y,
-               uint8* dst_u, int dst_stride_u,
-               uint8* dst_v, int dst_stride_v,
-               int width, int height);
-
-// Convert NV12 to I420.
-LIBYUV_API
-int NV12ToI420(const uint8* src_y, int src_stride_y,
-               const uint8* src_uv, int src_stride_uv,
-               uint8* dst_y, int dst_stride_y,
-               uint8* dst_u, int dst_stride_u,
-               uint8* dst_v, int dst_stride_v,
-               int width, int height);
-
-// Convert NV21 to I420.
-LIBYUV_API
-int NV21ToI420(const uint8* src_y, int src_stride_y,
-               const uint8* src_vu, int src_stride_vu,
-               uint8* dst_y, int dst_stride_y,
-               uint8* dst_u, int dst_stride_u,
-               uint8* dst_v, int dst_stride_v,
-               int width, int height);
-
-// Convert YUY2 to I420.
-LIBYUV_API
-int YUY2ToI420(const uint8* src_yuy2, int src_stride_yuy2,
-               uint8* dst_y, int dst_stride_y,
-               uint8* dst_u, int dst_stride_u,
-               uint8* dst_v, int dst_stride_v,
-               int width, int height);
-
-// Convert UYVY to I420.
-LIBYUV_API
-int UYVYToI420(const uint8* src_uyvy, int src_stride_uyvy,
-               uint8* dst_y, int dst_stride_y,
-               uint8* dst_u, int dst_stride_u,
-               uint8* dst_v, int dst_stride_v,
-               int width, int height);
-
-// Convert M420 to I420.
-LIBYUV_API
-int M420ToI420(const uint8* src_m420, int src_stride_m420,
-               uint8* dst_y, int dst_stride_y,
-               uint8* dst_u, int dst_stride_u,
-               uint8* dst_v, int dst_stride_v,
-               int width, int height);
-
-// Convert Q420 to I420.
-LIBYUV_API
-int Q420ToI420(const uint8* src_y, int src_stride_y,
-               const uint8* src_yuy2, int src_stride_yuy2,
-               uint8* dst_y, int dst_stride_y,
-               uint8* dst_u, int dst_stride_u,
-               uint8* dst_v, int dst_stride_v,
-               int width, int height);
-
-// ARGB little endian (bgra in memory) to I420.
-LIBYUV_API
-int ARGBToI420(const uint8* src_frame, int src_stride_frame,
-               uint8* dst_y, int dst_stride_y,
-               uint8* dst_u, int dst_stride_u,
-               uint8* dst_v, int dst_stride_v,
-               int width, int height);
-
-// BGRA little endian (argb in memory) to I420.
-LIBYUV_API
-int BGRAToI420(const uint8* src_frame, int src_stride_frame,
-               uint8* dst_y, int dst_stride_y,
-               uint8* dst_u, int dst_stride_u,
-               uint8* dst_v, int dst_stride_v,
-               int width, int height);
-
-// ABGR little endian (rgba in memory) to I420.
-LIBYUV_API
-int ABGRToI420(const uint8* src_frame, int src_stride_frame,
-               uint8* dst_y, int dst_stride_y,
-               uint8* dst_u, int dst_stride_u,
-               uint8* dst_v, int dst_stride_v,
-               int width, int height);
-
-// RGBA little endian (abgr in memory) to I420.
-LIBYUV_API
-int RGBAToI420(const uint8* src_frame, int src_stride_frame,
-               uint8* dst_y, int dst_stride_y,
-               uint8* dst_u, int dst_stride_u,
-               uint8* dst_v, int dst_stride_v,
-               int width, int height);
-
-// RGB little endian (bgr in memory) to I420.
-LIBYUV_API
-int RGB24ToI420(const uint8* src_frame, int src_stride_frame,
-                uint8* dst_y, int dst_stride_y,
-                uint8* dst_u, int dst_stride_u,
-                uint8* dst_v, int dst_stride_v,
-                int width, int height);
-
-// RGB big endian (rgb in memory) to I420.
-LIBYUV_API
-int RAWToI420(const uint8* src_frame, int src_stride_frame,
-              uint8* dst_y, int dst_stride_y,
-              uint8* dst_u, int dst_stride_u,
-              uint8* dst_v, int dst_stride_v,
-              int width, int height);
-
-// RGB16 (RGBP fourcc) little endian to I420.
-LIBYUV_API
-int RGB565ToI420(const uint8* src_frame, int src_stride_frame,
-                 uint8* dst_y, int dst_stride_y,
-                 uint8* dst_u, int dst_stride_u,
-                 uint8* dst_v, int dst_stride_v,
-                 int width, int height);
-
-// RGB15 (RGBO fourcc) little endian to I420.
-LIBYUV_API
-int ARGB1555ToI420(const uint8* src_frame, int src_stride_frame,
-                   uint8* dst_y, int dst_stride_y,
-                   uint8* dst_u, int dst_stride_u,
-                   uint8* dst_v, int dst_stride_v,
-                   int width, int height);
-
-// RGB12 (R444 fourcc) little endian to I420.
-LIBYUV_API
-int ARGB4444ToI420(const uint8* src_frame, int src_stride_frame,
-                   uint8* dst_y, int dst_stride_y,
-                   uint8* dst_u, int dst_stride_u,
-                   uint8* dst_v, int dst_stride_v,
-                   int width, int height);
-
-#ifdef HAVE_JPEG
-// src_width/height provided by capture.
-// dst_width/height for clipping determine final size.
-LIBYUV_API
-int MJPGToI420(const uint8* sample, size_t sample_size,
-               uint8* dst_y, int dst_stride_y,
-               uint8* dst_u, int dst_stride_u,
-               uint8* dst_v, int dst_stride_v,
-               int src_width, int src_height,
-               int dst_width, int dst_height);
-
-// Query size of MJPG in pixels.
-LIBYUV_API
-int MJPGSize(const uint8* sample, size_t sample_size,
-             int* width, int* height);
-#endif
-
-// Note Bayer formats (BGGR) To I420 are in format_conversion.h
-
-// Convert camera sample to I420 with cropping, rotation and vertical flip.
-// "src_size" is needed to parse MJPG.
-// "dst_stride_y" number of bytes in a row of the dst_y plane.
-//   Normally this would be the same as dst_width, with recommended alignment
-//   to 16 bytes for better efficiency.
-//   If rotation of 90 or 270 is used, stride is affected. The caller should
-//   allocate the I420 buffer according to rotation.
-// "dst_stride_u" number of bytes in a row of the dst_u plane.
-//   Normally this would be the same as (dst_width + 1) / 2, with
-//   recommended alignment to 16 bytes for better efficiency.
-//   If rotation of 90 or 270 is used, stride is affected.
-// "crop_x" and "crop_y" are starting position for cropping.
-//   To center, crop_x = (src_width - dst_width) / 2
-//              crop_y = (src_height - dst_height) / 2
-// "src_width" / "src_height" is size of src_frame in pixels.
-//   "src_height" can be negative indicating a vertically flipped image source.
-// "crop_width" / "crop_height" is the size to crop the src to.
-//    Must be less than or equal to src_width/src_height
-//    Cropping parameters are pre-rotation.
-// "rotation" can be 0, 90, 180 or 270.
-// "format" is a fourcc. ie 'I420', 'YUY2'
-// Returns 0 for successful; -1 for invalid parameter. Non-zero for failure.
-LIBYUV_API
-int ConvertToI420(const uint8* src_frame, size_t src_size,
-                  uint8* dst_y, int dst_stride_y,
-                  uint8* dst_u, int dst_stride_u,
-                  uint8* dst_v, int dst_stride_v,
-                  int crop_x, int crop_y,
-                  int src_width, int src_height,
-                  int crop_width, int crop_height,
-                  enum RotationMode rotation,
-                  uint32 format);
-
-#ifdef __cplusplus
-}  // extern "C"
-}  // namespace libyuv
-#endif
-
-#endif  // INCLUDE_LIBYUV_CONVERT_H_  NOLINT
diff --git a/drivers/theoraplayer/src/YUV/libyuv/include/libyuv/convert_argb.h b/drivers/theoraplayer/src/YUV/libyuv/include/libyuv/convert_argb.h
deleted file mode 100755
index a18014ca2c..0000000000
--- a/drivers/theoraplayer/src/YUV/libyuv/include/libyuv/convert_argb.h
+++ /dev/null
@@ -1,225 +0,0 @@
-/*
- *  Copyright 2012 The LibYuv Project Authors. All rights reserved.
- *
- *  Use of this source code is governed by a BSD-style license
- *  that can be found in the LICENSE file in the root of the source
- *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS. All contributing project authors may
- *  be found in the AUTHORS file in the root of the source tree.
- */
-
-#ifndef INCLUDE_LIBYUV_CONVERT_ARGB_H_  // NOLINT
-#define INCLUDE_LIBYUV_CONVERT_ARGB_H_
-
-#include "libyuv/basic_types.h"
-// TODO(fbarchard): Remove the following headers includes
-#include "libyuv/convert_from.h"
-#include "libyuv/planar_functions.h"
-#include "libyuv/rotate.h"
-
-// TODO(fbarchard): This set of functions should exactly match convert.h
-// Add missing Q420.
-// TODO(fbarchard): Add tests. Create random content of right size and convert
-// with C vs Opt and or to I420 and compare.
-// TODO(fbarchard): Some of these functions lack parameter setting.
-
-#ifdef __cplusplus
-namespace libyuv {
-extern "C" {
-#endif
-
-// Alias.
-#define ARGBToARGB ARGBCopy
-
-// Copy ARGB to ARGB.
-LIBYUV_API
-int ARGBCopy(const uint8* src_argb, int src_stride_argb,
-             uint8* dst_argb, int dst_stride_argb,
-             int width, int height);
-
-// Convert I420 to ARGB.
-LIBYUV_API
-int I420ToARGB(const uint8* src_y, int src_stride_y,
-               const uint8* src_u, int src_stride_u,
-               const uint8* src_v, int src_stride_v,
-               uint8* dst_argb, int dst_stride_argb,
-               int width, int height);
-
-// Convert I422 to ARGB.
-LIBYUV_API
-int I422ToARGB(const uint8* src_y, int src_stride_y,
-               const uint8* src_u, int src_stride_u,
-               const uint8* src_v, int src_stride_v,
-               uint8* dst_argb, int dst_stride_argb,
-               int width, int height);
-
-// Convert I444 to ARGB.
-LIBYUV_API
-int I444ToARGB(const uint8* src_y, int src_stride_y,
-               const uint8* src_u, int src_stride_u,
-               const uint8* src_v, int src_stride_v,
-               uint8* dst_argb, int dst_stride_argb,
-               int width, int height);
-
-// Convert I411 to ARGB.
-LIBYUV_API
-int I411ToARGB(const uint8* src_y, int src_stride_y,
-               const uint8* src_u, int src_stride_u,
-               const uint8* src_v, int src_stride_v,
-               uint8* dst_argb, int dst_stride_argb,
-               int width, int height);
-
-// Convert I400 (grey) to ARGB.
-LIBYUV_API
-int I400ToARGB(const uint8* src_y, int src_stride_y,
-               uint8* dst_argb, int dst_stride_argb,
-               int width, int height);
-
-// Alias.
-#define YToARGB I400ToARGB_Reference
-
-// Convert I400 to ARGB. Reverse of ARGBToI400.
-LIBYUV_API
-int I400ToARGB_Reference(const uint8* src_y, int src_stride_y,
-                         uint8* dst_argb, int dst_stride_argb,
-                         int width, int height);
-
-// Convert NV12 to ARGB.
-LIBYUV_API
-int NV12ToARGB(const uint8* src_y, int src_stride_y,
-               const uint8* src_uv, int src_stride_uv,
-               uint8* dst_argb, int dst_stride_argb,
-               int width, int height);
-
-// Convert NV21 to ARGB.
-LIBYUV_API
-int NV21ToARGB(const uint8* src_y, int src_stride_y,
-               const uint8* src_vu, int src_stride_vu,
-               uint8* dst_argb, int dst_stride_argb,
-               int width, int height);
-
-// Convert M420 to ARGB.
-LIBYUV_API
-int M420ToARGB(const uint8* src_m420, int src_stride_m420,
-               uint8* dst_argb, int dst_stride_argb,
-               int width, int height);
-
-// TODO(fbarchard): Convert Q420 to ARGB.
-// LIBYUV_API
-// int Q420ToARGB(const uint8* src_y, int src_stride_y,
-//                const uint8* src_yuy2, int src_stride_yuy2,
-//                uint8* dst_argb, int dst_stride_argb,
-//                int width, int height);
-
-// Convert YUY2 to ARGB.
-LIBYUV_API
-int YUY2ToARGB(const uint8* src_yuy2, int src_stride_yuy2,
-               uint8* dst_argb, int dst_stride_argb,
-               int width, int height);
-
-// Convert UYVY to ARGB.
-LIBYUV_API
-int UYVYToARGB(const uint8* src_uyvy, int src_stride_uyvy,
-               uint8* dst_argb, int dst_stride_argb,
-               int width, int height);
-
-// BGRA little endian (argb in memory) to ARGB.
-LIBYUV_API
-int BGRAToARGB(const uint8* src_frame, int src_stride_frame,
-               uint8* dst_argb, int dst_stride_argb,
-               int width, int height);
-
-// ABGR little endian (rgba in memory) to ARGB.
-LIBYUV_API
-int ABGRToARGB(const uint8* src_frame, int src_stride_frame,
-               uint8* dst_argb, int dst_stride_argb,
-               int width, int height);
-
-// RGBA little endian (abgr in memory) to ARGB.
-LIBYUV_API
-int RGBAToARGB(const uint8* src_frame, int src_stride_frame,
-               uint8* dst_argb, int dst_stride_argb,
-               int width, int height);
-
-// Deprecated function name.
-#define BG24ToARGB RGB24ToARGB
-
-// RGB little endian (bgr in memory) to ARGB.
-LIBYUV_API
-int RGB24ToARGB(const uint8* src_frame, int src_stride_frame,
-                uint8* dst_argb, int dst_stride_argb,
-                int width, int height);
-
-// RGB big endian (rgb in memory) to ARGB.
-LIBYUV_API
-int RAWToARGB(const uint8* src_frame, int src_stride_frame,
-              uint8* dst_argb, int dst_stride_argb,
-              int width, int height);
-
-// RGB16 (RGBP fourcc) little endian to ARGB.
-LIBYUV_API
-int RGB565ToARGB(const uint8* src_frame, int src_stride_frame,
-                 uint8* dst_argb, int dst_stride_argb,
-                 int width, int height);
-
-// RGB15 (RGBO fourcc) little endian to ARGB.
-LIBYUV_API
-int ARGB1555ToARGB(const uint8* src_frame, int src_stride_frame,
-                   uint8* dst_argb, int dst_stride_argb,
-                   int width, int height);
-
-// RGB12 (R444 fourcc) little endian to ARGB.
-LIBYUV_API
-int ARGB4444ToARGB(const uint8* src_frame, int src_stride_frame,
-                   uint8* dst_argb, int dst_stride_argb,
-                   int width, int height);
-
-#ifdef HAVE_JPEG
-// src_width/height provided by capture
-// dst_width/height for clipping determine final size.
-LIBYUV_API
-int MJPGToARGB(const uint8* sample, size_t sample_size,
-               uint8* dst_argb, int dst_stride_argb,
-               int src_width, int src_height,
-               int dst_width, int dst_height);
-#endif
-
-// Note Bayer formats (BGGR) to ARGB are in format_conversion.h.
-
-// Convert camera sample to ARGB with cropping, rotation and vertical flip.
-// "src_size" is needed to parse MJPG.
-// "dst_stride_argb" number of bytes in a row of the dst_argb plane.
-//   Normally this would be the same as dst_width, with recommended alignment
-//   to 16 bytes for better efficiency.
-//   If rotation of 90 or 270 is used, stride is affected. The caller should
-//   allocate the I420 buffer according to rotation.
-// "dst_stride_u" number of bytes in a row of the dst_u plane.
-//   Normally this would be the same as (dst_width + 1) / 2, with
-//   recommended alignment to 16 bytes for better efficiency.
-//   If rotation of 90 or 270 is used, stride is affected.
-// "crop_x" and "crop_y" are starting position for cropping.
-//   To center, crop_x = (src_width - dst_width) / 2
-//              crop_y = (src_height - dst_height) / 2
-// "src_width" / "src_height" is size of src_frame in pixels.
-//   "src_height" can be negative indicating a vertically flipped image source.
-// "crop_width" / "crop_height" is the size to crop the src to.
-//    Must be less than or equal to src_width/src_height
-//    Cropping parameters are pre-rotation.
-// "rotation" can be 0, 90, 180 or 270.
-// "format" is a fourcc. ie 'I420', 'YUY2'
-// Returns 0 for successful; -1 for invalid parameter. Non-zero for failure.
-LIBYUV_API
-int ConvertToARGB(const uint8* src_frame, size_t src_size,
-                  uint8* dst_argb, int dst_stride_argb,
-                  int crop_x, int crop_y,
-                  int src_width, int src_height,
-                  int crop_width, int crop_height,
-                  enum RotationMode rotation,
-                  uint32 format);
-
-#ifdef __cplusplus
-}  // extern "C"
-}  // namespace libyuv
-#endif
-
-#endif  // INCLUDE_LIBYUV_CONVERT_ARGB_H_  NOLINT
diff --git a/drivers/theoraplayer/src/YUV/libyuv/include/libyuv/convert_from.h b/drivers/theoraplayer/src/YUV/libyuv/include/libyuv/convert_from.h
deleted file mode 100755
index b1cf57f7dc..0000000000
--- a/drivers/theoraplayer/src/YUV/libyuv/include/libyuv/convert_from.h
+++ /dev/null
@@ -1,173 +0,0 @@
-/*
- *  Copyright 2011 The LibYuv Project Authors. All rights reserved.
- *
- *  Use of this source code is governed by a BSD-style license
- *  that can be found in the LICENSE file in the root of the source
- *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS. All contributing project authors may
- *  be found in the AUTHORS file in the root of the source tree.
- */
-
-#ifndef INCLUDE_LIBYUV_CONVERT_FROM_H_  // NOLINT
-#define INCLUDE_LIBYUV_CONVERT_FROM_H_
-
-#include "libyuv/basic_types.h"
-#include "libyuv/rotate.h"
-
-#ifdef __cplusplus
-namespace libyuv {
-extern "C" {
-#endif
-
-// See Also convert.h for conversions from formats to I420.
-
-// I420Copy in convert to I420ToI420.
-
-LIBYUV_API
-int I420ToI422(const uint8* src_y, int src_stride_y,
-               const uint8* src_u, int src_stride_u,
-               const uint8* src_v, int src_stride_v,
-               uint8* dst_y, int dst_stride_y,
-               uint8* dst_u, int dst_stride_u,
-               uint8* dst_v, int dst_stride_v,
-               int width, int height);
-
-LIBYUV_API
-int I420ToI444(const uint8* src_y, int src_stride_y,
-               const uint8* src_u, int src_stride_u,
-               const uint8* src_v, int src_stride_v,
-               uint8* dst_y, int dst_stride_y,
-               uint8* dst_u, int dst_stride_u,
-               uint8* dst_v, int dst_stride_v,
-               int width, int height);
-
-LIBYUV_API
-int I420ToI411(const uint8* src_y, int src_stride_y,
-               const uint8* src_u, int src_stride_u,
-               const uint8* src_v, int src_stride_v,
-               uint8* dst_y, int dst_stride_y,
-               uint8* dst_u, int dst_stride_u,
-               uint8* dst_v, int dst_stride_v,
-               int width, int height);
-
-// Copy to I400. Source can be I420, I422, I444, I400, NV12 or NV21.
-LIBYUV_API
-int I400Copy(const uint8* src_y, int src_stride_y,
-             uint8* dst_y, int dst_stride_y,
-             int width, int height);
-
-// TODO(fbarchard): I420ToM420
-// TODO(fbarchard): I420ToQ420
-
-LIBYUV_API
-int I420ToNV12(const uint8* src_y, int src_stride_y,
-               const uint8* src_u, int src_stride_u,
-               const uint8* src_v, int src_stride_v,
-               uint8* dst_y, int dst_stride_y,
-               uint8* dst_uv, int dst_stride_uv,
-               int width, int height);
-
-LIBYUV_API
-int I420ToNV21(const uint8* src_y, int src_stride_y,
-               const uint8* src_u, int src_stride_u,
-               const uint8* src_v, int src_stride_v,
-               uint8* dst_y, int dst_stride_y,
-               uint8* dst_vu, int dst_stride_vu,
-               int width, int height);
-
-LIBYUV_API
-int I420ToYUY2(const uint8* src_y, int src_stride_y,
-               const uint8* src_u, int src_stride_u,
-               const uint8* src_v, int src_stride_v,
-               uint8* dst_frame, int dst_stride_frame,
-               int width, int height);
-
-LIBYUV_API
-int I420ToUYVY(const uint8* src_y, int src_stride_y,
-               const uint8* src_u, int src_stride_u,
-               const uint8* src_v, int src_stride_v,
-               uint8* dst_frame, int dst_stride_frame,
-               int width, int height);
-
-LIBYUV_API
-int I420ToARGB(const uint8* src_y, int src_stride_y,
-               const uint8* src_u, int src_stride_u,
-               const uint8* src_v, int src_stride_v,
-               uint8* dst_argb, int dst_stride_argb,
-               int width, int height);
-
-LIBYUV_API
-int I420ToBGRA(const uint8* src_y, int src_stride_y,
-               const uint8* src_u, int src_stride_u,
-               const uint8* src_v, int src_stride_v,
-               uint8* dst_argb, int dst_stride_argb,
-               int width, int height);
-
-LIBYUV_API
-int I420ToABGR(const uint8* src_y, int src_stride_y,
-               const uint8* src_u, int src_stride_u,
-               const uint8* src_v, int src_stride_v,
-               uint8* dst_argb, int dst_stride_argb,
-               int width, int height);
-
-LIBYUV_API
-int I420ToRGBA(const uint8* src_y, int src_stride_y,
-               const uint8* src_u, int src_stride_u,
-               const uint8* src_v, int src_stride_v,
-               uint8* dst_rgba, int dst_stride_rgba,
-               int width, int height);
-
-LIBYUV_API
-int I420ToRGB24(const uint8* src_y, int src_stride_y,
-                const uint8* src_u, int src_stride_u,
-                const uint8* src_v, int src_stride_v,
-                uint8* dst_frame, int dst_stride_frame,
-                int width, int height);
-
-LIBYUV_API
-int I420ToRAW(const uint8* src_y, int src_stride_y,
-              const uint8* src_u, int src_stride_u,
-              const uint8* src_v, int src_stride_v,
-              uint8* dst_frame, int dst_stride_frame,
-              int width, int height);
-
-LIBYUV_API
-int I420ToRGB565(const uint8* src_y, int src_stride_y,
-                 const uint8* src_u, int src_stride_u,
-                 const uint8* src_v, int src_stride_v,
-                 uint8* dst_frame, int dst_stride_frame,
-                 int width, int height);
-
-LIBYUV_API
-int I420ToARGB1555(const uint8* src_y, int src_stride_y,
-                   const uint8* src_u, int src_stride_u,
-                   const uint8* src_v, int src_stride_v,
-                   uint8* dst_frame, int dst_stride_frame,
-                   int width, int height);
-
-LIBYUV_API
-int I420ToARGB4444(const uint8* src_y, int src_stride_y,
-                   const uint8* src_u, int src_stride_u,
-                   const uint8* src_v, int src_stride_v,
-                   uint8* dst_frame, int dst_stride_frame,
-                   int width, int height);
-
-// Note Bayer formats (BGGR) To I420 are in format_conversion.h.
-
-// Convert I420 to specified format.
-// "dst_sample_stride" is bytes in a row for the destination. Pass 0 if the
-//    buffer has contiguous rows. Can be negative. A multiple of 16 is optimal.
-LIBYUV_API
-int ConvertFromI420(const uint8* y, int y_stride,
-                    const uint8* u, int u_stride,
-                    const uint8* v, int v_stride,
-                    uint8* dst_sample, int dst_sample_stride,
-                    int width, int height,
-                    uint32 format);
-
-#ifdef __cplusplus
-}  // extern "C"
-}  // namespace libyuv
-#endif
-
-#endif  // INCLUDE_LIBYUV_CONVERT_FROM_H_  NOLINT
diff --git a/drivers/theoraplayer/src/YUV/libyuv/include/libyuv/convert_from_argb.h b/drivers/theoraplayer/src/YUV/libyuv/include/libyuv/convert_from_argb.h
deleted file mode 100755
index f0343a77d3..0000000000
--- a/drivers/theoraplayer/src/YUV/libyuv/include/libyuv/convert_from_argb.h
+++ /dev/null
@@ -1,168 +0,0 @@
-/*
- *  Copyright 2012 The LibYuv Project Authors. All rights reserved.
- *
- *  Use of this source code is governed by a BSD-style license
- *  that can be found in the LICENSE file in the root of the source
- *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS. All contributing project authors may
- *  be found in the AUTHORS file in the root of the source tree.
- */
-
-#ifndef INCLUDE_LIBYUV_CONVERT_FROM_ARGB_H_  // NOLINT
-#define INCLUDE_LIBYUV_CONVERT_FROM_ARGB_H_
-
-#include "libyuv/basic_types.h"
-
-#ifdef __cplusplus
-namespace libyuv {
-extern "C" {
-#endif
-
-// Copy ARGB to ARGB.
-#define ARGBToARGB ARGBCopy
-LIBYUV_API
-int ARGBCopy(const uint8* src_argb, int src_stride_argb,
-             uint8* dst_argb, int dst_stride_argb,
-             int width, int height);
-
-// Convert ARGB To BGRA. (alias)
-#define ARGBToBGRA BGRAToARGB
-LIBYUV_API
-int BGRAToARGB(const uint8* src_frame, int src_stride_frame,
-               uint8* dst_argb, int dst_stride_argb,
-               int width, int height);
-
-// Convert ARGB To ABGR. (alias)
-#define ARGBToABGR ABGRToARGB
-LIBYUV_API
-int ABGRToARGB(const uint8* src_frame, int src_stride_frame,
-               uint8* dst_argb, int dst_stride_argb,
-               int width, int height);
-
-// Convert ARGB To RGBA.
-LIBYUV_API
-int ARGBToRGBA(const uint8* src_frame, int src_stride_frame,
-               uint8* dst_argb, int dst_stride_argb,
-               int width, int height);
-
-// Convert ARGB To RGB24.
-LIBYUV_API
-int ARGBToRGB24(const uint8* src_argb, int src_stride_argb,
-                uint8* dst_rgb24, int dst_stride_rgb24,
-                int width, int height);
-
-// Convert ARGB To RAW.
-LIBYUV_API
-int ARGBToRAW(const uint8* src_argb, int src_stride_argb,
-              uint8* dst_rgb, int dst_stride_rgb,
-              int width, int height);
-
-// Convert ARGB To RGB565.
-LIBYUV_API
-int ARGBToRGB565(const uint8* src_argb, int src_stride_argb,
-                 uint8* dst_rgb565, int dst_stride_rgb565,
-                 int width, int height);
-
-// Convert ARGB To ARGB1555.
-LIBYUV_API
-int ARGBToARGB1555(const uint8* src_argb, int src_stride_argb,
-                   uint8* dst_argb1555, int dst_stride_argb1555,
-                   int width, int height);
-
-// Convert ARGB To ARGB4444.
-LIBYUV_API
-int ARGBToARGB4444(const uint8* src_argb, int src_stride_argb,
-                   uint8* dst_argb4444, int dst_stride_argb4444,
-                   int width, int height);
-
-// Convert ARGB To I444.
-LIBYUV_API
-int ARGBToI444(const uint8* src_argb, int src_stride_argb,
-               uint8* dst_y, int dst_stride_y,
-               uint8* dst_u, int dst_stride_u,
-               uint8* dst_v, int dst_stride_v,
-               int width, int height);
-
-// Convert ARGB To I422.
-LIBYUV_API
-int ARGBToI422(const uint8* src_argb, int src_stride_argb,
-               uint8* dst_y, int dst_stride_y,
-               uint8* dst_u, int dst_stride_u,
-               uint8* dst_v, int dst_stride_v,
-               int width, int height);
-
-// Convert ARGB To I420. (also in convert.h)
-LIBYUV_API
-int ARGBToI420(const uint8* src_argb, int src_stride_argb,
-               uint8* dst_y, int dst_stride_y,
-               uint8* dst_u, int dst_stride_u,
-               uint8* dst_v, int dst_stride_v,
-               int width, int height);
-
-// Convert ARGB to J420. (JPeg full range I420).
-LIBYUV_API
-int ARGBToJ420(const uint8* src_argb, int src_stride_argb,
-               uint8* dst_yj, int dst_stride_yj,
-               uint8* dst_u, int dst_stride_u,
-               uint8* dst_v, int dst_stride_v,
-               int width, int height);
-
-// Convert ARGB To I411.
-LIBYUV_API
-int ARGBToI411(const uint8* src_argb, int src_stride_argb,
-               uint8* dst_y, int dst_stride_y,
-               uint8* dst_u, int dst_stride_u,
-               uint8* dst_v, int dst_stride_v,
-               int width, int height);
-
-// Convert ARGB to J400. (JPeg full range).
-LIBYUV_API
-int ARGBToJ400(const uint8* src_argb, int src_stride_argb,
-               uint8* dst_yj, int dst_stride_yj,
-               int width, int height);
-
-// Convert ARGB to I400.
-LIBYUV_API
-int ARGBToI400(const uint8* src_argb, int src_stride_argb,
-               uint8* dst_y, int dst_stride_y,
-               int width, int height);
-
-// Convert ARGB To NV12.
-LIBYUV_API
-int ARGBToNV12(const uint8* src_argb, int src_stride_argb,
-               uint8* dst_y, int dst_stride_y,
-               uint8* dst_uv, int dst_stride_uv,
-               int width, int height);
-
-// Convert ARGB To NV21.
-LIBYUV_API
-int ARGBToNV21(const uint8* src_argb, int src_stride_argb,
-               uint8* dst_y, int dst_stride_y,
-               uint8* dst_vu, int dst_stride_vu,
-               int width, int height);
-
-// Convert ARGB To NV21.
-LIBYUV_API
-int ARGBToNV21(const uint8* src_argb, int src_stride_argb,
-               uint8* dst_y, int dst_stride_y,
-               uint8* dst_vu, int dst_stride_vu,
-               int width, int height);
-
-// Convert ARGB To YUY2.
-LIBYUV_API
-int ARGBToYUY2(const uint8* src_argb, int src_stride_argb,
-               uint8* dst_yuy2, int dst_stride_yuy2,
-               int width, int height);
-
-// Convert ARGB To UYVY.
-LIBYUV_API
-int ARGBToUYVY(const uint8* src_argb, int src_stride_argb,
-               uint8* dst_uyvy, int dst_stride_uyvy,
-               int width, int height);
-
-#ifdef __cplusplus
-}  // extern "C"
-}  // namespace libyuv
-#endif
-
-#endif  // INCLUDE_LIBYUV_CONVERT_FROM_ARGB_H_  NOLINT
diff --git a/drivers/theoraplayer/src/YUV/libyuv/include/libyuv/cpu_id.h b/drivers/theoraplayer/src/YUV/libyuv/include/libyuv/cpu_id.h
deleted file mode 100755
index dc858a814a..0000000000
--- a/drivers/theoraplayer/src/YUV/libyuv/include/libyuv/cpu_id.h
+++ /dev/null
@@ -1,81 +0,0 @@
-/*
- *  Copyright 2011 The LibYuv Project Authors. All rights reserved.
- *
- *  Use of this source code is governed by a BSD-style license
- *  that can be found in the LICENSE file in the root of the source
- *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS. All contributing project authors may
- *  be found in the AUTHORS file in the root of the source tree.
- */
-
-#ifndef INCLUDE_LIBYUV_CPU_ID_H_  // NOLINT
-#define INCLUDE_LIBYUV_CPU_ID_H_
-
-#include "libyuv/basic_types.h"
-
-#ifdef __cplusplus
-namespace libyuv {
-extern "C" {
-#endif
-
-// TODO(fbarchard): Consider overlapping bits for different architectures.
-// Internal flag to indicate cpuid requires initialization.
-#define kCpuInit 0x1
-
-// These flags are only valid on ARM processors.
-static const int kCpuHasARM = 0x2;
-static const int kCpuHasNEON = 0x4;
-// 0x8 reserved for future ARM flag.
-
-// These flags are only valid on x86 processors.
-static const int kCpuHasX86 = 0x10;
-static const int kCpuHasSSE2 = 0x20;
-static const int kCpuHasSSSE3 = 0x40;
-static const int kCpuHasSSE41 = 0x80;
-static const int kCpuHasSSE42 = 0x100;
-static const int kCpuHasAVX = 0x200;
-static const int kCpuHasAVX2 = 0x400;
-static const int kCpuHasERMS = 0x800;
-static const int kCpuHasFMA3 = 0x1000;
-// 0x2000, 0x4000, 0x8000 reserved for future X86 flags.
-
-// These flags are only valid on MIPS processors.
-static const int kCpuHasMIPS = 0x10000;
-static const int kCpuHasMIPS_DSP = 0x20000;
-static const int kCpuHasMIPS_DSPR2 = 0x40000;
-
-// Internal function used to auto-init.
-LIBYUV_API
-int InitCpuFlags(void);
-
-// Internal function for parsing /proc/cpuinfo.
-LIBYUV_API
-int ArmCpuCaps(const char* cpuinfo_name);
-
-// Detect CPU has SSE2 etc.
-// Test_flag parameter should be one of kCpuHas constants above.
-// returns non-zero if instruction set is detected
-static __inline int TestCpuFlag(int test_flag) {
-  LIBYUV_API extern int cpu_info_;
-  return (cpu_info_ == kCpuInit ? InitCpuFlags() : cpu_info_) & test_flag;
-}
-
-// For testing, allow CPU flags to be disabled.
-// ie MaskCpuFlags(~kCpuHasSSSE3) to disable SSSE3.
-// MaskCpuFlags(-1) to enable all cpu specific optimizations.
-// MaskCpuFlags(0) to disable all cpu specific optimizations.
-LIBYUV_API
-void MaskCpuFlags(int enable_flags);
-
-// Low level cpuid for X86. Returns zeros on other CPUs.
-// eax is the info type that you want.
-// ecx is typically the cpu number, and should normally be zero.
-LIBYUV_API
-void CpuId(uint32 eax, uint32 ecx, uint32* cpu_info);
-
-#ifdef __cplusplus
-}  // extern "C"
-}  // namespace libyuv
-#endif
-
-#endif  // INCLUDE_LIBYUV_CPU_ID_H_  NOLINT
diff --git a/drivers/theoraplayer/src/YUV/libyuv/include/libyuv/format_conversion.h b/drivers/theoraplayer/src/YUV/libyuv/include/libyuv/format_conversion.h
deleted file mode 100755
index b18bf05343..0000000000
--- a/drivers/theoraplayer/src/YUV/libyuv/include/libyuv/format_conversion.h
+++ /dev/null
@@ -1,168 +0,0 @@
-/*
- *  Copyright 2011 The LibYuv Project Authors. All rights reserved.
- *
- *  Use of this source code is governed by a BSD-style license
- *  that can be found in the LICENSE file in the root of the source
- *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS. All contributing project authors may
- *  be found in the AUTHORS file in the root of the source tree.
- */
-
-#ifndef INCLUDE_LIBYUV_FORMATCONVERSION_H_  // NOLINT
-#define INCLUDE_LIBYUV_FORMATCONVERSION_H_
-
-#include "libyuv/basic_types.h"
-
-#ifdef __cplusplus
-namespace libyuv {
-extern "C" {
-#endif
-
-// Convert Bayer RGB formats to I420.
-LIBYUV_API
-int BayerBGGRToI420(const uint8* src_bayer, int src_stride_bayer,
-                    uint8* dst_y, int dst_stride_y,
-                    uint8* dst_u, int dst_stride_u,
-                    uint8* dst_v, int dst_stride_v,
-                    int width, int height);
-
-LIBYUV_API
-int BayerGBRGToI420(const uint8* src_bayer, int src_stride_bayer,
-                    uint8* dst_y, int dst_stride_y,
-                    uint8* dst_u, int dst_stride_u,
-                    uint8* dst_v, int dst_stride_v,
-                    int width, int height);
-
-LIBYUV_API
-int BayerGRBGToI420(const uint8* src_bayer, int src_stride_bayer,
-                    uint8* dst_y, int dst_stride_y,
-                    uint8* dst_u, int dst_stride_u,
-                    uint8* dst_v, int dst_stride_v,
-                    int width, int height);
-
-LIBYUV_API
-int BayerRGGBToI420(const uint8* src_bayer, int src_stride_bayer,
-                    uint8* dst_y, int dst_stride_y,
-                    uint8* dst_u, int dst_stride_u,
-                    uint8* dst_v, int dst_stride_v,
-                    int width, int height);
-
-// Temporary API mapper.
-#define BayerRGBToI420(b, bs, f, y, ys, u, us, v, vs, w, h) \
-    BayerToI420(b, bs, y, ys, u, us, v, vs, w, h, f)
-
-LIBYUV_API
-int BayerToI420(const uint8* src_bayer, int src_stride_bayer,
-                uint8* dst_y, int dst_stride_y,
-                uint8* dst_u, int dst_stride_u,
-                uint8* dst_v, int dst_stride_v,
-                int width, int height,
-                uint32 src_fourcc_bayer);
-
-// Convert I420 to Bayer RGB formats.
-LIBYUV_API
-int I420ToBayerBGGR(const uint8* src_y, int src_stride_y,
-                    const uint8* src_u, int src_stride_u,
-                    const uint8* src_v, int src_stride_v,
-                    uint8* dst_frame, int dst_stride_frame,
-                    int width, int height);
-
-LIBYUV_API
-int I420ToBayerGBRG(const uint8* src_y, int src_stride_y,
-                    const uint8* src_u, int src_stride_u,
-                    const uint8* src_v, int src_stride_v,
-                    uint8* dst_frame, int dst_stride_frame,
-                    int width, int height);
-
-LIBYUV_API
-int I420ToBayerGRBG(const uint8* src_y, int src_stride_y,
-                    const uint8* src_u, int src_stride_u,
-                    const uint8* src_v, int src_stride_v,
-                    uint8* dst_frame, int dst_stride_frame,
-                    int width, int height);
-
-LIBYUV_API
-int I420ToBayerRGGB(const uint8* src_y, int src_stride_y,
-                    const uint8* src_u, int src_stride_u,
-                    const uint8* src_v, int src_stride_v,
-                    uint8* dst_frame, int dst_stride_frame,
-                    int width, int height);
-
-// Temporary API mapper.
-#define I420ToBayerRGB(y, ys, u, us, v, vs, b, bs, f, w, h) \
-    I420ToBayer(y, ys, u, us, v, vs, b, bs, w, h, f)
-
-LIBYUV_API
-int I420ToBayer(const uint8* src_y, int src_stride_y,
-                const uint8* src_u, int src_stride_u,
-                const uint8* src_v, int src_stride_v,
-                uint8* dst_frame, int dst_stride_frame,
-                int width, int height,
-                uint32 dst_fourcc_bayer);
-
-// Convert Bayer RGB formats to ARGB.
-LIBYUV_API
-int BayerBGGRToARGB(const uint8* src_bayer, int src_stride_bayer,
-                    uint8* dst_argb, int dst_stride_argb,
-                    int width, int height);
-
-LIBYUV_API
-int BayerGBRGToARGB(const uint8* src_bayer, int src_stride_bayer,
-                    uint8* dst_argb, int dst_stride_argb,
-                    int width, int height);
-
-LIBYUV_API
-int BayerGRBGToARGB(const uint8* src_bayer, int src_stride_bayer,
-                    uint8* dst_argb, int dst_stride_argb,
-                    int width, int height);
-
-LIBYUV_API
-int BayerRGGBToARGB(const uint8* src_bayer, int src_stride_bayer,
-                    uint8* dst_argb, int dst_stride_argb,
-                    int width, int height);
-
-// Temporary API mapper.
-#define BayerRGBToARGB(b, bs, f, a, as, w, h) BayerToARGB(b, bs, a, as, w, h, f)
-
-LIBYUV_API
-int BayerToARGB(const uint8* src_bayer, int src_stride_bayer,
-                uint8* dst_argb, int dst_stride_argb,
-                int width, int height,
-                uint32 src_fourcc_bayer);
-
-// Converts ARGB to Bayer RGB formats.
-LIBYUV_API
-int ARGBToBayerBGGR(const uint8* src_argb, int src_stride_argb,
-                    uint8* dst_bayer, int dst_stride_bayer,
-                    int width, int height);
-
-LIBYUV_API
-int ARGBToBayerGBRG(const uint8* src_argb, int src_stride_argb,
-                    uint8* dst_bayer, int dst_stride_bayer,
-                    int width, int height);
-
-LIBYUV_API
-int ARGBToBayerGRBG(const uint8* src_argb, int src_stride_argb,
-                    uint8* dst_bayer, int dst_stride_bayer,
-                    int width, int height);
-
-LIBYUV_API
-int ARGBToBayerRGGB(const uint8* src_argb, int src_stride_argb,
-                    uint8* dst_bayer, int dst_stride_bayer,
-                    int width, int height);
-
-// Temporary API mapper.
-#define ARGBToBayerRGB(a, as, b, bs, f, w, h) ARGBToBayer(b, bs, a, as, w, h, f)
-
-LIBYUV_API
-int ARGBToBayer(const uint8* src_argb, int src_stride_argb,
-                uint8* dst_bayer, int dst_stride_bayer,
-                int width, int height,
-                uint32 dst_fourcc_bayer);
-
-#ifdef __cplusplus
-}  // extern "C"
-}  // namespace libyuv
-#endif
-
-#endif  // INCLUDE_LIBYUV_FORMATCONVERSION_H_  NOLINT
diff --git a/drivers/theoraplayer/src/YUV/libyuv/include/libyuv/mjpeg_decoder.h b/drivers/theoraplayer/src/YUV/libyuv/include/libyuv/mjpeg_decoder.h
deleted file mode 100755
index faffaea8fa..0000000000
--- a/drivers/theoraplayer/src/YUV/libyuv/include/libyuv/mjpeg_decoder.h
+++ /dev/null
@@ -1,201 +0,0 @@
-/*
- *  Copyright 2012 The LibYuv Project Authors. All rights reserved.
- *
- *  Use of this source code is governed by a BSD-style license
- *  that can be found in the LICENSE file in the root of the source
- *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS. All contributing project authors may
- *  be found in the AUTHORS file in the root of the source tree.
- */
-
-#ifndef INCLUDE_LIBYUV_MJPEG_DECODER_H_  // NOLINT
-#define INCLUDE_LIBYUV_MJPEG_DECODER_H_
-
-#include "libyuv/basic_types.h"
-
-#ifdef __cplusplus
-// NOTE: For a simplified public API use convert.h MJPGToI420().
-
-struct jpeg_common_struct;
-struct jpeg_decompress_struct;
-struct jpeg_source_mgr;
-
-namespace libyuv {
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-LIBYUV_BOOL ValidateJpeg(const uint8* sample, size_t sample_size);
-
-#ifdef __cplusplus
-}  // extern "C"
-#endif
-
-static const uint32 kUnknownDataSize = 0xFFFFFFFF;
-
-enum JpegSubsamplingType {
-  kJpegYuv420,
-  kJpegYuv422,
-  kJpegYuv411,
-  kJpegYuv444,
-  kJpegYuv400,
-  kJpegUnknown
-};
-
-struct SetJmpErrorMgr;
-
-// MJPEG ("Motion JPEG") is a pseudo-standard video codec where the frames are
-// simply independent JPEG images with a fixed huffman table (which is omitted).
-// It is rarely used in video transmission, but is common as a camera capture
-// format, especially in Logitech devices. This class implements a decoder for
-// MJPEG frames.
-//
-// See http://tools.ietf.org/html/rfc2435
-class LIBYUV_API MJpegDecoder {
- public:
-  typedef void (*CallbackFunction)(void* opaque,
-                                   const uint8* const* data,
-                                   const int* strides,
-                                   int rows);
-
-  static const int kColorSpaceUnknown;
-  static const int kColorSpaceGrayscale;
-  static const int kColorSpaceRgb;
-  static const int kColorSpaceYCbCr;
-  static const int kColorSpaceCMYK;
-  static const int kColorSpaceYCCK;
-
-  MJpegDecoder();
-  ~MJpegDecoder();
-
-  // Loads a new frame, reads its headers, and determines the uncompressed
-  // image format.
-  // Returns LIBYUV_TRUE if image looks valid and format is supported.
-  // If return value is LIBYUV_TRUE, then the values for all the following
-  // getters are populated.
-  // src_len is the size of the compressed mjpeg frame in bytes.
-  LIBYUV_BOOL LoadFrame(const uint8* src, size_t src_len);
-
-  // Returns width of the last loaded frame in pixels.
-  int GetWidth();
-
-  // Returns height of the last loaded frame in pixels.
-  int GetHeight();
-
-  // Returns format of the last loaded frame. The return value is one of the
-  // kColorSpace* constants.
-  int GetColorSpace();
-
-  // Number of color components in the color space.
-  int GetNumComponents();
-
-  // Sample factors of the n-th component.
-  int GetHorizSampFactor(int component);
-
-  int GetVertSampFactor(int component);
-
-  int GetHorizSubSampFactor(int component);
-
-  int GetVertSubSampFactor(int component);
-
-  // Public for testability.
-  int GetImageScanlinesPerImcuRow();
-
-  // Public for testability.
-  int GetComponentScanlinesPerImcuRow(int component);
-
-  // Width of a component in bytes.
-  int GetComponentWidth(int component);
-
-  // Height of a component.
-  int GetComponentHeight(int component);
-
-  // Width of a component in bytes with padding for DCTSIZE. Public for testing.
-  int GetComponentStride(int component);
-
-  // Size of a component in bytes.
-  int GetComponentSize(int component);
-
-  // Call this after LoadFrame() if you decide you don't want to decode it
-  // after all.
-  LIBYUV_BOOL UnloadFrame();
-
-  // Decodes the entire image into a one-buffer-per-color-component format.
-  // dst_width must match exactly. dst_height must be <= to image height; if
-  // less, the image is cropped. "planes" must have size equal to at least
-  // GetNumComponents() and they must point to non-overlapping buffers of size
-  // at least GetComponentSize(i). The pointers in planes are incremented
-  // to point to after the end of the written data.
-  // TODO(fbarchard): Add dst_x, dst_y to allow specific rect to be decoded.
-  LIBYUV_BOOL DecodeToBuffers(uint8** planes, int dst_width, int dst_height);
-
-  // Decodes the entire image and passes the data via repeated calls to a
-  // callback function. Each call will get the data for a whole number of
-  // image scanlines.
-  // TODO(fbarchard): Add dst_x, dst_y to allow specific rect to be decoded.
-  LIBYUV_BOOL DecodeToCallback(CallbackFunction fn, void* opaque,
-                        int dst_width, int dst_height);
-
-  // The helper function which recognizes the jpeg sub-sampling type.
-  static JpegSubsamplingType JpegSubsamplingTypeHelper(
-     int* subsample_x, int* subsample_y, int number_of_components);
-
- private:
-  struct Buffer {
-    const uint8* data;
-    int len;
-  };
-
-  struct BufferVector {
-    Buffer* buffers;
-    int len;
-    int pos;
-  };
-
-  // Methods that are passed to jpeglib.
-  static int fill_input_buffer(jpeg_decompress_struct* cinfo);
-  static void init_source(jpeg_decompress_struct* cinfo);
-  static void skip_input_data(jpeg_decompress_struct* cinfo,
-                              long num_bytes);  // NOLINT
-  static void term_source(jpeg_decompress_struct* cinfo);
-
-  static void ErrorHandler(jpeg_common_struct* cinfo);
-
-  void AllocOutputBuffers(int num_outbufs);
-  void DestroyOutputBuffers();
-
-  LIBYUV_BOOL StartDecode();
-  LIBYUV_BOOL FinishDecode();
-
-  void SetScanlinePointers(uint8** data);
-  LIBYUV_BOOL DecodeImcuRow();
-
-  int GetComponentScanlinePadding(int component);
-
-  // A buffer holding the input data for a frame.
-  Buffer buf_;
-  BufferVector buf_vec_;
-
-  jpeg_decompress_struct* decompress_struct_;
-  jpeg_source_mgr* source_mgr_;
-  SetJmpErrorMgr* error_mgr_;
-
-  // LIBYUV_TRUE iff at least one component has scanline padding. (i.e.,
-  // GetComponentScanlinePadding() != 0.)
-  LIBYUV_BOOL has_scanline_padding_;
-
-  // Temporaries used to point to scanline outputs.
-  int num_outbufs_;  // Outermost size of all arrays below.
-  uint8*** scanlines_;
-  int* scanlines_sizes_;
-  // Temporary buffer used for decoding when we can't decode directly to the
-  // output buffers. Large enough for just one iMCU row.
-  uint8** databuf_;
-  int* databuf_strides_;
-};
-
-}  // namespace libyuv
-
-#endif  //  __cplusplus
-#endif  // INCLUDE_LIBYUV_MJPEG_DECODER_H_  NOLINT
diff --git a/drivers/theoraplayer/src/YUV/libyuv/include/libyuv/planar_functions.h b/drivers/theoraplayer/src/YUV/libyuv/include/libyuv/planar_functions.h
deleted file mode 100755
index ac516c5ba5..0000000000
--- a/drivers/theoraplayer/src/YUV/libyuv/include/libyuv/planar_functions.h
+++ /dev/null
@@ -1,434 +0,0 @@
-/*
- *  Copyright 2011 The LibYuv Project Authors. All rights reserved.
- *
- *  Use of this source code is governed by a BSD-style license
- *  that can be found in the LICENSE file in the root of the source
- *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS. All contributing project authors may
- *  be found in the AUTHORS file in the root of the source tree.
- */
-
-#ifndef INCLUDE_LIBYUV_PLANAR_FUNCTIONS_H_  // NOLINT
-#define INCLUDE_LIBYUV_PLANAR_FUNCTIONS_H_
-
-#include "libyuv/basic_types.h"
-
-// TODO(fbarchard): Remove the following headers includes.
-#include "libyuv/convert.h"
-#include "libyuv/convert_argb.h"
-
-#ifdef __cplusplus
-namespace libyuv {
-extern "C" {
-#endif
-
-// Copy a plane of data.
-LIBYUV_API
-void CopyPlane(const uint8* src_y, int src_stride_y,
-               uint8* dst_y, int dst_stride_y,
-               int width, int height);
-
-// Set a plane of data to a 32 bit value.
-LIBYUV_API
-void SetPlane(uint8* dst_y, int dst_stride_y,
-              int width, int height,
-              uint32 value);
-
-// Copy I400.  Supports inverting.
-LIBYUV_API
-int I400ToI400(const uint8* src_y, int src_stride_y,
-               uint8* dst_y, int dst_stride_y,
-               int width, int height);
-
-
-// Copy I422 to I422.
-#define I422ToI422 I422Copy
-LIBYUV_API
-int I422Copy(const uint8* src_y, int src_stride_y,
-             const uint8* src_u, int src_stride_u,
-             const uint8* src_v, int src_stride_v,
-             uint8* dst_y, int dst_stride_y,
-             uint8* dst_u, int dst_stride_u,
-             uint8* dst_v, int dst_stride_v,
-             int width, int height);
-
-// Copy I444 to I444.
-#define I444ToI444 I444Copy
-LIBYUV_API
-int I444Copy(const uint8* src_y, int src_stride_y,
-             const uint8* src_u, int src_stride_u,
-             const uint8* src_v, int src_stride_v,
-             uint8* dst_y, int dst_stride_y,
-             uint8* dst_u, int dst_stride_u,
-             uint8* dst_v, int dst_stride_v,
-             int width, int height);
-
-// Convert YUY2 to I422.
-LIBYUV_API
-int YUY2ToI422(const uint8* src_yuy2, int src_stride_yuy2,
-               uint8* dst_y, int dst_stride_y,
-               uint8* dst_u, int dst_stride_u,
-               uint8* dst_v, int dst_stride_v,
-               int width, int height);
-
-// Convert UYVY to I422.
-LIBYUV_API
-int UYVYToI422(const uint8* src_uyvy, int src_stride_uyvy,
-               uint8* dst_y, int dst_stride_y,
-               uint8* dst_u, int dst_stride_u,
-               uint8* dst_v, int dst_stride_v,
-               int width, int height);
-
-// Convert I420 to I400. (calls CopyPlane ignoring u/v).
-LIBYUV_API
-int I420ToI400(const uint8* src_y, int src_stride_y,
-               const uint8* src_u, int src_stride_u,
-               const uint8* src_v, int src_stride_v,
-               uint8* dst_y, int dst_stride_y,
-               int width, int height);
-
-// Alias
-#define I420ToI420Mirror I420Mirror
-
-// I420 mirror.
-LIBYUV_API
-int I420Mirror(const uint8* src_y, int src_stride_y,
-               const uint8* src_u, int src_stride_u,
-               const uint8* src_v, int src_stride_v,
-               uint8* dst_y, int dst_stride_y,
-               uint8* dst_u, int dst_stride_u,
-               uint8* dst_v, int dst_stride_v,
-               int width, int height);
-
-// Alias
-#define I400ToI400Mirror I400Mirror
-
-// I400 mirror.  A single plane is mirrored horizontally.
-// Pass negative height to achieve 180 degree rotation.
-LIBYUV_API
-int I400Mirror(const uint8* src_y, int src_stride_y,
-               uint8* dst_y, int dst_stride_y,
-               int width, int height);
-
-// Alias
-#define ARGBToARGBMirror ARGBMirror
-
-// ARGB mirror.
-LIBYUV_API
-int ARGBMirror(const uint8* src_argb, int src_stride_argb,
-               uint8* dst_argb, int dst_stride_argb,
-               int width, int height);
-
-// Convert NV12 to RGB565.
-LIBYUV_API
-int NV12ToRGB565(const uint8* src_y, int src_stride_y,
-                 const uint8* src_uv, int src_stride_uv,
-                 uint8* dst_rgb565, int dst_stride_rgb565,
-                 int width, int height);
-
-// Convert NV21 to RGB565.
-LIBYUV_API
-int NV21ToRGB565(const uint8* src_y, int src_stride_y,
-                 const uint8* src_uv, int src_stride_uv,
-                 uint8* dst_rgb565, int dst_stride_rgb565,
-                 int width, int height);
-
-// I422ToARGB is in convert_argb.h
-// Convert I422 to BGRA.
-LIBYUV_API
-int I422ToBGRA(const uint8* src_y, int src_stride_y,
-               const uint8* src_u, int src_stride_u,
-               const uint8* src_v, int src_stride_v,
-               uint8* dst_bgra, int dst_stride_bgra,
-               int width, int height);
-
-// Convert I422 to ABGR.
-LIBYUV_API
-int I422ToABGR(const uint8* src_y, int src_stride_y,
-               const uint8* src_u, int src_stride_u,
-               const uint8* src_v, int src_stride_v,
-               uint8* dst_abgr, int dst_stride_abgr,
-               int width, int height);
-
-// Convert I422 to RGBA.
-LIBYUV_API
-int I422ToRGBA(const uint8* src_y, int src_stride_y,
-               const uint8* src_u, int src_stride_u,
-               const uint8* src_v, int src_stride_v,
-               uint8* dst_rgba, int dst_stride_rgba,
-               int width, int height);
-
-// Draw a rectangle into I420.
-LIBYUV_API
-int I420Rect(uint8* dst_y, int dst_stride_y,
-             uint8* dst_u, int dst_stride_u,
-             uint8* dst_v, int dst_stride_v,
-             int x, int y, int width, int height,
-             int value_y, int value_u, int value_v);
-
-// Draw a rectangle into ARGB.
-LIBYUV_API
-int ARGBRect(uint8* dst_argb, int dst_stride_argb,
-             int x, int y, int width, int height, uint32 value);
-
-// Convert ARGB to gray scale ARGB.
-LIBYUV_API
-int ARGBGrayTo(const uint8* src_argb, int src_stride_argb,
-               uint8* dst_argb, int dst_stride_argb,
-               int width, int height);
-
-// Make a rectangle of ARGB gray scale.
-LIBYUV_API
-int ARGBGray(uint8* dst_argb, int dst_stride_argb,
-             int x, int y, int width, int height);
-
-// Make a rectangle of ARGB Sepia tone.
-LIBYUV_API
-int ARGBSepia(uint8* dst_argb, int dst_stride_argb,
-              int x, int y, int width, int height);
-
-// Apply a matrix rotation to each ARGB pixel.
-// matrix_argb is 4 signed ARGB values. -128 to 127 representing -2 to 2.
-// The first 4 coefficients apply to B, G, R, A and produce B of the output.
-// The next 4 coefficients apply to B, G, R, A and produce G of the output.
-// The next 4 coefficients apply to B, G, R, A and produce R of the output.
-// The last 4 coefficients apply to B, G, R, A and produce A of the output.
-LIBYUV_API
-int ARGBColorMatrix(const uint8* src_argb, int src_stride_argb,
-                    uint8* dst_argb, int dst_stride_argb,
-                    const int8* matrix_argb,
-                    int width, int height);
-
-// Deprecated. Use ARGBColorMatrix instead.
-// Apply a matrix rotation to each ARGB pixel.
-// matrix_argb is 3 signed ARGB values. -128 to 127 representing -1 to 1.
-// The first 4 coefficients apply to B, G, R, A and produce B of the output.
-// The next 4 coefficients apply to B, G, R, A and produce G of the output.
-// The last 4 coefficients apply to B, G, R, A and produce R of the output.
-LIBYUV_API
-int RGBColorMatrix(uint8* dst_argb, int dst_stride_argb,
-                   const int8* matrix_rgb,
-                   int x, int y, int width, int height);
-
-// Apply a color table each ARGB pixel.
-// Table contains 256 ARGB values.
-LIBYUV_API
-int ARGBColorTable(uint8* dst_argb, int dst_stride_argb,
-                   const uint8* table_argb,
-                   int x, int y, int width, int height);
-
-// Apply a color table each ARGB pixel but preserve destination alpha.
-// Table contains 256 ARGB values.
-LIBYUV_API
-int RGBColorTable(uint8* dst_argb, int dst_stride_argb,
-                  const uint8* table_argb,
-                  int x, int y, int width, int height);
-
-// Apply a luma/color table each ARGB pixel but preserve destination alpha.
-// Table contains 32768 values indexed by [Y][C] where 7 it 7 bit luma from
-// RGB (YJ style) and C is an 8 bit color component (R, G or B).
-LIBYUV_API
-int ARGBLumaColorTable(const uint8* src_argb, int src_stride_argb,
-                       uint8* dst_argb, int dst_stride_argb,
-                       const uint8* luma_rgb_table,
-                       int width, int height);
-
-// Apply a 3 term polynomial to ARGB values.
-// poly points to a 4x4 matrix.  The first row is constants.  The 2nd row is
-// coefficients for b, g, r and a.  The 3rd row is coefficients for b squared,
-// g squared, r squared and a squared.  The 4rd row is coefficients for b to
-// the 3, g to the 3, r to the 3 and a to the 3.  The values are summed and
-// result clamped to 0 to 255.
-// A polynomial approximation can be dirived using software such as 'R'.
-
-LIBYUV_API
-int ARGBPolynomial(const uint8* src_argb, int src_stride_argb,
-                   uint8* dst_argb, int dst_stride_argb,
-                   const float* poly,
-                   int width, int height);
-
-// Quantize a rectangle of ARGB. Alpha unaffected.
-// scale is a 16 bit fractional fixed point scaler between 0 and 65535.
-// interval_size should be a value between 1 and 255.
-// interval_offset should be a value between 0 and 255.
-LIBYUV_API
-int ARGBQuantize(uint8* dst_argb, int dst_stride_argb,
-                 int scale, int interval_size, int interval_offset,
-                 int x, int y, int width, int height);
-
-// Copy ARGB to ARGB.
-LIBYUV_API
-int ARGBCopy(const uint8* src_argb, int src_stride_argb,
-             uint8* dst_argb, int dst_stride_argb,
-             int width, int height);
-
-// Copy ARGB to ARGB.
-LIBYUV_API
-int ARGBCopyAlpha(const uint8* src_argb, int src_stride_argb,
-                  uint8* dst_argb, int dst_stride_argb,
-                  int width, int height);
-
-// Copy ARGB to ARGB.
-LIBYUV_API
-int ARGBCopyYToAlpha(const uint8* src_y, int src_stride_y,
-                     uint8* dst_argb, int dst_stride_argb,
-                     int width, int height);
-
-typedef void (*ARGBBlendRow)(const uint8* src_argb0, const uint8* src_argb1,
-                             uint8* dst_argb, int width);
-
-// Get function to Alpha Blend ARGB pixels and store to destination.
-LIBYUV_API
-ARGBBlendRow GetARGBBlend();
-
-// Alpha Blend ARGB images and store to destination.
-// Alpha of destination is set to 255.
-LIBYUV_API
-int ARGBBlend(const uint8* src_argb0, int src_stride_argb0,
-              const uint8* src_argb1, int src_stride_argb1,
-              uint8* dst_argb, int dst_stride_argb,
-              int width, int height);
-
-// Multiply ARGB image by ARGB image. Shifted down by 8. Saturates to 255.
-LIBYUV_API
-int ARGBMultiply(const uint8* src_argb0, int src_stride_argb0,
-                 const uint8* src_argb1, int src_stride_argb1,
-                 uint8* dst_argb, int dst_stride_argb,
-                 int width, int height);
-
-// Add ARGB image with ARGB image. Saturates to 255.
-LIBYUV_API
-int ARGBAdd(const uint8* src_argb0, int src_stride_argb0,
-            const uint8* src_argb1, int src_stride_argb1,
-            uint8* dst_argb, int dst_stride_argb,
-            int width, int height);
-
-// Subtract ARGB image (argb1) from ARGB image (argb0). Saturates to 0.
-LIBYUV_API
-int ARGBSubtract(const uint8* src_argb0, int src_stride_argb0,
-                 const uint8* src_argb1, int src_stride_argb1,
-                 uint8* dst_argb, int dst_stride_argb,
-                 int width, int height);
-
-// Convert I422 to YUY2.
-LIBYUV_API
-int I422ToYUY2(const uint8* src_y, int src_stride_y,
-               const uint8* src_u, int src_stride_u,
-               const uint8* src_v, int src_stride_v,
-               uint8* dst_frame, int dst_stride_frame,
-               int width, int height);
-
-// Convert I422 to UYVY.
-LIBYUV_API
-int I422ToUYVY(const uint8* src_y, int src_stride_y,
-               const uint8* src_u, int src_stride_u,
-               const uint8* src_v, int src_stride_v,
-               uint8* dst_frame, int dst_stride_frame,
-               int width, int height);
-
-// Convert unattentuated ARGB to preattenuated ARGB.
-LIBYUV_API
-int ARGBAttenuate(const uint8* src_argb, int src_stride_argb,
-                  uint8* dst_argb, int dst_stride_argb,
-                  int width, int height);
-
-// Convert preattentuated ARGB to unattenuated ARGB.
-LIBYUV_API
-int ARGBUnattenuate(const uint8* src_argb, int src_stride_argb,
-                    uint8* dst_argb, int dst_stride_argb,
-                    int width, int height);
-
-// Convert MJPG to ARGB.
-LIBYUV_API
-int MJPGToARGB(const uint8* sample, size_t sample_size,
-               uint8* argb, int argb_stride,
-               int w, int h, int dw, int dh);
-
-// Internal function - do not call directly.
-// Computes table of cumulative sum for image where the value is the sum
-// of all values above and to the left of the entry. Used by ARGBBlur.
-LIBYUV_API
-int ARGBComputeCumulativeSum(const uint8* src_argb, int src_stride_argb,
-                             int32* dst_cumsum, int dst_stride32_cumsum,
-                             int width, int height);
-
-// Blur ARGB image.
-// dst_cumsum table of width * (height + 1) * 16 bytes aligned to
-//   16 byte boundary.
-// dst_stride32_cumsum is number of ints in a row (width * 4).
-// radius is number of pixels around the center.  e.g. 1 = 3x3. 2=5x5.
-// Blur is optimized for radius of 5 (11x11) or less.
-LIBYUV_API
-int ARGBBlur(const uint8* src_argb, int src_stride_argb,
-             uint8* dst_argb, int dst_stride_argb,
-             int32* dst_cumsum, int dst_stride32_cumsum,
-             int width, int height, int radius);
-
-// Multiply ARGB image by ARGB value.
-LIBYUV_API
-int ARGBShade(const uint8* src_argb, int src_stride_argb,
-              uint8* dst_argb, int dst_stride_argb,
-              int width, int height, uint32 value);
-
-// Interpolate between two ARGB images using specified amount of interpolation
-// (0 to 255) and store to destination.
-// 'interpolation' is specified as 8 bit fraction where 0 means 100% src_argb0
-// and 255 means 1% src_argb0 and 99% src_argb1.
-// Internally uses ARGBScale bilinear filtering.
-// Caveat: This function will write up to 16 bytes beyond the end of dst_argb.
-LIBYUV_API
-int ARGBInterpolate(const uint8* src_argb0, int src_stride_argb0,
-                    const uint8* src_argb1, int src_stride_argb1,
-                    uint8* dst_argb, int dst_stride_argb,
-                    int width, int height, int interpolation);
-
-#if defined(__pnacl__) || defined(__CLR_VER) || defined(COVERAGE_ENABLED) || \
-    defined(TARGET_IPHONE_SIMULATOR)
-#define LIBYUV_DISABLE_X86
-#endif
-
-// Row functions for copying a pixels from a source with a slope to a row
-// of destination. Useful for scaling, rotation, mirror, texture mapping.
-LIBYUV_API
-void ARGBAffineRow_C(const uint8* src_argb, int src_argb_stride,
-                     uint8* dst_argb, const float* uv_dudv, int width);
-// The following are available on all x86 platforms:
-#if !defined(LIBYUV_DISABLE_X86) && \
-    (defined(_M_IX86) || defined(__x86_64__) || defined(__i386__))
-LIBYUV_API
-void ARGBAffineRow_SSE2(const uint8* src_argb, int src_argb_stride,
-                        uint8* dst_argb, const float* uv_dudv, int width);
-#define HAS_ARGBAFFINEROW_SSE2
-#endif  // LIBYUV_DISABLE_X86
-
-// Shuffle ARGB channel order.  e.g. BGRA to ARGB.
-// shuffler is 16 bytes and must be aligned.
-LIBYUV_API
-int ARGBShuffle(const uint8* src_bgra, int src_stride_bgra,
-                uint8* dst_argb, int dst_stride_argb,
-                const uint8* shuffler, int width, int height);
-
-// Sobel ARGB effect with planar output.
-LIBYUV_API
-int ARGBSobelToPlane(const uint8* src_argb, int src_stride_argb,
-                     uint8* dst_y, int dst_stride_y,
-                     int width, int height);
-
-// Sobel ARGB effect.
-LIBYUV_API
-int ARGBSobel(const uint8* src_argb, int src_stride_argb,
-              uint8* dst_argb, int dst_stride_argb,
-              int width, int height);
-
-// Sobel ARGB effect w/ Sobel X, Sobel, Sobel Y in ARGB.
-LIBYUV_API
-int ARGBSobelXY(const uint8* src_argb, int src_stride_argb,
-                uint8* dst_argb, int dst_stride_argb,
-                int width, int height);
-
-#ifdef __cplusplus
-}  // extern "C"
-}  // namespace libyuv
-#endif
-
-#endif  // INCLUDE_LIBYUV_PLANAR_FUNCTIONS_H_  NOLINT
diff --git a/drivers/theoraplayer/src/YUV/libyuv/include/libyuv/rotate.h b/drivers/theoraplayer/src/YUV/libyuv/include/libyuv/rotate.h
deleted file mode 100755
index 8af60b8955..0000000000
--- a/drivers/theoraplayer/src/YUV/libyuv/include/libyuv/rotate.h
+++ /dev/null
@@ -1,117 +0,0 @@
-/*
- *  Copyright 2011 The LibYuv Project Authors. All rights reserved.
- *
- *  Use of this source code is governed by a BSD-style license
- *  that can be found in the LICENSE file in the root of the source
- *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS. All contributing project authors may
- *  be found in the AUTHORS file in the root of the source tree.
- */
-
-#ifndef INCLUDE_LIBYUV_ROTATE_H_  // NOLINT
-#define INCLUDE_LIBYUV_ROTATE_H_
-
-#include "libyuv/basic_types.h"
-
-#ifdef __cplusplus
-namespace libyuv {
-extern "C" {
-#endif
-
-// Supported rotation.
-typedef enum RotationMode {
-  kRotate0 = 0,  // No rotation.
-  kRotate90 = 90,  // Rotate 90 degrees clockwise.
-  kRotate180 = 180,  // Rotate 180 degrees.
-  kRotate270 = 270,  // Rotate 270 degrees clockwise.
-
-  // Deprecated.
-  kRotateNone = 0,
-  kRotateClockwise = 90,
-  kRotateCounterClockwise = 270,
-} RotationModeEnum;
-
-// Rotate I420 frame.
-LIBYUV_API
-int I420Rotate(const uint8* src_y, int src_stride_y,
-               const uint8* src_u, int src_stride_u,
-               const uint8* src_v, int src_stride_v,
-               uint8* dst_y, int dst_stride_y,
-               uint8* dst_u, int dst_stride_u,
-               uint8* dst_v, int dst_stride_v,
-               int src_width, int src_height, enum RotationMode mode);
-
-// Rotate NV12 input and store in I420.
-LIBYUV_API
-int NV12ToI420Rotate(const uint8* src_y, int src_stride_y,
-                     const uint8* src_uv, int src_stride_uv,
-                     uint8* dst_y, int dst_stride_y,
-                     uint8* dst_u, int dst_stride_u,
-                     uint8* dst_v, int dst_stride_v,
-                     int src_width, int src_height, enum RotationMode mode);
-
-// Rotate a plane by 0, 90, 180, or 270.
-LIBYUV_API
-int RotatePlane(const uint8* src, int src_stride,
-                uint8* dst, int dst_stride,
-                int src_width, int src_height, enum RotationMode mode);
-
-// Rotate planes by 90, 180, 270. Deprecated.
-LIBYUV_API
-void RotatePlane90(const uint8* src, int src_stride,
-                   uint8* dst, int dst_stride,
-                   int width, int height);
-
-LIBYUV_API
-void RotatePlane180(const uint8* src, int src_stride,
-                    uint8* dst, int dst_stride,
-                    int width, int height);
-
-LIBYUV_API
-void RotatePlane270(const uint8* src, int src_stride,
-                    uint8* dst, int dst_stride,
-                    int width, int height);
-
-LIBYUV_API
-void RotateUV90(const uint8* src, int src_stride,
-                uint8* dst_a, int dst_stride_a,
-                uint8* dst_b, int dst_stride_b,
-                int width, int height);
-
-// Rotations for when U and V are interleaved.
-// These functions take one input pointer and
-// split the data into two buffers while
-// rotating them. Deprecated.
-LIBYUV_API
-void RotateUV180(const uint8* src, int src_stride,
-                 uint8* dst_a, int dst_stride_a,
-                 uint8* dst_b, int dst_stride_b,
-                 int width, int height);
-
-LIBYUV_API
-void RotateUV270(const uint8* src, int src_stride,
-                 uint8* dst_a, int dst_stride_a,
-                 uint8* dst_b, int dst_stride_b,
-                 int width, int height);
-
-// The 90 and 270 functions are based on transposes.
-// Doing a transpose with reversing the read/write
-// order will result in a rotation by +- 90 degrees.
-// Deprecated.
-LIBYUV_API
-void TransposePlane(const uint8* src, int src_stride,
-                    uint8* dst, int dst_stride,
-                    int width, int height);
-
-LIBYUV_API
-void TransposeUV(const uint8* src, int src_stride,
-                 uint8* dst_a, int dst_stride_a,
-                 uint8* dst_b, int dst_stride_b,
-                 int width, int height);
-
-#ifdef __cplusplus
-}  // extern "C"
-}  // namespace libyuv
-#endif
-
-#endif  // INCLUDE_LIBYUV_ROTATE_H_  NOLINT
diff --git a/drivers/theoraplayer/src/YUV/libyuv/include/libyuv/rotate_argb.h b/drivers/theoraplayer/src/YUV/libyuv/include/libyuv/rotate_argb.h
deleted file mode 100755
index 660ff5573e..0000000000
--- a/drivers/theoraplayer/src/YUV/libyuv/include/libyuv/rotate_argb.h
+++ /dev/null
@@ -1,33 +0,0 @@
-/*
- *  Copyright 2012 The LibYuv Project Authors. All rights reserved.
- *
- *  Use of this source code is governed by a BSD-style license
- *  that can be found in the LICENSE file in the root of the source
- *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS. All contributing project authors may
- *  be found in the AUTHORS file in the root of the source tree.
- */
-
-#ifndef INCLUDE_LIBYUV_ROTATE_ARGB_H_  // NOLINT
-#define INCLUDE_LIBYUV_ROTATE_ARGB_H_
-
-#include "libyuv/basic_types.h"
-#include "libyuv/rotate.h"  // For RotationMode.
-
-#ifdef __cplusplus
-namespace libyuv {
-extern "C" {
-#endif
-
-// Rotate ARGB frame
-LIBYUV_API
-int ARGBRotate(const uint8* src_argb, int src_stride_argb,
-               uint8* dst_argb, int dst_stride_argb,
-               int src_width, int src_height, enum RotationMode mode);
-
-#ifdef __cplusplus
-}  // extern "C"
-}  // namespace libyuv
-#endif
-
-#endif  // INCLUDE_LIBYUV_ROTATE_ARGB_H_  NOLINT
diff --git a/drivers/theoraplayer/src/YUV/libyuv/include/libyuv/row.h b/drivers/theoraplayer/src/YUV/libyuv/include/libyuv/row.h
deleted file mode 100755
index 757020da86..0000000000
--- a/drivers/theoraplayer/src/YUV/libyuv/include/libyuv/row.h
+++ /dev/null
@@ -1,1694 +0,0 @@
-/*
- *  Copyright 2011 The LibYuv Project Authors. All rights reserved.
- *
- *  Use of this source code is governed by a BSD-style license
- *  that can be found in the LICENSE file in the root of the source
- *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS. All contributing project authors may
- *  be found in the AUTHORS file in the root of the source tree.
- */
-
-#ifndef INCLUDE_LIBYUV_ROW_H_  // NOLINT
-#define INCLUDE_LIBYUV_ROW_H_
-
-#include <stdlib.h>  // For malloc.
-
-#include "libyuv/basic_types.h"
-
-#ifdef __cplusplus
-namespace libyuv {
-extern "C" {
-#endif
-
-#define IS_ALIGNED(p, a) (!((uintptr_t)(p) & ((a) - 1)))
-
-#ifdef __cplusplus
-#define align_buffer_64(var, size)                                             \
-  uint8* var##_mem = reinterpret_cast<uint8*>(malloc((size) + 63));            \
-  uint8* var = reinterpret_cast<uint8*>                                        \
-      ((reinterpret_cast<intptr_t>(var##_mem) + 63) & ~63)
-#else
-#define align_buffer_64(var, size)                                             \
-  uint8* var##_mem = (uint8*)(malloc((size) + 63));               /* NOLINT */ \
-  uint8* var = (uint8*)(((intptr_t)(var##_mem) + 63) & ~63)       /* NOLINT */
-#endif
-
-#define free_aligned_buffer_64(var) \
-  free(var##_mem);  \
-  var = 0
-
-#if defined(__pnacl__) || defined(__CLR_VER) || defined(COVERAGE_ENABLED) || \
-    defined(TARGET_IPHONE_SIMULATOR)
-#define LIBYUV_DISABLE_X86
-#endif
-// True if compiling for SSSE3 as a requirement.
-#if defined(__SSSE3__) || (defined(_M_IX86_FP) && (_M_IX86_FP >= 3))
-#define LIBYUV_SSSE3_ONLY
-#endif
-
-// Enable for NaCL pepper 33 for bundle and AVX2 support.
-//  #define NEW_BINUTILS
-
-// The following are available on all x86 platforms:
-#if !defined(LIBYUV_DISABLE_X86) && \
-    (defined(_M_IX86) || defined(__x86_64__) || defined(__i386__))
-// Effects:
-#define HAS_ARGBADDROW_SSE2
-#define HAS_ARGBAFFINEROW_SSE2
-#define HAS_ARGBATTENUATEROW_SSSE3
-#define HAS_ARGBBLENDROW_SSSE3
-#define HAS_ARGBCOLORMATRIXROW_SSSE3
-#define HAS_ARGBCOLORTABLEROW_X86
-#define HAS_ARGBCOPYALPHAROW_SSE2
-#define HAS_ARGBCOPYYTOALPHAROW_SSE2
-#define HAS_ARGBGRAYROW_SSSE3
-#define HAS_ARGBLUMACOLORTABLEROW_SSSE3
-#define HAS_ARGBMIRRORROW_SSSE3
-#define HAS_ARGBMULTIPLYROW_SSE2
-#define HAS_ARGBPOLYNOMIALROW_SSE2
-#define HAS_ARGBQUANTIZEROW_SSE2
-#define HAS_ARGBSEPIAROW_SSSE3
-#define HAS_ARGBSHADEROW_SSE2
-#define HAS_ARGBSUBTRACTROW_SSE2
-#define HAS_ARGBTOUVROW_SSSE3
-#define HAS_ARGBUNATTENUATEROW_SSE2
-#define HAS_COMPUTECUMULATIVESUMROW_SSE2
-#define HAS_CUMULATIVESUMTOAVERAGEROW_SSE2
-#define HAS_INTERPOLATEROW_SSE2
-#define HAS_INTERPOLATEROW_SSSE3
-#define HAS_RGBCOLORTABLEROW_X86
-#define HAS_SOBELROW_SSE2
-#define HAS_SOBELTOPLANEROW_SSE2
-#define HAS_SOBELXROW_SSE2
-#define HAS_SOBELXYROW_SSE2
-#define HAS_SOBELYROW_SSE2
-
-// Conversions:
-#define HAS_ABGRTOUVROW_SSSE3
-#define HAS_ABGRTOYROW_SSSE3
-#define HAS_ARGB1555TOARGBROW_SSE2
-#define HAS_ARGB4444TOARGBROW_SSE2
-#define HAS_ARGBSHUFFLEROW_SSE2
-#define HAS_ARGBSHUFFLEROW_SSSE3
-#define HAS_ARGBTOARGB1555ROW_SSE2
-#define HAS_ARGBTOARGB4444ROW_SSE2
-#define HAS_ARGBTOBAYERGGROW_SSE2
-#define HAS_ARGBTOBAYERROW_SSSE3
-#define HAS_ARGBTORAWROW_SSSE3
-#define HAS_ARGBTORGB24ROW_SSSE3
-#define HAS_ARGBTORGB565ROW_SSE2
-#define HAS_ARGBTOUV422ROW_SSSE3
-#define HAS_ARGBTOUV444ROW_SSSE3
-#define HAS_ARGBTOUVJROW_SSSE3
-#define HAS_ARGBTOYJROW_SSSE3
-#define HAS_ARGBTOYROW_SSSE3
-#define HAS_BGRATOUVROW_SSSE3
-#define HAS_BGRATOYROW_SSSE3
-#define HAS_COPYROW_ERMS
-#define HAS_COPYROW_SSE2
-#define HAS_COPYROW_X86
-#define HAS_HALFROW_SSE2
-#define HAS_I400TOARGBROW_SSE2
-#define HAS_I411TOARGBROW_SSSE3
-#define HAS_I422TOARGB1555ROW_SSSE3
-#define HAS_I422TOABGRROW_SSSE3
-#define HAS_I422TOARGB1555ROW_SSSE3
-#define HAS_I422TOARGB4444ROW_SSSE3
-#define HAS_I422TOARGBROW_SSSE3
-#define HAS_I422TOBGRAROW_SSSE3
-#define HAS_I422TORAWROW_SSSE3
-#define HAS_I422TORGB24ROW_SSSE3
-#define HAS_I422TORGB565ROW_SSSE3
-#define HAS_I422TORGBAROW_SSSE3
-#define HAS_I422TOUYVYROW_SSE2
-#define HAS_I422TOYUY2ROW_SSE2
-#define HAS_I444TOARGBROW_SSSE3
-#define HAS_MERGEUVROW_SSE2
-#define HAS_MIRRORROW_SSE2
-#define HAS_MIRRORROW_SSSE3
-#define HAS_MIRRORROW_UV_SSSE3
-#define HAS_MIRRORUVROW_SSSE3
-#define HAS_NV12TOARGBROW_SSSE3
-#define HAS_NV12TORGB565ROW_SSSE3
-#define HAS_NV21TOARGBROW_SSSE3
-#define HAS_NV21TORGB565ROW_SSSE3
-#define HAS_RAWTOARGBROW_SSSE3
-#define HAS_RAWTOYROW_SSSE3
-#define HAS_RGB24TOARGBROW_SSSE3
-#define HAS_RGB24TOYROW_SSSE3
-#define HAS_RGB565TOARGBROW_SSE2
-#define HAS_RGBATOUVROW_SSSE3
-#define HAS_RGBATOYROW_SSSE3
-#define HAS_SETROW_X86
-#define HAS_SPLITUVROW_SSE2
-#define HAS_UYVYTOARGBROW_SSSE3
-#define HAS_UYVYTOUV422ROW_SSE2
-#define HAS_UYVYTOUVROW_SSE2
-#define HAS_UYVYTOYROW_SSE2
-#define HAS_YTOARGBROW_SSE2
-#define HAS_YUY2TOARGBROW_SSSE3
-#define HAS_YUY2TOUV422ROW_SSE2
-#define HAS_YUY2TOUVROW_SSE2
-#define HAS_YUY2TOYROW_SSE2
-#endif
-
-// GCC >= 4.7.0 required for AVX2.
-#if defined(__GNUC__) && (defined(__x86_64__) || defined(__i386__))
-#if (__GNUC__ > 4) || (__GNUC__ == 4 && (__GNUC_MINOR__ >= 7))
-#define GCC_HAS_AVX2 1
-#endif  // GNUC >= 4.7
-#endif  // __GNUC__
-
-// clang >= 3.4.0 required for AVX2.
-#if defined(__clang__) && (defined(__x86_64__) || defined(__i386__))
-#if (__clang_major__ > 3) || (__clang_major__ == 3 && (__clang_minor__ >= 4))
-#define CLANG_HAS_AVX2 1
-#endif  // clang >= 3.4
-#endif  // __clang__
-
-// Visual C 2012 required for AVX2.
-#if defined(_M_IX86) && defined(_MSC_VER) && _MSC_VER >= 1700
-#define VISUALC_HAS_AVX2 1
-#endif  // VisualStudio >= 2012
-
-// The following are available on all x86 platforms, but
-// require VS2012, clang 3.4 or gcc 4.7.
-// The code supports NaCL but requires a new compiler and validator.
-#if !defined(LIBYUV_DISABLE_X86) && (defined(VISUALC_HAS_AVX2) || \
-    defined(CLANG_HAS_AVX2) || defined(GCC_HAS_AVX2))
-// Effects:
-#define HAS_ARGBPOLYNOMIALROW_AVX2
-#define HAS_ARGBSHUFFLEROW_AVX2
-#define HAS_ARGBCOPYALPHAROW_AVX2
-#define HAS_ARGBCOPYYTOALPHAROW_AVX2
-#endif
-
-// The following are require VS2012.
-// TODO(fbarchard): Port to gcc.
-#if !defined(LIBYUV_DISABLE_X86) && defined(VISUALC_HAS_AVX2)
-#define HAS_ARGBTOUVROW_AVX2
-#define HAS_ARGBTOYJROW_AVX2
-#define HAS_ARGBTOYROW_AVX2
-#define HAS_HALFROW_AVX2
-#define HAS_I422TOARGBROW_AVX2
-#define HAS_INTERPOLATEROW_AVX2
-#define HAS_MERGEUVROW_AVX2
-#define HAS_MIRRORROW_AVX2
-#define HAS_SPLITUVROW_AVX2
-#define HAS_UYVYTOUV422ROW_AVX2
-#define HAS_UYVYTOUVROW_AVX2
-#define HAS_UYVYTOYROW_AVX2
-#define HAS_YUY2TOUV422ROW_AVX2
-#define HAS_YUY2TOUVROW_AVX2
-#define HAS_YUY2TOYROW_AVX2
-
-// Effects:
-#define HAS_ARGBADDROW_AVX2
-#define HAS_ARGBATTENUATEROW_AVX2
-#define HAS_ARGBMIRRORROW_AVX2
-#define HAS_ARGBMULTIPLYROW_AVX2
-#define HAS_ARGBSUBTRACTROW_AVX2
-#define HAS_ARGBUNATTENUATEROW_AVX2
-#endif  // defined(VISUALC_HAS_AVX2)
-
-// The following are Yasm x86 only:
-// TODO(fbarchard): Port AVX2 to inline.
-#if !defined(LIBYUV_DISABLE_X86) && defined(HAVE_YASM)
-    (defined(_M_IX86) || defined(_M_X64) || \
-    defined(__x86_64__) || defined(__i386__))
-#define HAS_MERGEUVROW_AVX2
-#define HAS_MERGEUVROW_MMX
-#define HAS_SPLITUVROW_AVX2
-#define HAS_SPLITUVROW_MMX
-#define HAS_UYVYTOYROW_AVX2
-#define HAS_UYVYTOYROW_MMX
-#define HAS_YUY2TOYROW_AVX2
-#define HAS_YUY2TOYROW_MMX
-#endif
-
-// The following are disabled when SSSE3 is available:
-#if !defined(LIBYUV_DISABLE_X86) && \
-    (defined(_M_IX86) || defined(__x86_64__) || defined(__i386__)) && \
-    !defined(LIBYUV_SSSE3_ONLY)
-#define HAS_ARGBBLENDROW_SSE2
-#define HAS_ARGBATTENUATEROW_SSE2
-#define HAS_MIRRORROW_SSE2
-#endif
-
-// The following are available on Neon platforms:
-#if !defined(LIBYUV_DISABLE_NEON) && \
-    (defined(__ARM_NEON__) || defined(LIBYUV_NEON))
-#define HAS_ABGRTOUVROW_NEON
-#define HAS_ABGRTOYROW_NEON
-#define HAS_ARGB1555TOARGBROW_NEON
-#define HAS_ARGB1555TOUVROW_NEON
-#define HAS_ARGB1555TOYROW_NEON
-#define HAS_ARGB4444TOARGBROW_NEON
-#define HAS_ARGB4444TOUVROW_NEON
-#define HAS_ARGB4444TOYROW_NEON
-#define HAS_ARGBTOARGB1555ROW_NEON
-#define HAS_ARGBTOARGB4444ROW_NEON
-#define HAS_ARGBTOBAYERROW_NEON
-#define HAS_ARGBTOBAYERGGROW_NEON
-#define HAS_ARGBTORAWROW_NEON
-#define HAS_ARGBTORGB24ROW_NEON
-#define HAS_ARGBTORGB565ROW_NEON
-#define HAS_ARGBTOUV411ROW_NEON
-#define HAS_ARGBTOUV422ROW_NEON
-#define HAS_ARGBTOUV444ROW_NEON
-#define HAS_ARGBTOUVROW_NEON
-#define HAS_ARGBTOUVJROW_NEON
-#define HAS_ARGBTOYROW_NEON
-#define HAS_ARGBTOYJROW_NEON
-#define HAS_BGRATOUVROW_NEON
-#define HAS_BGRATOYROW_NEON
-#define HAS_COPYROW_NEON
-#define HAS_HALFROW_NEON
-#define HAS_I400TOARGBROW_NEON
-#define HAS_I411TOARGBROW_NEON
-#define HAS_I422TOABGRROW_NEON
-#define HAS_I422TOARGB1555ROW_NEON
-#define HAS_I422TOARGB4444ROW_NEON
-#define HAS_I422TOARGBROW_NEON
-#define HAS_I422TOBGRAROW_NEON
-#define HAS_I422TORAWROW_NEON
-#define HAS_I422TORGB24ROW_NEON
-#define HAS_I422TORGB565ROW_NEON
-#define HAS_I422TORGBAROW_NEON
-#define HAS_I422TOUYVYROW_NEON
-#define HAS_I422TOYUY2ROW_NEON
-#define HAS_I444TOARGBROW_NEON
-#define HAS_MERGEUVROW_NEON
-#define HAS_MIRRORROW_NEON
-#define HAS_MIRRORUVROW_NEON
-#define HAS_NV12TOARGBROW_NEON
-#define HAS_NV12TORGB565ROW_NEON
-#define HAS_NV21TOARGBROW_NEON
-#define HAS_NV21TORGB565ROW_NEON
-#define HAS_RAWTOARGBROW_NEON
-#define HAS_RAWTOUVROW_NEON
-#define HAS_RAWTOYROW_NEON
-#define HAS_RGB24TOARGBROW_NEON
-#define HAS_RGB24TOUVROW_NEON
-#define HAS_RGB24TOYROW_NEON
-#define HAS_RGB565TOARGBROW_NEON
-#define HAS_RGB565TOUVROW_NEON
-#define HAS_RGB565TOYROW_NEON
-#define HAS_RGBATOUVROW_NEON
-#define HAS_RGBATOYROW_NEON
-#define HAS_SETROW_NEON
-#define HAS_SPLITUVROW_NEON
-#define HAS_UYVYTOARGBROW_NEON
-#define HAS_UYVYTOUV422ROW_NEON
-#define HAS_UYVYTOUVROW_NEON
-#define HAS_UYVYTOYROW_NEON
-#define HAS_YTOARGBROW_NEON
-#define HAS_YUY2TOARGBROW_NEON
-#define HAS_YUY2TOUV422ROW_NEON
-#define HAS_YUY2TOUVROW_NEON
-#define HAS_YUY2TOYROW_NEON
-
-// Effects:
-#define HAS_ARGBADDROW_NEON
-#define HAS_ARGBATTENUATEROW_NEON
-#define HAS_ARGBBLENDROW_NEON
-#define HAS_ARGBCOLORMATRIXROW_NEON
-#define HAS_ARGBGRAYROW_NEON
-#define HAS_ARGBMIRRORROW_NEON
-#define HAS_ARGBMULTIPLYROW_NEON
-#define HAS_ARGBQUANTIZEROW_NEON
-#define HAS_ARGBSEPIAROW_NEON
-#define HAS_ARGBSHADEROW_NEON
-#define HAS_ARGBSUBTRACTROW_NEON
-#define HAS_SOBELROW_NEON
-#define HAS_SOBELTOPLANEROW_NEON
-#define HAS_SOBELXYROW_NEON
-#define HAS_SOBELXROW_NEON
-#define HAS_SOBELYROW_NEON
-#define HAS_INTERPOLATEROW_NEON
-#endif
-
-// The following are available on Mips platforms:
-#if !defined(LIBYUV_DISABLE_MIPS) && defined(__mips__)
-#define HAS_COPYROW_MIPS
-#if defined(__mips_dsp) && (__mips_dsp_rev >= 2)
-#define HAS_I422TOABGRROW_MIPS_DSPR2
-#define HAS_I422TOARGBROW_MIPS_DSPR2
-#define HAS_I422TOBGRAROW_MIPS_DSPR2
-#define HAS_INTERPOLATEROWS_MIPS_DSPR2
-#define HAS_MIRRORROW_MIPS_DSPR2
-#define HAS_MIRRORUVROW_MIPS_DSPR2
-#define HAS_SPLITUVROW_MIPS_DSPR2
-#endif
-#endif
-
-#if defined(_MSC_VER) && !defined(__CLR_VER)
-#define SIMD_ALIGNED(var) __declspec(align(16)) var
-typedef __declspec(align(16)) int16 vec16[8];
-typedef __declspec(align(16)) int32 vec32[4];
-typedef __declspec(align(16)) int8 vec8[16];
-typedef __declspec(align(16)) uint16 uvec16[8];
-typedef __declspec(align(16)) uint32 uvec32[4];
-typedef __declspec(align(16)) uint8 uvec8[16];
-typedef __declspec(align(32)) int16 lvec16[16];
-typedef __declspec(align(32)) int32 lvec32[8];
-typedef __declspec(align(32)) int8 lvec8[32];
-typedef __declspec(align(32)) uint16 ulvec16[16];
-typedef __declspec(align(32)) uint32 ulvec32[8];
-typedef __declspec(align(32)) uint8 ulvec8[32];
-
-#elif defined(__GNUC__)
-// Caveat GCC 4.2 to 4.7 have a known issue using vectors with const.
-#define SIMD_ALIGNED(var) var __attribute__((aligned(16)))
-typedef int16 __attribute__((vector_size(16))) vec16;
-typedef int32 __attribute__((vector_size(16))) vec32;
-typedef int8 __attribute__((vector_size(16))) vec8;
-typedef uint16 __attribute__((vector_size(16))) uvec16;
-typedef uint32 __attribute__((vector_size(16))) uvec32;
-typedef uint8 __attribute__((vector_size(16))) uvec8;
-#else
-#define SIMD_ALIGNED(var) var
-typedef int16 vec16[8];
-typedef int32 vec32[4];
-typedef int8 vec8[16];
-typedef uint16 uvec16[8];
-typedef uint32 uvec32[4];
-typedef uint8 uvec8[16];
-#endif
-
-#if defined(__APPLE__) || defined(__x86_64__) || defined(__llvm__)
-#define OMITFP
-#else
-#define OMITFP __attribute__((optimize("omit-frame-pointer")))
-#endif
-
-// NaCL macros for GCC x86 and x64.
-
-// TODO(nfullagar): When pepper_33 toolchain is distributed, default to
-// NEW_BINUTILS and remove all BUNDLEALIGN occurances.
-#if defined(__native_client__)
-#define LABELALIGN ".p2align 5\n"
-#else
-#define LABELALIGN ".p2align 2\n"
-#endif
-#if defined(__native_client__) && defined(__x86_64__)
-#if defined(NEW_BINUTILS)
-#define BUNDLELOCK ".bundle_lock\n"
-#define BUNDLEUNLOCK ".bundle_unlock\n"
-#define BUNDLEALIGN "\n"
-#else
-#define BUNDLELOCK "\n"
-#define BUNDLEUNLOCK "\n"
-#define BUNDLEALIGN ".p2align 5\n"
-#endif
-#define MEMACCESS(base) "%%nacl:(%%r15,%q" #base ")"
-#define MEMACCESS2(offset, base) "%%nacl:" #offset "(%%r15,%q" #base ")"
-#define MEMLEA(offset, base) #offset "(%q" #base ")"
-#define MEMLEA3(offset, index, scale) \
-    #offset "(,%q" #index "," #scale ")"
-#define MEMLEA4(offset, base, index, scale) \
-    #offset "(%q" #base ",%q" #index "," #scale ")"
-#define MEMMOVESTRING(s, d) "%%nacl:(%q" #s "),%%nacl:(%q" #d "), %%r15"
-#define MEMSTORESTRING(reg, d) "%%" #reg ",%%nacl:(%q" #d "), %%r15"
-#define MEMOPREG(opcode, offset, base, index, scale, reg) \
-    BUNDLELOCK \
-    "lea " #offset "(%q" #base ",%q" #index "," #scale "),%%r14d\n" \
-    #opcode " (%%r15,%%r14),%%" #reg "\n" \
-    BUNDLEUNLOCK
-#define MEMOPMEM(opcode, reg, offset, base, index, scale) \
-    BUNDLELOCK \
-    "lea " #offset "(%q" #base ",%q" #index "," #scale "),%%r14d\n" \
-    #opcode " %%" #reg ",(%%r15,%%r14)\n" \
-    BUNDLEUNLOCK
-#define MEMOPARG(opcode, offset, base, index, scale, arg) \
-    BUNDLELOCK \
-    "lea " #offset "(%q" #base ",%q" #index "," #scale "),%%r14d\n" \
-    #opcode " (%%r15,%%r14),%" #arg "\n" \
-    BUNDLEUNLOCK
-#else
-#define BUNDLEALIGN "\n"
-#define MEMACCESS(base) "(%" #base ")"
-#define MEMACCESS2(offset, base) #offset "(%" #base ")"
-#define MEMLEA(offset, base) #offset "(%" #base ")"
-#define MEMLEA3(offset, index, scale) \
-    #offset "(,%" #index "," #scale ")"
-#define MEMLEA4(offset, base, index, scale) \
-    #offset "(%" #base ",%" #index "," #scale ")"
-#define MEMMOVESTRING(s, d)
-#define MEMSTORESTRING(reg, d)
-#define MEMOPREG(opcode, offset, base, index, scale, reg) \
-    #opcode " " #offset "(%" #base ",%" #index "," #scale "),%%" #reg "\n"
-#define MEMOPMEM(opcode, reg, offset, base, index, scale) \
-    #opcode " %%" #reg ","#offset "(%" #base ",%" #index "," #scale ")\n"
-#define MEMOPARG(opcode, offset, base, index, scale, arg) \
-    #opcode " " #offset "(%" #base ",%" #index "," #scale "),%" #arg "\n"
-#endif
-
-void I444ToARGBRow_NEON(const uint8* src_y,
-                        const uint8* src_u,
-                        const uint8* src_v,
-                        uint8* dst_argb,
-                        int width);
-void I422ToARGBRow_NEON(const uint8* src_y,
-                        const uint8* src_u,
-                        const uint8* src_v,
-                        uint8* dst_argb,
-                        int width);
-void I411ToARGBRow_NEON(const uint8* src_y,
-                        const uint8* src_u,
-                        const uint8* src_v,
-                        uint8* dst_argb,
-                        int width);
-void I422ToBGRARow_NEON(const uint8* src_y,
-                        const uint8* src_u,
-                        const uint8* src_v,
-                        uint8* dst_bgra,
-                        int width);
-void I422ToABGRRow_NEON(const uint8* src_y,
-                        const uint8* src_u,
-                        const uint8* src_v,
-                        uint8* dst_abgr,
-                        int width);
-void I422ToRGBARow_NEON(const uint8* src_y,
-                        const uint8* src_u,
-                        const uint8* src_v,
-                        uint8* dst_rgba,
-                        int width);
-void I422ToRGB24Row_NEON(const uint8* src_y,
-                         const uint8* src_u,
-                         const uint8* src_v,
-                         uint8* dst_rgb24,
-                         int width);
-void I422ToRAWRow_NEON(const uint8* src_y,
-                       const uint8* src_u,
-                       const uint8* src_v,
-                       uint8* dst_raw,
-                       int width);
-void I422ToRGB565Row_NEON(const uint8* src_y,
-                          const uint8* src_u,
-                          const uint8* src_v,
-                          uint8* dst_rgb565,
-                          int width);
-void I422ToARGB1555Row_NEON(const uint8* src_y,
-                            const uint8* src_u,
-                            const uint8* src_v,
-                            uint8* dst_argb1555,
-                            int width);
-void I422ToARGB4444Row_NEON(const uint8* src_y,
-                            const uint8* src_u,
-                            const uint8* src_v,
-                            uint8* dst_argb4444,
-                            int width);
-void NV12ToARGBRow_NEON(const uint8* src_y,
-                        const uint8* src_uv,
-                        uint8* dst_argb,
-                        int width);
-void NV21ToARGBRow_NEON(const uint8* src_y,
-                        const uint8* src_vu,
-                        uint8* dst_argb,
-                        int width);
-void NV12ToRGB565Row_NEON(const uint8* src_y,
-                          const uint8* src_uv,
-                          uint8* dst_rgb565,
-                          int width);
-void NV21ToRGB565Row_NEON(const uint8* src_y,
-                          const uint8* src_vu,
-                          uint8* dst_rgb565,
-                          int width);
-void YUY2ToARGBRow_NEON(const uint8* src_yuy2,
-                        uint8* dst_argb,
-                        int width);
-void UYVYToARGBRow_NEON(const uint8* src_uyvy,
-                        uint8* dst_argb,
-                        int width);
-
-void ARGBToYRow_AVX2(const uint8* src_argb, uint8* dst_y, int pix);
-void ARGBToYRow_Any_AVX2(const uint8* src_argb, uint8* dst_y, int pix);
-void ARGBToYRow_SSSE3(const uint8* src_argb, uint8* dst_y, int pix);
-void ARGBToYJRow_AVX2(const uint8* src_argb, uint8* dst_y, int pix);
-void ARGBToYJRow_Any_AVX2(const uint8* src_argb, uint8* dst_y, int pix);
-void ARGBToYJRow_SSSE3(const uint8* src_argb, uint8* dst_y, int pix);
-void BGRAToYRow_SSSE3(const uint8* src_bgra, uint8* dst_y, int pix);
-void ABGRToYRow_SSSE3(const uint8* src_abgr, uint8* dst_y, int pix);
-void RGBAToYRow_SSSE3(const uint8* src_rgba, uint8* dst_y, int pix);
-void RGB24ToYRow_SSSE3(const uint8* src_rgb24, uint8* dst_y, int pix);
-void RAWToYRow_SSSE3(const uint8* src_raw, uint8* dst_y, int pix);
-void ARGBToYRow_Unaligned_SSSE3(const uint8* src_argb, uint8* dst_y, int pix);
-void ARGBToYJRow_Unaligned_SSSE3(const uint8* src_argb, uint8* dst_y, int pix);
-void BGRAToYRow_Unaligned_SSSE3(const uint8* src_bgra, uint8* dst_y, int pix);
-void ABGRToYRow_Unaligned_SSSE3(const uint8* src_abgr, uint8* dst_y, int pix);
-void RGBAToYRow_Unaligned_SSSE3(const uint8* src_rgba, uint8* dst_y, int pix);
-void RGB24ToYRow_Unaligned_SSSE3(const uint8* src_rgb24, uint8* dst_y, int pix);
-void RAWToYRow_Unaligned_SSSE3(const uint8* src_raw, uint8* dst_y, int pix);
-void ARGBToYRow_NEON(const uint8* src_argb, uint8* dst_y, int pix);
-void ARGBToYJRow_NEON(const uint8* src_argb, uint8* dst_y, int pix);
-void ARGBToUV444Row_NEON(const uint8* src_argb, uint8* dst_u, uint8* dst_v,
-                         int pix);
-void ARGBToUV422Row_NEON(const uint8* src_argb, uint8* dst_u, uint8* dst_v,
-                         int pix);
-void ARGBToUV411Row_NEON(const uint8* src_argb, uint8* dst_u, uint8* dst_v,
-                         int pix);
-void ARGBToUVRow_NEON(const uint8* src_argb, int src_stride_argb,
-                      uint8* dst_u, uint8* dst_v, int pix);
-void ARGBToUVJRow_NEON(const uint8* src_argb, int src_stride_argb,
-                       uint8* dst_u, uint8* dst_v, int pix);
-void BGRAToUVRow_NEON(const uint8* src_bgra, int src_stride_bgra,
-                      uint8* dst_u, uint8* dst_v, int pix);
-void ABGRToUVRow_NEON(const uint8* src_abgr, int src_stride_abgr,
-                      uint8* dst_u, uint8* dst_v, int pix);
-void RGBAToUVRow_NEON(const uint8* src_rgba, int src_stride_rgba,
-                      uint8* dst_u, uint8* dst_v, int pix);
-void RGB24ToUVRow_NEON(const uint8* src_rgb24, int src_stride_rgb24,
-                       uint8* dst_u, uint8* dst_v, int pix);
-void RAWToUVRow_NEON(const uint8* src_raw, int src_stride_raw,
-                     uint8* dst_u, uint8* dst_v, int pix);
-void RGB565ToUVRow_NEON(const uint8* src_rgb565, int src_stride_rgb565,
-                        uint8* dst_u, uint8* dst_v, int pix);
-void ARGB1555ToUVRow_NEON(const uint8* src_argb1555, int src_stride_argb1555,
-                          uint8* dst_u, uint8* dst_v, int pix);
-void ARGB4444ToUVRow_NEON(const uint8* src_argb4444, int src_stride_argb4444,
-                          uint8* dst_u, uint8* dst_v, int pix);
-void BGRAToYRow_NEON(const uint8* src_bgra, uint8* dst_y, int pix);
-void ABGRToYRow_NEON(const uint8* src_abgr, uint8* dst_y, int pix);
-void RGBAToYRow_NEON(const uint8* src_rgba, uint8* dst_y, int pix);
-void RGB24ToYRow_NEON(const uint8* src_rgb24, uint8* dst_y, int pix);
-void RAWToYRow_NEON(const uint8* src_raw, uint8* dst_y, int pix);
-void RGB565ToYRow_NEON(const uint8* src_rgb565, uint8* dst_y, int pix);
-void ARGB1555ToYRow_NEON(const uint8* src_argb1555, uint8* dst_y, int pix);
-void ARGB4444ToYRow_NEON(const uint8* src_argb4444, uint8* dst_y, int pix);
-void ARGBToYRow_C(const uint8* src_argb, uint8* dst_y, int pix);
-void ARGBToYJRow_C(const uint8* src_argb, uint8* dst_y, int pix);
-void BGRAToYRow_C(const uint8* src_bgra, uint8* dst_y, int pix);
-void ABGRToYRow_C(const uint8* src_abgr, uint8* dst_y, int pix);
-void RGBAToYRow_C(const uint8* src_rgba, uint8* dst_y, int pix);
-void RGB24ToYRow_C(const uint8* src_rgb24, uint8* dst_y, int pix);
-void RAWToYRow_C(const uint8* src_raw, uint8* dst_y, int pix);
-void RGB565ToYRow_C(const uint8* src_rgb565, uint8* dst_y, int pix);
-void ARGB1555ToYRow_C(const uint8* src_argb1555, uint8* dst_y, int pix);
-void ARGB4444ToYRow_C(const uint8* src_argb4444, uint8* dst_y, int pix);
-void ARGBToYRow_Any_SSSE3(const uint8* src_argb, uint8* dst_y, int pix);
-void ARGBToYJRow_Any_SSSE3(const uint8* src_argb, uint8* dst_y, int pix);
-void BGRAToYRow_Any_SSSE3(const uint8* src_bgra, uint8* dst_y, int pix);
-void ABGRToYRow_Any_SSSE3(const uint8* src_abgr, uint8* dst_y, int pix);
-void RGBAToYRow_Any_SSSE3(const uint8* src_rgba, uint8* dst_y, int pix);
-void RGB24ToYRow_Any_SSSE3(const uint8* src_rgb24, uint8* dst_y, int pix);
-void RAWToYRow_Any_SSSE3(const uint8* src_raw, uint8* dst_y, int pix);
-void ARGBToYRow_Any_NEON(const uint8* src_argb, uint8* dst_y, int pix);
-void ARGBToYJRow_Any_NEON(const uint8* src_argb, uint8* dst_y, int pix);
-void BGRAToYRow_Any_NEON(const uint8* src_bgra, uint8* dst_y, int pix);
-void ABGRToYRow_Any_NEON(const uint8* src_abgr, uint8* dst_y, int pix);
-void RGBAToYRow_Any_NEON(const uint8* src_rgba, uint8* dst_y, int pix);
-void RGB24ToYRow_Any_NEON(const uint8* src_rgb24, uint8* dst_y, int pix);
-void RAWToYRow_Any_NEON(const uint8* src_raw, uint8* dst_y, int pix);
-void RGB565ToYRow_Any_NEON(const uint8* src_rgb565, uint8* dst_y, int pix);
-void ARGB1555ToYRow_Any_NEON(const uint8* src_argb1555, uint8* dst_y, int pix);
-void ARGB4444ToYRow_Any_NEON(const uint8* src_argb4444, uint8* dst_y, int pix);
-
-void ARGBToUVRow_AVX2(const uint8* src_argb, int src_stride_argb,
-                      uint8* dst_u, uint8* dst_v, int width);
-void ARGBToUVRow_Any_AVX2(const uint8* src_argb, int src_stride_argb,
-                          uint8* dst_u, uint8* dst_v, int width);
-void ARGBToUVRow_SSSE3(const uint8* src_argb, int src_stride_argb,
-                       uint8* dst_u, uint8* dst_v, int width);
-void ARGBToUVJRow_SSSE3(const uint8* src_argb, int src_stride_argb,
-                        uint8* dst_u, uint8* dst_v, int width);
-void BGRAToUVRow_SSSE3(const uint8* src_bgra, int src_stride_bgra,
-                       uint8* dst_u, uint8* dst_v, int width);
-void ABGRToUVRow_SSSE3(const uint8* src_abgr, int src_stride_abgr,
-                       uint8* dst_u, uint8* dst_v, int width);
-void RGBAToUVRow_SSSE3(const uint8* src_rgba, int src_stride_rgba,
-                       uint8* dst_u, uint8* dst_v, int width);
-void ARGBToUVRow_Unaligned_SSSE3(const uint8* src_argb, int src_stride_argb,
-                                 uint8* dst_u, uint8* dst_v, int width);
-void ARGBToUVJRow_Unaligned_SSSE3(const uint8* src_argb, int src_stride_argb,
-                                  uint8* dst_u, uint8* dst_v, int width);
-void BGRAToUVRow_Unaligned_SSSE3(const uint8* src_bgra, int src_stride_bgra,
-                                 uint8* dst_u, uint8* dst_v, int width);
-void ABGRToUVRow_Unaligned_SSSE3(const uint8* src_abgr, int src_stride_abgr,
-                                 uint8* dst_u, uint8* dst_v, int width);
-void RGBAToUVRow_Unaligned_SSSE3(const uint8* src_rgba, int src_stride_rgba,
-                                 uint8* dst_u, uint8* dst_v, int width);
-void ARGBToUVRow_Any_SSSE3(const uint8* src_argb, int src_stride_argb,
-                           uint8* dst_u, uint8* dst_v, int width);
-void ARGBToUVJRow_Any_SSSE3(const uint8* src_argb, int src_stride_argb,
-                            uint8* dst_u, uint8* dst_v, int width);
-void BGRAToUVRow_Any_SSSE3(const uint8* src_bgra, int src_stride_bgra,
-                           uint8* dst_u, uint8* dst_v, int width);
-void ABGRToUVRow_Any_SSSE3(const uint8* src_abgr, int src_stride_abgr,
-                           uint8* dst_u, uint8* dst_v, int width);
-void RGBAToUVRow_Any_SSSE3(const uint8* src_rgba, int src_stride_rgba,
-                           uint8* dst_u, uint8* dst_v, int width);
-void ARGBToUV444Row_Any_NEON(const uint8* src_argb, uint8* dst_u, uint8* dst_v,
-                             int pix);
-void ARGBToUV422Row_Any_NEON(const uint8* src_argb, uint8* dst_u, uint8* dst_v,
-                             int pix);
-void ARGBToUV411Row_Any_NEON(const uint8* src_argb, uint8* dst_u, uint8* dst_v,
-                             int pix);
-void ARGBToUVRow_Any_NEON(const uint8* src_argb, int src_stride_argb,
-                          uint8* dst_u, uint8* dst_v, int pix);
-void ARGBToUVJRow_Any_NEON(const uint8* src_argb, int src_stride_argb,
-                           uint8* dst_u, uint8* dst_v, int pix);
-void BGRAToUVRow_Any_NEON(const uint8* src_bgra, int src_stride_bgra,
-                          uint8* dst_u, uint8* dst_v, int pix);
-void ABGRToUVRow_Any_NEON(const uint8* src_abgr, int src_stride_abgr,
-                          uint8* dst_u, uint8* dst_v, int pix);
-void RGBAToUVRow_Any_NEON(const uint8* src_rgba, int src_stride_rgba,
-                          uint8* dst_u, uint8* dst_v, int pix);
-void RGB24ToUVRow_Any_NEON(const uint8* src_rgb24, int src_stride_rgb24,
-                           uint8* dst_u, uint8* dst_v, int pix);
-void RAWToUVRow_Any_NEON(const uint8* src_raw, int src_stride_raw,
-                         uint8* dst_u, uint8* dst_v, int pix);
-void RGB565ToUVRow_Any_NEON(const uint8* src_rgb565, int src_stride_rgb565,
-                            uint8* dst_u, uint8* dst_v, int pix);
-void ARGB1555ToUVRow_Any_NEON(const uint8* src_argb1555,
-                              int src_stride_argb1555,
-                              uint8* dst_u, uint8* dst_v, int pix);
-void ARGB4444ToUVRow_Any_NEON(const uint8* src_argb4444,
-                              int src_stride_argb4444,
-                              uint8* dst_u, uint8* dst_v, int pix);
-void ARGBToUVRow_C(const uint8* src_argb, int src_stride_argb,
-                   uint8* dst_u, uint8* dst_v, int width);
-void ARGBToUVJRow_C(const uint8* src_argb, int src_stride_argb,
-                    uint8* dst_u, uint8* dst_v, int width);
-void BGRAToUVRow_C(const uint8* src_bgra, int src_stride_bgra,
-                   uint8* dst_u, uint8* dst_v, int width);
-void ABGRToUVRow_C(const uint8* src_abgr, int src_stride_abgr,
-                   uint8* dst_u, uint8* dst_v, int width);
-void RGBAToUVRow_C(const uint8* src_rgba, int src_stride_rgba,
-                   uint8* dst_u, uint8* dst_v, int width);
-void RGB24ToUVRow_C(const uint8* src_rgb24, int src_stride_rgb24,
-                    uint8* dst_u, uint8* dst_v, int width);
-void RAWToUVRow_C(const uint8* src_raw, int src_stride_raw,
-                  uint8* dst_u, uint8* dst_v, int width);
-void RGB565ToUVRow_C(const uint8* src_rgb565, int src_stride_rgb565,
-                     uint8* dst_u, uint8* dst_v, int width);
-void ARGB1555ToUVRow_C(const uint8* src_argb1555, int src_stride_argb1555,
-                       uint8* dst_u, uint8* dst_v, int width);
-void ARGB4444ToUVRow_C(const uint8* src_argb4444, int src_stride_argb4444,
-                       uint8* dst_u, uint8* dst_v, int width);
-
-void ARGBToUV444Row_SSSE3(const uint8* src_argb,
-                          uint8* dst_u, uint8* dst_v, int width);
-void ARGBToUV444Row_Unaligned_SSSE3(const uint8* src_argb,
-                                    uint8* dst_u, uint8* dst_v, int width);
-void ARGBToUV444Row_Any_SSSE3(const uint8* src_argb,
-                              uint8* dst_u, uint8* dst_v, int width);
-
-void ARGBToUV422Row_SSSE3(const uint8* src_argb,
-                          uint8* dst_u, uint8* dst_v, int width);
-void ARGBToUV422Row_Unaligned_SSSE3(const uint8* src_argb,
-                                    uint8* dst_u, uint8* dst_v, int width);
-void ARGBToUV422Row_Any_SSSE3(const uint8* src_argb,
-                              uint8* dst_u, uint8* dst_v, int width);
-
-void ARGBToUV444Row_C(const uint8* src_argb,
-                      uint8* dst_u, uint8* dst_v, int width);
-void ARGBToUV422Row_C(const uint8* src_argb,
-                      uint8* dst_u, uint8* dst_v, int width);
-void ARGBToUV411Row_C(const uint8* src_argb,
-                      uint8* dst_u, uint8* dst_v, int width);
-
-void MirrorRow_AVX2(const uint8* src, uint8* dst, int width);
-void MirrorRow_SSSE3(const uint8* src, uint8* dst, int width);
-void MirrorRow_SSE2(const uint8* src, uint8* dst, int width);
-void MirrorRow_NEON(const uint8* src, uint8* dst, int width);
-void MirrorRow_MIPS_DSPR2(const uint8* src, uint8* dst, int width);
-void MirrorRow_C(const uint8* src, uint8* dst, int width);
-
-void MirrorUVRow_SSSE3(const uint8* src_uv, uint8* dst_u, uint8* dst_v,
-                       int width);
-void MirrorUVRow_NEON(const uint8* src_uv, uint8* dst_u, uint8* dst_v,
-                      int width);
-void MirrorUVRow_MIPS_DSPR2(const uint8* src_uv, uint8* dst_u, uint8* dst_v,
-                            int width);
-void MirrorUVRow_C(const uint8* src_uv, uint8* dst_u, uint8* dst_v,
-                   int width);
-
-void ARGBMirrorRow_AVX2(const uint8* src, uint8* dst, int width);
-void ARGBMirrorRow_SSSE3(const uint8* src, uint8* dst, int width);
-void ARGBMirrorRow_NEON(const uint8* src, uint8* dst, int width);
-void ARGBMirrorRow_C(const uint8* src, uint8* dst, int width);
-
-void SplitUVRow_C(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int pix);
-void SplitUVRow_SSE2(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int pix);
-void SplitUVRow_AVX2(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int pix);
-void SplitUVRow_NEON(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int pix);
-void SplitUVRow_MIPS_DSPR2(const uint8* src_uv, uint8* dst_u, uint8* dst_v,
-                           int pix);
-void SplitUVRow_Unaligned_SSE2(const uint8* src_uv, uint8* dst_u, uint8* dst_v,
-                               int pix);
-void SplitUVRow_Unaligned_MIPS_DSPR2(const uint8* src_uv, uint8* dst_u,
-                                     uint8* dst_v, int pix);
-void SplitUVRow_Any_SSE2(const uint8* src_uv, uint8* dst_u, uint8* dst_v,
-                         int pix);
-void SplitUVRow_Any_AVX2(const uint8* src_uv, uint8* dst_u, uint8* dst_v,
-                         int pix);
-void SplitUVRow_Any_NEON(const uint8* src_uv, uint8* dst_u, uint8* dst_v,
-                         int pix);
-void SplitUVRow_Any_MIPS_DSPR2(const uint8* src_uv, uint8* dst_u, uint8* dst_v,
-                               int pix);
-
-void MergeUVRow_C(const uint8* src_u, const uint8* src_v, uint8* dst_uv,
-                  int width);
-void MergeUVRow_SSE2(const uint8* src_u, const uint8* src_v, uint8* dst_uv,
-                     int width);
-void MergeUVRow_AVX2(const uint8* src_u, const uint8* src_v, uint8* dst_uv,
-                     int width);
-void MergeUVRow_NEON(const uint8* src_u, const uint8* src_v, uint8* dst_uv,
-                     int width);
-void MergeUVRow_Unaligned_SSE2(const uint8* src_u, const uint8* src_v,
-                               uint8* dst_uv, int width);
-void MergeUVRow_Any_SSE2(const uint8* src_u, const uint8* src_v, uint8* dst_uv,
-                         int width);
-void MergeUVRow_Any_AVX2(const uint8* src_u, const uint8* src_v, uint8* dst_uv,
-                         int width);
-void MergeUVRow_Any_NEON(const uint8* src_u, const uint8* src_v, uint8* dst_uv,
-                         int width);
-
-void CopyRow_SSE2(const uint8* src, uint8* dst, int count);
-void CopyRow_ERMS(const uint8* src, uint8* dst, int count);
-void CopyRow_X86(const uint8* src, uint8* dst, int count);
-void CopyRow_NEON(const uint8* src, uint8* dst, int count);
-void CopyRow_MIPS(const uint8* src, uint8* dst, int count);
-void CopyRow_C(const uint8* src, uint8* dst, int count);
-
-void ARGBCopyAlphaRow_C(const uint8* src_argb, uint8* dst_argb, int width);
-void ARGBCopyAlphaRow_SSE2(const uint8* src_argb, uint8* dst_argb, int width);
-void ARGBCopyAlphaRow_AVX2(const uint8* src_argb, uint8* dst_argb, int width);
-
-void ARGBCopyYToAlphaRow_C(const uint8* src_y, uint8* dst_argb, int width);
-void ARGBCopyYToAlphaRow_SSE2(const uint8* src_y, uint8* dst_argb, int width);
-void ARGBCopyYToAlphaRow_AVX2(const uint8* src_y, uint8* dst_argb, int width);
-
-void SetRow_X86(uint8* dst, uint32 v32, int count);
-void ARGBSetRows_X86(uint8* dst, uint32 v32, int width,
-                     int dst_stride, int height);
-void SetRow_NEON(uint8* dst, uint32 v32, int count);
-void ARGBSetRows_NEON(uint8* dst, uint32 v32, int width,
-                      int dst_stride, int height);
-void SetRow_C(uint8* dst, uint32 v32, int count);
-void ARGBSetRows_C(uint8* dst, uint32 v32, int width, int dst_stride,
-                   int height);
-
-// ARGBShufflers for BGRAToARGB etc.
-void ARGBShuffleRow_C(const uint8* src_argb, uint8* dst_argb,
-                      const uint8* shuffler, int pix);
-void ARGBShuffleRow_SSE2(const uint8* src_argb, uint8* dst_argb,
-                         const uint8* shuffler, int pix);
-void ARGBShuffleRow_SSSE3(const uint8* src_argb, uint8* dst_argb,
-                          const uint8* shuffler, int pix);
-void ARGBShuffleRow_AVX2(const uint8* src_argb, uint8* dst_argb,
-                         const uint8* shuffler, int pix);
-void ARGBShuffleRow_NEON(const uint8* src_argb, uint8* dst_argb,
-                         const uint8* shuffler, int pix);
-void ARGBShuffleRow_Unaligned_SSSE3(const uint8* src_argb, uint8* dst_argb,
-                                    const uint8* shuffler, int pix);
-void ARGBShuffleRow_Any_SSE2(const uint8* src_argb, uint8* dst_argb,
-                             const uint8* shuffler, int pix);
-void ARGBShuffleRow_Any_SSSE3(const uint8* src_argb, uint8* dst_argb,
-                              const uint8* shuffler, int pix);
-void ARGBShuffleRow_Any_AVX2(const uint8* src_argb, uint8* dst_argb,
-                             const uint8* shuffler, int pix);
-void ARGBShuffleRow_Any_NEON(const uint8* src_argb, uint8* dst_argb,
-                             const uint8* shuffler, int pix);
-
-void RGB24ToARGBRow_SSSE3(const uint8* src_rgb24, uint8* dst_argb, int pix);
-void RAWToARGBRow_SSSE3(const uint8* src_raw, uint8* dst_argb, int pix);
-void RGB565ToARGBRow_SSE2(const uint8* src_rgb565, uint8* dst_argb, int pix);
-void ARGB1555ToARGBRow_SSE2(const uint8* src_argb1555, uint8* dst_argb,
-                            int pix);
-void ARGB4444ToARGBRow_SSE2(const uint8* src_argb4444, uint8* dst_argb,
-                            int pix);
-
-void RGB24ToARGBRow_NEON(const uint8* src_rgb24, uint8* dst_argb, int pix);
-void RAWToARGBRow_NEON(const uint8* src_raw, uint8* dst_argb, int pix);
-void RGB565ToARGBRow_NEON(const uint8* src_rgb565, uint8* dst_argb, int pix);
-void ARGB1555ToARGBRow_NEON(const uint8* src_argb1555, uint8* dst_argb,
-                            int pix);
-void ARGB4444ToARGBRow_NEON(const uint8* src_argb4444, uint8* dst_argb,
-                            int pix);
-void RGB24ToARGBRow_C(const uint8* src_rgb24, uint8* dst_argb, int pix);
-void RAWToARGBRow_C(const uint8* src_raw, uint8* dst_argb, int pix);
-void RGB565ToARGBRow_C(const uint8* src_rgb, uint8* dst_argb, int pix);
-void ARGB1555ToARGBRow_C(const uint8* src_argb, uint8* dst_argb, int pix);
-void ARGB4444ToARGBRow_C(const uint8* src_argb, uint8* dst_argb, int pix);
-void RGB24ToARGBRow_Any_SSSE3(const uint8* src_rgb24, uint8* dst_argb, int pix);
-void RAWToARGBRow_Any_SSSE3(const uint8* src_raw, uint8* dst_argb, int pix);
-void RGB565ToARGBRow_Any_SSE2(const uint8* src_rgb565, uint8* dst_argb,
-                              int pix);
-void ARGB1555ToARGBRow_Any_SSE2(const uint8* src_argb1555, uint8* dst_argb,
-                                int pix);
-void ARGB4444ToARGBRow_Any_SSE2(const uint8* src_argb4444, uint8* dst_argb,
-                                int pix);
-void RGB24ToARGBRow_Any_NEON(const uint8* src_rgb24, uint8* dst_argb, int pix);
-void RAWToARGBRow_Any_NEON(const uint8* src_raw, uint8* dst_argb, int pix);
-void RGB565ToARGBRow_Any_NEON(const uint8* src_rgb565, uint8* dst_argb,
-                              int pix);
-void ARGB1555ToARGBRow_Any_NEON(const uint8* src_argb1555, uint8* dst_argb,
-                                int pix);
-void ARGB4444ToARGBRow_Any_NEON(const uint8* src_argb4444, uint8* dst_argb,
-                                int pix);
-
-void ARGBToRGB24Row_SSSE3(const uint8* src_argb, uint8* dst_rgb, int pix);
-void ARGBToRAWRow_SSSE3(const uint8* src_argb, uint8* dst_rgb, int pix);
-void ARGBToRGB565Row_SSE2(const uint8* src_argb, uint8* dst_rgb, int pix);
-void ARGBToARGB1555Row_SSE2(const uint8* src_argb, uint8* dst_rgb, int pix);
-void ARGBToARGB4444Row_SSE2(const uint8* src_argb, uint8* dst_rgb, int pix);
-
-void ARGBToRGB24Row_NEON(const uint8* src_argb, uint8* dst_rgb, int pix);
-void ARGBToRAWRow_NEON(const uint8* src_argb, uint8* dst_rgb, int pix);
-void ARGBToRGB565Row_NEON(const uint8* src_argb, uint8* dst_rgb, int pix);
-void ARGBToARGB1555Row_NEON(const uint8* src_argb, uint8* dst_rgb, int pix);
-void ARGBToARGB4444Row_NEON(const uint8* src_argb, uint8* dst_rgb, int pix);
-
-void ARGBToRGBARow_C(const uint8* src_argb, uint8* dst_rgb, int pix);
-void ARGBToRGB24Row_C(const uint8* src_argb, uint8* dst_rgb, int pix);
-void ARGBToRAWRow_C(const uint8* src_argb, uint8* dst_rgb, int pix);
-void ARGBToRGB565Row_C(const uint8* src_argb, uint8* dst_rgb, int pix);
-void ARGBToARGB1555Row_C(const uint8* src_argb, uint8* dst_rgb, int pix);
-void ARGBToARGB4444Row_C(const uint8* src_argb, uint8* dst_rgb, int pix);
-
-void I400ToARGBRow_SSE2(const uint8* src_y, uint8* dst_argb, int pix);
-void I400ToARGBRow_Unaligned_SSE2(const uint8* src_y, uint8* dst_argb, int pix);
-void I400ToARGBRow_NEON(const uint8* src_y, uint8* dst_argb, int pix);
-void I400ToARGBRow_C(const uint8* src_y, uint8* dst_argb, int pix);
-void I400ToARGBRow_Any_SSE2(const uint8* src_y, uint8* dst_argb, int pix);
-void I400ToARGBRow_Any_NEON(const uint8* src_y, uint8* dst_argb, int pix);
-
-void I444ToARGBRow_C(const uint8* src_y,
-                     const uint8* src_u,
-                     const uint8* src_v,
-                     uint8* dst_argb,
-                     int width);
-void I422ToARGBRow_C(const uint8* src_y,
-                     const uint8* src_u,
-                     const uint8* src_v,
-                     uint8* dst_argb,
-                     int width);
-void I411ToARGBRow_C(const uint8* src_y,
-                     const uint8* src_u,
-                     const uint8* src_v,
-                     uint8* dst_argb,
-                     int width);
-void NV12ToARGBRow_C(const uint8* src_y,
-                     const uint8* src_uv,
-                     uint8* dst_argb,
-                     int width);
-void NV21ToRGB565Row_C(const uint8* src_y,
-                       const uint8* src_vu,
-                       uint8* dst_argb,
-                       int width);
-void NV12ToRGB565Row_C(const uint8* src_y,
-                       const uint8* src_uv,
-                       uint8* dst_argb,
-                       int width);
-void NV21ToARGBRow_C(const uint8* src_y,
-                     const uint8* src_vu,
-                     uint8* dst_argb,
-                     int width);
-void YUY2ToARGBRow_C(const uint8* src_yuy2,
-                     uint8* dst_argb,
-                     int width);
-void UYVYToARGBRow_C(const uint8* src_uyvy,
-                     uint8* dst_argb,
-                     int width);
-void I422ToBGRARow_C(const uint8* src_y,
-                     const uint8* src_u,
-                     const uint8* src_v,
-                     uint8* dst_bgra,
-                     int width);
-void I422ToABGRRow_C(const uint8* src_y,
-                     const uint8* src_u,
-                     const uint8* src_v,
-                     uint8* dst_abgr,
-                     int width);
-void I422ToRGBARow_C(const uint8* src_y,
-                     const uint8* src_u,
-                     const uint8* src_v,
-                     uint8* dst_rgba,
-                     int width);
-void I422ToRGB24Row_C(const uint8* src_y,
-                      const uint8* src_u,
-                      const uint8* src_v,
-                      uint8* dst_rgb24,
-                      int width);
-void I422ToRAWRow_C(const uint8* src_y,
-                    const uint8* src_u,
-                    const uint8* src_v,
-                    uint8* dst_raw,
-                    int width);
-void I422ToARGB4444Row_C(const uint8* src_y,
-                         const uint8* src_u,
-                         const uint8* src_v,
-                         uint8* dst_argb4444,
-                         int width);
-void I422ToARGB1555Row_C(const uint8* src_y,
-                         const uint8* src_u,
-                         const uint8* src_v,
-                         uint8* dst_argb4444,
-                         int width);
-void I422ToRGB565Row_C(const uint8* src_y,
-                       const uint8* src_u,
-                       const uint8* src_v,
-                       uint8* dst_rgb565,
-                       int width);
-void YToARGBRow_C(const uint8* src_y,
-                  uint8* dst_argb,
-                  int width);
-void I422ToARGBRow_AVX2(const uint8* src_y,
-                        const uint8* src_u,
-                        const uint8* src_v,
-                        uint8* dst_argb,
-                        int width);
-void I444ToARGBRow_SSSE3(const uint8* src_y,
-                         const uint8* src_u,
-                         const uint8* src_v,
-                         uint8* dst_argb,
-                         int width);
-void I422ToARGBRow_SSSE3(const uint8* src_y,
-                         const uint8* src_u,
-                         const uint8* src_v,
-                         uint8* dst_argb,
-                         int width);
-void I411ToARGBRow_SSSE3(const uint8* src_y,
-                         const uint8* src_u,
-                         const uint8* src_v,
-                         uint8* dst_argb,
-                         int width);
-void NV12ToARGBRow_SSSE3(const uint8* src_y,
-                         const uint8* src_uv,
-                         uint8* dst_argb,
-                         int width);
-void NV21ToARGBRow_SSSE3(const uint8* src_y,
-                         const uint8* src_vu,
-                         uint8* dst_argb,
-                         int width);
-void NV12ToRGB565Row_SSSE3(const uint8* src_y,
-                           const uint8* src_uv,
-                           uint8* dst_argb,
-                           int width);
-void NV21ToRGB565Row_SSSE3(const uint8* src_y,
-                           const uint8* src_vu,
-                           uint8* dst_argb,
-                           int width);
-void YUY2ToARGBRow_SSSE3(const uint8* src_yuy2,
-                         uint8* dst_argb,
-                         int width);
-void UYVYToARGBRow_SSSE3(const uint8* src_uyvy,
-                         uint8* dst_argb,
-                         int width);
-void I422ToBGRARow_SSSE3(const uint8* src_y,
-                         const uint8* src_u,
-                         const uint8* src_v,
-                         uint8* dst_bgra,
-                         int width);
-void I422ToABGRRow_SSSE3(const uint8* src_y,
-                         const uint8* src_u,
-                         const uint8* src_v,
-                         uint8* dst_abgr,
-                         int width);
-void I422ToRGBARow_SSSE3(const uint8* src_y,
-                         const uint8* src_u,
-                         const uint8* src_v,
-                         uint8* dst_rgba,
-                         int width);
-void I422ToARGB4444Row_SSSE3(const uint8* src_y,
-                             const uint8* src_u,
-                             const uint8* src_v,
-                             uint8* dst_argb,
-                             int width);
-void I422ToARGB1555Row_SSSE3(const uint8* src_y,
-                             const uint8* src_u,
-                             const uint8* src_v,
-                             uint8* dst_argb,
-                             int width);
-void I422ToRGB565Row_SSSE3(const uint8* src_y,
-                           const uint8* src_u,
-                           const uint8* src_v,
-                           uint8* dst_argb,
-                           int width);
-// RGB24/RAW are unaligned.
-void I422ToRGB24Row_SSSE3(const uint8* src_y,
-                          const uint8* src_u,
-                          const uint8* src_v,
-                          uint8* dst_rgb24,
-                          int width);
-void I422ToRAWRow_SSSE3(const uint8* src_y,
-                        const uint8* src_u,
-                        const uint8* src_v,
-                        uint8* dst_raw,
-                        int width);
-
-void I444ToARGBRow_Unaligned_SSSE3(const uint8* src_y,
-                                   const uint8* src_u,
-                                   const uint8* src_v,
-                                   uint8* dst_argb,
-                                   int width);
-void I422ToARGBRow_Unaligned_SSSE3(const uint8* src_y,
-                                   const uint8* src_u,
-                                   const uint8* src_v,
-                                   uint8* dst_argb,
-                                   int width);
-void I411ToARGBRow_Unaligned_SSSE3(const uint8* src_y,
-                                   const uint8* src_u,
-                                   const uint8* src_v,
-                                   uint8* dst_argb,
-                                   int width);
-void NV12ToARGBRow_Unaligned_SSSE3(const uint8* src_y,
-                                   const uint8* src_uv,
-                                   uint8* dst_argb,
-                                   int width);
-void NV21ToARGBRow_Unaligned_SSSE3(const uint8* src_y,
-                                   const uint8* src_vu,
-                                   uint8* dst_argb,
-                                   int width);
-void YUY2ToARGBRow_Unaligned_SSSE3(const uint8* src_yuy2,
-                                   uint8* dst_argb,
-                                   int width);
-void UYVYToARGBRow_Unaligned_SSSE3(const uint8* src_uyvy,
-                                   uint8* dst_argb,
-                                   int width);
-void I422ToBGRARow_Unaligned_SSSE3(const uint8* src_y,
-                                   const uint8* src_u,
-                                   const uint8* src_v,
-                                   uint8* dst_bgra,
-                                   int width);
-void I422ToABGRRow_Unaligned_SSSE3(const uint8* src_y,
-                                   const uint8* src_u,
-                                   const uint8* src_v,
-                                   uint8* dst_abgr,
-                                   int width);
-void I422ToRGBARow_Unaligned_SSSE3(const uint8* src_y,
-                                   const uint8* src_u,
-                                   const uint8* src_v,
-                                   uint8* dst_rgba,
-                                   int width);
-void I422ToARGBRow_Any_AVX2(const uint8* src_y,
-                            const uint8* src_u,
-                            const uint8* src_v,
-                            uint8* dst_argb,
-                            int width);
-void I444ToARGBRow_Any_SSSE3(const uint8* src_y,
-                             const uint8* src_u,
-                             const uint8* src_v,
-                             uint8* dst_argb,
-                             int width);
-void I422ToARGBRow_Any_SSSE3(const uint8* src_y,
-                             const uint8* src_u,
-                             const uint8* src_v,
-                             uint8* dst_argb,
-                             int width);
-void I411ToARGBRow_Any_SSSE3(const uint8* src_y,
-                             const uint8* src_u,
-                             const uint8* src_v,
-                             uint8* dst_argb,
-                             int width);
-void NV12ToARGBRow_Any_SSSE3(const uint8* src_y,
-                             const uint8* src_uv,
-                             uint8* dst_argb,
-                             int width);
-void NV21ToARGBRow_Any_SSSE3(const uint8* src_y,
-                             const uint8* src_vu,
-                             uint8* dst_argb,
-                             int width);
-void NV12ToRGB565Row_Any_SSSE3(const uint8* src_y,
-                               const uint8* src_uv,
-                               uint8* dst_argb,
-                               int width);
-void NV21ToRGB565Row_Any_SSSE3(const uint8* src_y,
-                               const uint8* src_vu,
-                               uint8* dst_argb,
-                               int width);
-void YUY2ToARGBRow_Any_SSSE3(const uint8* src_yuy2,
-                             uint8* dst_argb,
-                             int width);
-void UYVYToARGBRow_Any_SSSE3(const uint8* src_uyvy,
-                             uint8* dst_argb,
-                             int width);
-void I422ToBGRARow_Any_SSSE3(const uint8* src_y,
-                             const uint8* src_u,
-                             const uint8* src_v,
-                             uint8* dst_bgra,
-                             int width);
-void I422ToABGRRow_Any_SSSE3(const uint8* src_y,
-                             const uint8* src_u,
-                             const uint8* src_v,
-                             uint8* dst_abgr,
-                             int width);
-void I422ToRGBARow_Any_SSSE3(const uint8* src_y,
-                             const uint8* src_u,
-                             const uint8* src_v,
-                             uint8* dst_rgba,
-                             int width);
-void I422ToARGB4444Row_Any_SSSE3(const uint8* src_y,
-                                 const uint8* src_u,
-                                 const uint8* src_v,
-                                 uint8* dst_rgba,
-                                 int width);
-void I422ToARGB1555Row_Any_SSSE3(const uint8* src_y,
-                                 const uint8* src_u,
-                                 const uint8* src_v,
-                                 uint8* dst_rgba,
-                                 int width);
-void I422ToRGB565Row_Any_SSSE3(const uint8* src_y,
-                               const uint8* src_u,
-                               const uint8* src_v,
-                               uint8* dst_rgba,
-                               int width);
-// RGB24/RAW are unaligned.
-void I422ToRGB24Row_Any_SSSE3(const uint8* src_y,
-                              const uint8* src_u,
-                              const uint8* src_v,
-                              uint8* dst_argb,
-                              int width);
-void I422ToRAWRow_Any_SSSE3(const uint8* src_y,
-                            const uint8* src_u,
-                            const uint8* src_v,
-                            uint8* dst_argb,
-                            int width);
-void YToARGBRow_SSE2(const uint8* src_y,
-                     uint8* dst_argb,
-                     int width);
-void YToARGBRow_NEON(const uint8* src_y,
-                     uint8* dst_argb,
-                     int width);
-void YToARGBRow_Any_SSE2(const uint8* src_y,
-                         uint8* dst_argb,
-                         int width);
-void YToARGBRow_Any_NEON(const uint8* src_y,
-                         uint8* dst_argb,
-                         int width);
-
-// ARGB preattenuated alpha blend.
-void ARGBBlendRow_SSSE3(const uint8* src_argb, const uint8* src_argb1,
-                        uint8* dst_argb, int width);
-void ARGBBlendRow_SSE2(const uint8* src_argb, const uint8* src_argb1,
-                       uint8* dst_argb, int width);
-void ARGBBlendRow_NEON(const uint8* src_argb, const uint8* src_argb1,
-                       uint8* dst_argb, int width);
-void ARGBBlendRow_C(const uint8* src_argb, const uint8* src_argb1,
-                    uint8* dst_argb, int width);
-
-// ARGB multiply images. Same API as Blend, but these require
-// pointer and width alignment for SSE2.
-void ARGBMultiplyRow_C(const uint8* src_argb, const uint8* src_argb1,
-                       uint8* dst_argb, int width);
-void ARGBMultiplyRow_SSE2(const uint8* src_argb, const uint8* src_argb1,
-                          uint8* dst_argb, int width);
-void ARGBMultiplyRow_Any_SSE2(const uint8* src_argb, const uint8* src_argb1,
-                              uint8* dst_argb, int width);
-void ARGBMultiplyRow_AVX2(const uint8* src_argb, const uint8* src_argb1,
-                          uint8* dst_argb, int width);
-void ARGBMultiplyRow_Any_AVX2(const uint8* src_argb, const uint8* src_argb1,
-                              uint8* dst_argb, int width);
-void ARGBMultiplyRow_NEON(const uint8* src_argb, const uint8* src_argb1,
-                          uint8* dst_argb, int width);
-void ARGBMultiplyRow_Any_NEON(const uint8* src_argb, const uint8* src_argb1,
-                              uint8* dst_argb, int width);
-
-// ARGB add images.
-void ARGBAddRow_C(const uint8* src_argb, const uint8* src_argb1,
-                  uint8* dst_argb, int width);
-void ARGBAddRow_SSE2(const uint8* src_argb, const uint8* src_argb1,
-                     uint8* dst_argb, int width);
-void ARGBAddRow_Any_SSE2(const uint8* src_argb, const uint8* src_argb1,
-                         uint8* dst_argb, int width);
-void ARGBAddRow_AVX2(const uint8* src_argb, const uint8* src_argb1,
-                     uint8* dst_argb, int width);
-void ARGBAddRow_Any_AVX2(const uint8* src_argb, const uint8* src_argb1,
-                         uint8* dst_argb, int width);
-void ARGBAddRow_NEON(const uint8* src_argb, const uint8* src_argb1,
-                     uint8* dst_argb, int width);
-void ARGBAddRow_Any_NEON(const uint8* src_argb, const uint8* src_argb1,
-                         uint8* dst_argb, int width);
-
-// ARGB subtract images. Same API as Blend, but these require
-// pointer and width alignment for SSE2.
-void ARGBSubtractRow_C(const uint8* src_argb, const uint8* src_argb1,
-                       uint8* dst_argb, int width);
-void ARGBSubtractRow_SSE2(const uint8* src_argb, const uint8* src_argb1,
-                          uint8* dst_argb, int width);
-void ARGBSubtractRow_Any_SSE2(const uint8* src_argb, const uint8* src_argb1,
-                              uint8* dst_argb, int width);
-void ARGBSubtractRow_AVX2(const uint8* src_argb, const uint8* src_argb1,
-                          uint8* dst_argb, int width);
-void ARGBSubtractRow_Any_AVX2(const uint8* src_argb, const uint8* src_argb1,
-                              uint8* dst_argb, int width);
-void ARGBSubtractRow_NEON(const uint8* src_argb, const uint8* src_argb1,
-                          uint8* dst_argb, int width);
-void ARGBSubtractRow_Any_NEON(const uint8* src_argb, const uint8* src_argb1,
-                              uint8* dst_argb, int width);
-
-void ARGBToRGB24Row_Any_SSSE3(const uint8* src_argb, uint8* dst_rgb, int pix);
-void ARGBToRAWRow_Any_SSSE3(const uint8* src_argb, uint8* dst_rgb, int pix);
-void ARGBToRGB565Row_Any_SSE2(const uint8* src_argb, uint8* dst_rgb, int pix);
-void ARGBToARGB1555Row_Any_SSE2(const uint8* src_argb, uint8* dst_rgb, int pix);
-void ARGBToARGB4444Row_Any_SSE2(const uint8* src_argb, uint8* dst_rgb, int pix);
-
-void ARGBToRGB24Row_Any_NEON(const uint8* src_argb, uint8* dst_rgb, int pix);
-void ARGBToRAWRow_Any_NEON(const uint8* src_argb, uint8* dst_rgb, int pix);
-void ARGBToRGB565Row_Any_NEON(const uint8* src_argb, uint8* dst_rgb, int pix);
-void ARGBToARGB1555Row_Any_NEON(const uint8* src_argb, uint8* dst_rgb, int pix);
-void ARGBToARGB4444Row_Any_NEON(const uint8* src_argb, uint8* dst_rgb, int pix);
-
-void I444ToARGBRow_Any_NEON(const uint8* src_y,
-                            const uint8* src_u,
-                            const uint8* src_v,
-                            uint8* dst_argb,
-                            int width);
-void I422ToARGBRow_Any_NEON(const uint8* src_y,
-                            const uint8* src_u,
-                            const uint8* src_v,
-                            uint8* dst_argb,
-                            int width);
-void I411ToARGBRow_Any_NEON(const uint8* src_y,
-                            const uint8* src_u,
-                            const uint8* src_v,
-                            uint8* dst_argb,
-                            int width);
-void I422ToBGRARow_Any_NEON(const uint8* src_y,
-                            const uint8* src_u,
-                            const uint8* src_v,
-                            uint8* dst_argb,
-                            int width);
-void I422ToABGRRow_Any_NEON(const uint8* src_y,
-                            const uint8* src_u,
-                            const uint8* src_v,
-                            uint8* dst_argb,
-                            int width);
-void I422ToRGBARow_Any_NEON(const uint8* src_y,
-                            const uint8* src_u,
-                            const uint8* src_v,
-                            uint8* dst_argb,
-                            int width);
-void I422ToRGB24Row_Any_NEON(const uint8* src_y,
-                             const uint8* src_u,
-                             const uint8* src_v,
-                             uint8* dst_argb,
-                             int width);
-void I422ToRAWRow_Any_NEON(const uint8* src_y,
-                           const uint8* src_u,
-                           const uint8* src_v,
-                           uint8* dst_argb,
-                           int width);
-void I422ToARGB4444Row_Any_NEON(const uint8* src_y,
-                                const uint8* src_u,
-                                const uint8* src_v,
-                                uint8* dst_argb,
-                                int width);
-void I422ToARGB1555Row_Any_NEON(const uint8* src_y,
-                                const uint8* src_u,
-                                const uint8* src_v,
-                                uint8* dst_argb,
-                                int width);
-void I422ToRGB565Row_Any_NEON(const uint8* src_y,
-                              const uint8* src_u,
-                              const uint8* src_v,
-                              uint8* dst_argb,
-                              int width);
-void NV12ToARGBRow_Any_NEON(const uint8* src_y,
-                            const uint8* src_uv,
-                            uint8* dst_argb,
-                            int width);
-void NV21ToARGBRow_Any_NEON(const uint8* src_y,
-                            const uint8* src_uv,
-                            uint8* dst_argb,
-                            int width);
-void NV12ToRGB565Row_Any_NEON(const uint8* src_y,
-                              const uint8* src_uv,
-                              uint8* dst_argb,
-                              int width);
-void NV21ToRGB565Row_Any_NEON(const uint8* src_y,
-                              const uint8* src_uv,
-                              uint8* dst_argb,
-                              int width);
-void YUY2ToARGBRow_Any_NEON(const uint8* src_yuy2,
-                            uint8* dst_argb,
-                            int width);
-void UYVYToARGBRow_Any_NEON(const uint8* src_uyvy,
-                            uint8* dst_argb,
-                            int width);
-void I422ToARGBRow_MIPS_DSPR2(const uint8* src_y,
-                              const uint8* src_u,
-                              const uint8* src_v,
-                              uint8* dst_argb,
-                              int width);
-void I422ToBGRARow_MIPS_DSPR2(const uint8* src_y,
-                              const uint8* src_u,
-                              const uint8* src_v,
-                              uint8* dst_argb,
-                              int width);
-void I422ToABGRRow_MIPS_DSPR2(const uint8* src_y,
-                              const uint8* src_u,
-                              const uint8* src_v,
-                              uint8* dst_argb,
-                              int width);
-void I422ToARGBRow_MIPS_DSPR2(const uint8* src_y,
-                              const uint8* src_u,
-                              const uint8* src_v,
-                              uint8* dst_argb,
-                              int width);
-void I422ToBGRARow_MIPS_DSPR2(const uint8* src_y,
-                              const uint8* src_u,
-                              const uint8* src_v,
-                              uint8* dst_argb,
-                              int width);
-void I422ToABGRRow_MIPS_DSPR2(const uint8* src_y,
-                              const uint8* src_u,
-                              const uint8* src_v,
-                              uint8* dst_argb,
-                              int width);
-
-void YUY2ToYRow_AVX2(const uint8* src_yuy2, uint8* dst_y, int pix);
-void YUY2ToUVRow_AVX2(const uint8* src_yuy2, int stride_yuy2,
-                      uint8* dst_u, uint8* dst_v, int pix);
-void YUY2ToUV422Row_AVX2(const uint8* src_yuy2,
-                         uint8* dst_u, uint8* dst_v, int pix);
-void YUY2ToYRow_SSE2(const uint8* src_yuy2, uint8* dst_y, int pix);
-void YUY2ToUVRow_SSE2(const uint8* src_yuy2, int stride_yuy2,
-                      uint8* dst_u, uint8* dst_v, int pix);
-void YUY2ToUV422Row_SSE2(const uint8* src_yuy2,
-                         uint8* dst_u, uint8* dst_v, int pix);
-void YUY2ToYRow_Unaligned_SSE2(const uint8* src_yuy2,
-                               uint8* dst_y, int pix);
-void YUY2ToUVRow_Unaligned_SSE2(const uint8* src_yuy2, int stride_yuy2,
-                                uint8* dst_u, uint8* dst_v, int pix);
-void YUY2ToUV422Row_Unaligned_SSE2(const uint8* src_yuy2,
-                                   uint8* dst_u, uint8* dst_v, int pix);
-void YUY2ToYRow_NEON(const uint8* src_yuy2, uint8* dst_y, int pix);
-void YUY2ToUVRow_NEON(const uint8* src_yuy2, int stride_yuy2,
-                      uint8* dst_u, uint8* dst_v, int pix);
-void YUY2ToUV422Row_NEON(const uint8* src_yuy2,
-                         uint8* dst_u, uint8* dst_v, int pix);
-void YUY2ToYRow_C(const uint8* src_yuy2, uint8* dst_y, int pix);
-void YUY2ToUVRow_C(const uint8* src_yuy2, int stride_yuy2,
-                   uint8* dst_u, uint8* dst_v, int pix);
-void YUY2ToUV422Row_C(const uint8* src_yuy2,
-                      uint8* dst_u, uint8* dst_v, int pix);
-void YUY2ToYRow_Any_AVX2(const uint8* src_yuy2, uint8* dst_y, int pix);
-void YUY2ToUVRow_Any_AVX2(const uint8* src_yuy2, int stride_yuy2,
-                          uint8* dst_u, uint8* dst_v, int pix);
-void YUY2ToUV422Row_Any_AVX2(const uint8* src_yuy2,
-                             uint8* dst_u, uint8* dst_v, int pix);
-void YUY2ToYRow_Any_SSE2(const uint8* src_yuy2, uint8* dst_y, int pix);
-void YUY2ToUVRow_Any_SSE2(const uint8* src_yuy2, int stride_yuy2,
-                          uint8* dst_u, uint8* dst_v, int pix);
-void YUY2ToUV422Row_Any_SSE2(const uint8* src_yuy2,
-                             uint8* dst_u, uint8* dst_v, int pix);
-void YUY2ToYRow_Any_NEON(const uint8* src_yuy2, uint8* dst_y, int pix);
-void YUY2ToUVRow_Any_NEON(const uint8* src_yuy2, int stride_yuy2,
-                          uint8* dst_u, uint8* dst_v, int pix);
-void YUY2ToUV422Row_Any_NEON(const uint8* src_yuy2,
-                             uint8* dst_u, uint8* dst_v, int pix);
-void UYVYToYRow_AVX2(const uint8* src_uyvy, uint8* dst_y, int pix);
-void UYVYToUVRow_AVX2(const uint8* src_uyvy, int stride_uyvy,
-                      uint8* dst_u, uint8* dst_v, int pix);
-void UYVYToUV422Row_AVX2(const uint8* src_uyvy,
-                         uint8* dst_u, uint8* dst_v, int pix);
-void UYVYToYRow_SSE2(const uint8* src_uyvy, uint8* dst_y, int pix);
-void UYVYToUVRow_SSE2(const uint8* src_uyvy, int stride_uyvy,
-                      uint8* dst_u, uint8* dst_v, int pix);
-void UYVYToUV422Row_SSE2(const uint8* src_uyvy,
-                         uint8* dst_u, uint8* dst_v, int pix);
-void UYVYToYRow_Unaligned_SSE2(const uint8* src_uyvy,
-                               uint8* dst_y, int pix);
-void UYVYToUVRow_Unaligned_SSE2(const uint8* src_uyvy, int stride_uyvy,
-                                uint8* dst_u, uint8* dst_v, int pix);
-void UYVYToUV422Row_Unaligned_SSE2(const uint8* src_uyvy,
-                                   uint8* dst_u, uint8* dst_v, int pix);
-void UYVYToYRow_AVX2(const uint8* src_uyvy, uint8* dst_y, int pix);
-void UYVYToUVRow_AVX2(const uint8* src_uyvy, int stride_uyvy,
-                      uint8* dst_u, uint8* dst_v, int pix);
-void UYVYToUV422Row_AVX2(const uint8* src_uyvy,
-                         uint8* dst_u, uint8* dst_v, int pix);
-void UYVYToYRow_NEON(const uint8* src_uyvy, uint8* dst_y, int pix);
-void UYVYToUVRow_NEON(const uint8* src_uyvy, int stride_uyvy,
-                      uint8* dst_u, uint8* dst_v, int pix);
-void UYVYToUV422Row_NEON(const uint8* src_uyvy,
-                         uint8* dst_u, uint8* dst_v, int pix);
-
-void UYVYToYRow_C(const uint8* src_uyvy, uint8* dst_y, int pix);
-void UYVYToUVRow_C(const uint8* src_uyvy, int stride_uyvy,
-                   uint8* dst_u, uint8* dst_v, int pix);
-void UYVYToUV422Row_C(const uint8* src_uyvy,
-                      uint8* dst_u, uint8* dst_v, int pix);
-void UYVYToYRow_Any_AVX2(const uint8* src_uyvy, uint8* dst_y, int pix);
-void UYVYToUVRow_Any_AVX2(const uint8* src_uyvy, int stride_uyvy,
-                          uint8* dst_u, uint8* dst_v, int pix);
-void UYVYToUV422Row_Any_AVX2(const uint8* src_uyvy,
-                             uint8* dst_u, uint8* dst_v, int pix);
-void UYVYToYRow_Any_SSE2(const uint8* src_uyvy, uint8* dst_y, int pix);
-void UYVYToUVRow_Any_SSE2(const uint8* src_uyvy, int stride_uyvy,
-                          uint8* dst_u, uint8* dst_v, int pix);
-void UYVYToUV422Row_Any_SSE2(const uint8* src_uyvy,
-                             uint8* dst_u, uint8* dst_v, int pix);
-void UYVYToYRow_Any_NEON(const uint8* src_uyvy, uint8* dst_y, int pix);
-void UYVYToUVRow_Any_NEON(const uint8* src_uyvy, int stride_uyvy,
-                          uint8* dst_u, uint8* dst_v, int pix);
-void UYVYToUV422Row_Any_NEON(const uint8* src_uyvy,
-                             uint8* dst_u, uint8* dst_v, int pix);
-
-void HalfRow_C(const uint8* src_uv, int src_uv_stride,
-               uint8* dst_uv, int pix);
-void HalfRow_SSE2(const uint8* src_uv, int src_uv_stride,
-                  uint8* dst_uv, int pix);
-void HalfRow_AVX2(const uint8* src_uv, int src_uv_stride,
-                  uint8* dst_uv, int pix);
-void HalfRow_NEON(const uint8* src_uv, int src_uv_stride,
-                  uint8* dst_uv, int pix);
-
-void ARGBToBayerRow_C(const uint8* src_argb, uint8* dst_bayer,
-                      uint32 selector, int pix);
-void ARGBToBayerRow_SSSE3(const uint8* src_argb, uint8* dst_bayer,
-                          uint32 selector, int pix);
-void ARGBToBayerRow_NEON(const uint8* src_argb, uint8* dst_bayer,
-                         uint32 selector, int pix);
-void ARGBToBayerRow_Any_SSSE3(const uint8* src_argb, uint8* dst_bayer,
-                              uint32 selector, int pix);
-void ARGBToBayerRow_Any_NEON(const uint8* src_argb, uint8* dst_bayer,
-                             uint32 selector, int pix);
-void ARGBToBayerGGRow_C(const uint8* src_argb, uint8* dst_bayer,
-                        uint32 /* selector */, int pix);
-void ARGBToBayerGGRow_SSE2(const uint8* src_argb, uint8* dst_bayer,
-                           uint32 /* selector */, int pix);
-void ARGBToBayerGGRow_NEON(const uint8* src_argb, uint8* dst_bayer,
-                           uint32 /* selector */, int pix);
-void ARGBToBayerGGRow_Any_SSE2(const uint8* src_argb, uint8* dst_bayer,
-                               uint32 /* selector */, int pix);
-void ARGBToBayerGGRow_Any_NEON(const uint8* src_argb, uint8* dst_bayer,
-                               uint32 /* selector */, int pix);
-
-void I422ToYUY2Row_C(const uint8* src_y,
-                     const uint8* src_u,
-                     const uint8* src_v,
-                     uint8* dst_yuy2, int width);
-void I422ToUYVYRow_C(const uint8* src_y,
-                     const uint8* src_u,
-                     const uint8* src_v,
-                     uint8* dst_uyvy, int width);
-void I422ToYUY2Row_SSE2(const uint8* src_y,
-                        const uint8* src_u,
-                        const uint8* src_v,
-                        uint8* dst_yuy2, int width);
-void I422ToUYVYRow_SSE2(const uint8* src_y,
-                        const uint8* src_u,
-                        const uint8* src_v,
-                        uint8* dst_uyvy, int width);
-void I422ToYUY2Row_Any_SSE2(const uint8* src_y,
-                            const uint8* src_u,
-                            const uint8* src_v,
-                            uint8* dst_yuy2, int width);
-void I422ToUYVYRow_Any_SSE2(const uint8* src_y,
-                            const uint8* src_u,
-                            const uint8* src_v,
-                            uint8* dst_uyvy, int width);
-void I422ToYUY2Row_NEON(const uint8* src_y,
-                        const uint8* src_u,
-                        const uint8* src_v,
-                        uint8* dst_yuy2, int width);
-void I422ToUYVYRow_NEON(const uint8* src_y,
-                        const uint8* src_u,
-                        const uint8* src_v,
-                        uint8* dst_uyvy, int width);
-void I422ToYUY2Row_Any_NEON(const uint8* src_y,
-                            const uint8* src_u,
-                            const uint8* src_v,
-                            uint8* dst_yuy2, int width);
-void I422ToUYVYRow_Any_NEON(const uint8* src_y,
-                            const uint8* src_u,
-                            const uint8* src_v,
-                            uint8* dst_uyvy, int width);
-
-// Effects related row functions.
-void ARGBAttenuateRow_C(const uint8* src_argb, uint8* dst_argb, int width);
-void ARGBAttenuateRow_SSE2(const uint8* src_argb, uint8* dst_argb, int width);
-void ARGBAttenuateRow_SSSE3(const uint8* src_argb, uint8* dst_argb, int width);
-void ARGBAttenuateRow_AVX2(const uint8* src_argb, uint8* dst_argb, int width);
-void ARGBAttenuateRow_NEON(const uint8* src_argb, uint8* dst_argb, int width);
-void ARGBAttenuateRow_Any_SSE2(const uint8* src_argb, uint8* dst_argb,
-                               int width);
-void ARGBAttenuateRow_Any_SSSE3(const uint8* src_argb, uint8* dst_argb,
-                                int width);
-void ARGBAttenuateRow_Any_AVX2(const uint8* src_argb, uint8* dst_argb,
-                               int width);
-void ARGBAttenuateRow_Any_NEON(const uint8* src_argb, uint8* dst_argb,
-                               int width);
-
-// Inverse table for unattenuate, shared by C and SSE2.
-extern const uint32 fixed_invtbl8[256];
-void ARGBUnattenuateRow_C(const uint8* src_argb, uint8* dst_argb, int width);
-void ARGBUnattenuateRow_SSE2(const uint8* src_argb, uint8* dst_argb, int width);
-void ARGBUnattenuateRow_AVX2(const uint8* src_argb, uint8* dst_argb, int width);
-void ARGBUnattenuateRow_Any_SSE2(const uint8* src_argb, uint8* dst_argb,
-                                 int width);
-void ARGBUnattenuateRow_Any_AVX2(const uint8* src_argb, uint8* dst_argb,
-                                 int width);
-
-void ARGBGrayRow_C(const uint8* src_argb, uint8* dst_argb, int width);
-void ARGBGrayRow_SSSE3(const uint8* src_argb, uint8* dst_argb, int width);
-void ARGBGrayRow_NEON(const uint8* src_argb, uint8* dst_argb, int width);
-
-void ARGBSepiaRow_C(uint8* dst_argb, int width);
-void ARGBSepiaRow_SSSE3(uint8* dst_argb, int width);
-void ARGBSepiaRow_NEON(uint8* dst_argb, int width);
-
-void ARGBColorMatrixRow_C(const uint8* src_argb, uint8* dst_argb,
-                          const int8* matrix_argb, int width);
-void ARGBColorMatrixRow_SSSE3(const uint8* src_argb, uint8* dst_argb,
-                              const int8* matrix_argb, int width);
-void ARGBColorMatrixRow_NEON(const uint8* src_argb, uint8* dst_argb,
-                             const int8* matrix_argb, int width);
-
-void ARGBColorTableRow_C(uint8* dst_argb, const uint8* table_argb, int width);
-void ARGBColorTableRow_X86(uint8* dst_argb, const uint8* table_argb, int width);
-
-void RGBColorTableRow_C(uint8* dst_argb, const uint8* table_argb, int width);
-void RGBColorTableRow_X86(uint8* dst_argb, const uint8* table_argb, int width);
-
-void ARGBQuantizeRow_C(uint8* dst_argb, int scale, int interval_size,
-                       int interval_offset, int width);
-void ARGBQuantizeRow_SSE2(uint8* dst_argb, int scale, int interval_size,
-                          int interval_offset, int width);
-void ARGBQuantizeRow_NEON(uint8* dst_argb, int scale, int interval_size,
-                          int interval_offset, int width);
-
-void ARGBShadeRow_C(const uint8* src_argb, uint8* dst_argb, int width,
-                    uint32 value);
-void ARGBShadeRow_SSE2(const uint8* src_argb, uint8* dst_argb, int width,
-                       uint32 value);
-void ARGBShadeRow_NEON(const uint8* src_argb, uint8* dst_argb, int width,
-                       uint32 value);
-
-// Used for blur.
-void CumulativeSumToAverageRow_SSE2(const int32* topleft, const int32* botleft,
-                                    int width, int area, uint8* dst, int count);
-void ComputeCumulativeSumRow_SSE2(const uint8* row, int32* cumsum,
-                                  const int32* previous_cumsum, int width);
-
-void CumulativeSumToAverageRow_C(const int32* topleft, const int32* botleft,
-                                 int width, int area, uint8* dst, int count);
-void ComputeCumulativeSumRow_C(const uint8* row, int32* cumsum,
-                               const int32* previous_cumsum, int width);
-
-LIBYUV_API
-void ARGBAffineRow_C(const uint8* src_argb, int src_argb_stride,
-                     uint8* dst_argb, const float* uv_dudv, int width);
-LIBYUV_API
-void ARGBAffineRow_SSE2(const uint8* src_argb, int src_argb_stride,
-                        uint8* dst_argb, const float* uv_dudv, int width);
-
-// Used for I420Scale, ARGBScale, and ARGBInterpolate.
-void InterpolateRow_C(uint8* dst_ptr, const uint8* src_ptr,
-                      ptrdiff_t src_stride_ptr,
-                      int width, int source_y_fraction);
-void InterpolateRow_SSE2(uint8* dst_ptr, const uint8* src_ptr,
-                         ptrdiff_t src_stride_ptr, int width,
-                         int source_y_fraction);
-void InterpolateRow_SSSE3(uint8* dst_ptr, const uint8* src_ptr,
-                          ptrdiff_t src_stride_ptr, int width,
-                          int source_y_fraction);
-void InterpolateRow_AVX2(uint8* dst_ptr, const uint8* src_ptr,
-                         ptrdiff_t src_stride_ptr, int width,
-                         int source_y_fraction);
-void InterpolateRow_NEON(uint8* dst_ptr, const uint8* src_ptr,
-                         ptrdiff_t src_stride_ptr, int width,
-                         int source_y_fraction);
-void InterpolateRows_MIPS_DSPR2(uint8* dst_ptr, const uint8* src_ptr,
-                                ptrdiff_t src_stride_ptr, int width,
-                                int source_y_fraction);
-void InterpolateRow_Unaligned_SSE2(uint8* dst_ptr, const uint8* src_ptr,
-                                   ptrdiff_t src_stride_ptr, int width,
-                                   int source_y_fraction);
-void InterpolateRow_Unaligned_SSSE3(uint8* dst_ptr, const uint8* src_ptr,
-                                    ptrdiff_t src_stride_ptr, int width,
-                                    int source_y_fraction);
-void InterpolateRow_Any_NEON(uint8* dst_ptr, const uint8* src_ptr,
-                             ptrdiff_t src_stride_ptr, int width,
-                             int source_y_fraction);
-void InterpolateRow_Any_SSE2(uint8* dst_ptr, const uint8* src_ptr,
-                             ptrdiff_t src_stride_ptr, int width,
-                             int source_y_fraction);
-void InterpolateRow_Any_SSSE3(uint8* dst_ptr, const uint8* src_ptr,
-                              ptrdiff_t src_stride_ptr, int width,
-                              int source_y_fraction);
-void InterpolateRow_Any_AVX2(uint8* dst_ptr, const uint8* src_ptr,
-                             ptrdiff_t src_stride_ptr, int width,
-                             int source_y_fraction);
-void InterpolateRows_Any_MIPS_DSPR2(uint8* dst_ptr, const uint8* src_ptr,
-                                    ptrdiff_t src_stride_ptr, int width,
-                                    int source_y_fraction);
-
-// Sobel images.
-void SobelXRow_C(const uint8* src_y0, const uint8* src_y1, const uint8* src_y2,
-                 uint8* dst_sobelx, int width);
-void SobelXRow_SSE2(const uint8* src_y0, const uint8* src_y1,
-                    const uint8* src_y2, uint8* dst_sobelx, int width);
-void SobelXRow_NEON(const uint8* src_y0, const uint8* src_y1,
-                    const uint8* src_y2, uint8* dst_sobelx, int width);
-void SobelYRow_C(const uint8* src_y0, const uint8* src_y1,
-                 uint8* dst_sobely, int width);
-void SobelYRow_SSE2(const uint8* src_y0, const uint8* src_y1,
-                    uint8* dst_sobely, int width);
-void SobelYRow_NEON(const uint8* src_y0, const uint8* src_y1,
-                    uint8* dst_sobely, int width);
-void SobelRow_C(const uint8* src_sobelx, const uint8* src_sobely,
-                uint8* dst_argb, int width);
-void SobelRow_SSE2(const uint8* src_sobelx, const uint8* src_sobely,
-                   uint8* dst_argb, int width);
-void SobelRow_NEON(const uint8* src_sobelx, const uint8* src_sobely,
-                   uint8* dst_argb, int width);
-void SobelToPlaneRow_C(const uint8* src_sobelx, const uint8* src_sobely,
-                       uint8* dst_y, int width);
-void SobelToPlaneRow_SSE2(const uint8* src_sobelx, const uint8* src_sobely,
-                          uint8* dst_y, int width);
-void SobelToPlaneRow_NEON(const uint8* src_sobelx, const uint8* src_sobely,
-                          uint8* dst_y, int width);
-void SobelXYRow_C(const uint8* src_sobelx, const uint8* src_sobely,
-                  uint8* dst_argb, int width);
-void SobelXYRow_SSE2(const uint8* src_sobelx, const uint8* src_sobely,
-                     uint8* dst_argb, int width);
-void SobelXYRow_NEON(const uint8* src_sobelx, const uint8* src_sobely,
-                     uint8* dst_argb, int width);
-
-void ARGBPolynomialRow_C(const uint8* src_argb,
-                         uint8* dst_argb, const float* poly,
-                         int width);
-void ARGBPolynomialRow_SSE2(const uint8* src_argb,
-                            uint8* dst_argb, const float* poly,
-                            int width);
-void ARGBPolynomialRow_AVX2(const uint8* src_argb,
-                            uint8* dst_argb, const float* poly,
-                            int width);
-
-void ARGBLumaColorTableRow_C(const uint8* src_argb, uint8* dst_argb, int width,
-                             const uint8* luma, uint32 lumacoeff);
-void ARGBLumaColorTableRow_SSSE3(const uint8* src_argb, uint8* dst_argb,
-                                 int width,
-                                 const uint8* luma, uint32 lumacoeff);
-
-#ifdef __cplusplus
-}  // extern "C"
-}  // namespace libyuv
-#endif
-
-#endif  // INCLUDE_LIBYUV_ROW_H_  NOLINT
diff --git a/drivers/theoraplayer/src/YUV/libyuv/include/libyuv/scale.h b/drivers/theoraplayer/src/YUV/libyuv/include/libyuv/scale.h
deleted file mode 100755
index 592b8ed5fa..0000000000
--- a/drivers/theoraplayer/src/YUV/libyuv/include/libyuv/scale.h
+++ /dev/null
@@ -1,85 +0,0 @@
-/*
- *  Copyright 2011 The LibYuv Project Authors. All rights reserved.
- *
- *  Use of this source code is governed by a BSD-style license
- *  that can be found in the LICENSE file in the root of the source
- *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS. All contributing project authors may
- *  be found in the AUTHORS file in the root of the source tree.
- */
-
-#ifndef INCLUDE_LIBYUV_SCALE_H_  // NOLINT
-#define INCLUDE_LIBYUV_SCALE_H_
-
-#include "libyuv/basic_types.h"
-
-#ifdef __cplusplus
-namespace libyuv {
-extern "C" {
-#endif
-
-// Supported filtering.
-typedef enum FilterMode {
-  kFilterNone = 0,  // Point sample; Fastest.
-  kFilterLinear = 1,  // Filter horizontally only.
-  kFilterBilinear = 2,  // Faster than box, but lower quality scaling down.
-  kFilterBox = 3  // Highest quality.
-} FilterModeEnum;
-
-// Scale a YUV plane.
-LIBYUV_API
-void ScalePlane(const uint8* src, int src_stride,
-                int src_width, int src_height,
-                uint8* dst, int dst_stride,
-                int dst_width, int dst_height,
-                enum FilterMode filtering);
-
-// Scales a YUV 4:2:0 image from the src width and height to the
-// dst width and height.
-// If filtering is kFilterNone, a simple nearest-neighbor algorithm is
-// used. This produces basic (blocky) quality at the fastest speed.
-// If filtering is kFilterBilinear, interpolation is used to produce a better
-// quality image, at the expense of speed.
-// If filtering is kFilterBox, averaging is used to produce ever better
-// quality image, at further expense of speed.
-// Returns 0 if successful.
-
-LIBYUV_API
-int I420Scale(const uint8* src_y, int src_stride_y,
-              const uint8* src_u, int src_stride_u,
-              const uint8* src_v, int src_stride_v,
-              int src_width, int src_height,
-              uint8* dst_y, int dst_stride_y,
-              uint8* dst_u, int dst_stride_u,
-              uint8* dst_v, int dst_stride_v,
-              int dst_width, int dst_height,
-              enum FilterMode filtering);
-
-#ifdef __cplusplus
-// Legacy API.  Deprecated.
-LIBYUV_API
-int Scale(const uint8* src_y, const uint8* src_u, const uint8* src_v,
-          int src_stride_y, int src_stride_u, int src_stride_v,
-          int src_width, int src_height,
-          uint8* dst_y, uint8* dst_u, uint8* dst_v,
-          int dst_stride_y, int dst_stride_u, int dst_stride_v,
-          int dst_width, int dst_height,
-          LIBYUV_BOOL interpolate);
-
-// Legacy API.  Deprecated.
-LIBYUV_API
-int ScaleOffset(const uint8* src_i420, int src_width, int src_height,
-                uint8* dst_i420, int dst_width, int dst_height, int dst_yoffset,
-                LIBYUV_BOOL interpolate);
-
-// For testing, allow disabling of specialized scalers.
-LIBYUV_API
-void SetUseReferenceImpl(LIBYUV_BOOL use);
-#endif  // __cplusplus
-
-#ifdef __cplusplus
-}  // extern "C"
-}  // namespace libyuv
-#endif
-
-#endif  // INCLUDE_LIBYUV_SCALE_H_  NOLINT
diff --git a/drivers/theoraplayer/src/YUV/libyuv/include/libyuv/scale_argb.h b/drivers/theoraplayer/src/YUV/libyuv/include/libyuv/scale_argb.h
deleted file mode 100755
index 0c9b362575..0000000000
--- a/drivers/theoraplayer/src/YUV/libyuv/include/libyuv/scale_argb.h
+++ /dev/null
@@ -1,57 +0,0 @@
-/*
- *  Copyright 2012 The LibYuv Project Authors. All rights reserved.
- *
- *  Use of this source code is governed by a BSD-style license
- *  that can be found in the LICENSE file in the root of the source
- *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS. All contributing project authors may
- *  be found in the AUTHORS file in the root of the source tree.
- */
-
-#ifndef INCLUDE_LIBYUV_SCALE_ARGB_H_  // NOLINT
-#define INCLUDE_LIBYUV_SCALE_ARGB_H_
-
-#include "libyuv/basic_types.h"
-#include "libyuv/scale.h"  // For FilterMode
-
-#ifdef __cplusplus
-namespace libyuv {
-extern "C" {
-#endif
-
-LIBYUV_API
-int ARGBScale(const uint8* src_argb, int src_stride_argb,
-              int src_width, int src_height,
-              uint8* dst_argb, int dst_stride_argb,
-              int dst_width, int dst_height,
-              enum FilterMode filtering);
-
-// Clipped scale takes destination rectangle coordinates for clip values.
-LIBYUV_API
-int ARGBScaleClip(const uint8* src_argb, int src_stride_argb,
-                  int src_width, int src_height,
-                  uint8* dst_argb, int dst_stride_argb,
-                  int dst_width, int dst_height,
-                  int clip_x, int clip_y, int clip_width, int clip_height,
-                  enum FilterMode filtering);
-
-// TODO(fbarchard): Implement this.
-// Scale with YUV conversion to ARGB and clipping.
-LIBYUV_API
-int YUVToARGBScaleClip(const uint8* src_y, int src_stride_y,
-                       const uint8* src_u, int src_stride_u,
-                       const uint8* src_v, int src_stride_v,
-                       uint32 src_fourcc,
-                       int src_width, int src_height,
-                       uint8* dst_argb, int dst_stride_argb,
-                       uint32 dst_fourcc,
-                       int dst_width, int dst_height,
-                       int clip_x, int clip_y, int clip_width, int clip_height,
-                       enum FilterMode filtering);
-
-#ifdef __cplusplus
-}  // extern "C"
-}  // namespace libyuv
-#endif
-
-#endif  // INCLUDE_LIBYUV_SCALE_ARGB_H_  NOLINT
diff --git a/drivers/theoraplayer/src/YUV/libyuv/include/libyuv/scale_row.h b/drivers/theoraplayer/src/YUV/libyuv/include/libyuv/scale_row.h
deleted file mode 100644
index 13eccc4d77..0000000000
--- a/drivers/theoraplayer/src/YUV/libyuv/include/libyuv/scale_row.h
+++ /dev/null
@@ -1,301 +0,0 @@
-/*
- *  Copyright 2013 The LibYuv Project Authors. All rights reserved.
- *
- *  Use of this source code is governed by a BSD-style license
- *  that can be found in the LICENSE file in the root of the source
- *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS. All contributing project authors may
- *  be found in the AUTHORS file in the root of the source tree.
- */
-
-#ifndef INCLUDE_LIBYUV_SCALE_ROW_H_  // NOLINT
-#define INCLUDE_LIBYUV_SCALE_ROW_H_
-
-#include "libyuv/basic_types.h"
-
-#ifdef __cplusplus
-namespace libyuv {
-extern "C" {
-#endif
-
-#if defined(__pnacl__) || defined(__CLR_VER) || defined(COVERAGE_ENABLED) || \
-    defined(TARGET_IPHONE_SIMULATOR)
-#define LIBYUV_DISABLE_X86
-#endif
-
-// The following are available on all x86 platforms:
-#if !defined(LIBYUV_DISABLE_X86) && \
-    (defined(_M_IX86) || defined(__x86_64__) || defined(__i386__))
-#define HAS_SCALEROWDOWN2_SSE2
-#define HAS_SCALEROWDOWN4_SSE2
-#define HAS_SCALEROWDOWN34_SSSE3
-#define HAS_SCALEROWDOWN38_SSSE3
-#define HAS_SCALEADDROWS_SSE2
-#define HAS_SCALEFILTERCOLS_SSSE3
-#define HAS_SCALECOLSUP2_SSE2
-#define HAS_SCALEARGBROWDOWN2_SSE2
-#define HAS_SCALEARGBROWDOWNEVEN_SSE2
-#define HAS_SCALEARGBCOLS_SSE2
-#define HAS_SCALEARGBFILTERCOLS_SSSE3
-#define HAS_SCALEARGBCOLSUP2_SSE2
-#define HAS_FIXEDDIV_X86
-#define HAS_FIXEDDIV1_X86
-#endif
-
-// The following are available on Neon platforms:
-#if !defined(LIBYUV_DISABLE_NEON) && !defined(__native_client__) && \
-    (defined(__ARM_NEON__) || defined(LIBYUV_NEON))
-#define HAS_SCALEROWDOWN2_NEON
-#define HAS_SCALEROWDOWN4_NEON
-#define HAS_SCALEROWDOWN34_NEON
-#define HAS_SCALEROWDOWN38_NEON
-#define HAS_SCALEARGBROWDOWNEVEN_NEON
-#define HAS_SCALEARGBROWDOWN2_NEON
-#endif
-
-// The following are available on Mips platforms:
-#if !defined(LIBYUV_DISABLE_MIPS) && !defined(__native_client__) && \
-    defined(__mips__) && defined(__mips_dsp) && (__mips_dsp_rev >= 2)
-#define HAS_SCALEROWDOWN2_MIPS_DSPR2
-#define HAS_SCALEROWDOWN4_MIPS_DSPR2
-#define HAS_SCALEROWDOWN34_MIPS_DSPR2
-#define HAS_SCALEROWDOWN38_MIPS_DSPR2
-#endif
-
-// Scale ARGB vertically with bilinear interpolation.
-void ScalePlaneVertical(int src_height,
-                        int dst_width, int dst_height,
-                        int src_stride, int dst_stride,
-                        const uint8* src_argb, uint8* dst_argb,
-                        int x, int y, int dy,
-                        int bpp, enum FilterMode filtering);
-
-// Simplify the filtering based on scale factors.
-enum FilterMode ScaleFilterReduce(int src_width, int src_height,
-                                  int dst_width, int dst_height,
-                                  enum FilterMode filtering);
-
-// Divide num by div and return as 16.16 fixed point result.
-int FixedDiv_C(int num, int div);
-int FixedDiv_X86(int num, int div);
-// Divide num - 1 by div - 1 and return as 16.16 fixed point result.
-int FixedDiv1_C(int num, int div);
-int FixedDiv1_X86(int num, int div);
-#ifdef HAS_FIXEDDIV_X86
-#define FixedDiv FixedDiv_X86
-#define FixedDiv1 FixedDiv1_X86
-#else
-#define FixedDiv FixedDiv_C
-#define FixedDiv1 FixedDiv1_C
-#endif
-
-// Compute slope values for stepping.
-void ScaleSlope(int src_width, int src_height,
-                int dst_width, int dst_height,
-                enum FilterMode filtering,
-                int* x, int* y, int* dx, int* dy);
-
-void ScaleRowDown2_C(const uint8* src_ptr, ptrdiff_t src_stride,
-                     uint8* dst, int dst_width);
-void ScaleRowDown2Linear_C(const uint8* src_ptr, ptrdiff_t src_stride,
-                           uint8* dst, int dst_width);
-void ScaleRowDown2Box_C(const uint8* src_ptr, ptrdiff_t src_stride,
-                        uint8* dst, int dst_width);
-void ScaleRowDown4_C(const uint8* src_ptr, ptrdiff_t src_stride,
-                     uint8* dst, int dst_width);
-void ScaleRowDown4Box_C(const uint8* src_ptr, ptrdiff_t src_stride,
-                        uint8* dst, int dst_width);
-void ScaleRowDown34_C(const uint8* src_ptr, ptrdiff_t src_stride,
-                      uint8* dst, int dst_width);
-void ScaleRowDown34_0_Box_C(const uint8* src_ptr, ptrdiff_t src_stride,
-                            uint8* d, int dst_width);
-void ScaleRowDown34_1_Box_C(const uint8* src_ptr, ptrdiff_t src_stride,
-                            uint8* d, int dst_width);
-void ScaleCols_C(uint8* dst_ptr, const uint8* src_ptr,
-                 int dst_width, int x, int dx);
-void ScaleColsUp2_C(uint8* dst_ptr, const uint8* src_ptr,
-                    int dst_width, int, int);
-void ScaleFilterCols_C(uint8* dst_ptr, const uint8* src_ptr,
-                       int dst_width, int x, int dx);
-void ScaleFilterCols64_C(uint8* dst_ptr, const uint8* src_ptr,
-                         int dst_width, int x, int dx);
-void ScaleRowDown38_C(const uint8* src_ptr, ptrdiff_t src_stride,
-                      uint8* dst, int dst_width);
-void ScaleRowDown38_3_Box_C(const uint8* src_ptr,
-                            ptrdiff_t src_stride,
-                            uint8* dst_ptr, int dst_width);
-void ScaleRowDown38_2_Box_C(const uint8* src_ptr, ptrdiff_t src_stride,
-                            uint8* dst_ptr, int dst_width);
-void ScaleAddRows_C(const uint8* src_ptr, ptrdiff_t src_stride,
-                    uint16* dst_ptr, int src_width, int src_height);
-void ScaleARGBRowDown2_C(const uint8* src_argb,
-                         ptrdiff_t src_stride,
-                         uint8* dst_argb, int dst_width);
-void ScaleARGBRowDown2Linear_C(const uint8* src_argb,
-                               ptrdiff_t src_stride,
-                               uint8* dst_argb, int dst_width);
-void ScaleARGBRowDown2Box_C(const uint8* src_argb, ptrdiff_t src_stride,
-                            uint8* dst_argb, int dst_width);
-void ScaleARGBRowDownEven_C(const uint8* src_argb, ptrdiff_t src_stride,
-                            int src_stepx,
-                            uint8* dst_argb, int dst_width);
-void ScaleARGBRowDownEvenBox_C(const uint8* src_argb,
-                               ptrdiff_t src_stride,
-                               int src_stepx,
-                               uint8* dst_argb, int dst_width);
-void ScaleARGBCols_C(uint8* dst_argb, const uint8* src_argb,
-                     int dst_width, int x, int dx);
-void ScaleARGBCols64_C(uint8* dst_argb, const uint8* src_argb,
-                       int dst_width, int x, int dx);
-void ScaleARGBColsUp2_C(uint8* dst_argb, const uint8* src_argb,
-                        int dst_width, int, int);
-void ScaleARGBFilterCols_C(uint8* dst_argb, const uint8* src_argb,
-                           int dst_width, int x, int dx);
-void ScaleARGBFilterCols64_C(uint8* dst_argb, const uint8* src_argb,
-                             int dst_width, int x, int dx);
-
-void ScaleRowDown2_SSE2(const uint8* src_ptr, ptrdiff_t src_stride,
-                        uint8* dst_ptr, int dst_width);
-void ScaleRowDown2Linear_SSE2(const uint8* src_ptr, ptrdiff_t src_stride,
-                              uint8* dst_ptr, int dst_width);
-void ScaleRowDown2Box_SSE2(const uint8* src_ptr, ptrdiff_t src_stride,
-                           uint8* dst_ptr, int dst_width);
-void ScaleRowDown2_Unaligned_SSE2(const uint8* src_ptr,
-                                  ptrdiff_t src_stride,
-                                  uint8* dst_ptr, int dst_width);
-void ScaleRowDown2Linear_Unaligned_SSE2(const uint8* src_ptr,
-                                        ptrdiff_t src_stride,
-                                        uint8* dst_ptr, int dst_width);
-void ScaleRowDown2Box_Unaligned_SSE2(const uint8* src_ptr,
-                                     ptrdiff_t src_stride,
-                                     uint8* dst_ptr, int dst_width);
-void ScaleRowDown4_SSE2(const uint8* src_ptr, ptrdiff_t src_stride,
-                        uint8* dst_ptr, int dst_width);
-void ScaleRowDown4Box_SSE2(const uint8* src_ptr, ptrdiff_t src_stride,
-                           uint8* dst_ptr, int dst_width);
-void ScaleRowDown34_SSSE3(const uint8* src_ptr, ptrdiff_t src_stride,
-                          uint8* dst_ptr, int dst_width);
-void ScaleRowDown34_1_Box_SSSE3(const uint8* src_ptr,
-                                ptrdiff_t src_stride,
-                                uint8* dst_ptr, int dst_width);
-void ScaleRowDown34_0_Box_SSSE3(const uint8* src_ptr,
-                                ptrdiff_t src_stride,
-                                uint8* dst_ptr, int dst_width);
-void ScaleRowDown38_SSSE3(const uint8* src_ptr, ptrdiff_t src_stride,
-                          uint8* dst_ptr, int dst_width);
-void ScaleRowDown38_3_Box_SSSE3(const uint8* src_ptr,
-                                ptrdiff_t src_stride,
-                                uint8* dst_ptr, int dst_width);
-void ScaleRowDown38_2_Box_SSSE3(const uint8* src_ptr,
-                                ptrdiff_t src_stride,
-                                uint8* dst_ptr, int dst_width);
-void ScaleAddRows_SSE2(const uint8* src_ptr, ptrdiff_t src_stride,
-                       uint16* dst_ptr, int src_width,
-                       int src_height);
-void ScaleFilterCols_SSSE3(uint8* dst_ptr, const uint8* src_ptr,
-                           int dst_width, int x, int dx);
-void ScaleColsUp2_SSE2(uint8* dst_ptr, const uint8* src_ptr,
-                       int dst_width, int x, int dx);
-void ScaleARGBRowDown2_SSE2(const uint8* src_argb,
-                            ptrdiff_t src_stride,
-                            uint8* dst_argb, int dst_width);
-void ScaleARGBRowDown2Linear_SSE2(const uint8* src_argb,
-                                  ptrdiff_t src_stride,
-                                  uint8* dst_argb, int dst_width);
-void ScaleARGBRowDown2Box_SSE2(const uint8* src_argb,
-                               ptrdiff_t src_stride,
-                               uint8* dst_argb, int dst_width);
-void ScaleARGBRowDownEven_SSE2(const uint8* src_argb, ptrdiff_t src_stride,
-                               int src_stepx,
-                               uint8* dst_argb, int dst_width);
-void ScaleARGBRowDownEvenBox_SSE2(const uint8* src_argb,
-                                  ptrdiff_t src_stride,
-                                  int src_stepx,
-                                  uint8* dst_argb, int dst_width);
-void ScaleARGBCols_SSE2(uint8* dst_argb, const uint8* src_argb,
-                        int dst_width, int x, int dx);
-void ScaleARGBFilterCols_SSSE3(uint8* dst_argb, const uint8* src_argb,
-                               int dst_width, int x, int dx);
-void ScaleARGBColsUp2_SSE2(uint8* dst_argb, const uint8* src_argb,
-                           int dst_width, int x, int dx);
-// Row functions.
-void ScaleARGBRowDownEven_NEON(const uint8* src_argb, int src_stride,
-                               int src_stepx,
-                               uint8* dst_argb, int dst_width);
-void ScaleARGBRowDownEvenBox_NEON(const uint8* src_argb, int src_stride,
-                                  int src_stepx,
-                                  uint8* dst_argb, int dst_width);
-void ScaleARGBRowDown2_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
-                            uint8* dst, int dst_width);
-void ScaleARGBRowDown2Box_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
-                               uint8* dst, int dst_width);
-
-// ScaleRowDown2Box also used by planar functions
-// NEON downscalers with interpolation.
-
-// Note - not static due to reuse in convert for 444 to 420.
-void ScaleRowDown2_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
-                        uint8* dst, int dst_width);
-
-void ScaleRowDown2Box_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
-                           uint8* dst, int dst_width);
-
-void ScaleRowDown4_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
-                        uint8* dst_ptr, int dst_width);
-void ScaleRowDown4Box_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
-                           uint8* dst_ptr, int dst_width);
-
-// Down scale from 4 to 3 pixels. Use the neon multilane read/write
-//  to load up the every 4th pixel into a 4 different registers.
-// Point samples 32 pixels to 24 pixels.
-void ScaleRowDown34_NEON(const uint8* src_ptr,
-                         ptrdiff_t src_stride,
-                         uint8* dst_ptr, int dst_width);
-void ScaleRowDown34_0_Box_NEON(const uint8* src_ptr,
-                               ptrdiff_t src_stride,
-                               uint8* dst_ptr, int dst_width);
-void ScaleRowDown34_1_Box_NEON(const uint8* src_ptr,
-                               ptrdiff_t src_stride,
-                               uint8* dst_ptr, int dst_width);
-
-// 32 -> 12
-void ScaleRowDown38_NEON(const uint8* src_ptr,
-                         ptrdiff_t src_stride,
-                         uint8* dst_ptr, int dst_width);
-// 32x3 -> 12x1
-void ScaleRowDown38_3_Box_NEON(const uint8* src_ptr,
-                               ptrdiff_t src_stride,
-                               uint8* dst_ptr, int dst_width);
-// 32x2 -> 12x1
-void ScaleRowDown38_2_Box_NEON(const uint8* src_ptr,
-                               ptrdiff_t src_stride,
-                               uint8* dst_ptr, int dst_width);
-
-void ScaleRowDown2_MIPS_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride,
-                              uint8* dst, int dst_width);
-void ScaleRowDown2Box_MIPS_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride,
-                                 uint8* dst, int dst_width);
-void ScaleRowDown4_MIPS_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride,
-                              uint8* dst, int dst_width);
-void ScaleRowDown4Box_MIPS_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride,
-                                 uint8* dst, int dst_width);
-void ScaleRowDown34_MIPS_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride,
-                               uint8* dst, int dst_width);
-void ScaleRowDown34_0_Box_MIPS_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride,
-                                     uint8* d, int dst_width);
-void ScaleRowDown34_1_Box_MIPS_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride,
-                                     uint8* d, int dst_width);
-void ScaleRowDown38_MIPS_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride,
-                               uint8* dst, int dst_width);
-void ScaleRowDown38_2_Box_MIPS_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride,
-                                     uint8* dst_ptr, int dst_width);
-void ScaleRowDown38_3_Box_MIPS_DSPR2(const uint8* src_ptr,
-                                     ptrdiff_t src_stride,
-                                     uint8* dst_ptr, int dst_width);
-
-#ifdef __cplusplus
-}  // extern "C"
-}  // namespace libyuv
-#endif
-
-#endif  // INCLUDE_LIBYUV_SCALE_ROW_H_  NOLINT
diff --git a/drivers/theoraplayer/src/YUV/libyuv/include/libyuv/version.h b/drivers/theoraplayer/src/YUV/libyuv/include/libyuv/version.h
deleted file mode 100755
index 4881861866..0000000000
--- a/drivers/theoraplayer/src/YUV/libyuv/include/libyuv/version.h
+++ /dev/null
@@ -1,16 +0,0 @@
-/*
- *  Copyright 2012 The LibYuv Project Authors. All rights reserved.
- *
- *  Use of this source code is governed by a BSD-style license
- *  that can be found in the LICENSE file in the root of the source
- *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS. All contributing project authors may
- *  be found in the AUTHORS file in the root of the source tree.
- */
-
-#ifndef INCLUDE_LIBYUV_VERSION_H_  // NOLINT
-#define INCLUDE_LIBYUV_VERSION_H_
-
-#define LIBYUV_VERSION 998
-
-#endif  // INCLUDE_LIBYUV_VERSION_H_  NOLINT
diff --git a/drivers/theoraplayer/src/YUV/libyuv/include/libyuv/video_common.h b/drivers/theoraplayer/src/YUV/libyuv/include/libyuv/video_common.h
deleted file mode 100755
index 039efb96d1..0000000000
--- a/drivers/theoraplayer/src/YUV/libyuv/include/libyuv/video_common.h
+++ /dev/null
@@ -1,182 +0,0 @@
-/*
- *  Copyright 2011 The LibYuv Project Authors. All rights reserved.
- *
- *  Use of this source code is governed by a BSD-style license
- *  that can be found in the LICENSE file in the root of the source
- *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS. All contributing project authors may
- *  be found in the AUTHORS file in the root of the source tree.
- */
-
-// Common definitions for video, including fourcc and VideoFormat.
-
-#ifndef INCLUDE_LIBYUV_VIDEO_COMMON_H_  // NOLINT
-#define INCLUDE_LIBYUV_VIDEO_COMMON_H_
-
-#include "libyuv/basic_types.h"
-
-#ifdef __cplusplus
-namespace libyuv {
-extern "C" {
-#endif
-
-//////////////////////////////////////////////////////////////////////////////
-// Definition of FourCC codes
-//////////////////////////////////////////////////////////////////////////////
-
-// Convert four characters to a FourCC code.
-// Needs to be a macro otherwise the OS X compiler complains when the kFormat*
-// constants are used in a switch.
-#ifdef __cplusplus
-#define FOURCC(a, b, c, d) ( \
-    (static_cast<uint32>(a)) | (static_cast<uint32>(b) << 8) | \
-    (static_cast<uint32>(c) << 16) | (static_cast<uint32>(d) << 24))
-#else
-#define FOURCC(a, b, c, d) ( \
-    ((uint32)(a)) | ((uint32)(b) << 8) | /* NOLINT */ \
-    ((uint32)(c) << 16) | ((uint32)(d) << 24))  /* NOLINT */
-#endif
-
-// Some pages discussing FourCC codes:
-//   http://www.fourcc.org/yuv.php
-//   http://v4l2spec.bytesex.org/spec/book1.htm
-//   http://developer.apple.com/quicktime/icefloe/dispatch020.html
-//   http://msdn.microsoft.com/library/windows/desktop/dd206750.aspx#nv12
-//   http://people.xiph.org/~xiphmont/containers/nut/nut4cc.txt
-
-// FourCC codes grouped according to implementation efficiency.
-// Primary formats should convert in 1 efficient step.
-// Secondary formats are converted in 2 steps.
-// Auxilliary formats call primary converters.
-enum FourCC {
-  // 9 Primary YUV formats: 5 planar, 2 biplanar, 2 packed.
-  FOURCC_I420 = FOURCC('I', '4', '2', '0'),
-  FOURCC_I422 = FOURCC('I', '4', '2', '2'),
-  FOURCC_I444 = FOURCC('I', '4', '4', '4'),
-  FOURCC_I411 = FOURCC('I', '4', '1', '1'),
-  FOURCC_I400 = FOURCC('I', '4', '0', '0'),
-  FOURCC_NV21 = FOURCC('N', 'V', '2', '1'),
-  FOURCC_NV12 = FOURCC('N', 'V', '1', '2'),
-  FOURCC_YUY2 = FOURCC('Y', 'U', 'Y', '2'),
-  FOURCC_UYVY = FOURCC('U', 'Y', 'V', 'Y'),
-
-  // 2 Secondary YUV formats: row biplanar.
-  FOURCC_M420 = FOURCC('M', '4', '2', '0'),
-  FOURCC_Q420 = FOURCC('Q', '4', '2', '0'),
-
-  // 9 Primary RGB formats: 4 32 bpp, 2 24 bpp, 3 16 bpp.
-  FOURCC_ARGB = FOURCC('A', 'R', 'G', 'B'),
-  FOURCC_BGRA = FOURCC('B', 'G', 'R', 'A'),
-  FOURCC_ABGR = FOURCC('A', 'B', 'G', 'R'),
-  FOURCC_24BG = FOURCC('2', '4', 'B', 'G'),
-  FOURCC_RAW  = FOURCC('r', 'a', 'w', ' '),
-  FOURCC_RGBA = FOURCC('R', 'G', 'B', 'A'),
-  FOURCC_RGBP = FOURCC('R', 'G', 'B', 'P'),  // rgb565 LE.
-  FOURCC_RGBO = FOURCC('R', 'G', 'B', 'O'),  // argb1555 LE.
-  FOURCC_R444 = FOURCC('R', '4', '4', '4'),  // argb4444 LE.
-
-  // 4 Secondary RGB formats: 4 Bayer Patterns.
-  FOURCC_RGGB = FOURCC('R', 'G', 'G', 'B'),
-  FOURCC_BGGR = FOURCC('B', 'G', 'G', 'R'),
-  FOURCC_GRBG = FOURCC('G', 'R', 'B', 'G'),
-  FOURCC_GBRG = FOURCC('G', 'B', 'R', 'G'),
-
-  // 1 Primary Compressed YUV format.
-  FOURCC_MJPG = FOURCC('M', 'J', 'P', 'G'),
-
-  // 5 Auxiliary YUV variations: 3 with U and V planes are swapped, 1 Alias.
-  FOURCC_YV12 = FOURCC('Y', 'V', '1', '2'),
-  FOURCC_YV16 = FOURCC('Y', 'V', '1', '6'),
-  FOURCC_YV24 = FOURCC('Y', 'V', '2', '4'),
-  FOURCC_YU12 = FOURCC('Y', 'U', '1', '2'),  // Linux version of I420.
-  FOURCC_J420 = FOURCC('J', '4', '2', '0'),
-  FOURCC_J400 = FOURCC('J', '4', '0', '0'),
-
-  // 14 Auxiliary aliases.  CanonicalFourCC() maps these to canonical fourcc.
-  FOURCC_IYUV = FOURCC('I', 'Y', 'U', 'V'),  // Alias for I420.
-  FOURCC_YU16 = FOURCC('Y', 'U', '1', '6'),  // Alias for I422.
-  FOURCC_YU24 = FOURCC('Y', 'U', '2', '4'),  // Alias for I444.
-  FOURCC_YUYV = FOURCC('Y', 'U', 'Y', 'V'),  // Alias for YUY2.
-  FOURCC_YUVS = FOURCC('y', 'u', 'v', 's'),  // Alias for YUY2 on Mac.
-  FOURCC_HDYC = FOURCC('H', 'D', 'Y', 'C'),  // Alias for UYVY.
-  FOURCC_2VUY = FOURCC('2', 'v', 'u', 'y'),  // Alias for UYVY on Mac.
-  FOURCC_JPEG = FOURCC('J', 'P', 'E', 'G'),  // Alias for MJPG.
-  FOURCC_DMB1 = FOURCC('d', 'm', 'b', '1'),  // Alias for MJPG on Mac.
-  FOURCC_BA81 = FOURCC('B', 'A', '8', '1'),  // Alias for BGGR.
-  FOURCC_RGB3 = FOURCC('R', 'G', 'B', '3'),  // Alias for RAW.
-  FOURCC_BGR3 = FOURCC('B', 'G', 'R', '3'),  // Alias for 24BG.
-  FOURCC_CM32 = FOURCC(0, 0, 0, 32),  // Alias for BGRA kCMPixelFormat_32ARGB
-  FOURCC_CM24 = FOURCC(0, 0, 0, 24),  // Alias for RAW kCMPixelFormat_24RGB
-  FOURCC_L555 = FOURCC('L', '5', '5', '5'),  // Alias for RGBO.
-  FOURCC_L565 = FOURCC('L', '5', '6', '5'),  // Alias for RGBP.
-  FOURCC_5551 = FOURCC('5', '5', '5', '1'),  // Alias for RGBO.
-
-  // 1 Auxiliary compressed YUV format set aside for capturer.
-  FOURCC_H264 = FOURCC('H', '2', '6', '4'),
-
-  // Match any fourcc.
-  FOURCC_ANY  = 0xFFFFFFFF,
-};
-
-enum FourCCBpp {
-  // Canonical fourcc codes used in our code.
-  FOURCC_BPP_I420 = 12,
-  FOURCC_BPP_I422 = 16,
-  FOURCC_BPP_I444 = 24,
-  FOURCC_BPP_I411 = 12,
-  FOURCC_BPP_I400 = 8,
-  FOURCC_BPP_NV21 = 12,
-  FOURCC_BPP_NV12 = 12,
-  FOURCC_BPP_YUY2 = 16,
-  FOURCC_BPP_UYVY = 16,
-  FOURCC_BPP_M420 = 12,
-  FOURCC_BPP_Q420 = 12,
-  FOURCC_BPP_ARGB = 32,
-  FOURCC_BPP_BGRA = 32,
-  FOURCC_BPP_ABGR = 32,
-  FOURCC_BPP_RGBA = 32,
-  FOURCC_BPP_24BG = 24,
-  FOURCC_BPP_RAW  = 24,
-  FOURCC_BPP_RGBP = 16,
-  FOURCC_BPP_RGBO = 16,
-  FOURCC_BPP_R444 = 16,
-  FOURCC_BPP_RGGB = 8,
-  FOURCC_BPP_BGGR = 8,
-  FOURCC_BPP_GRBG = 8,
-  FOURCC_BPP_GBRG = 8,
-  FOURCC_BPP_YV12 = 12,
-  FOURCC_BPP_YV16 = 16,
-  FOURCC_BPP_YV24 = 24,
-  FOURCC_BPP_YU12 = 12,
-  FOURCC_BPP_J420 = 12,
-  FOURCC_BPP_J400 = 8,
-  FOURCC_BPP_MJPG = 0,  // 0 means unknown.
-  FOURCC_BPP_H264 = 0,
-  FOURCC_BPP_IYUV = 12,
-  FOURCC_BPP_YU16 = 16,
-  FOURCC_BPP_YU24 = 24,
-  FOURCC_BPP_YUYV = 16,
-  FOURCC_BPP_YUVS = 16,
-  FOURCC_BPP_HDYC = 16,
-  FOURCC_BPP_2VUY = 16,
-  FOURCC_BPP_JPEG = 1,
-  FOURCC_BPP_DMB1 = 1,
-  FOURCC_BPP_BA81 = 8,
-  FOURCC_BPP_RGB3 = 24,
-  FOURCC_BPP_BGR3 = 24,
-  FOURCC_BPP_CM32 = 32,
-  FOURCC_BPP_CM24 = 24,
-
-  // Match any fourcc.
-  FOURCC_BPP_ANY  = 0,  // 0 means unknown.
-};
-
-// Converts fourcc aliases into canonical ones.
-LIBYUV_API uint32 CanonicalFourCC(uint32 fourcc);
-
-#ifdef __cplusplus
-}  // extern "C"
-}  // namespace libyuv
-#endif
-
-#endif  // INCLUDE_LIBYUV_VIDEO_COMMON_H_  NOLINT
diff --git a/drivers/theoraplayer/src/YUV/libyuv/libtheoraplayer-readme.txt b/drivers/theoraplayer/src/YUV/libyuv/libtheoraplayer-readme.txt
deleted file mode 100755
index 680e4a1c36..0000000000
--- a/drivers/theoraplayer/src/YUV/libyuv/libtheoraplayer-readme.txt
+++ /dev/null
@@ -1,15 +0,0 @@
-libyuv's source code is here provided in minimalist distribution format
-with all source files not needed for compiling libtheoraplayer removed.
-
-- The project files were modified to fit libtheoraplayer's binary output
-  folder structure.
-- Some project files missing in the original source distibution were added to support
-  compiling the libtheoraplayer on those platforms.
-- Also, some code may have been changed to address certain compiler/platform
-  specific problems and is so indicated in the source code.
-
-libyuv is owned and maintained by the Google Inc. and this distribution
-is present here only for convenience and easier compilation of libtheoraplayer.
-
-If you want to use libyuv outside of libtheoraplayer, it is encouraged to use the
-original source distribution by Google Inc: https://code.google.com/p/libyuv/
-\ No newline at end of file
diff --git a/drivers/theoraplayer/src/YUV/libyuv/src/compare.cc b/drivers/theoraplayer/src/YUV/libyuv/src/compare.cc
deleted file mode 100755
index 9ea81b4e21..0000000000
--- a/drivers/theoraplayer/src/YUV/libyuv/src/compare.cc
+++ /dev/null
@@ -1,325 +0,0 @@
-/*
- *  Copyright 2011 The LibYuv Project Authors. All rights reserved.
- *
- *  Use of this source code is governed by a BSD-style license
- *  that can be found in the LICENSE file in the root of the source
- *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS. All contributing project authors may
- *  be found in the AUTHORS file in the root of the source tree.
- */
-
-#include "libyuv/compare.h"
-
-#include <float.h>
-#include <math.h>
-#ifdef _OPENMP
-#include <omp.h>
-#endif
-
-#include "libyuv/basic_types.h"
-#include "libyuv/cpu_id.h"
-#include "libyuv/row.h"
-
-#ifdef __cplusplus
-namespace libyuv {
-extern "C" {
-#endif
-
-// hash seed of 5381 recommended.
-// Internal C version of HashDjb2 with int sized count for efficiency.
-uint32 HashDjb2_C(const uint8* src, int count, uint32 seed);
-
-// This module is for Visual C x86
-#if !defined(LIBYUV_DISABLE_X86) && \
-    (defined(_M_IX86) || \
-    (defined(__x86_64__) || (defined(__i386__) && !defined(__pic__))))
-#define HAS_HASHDJB2_SSE41
-uint32 HashDjb2_SSE41(const uint8* src, int count, uint32 seed);
-
-#if _MSC_VER >= 1700
-#define HAS_HASHDJB2_AVX2
-uint32 HashDjb2_AVX2(const uint8* src, int count, uint32 seed);
-#endif
-
-#endif  // HAS_HASHDJB2_SSE41
-
-// hash seed of 5381 recommended.
-LIBYUV_API
-uint32 HashDjb2(const uint8* src, uint64 count, uint32 seed) {
-  const int kBlockSize = 1 << 15;  // 32768;
-  int remainder;
-  uint32 (*HashDjb2_SSE)(const uint8* src, int count, uint32 seed) = HashDjb2_C;
-#if defined(HAS_HASHDJB2_SSE41)
-  if (TestCpuFlag(kCpuHasSSE41)) {
-    HashDjb2_SSE = HashDjb2_SSE41;
-  }
-#endif
-#if defined(HAS_HASHDJB2_AVX2)
-  if (TestCpuFlag(kCpuHasAVX2)) {
-    HashDjb2_SSE = HashDjb2_AVX2;
-  }
-#endif
-
-  while (count >= (uint64)(kBlockSize)) {
-    seed = HashDjb2_SSE(src, kBlockSize, seed);
-    src += kBlockSize;
-    count -= kBlockSize;
-  }
-  remainder = (int)(count) & ~15;
-  if (remainder) {
-    seed = HashDjb2_SSE(src, remainder, seed);
-    src += remainder;
-    count -= remainder;
-  }
-  remainder = (int)(count) & 15;
-  if (remainder) {
-    seed = HashDjb2_C(src, remainder, seed);
-  }
-  return seed;
-}
-
-uint32 SumSquareError_C(const uint8* src_a, const uint8* src_b, int count);
-#if !defined(LIBYUV_DISABLE_NEON) && \
-    (defined(__ARM_NEON__) || defined(LIBYUV_NEON))
-#define HAS_SUMSQUAREERROR_NEON
-uint32 SumSquareError_NEON(const uint8* src_a, const uint8* src_b, int count);
-#endif
-#if !defined(LIBYUV_DISABLE_X86) && \
-    (defined(_M_IX86) || defined(__x86_64__) || defined(__i386__))
-#define HAS_SUMSQUAREERROR_SSE2
-uint32 SumSquareError_SSE2(const uint8* src_a, const uint8* src_b, int count);
-#endif
-// Visual C 2012 required for AVX2.
-#if !defined(LIBYUV_DISABLE_X86) && defined(_M_IX86) && _MSC_VER >= 1700
-#define HAS_SUMSQUAREERROR_AVX2
-uint32 SumSquareError_AVX2(const uint8* src_a, const uint8* src_b, int count);
-#endif
-
-// TODO(fbarchard): Refactor into row function.
-LIBYUV_API
-uint64 ComputeSumSquareError(const uint8* src_a, const uint8* src_b,
-                             int count) {
-  // SumSquareError returns values 0 to 65535 for each squared difference.
-  // Up to 65536 of those can be summed and remain within a uint32.
-  // After each block of 65536 pixels, accumulate into a uint64.
-  const int kBlockSize = 65536;
-  int remainder = count & (kBlockSize - 1) & ~31;
-  uint64 sse = 0;
-  int i;
-  uint32 (*SumSquareError)(const uint8* src_a, const uint8* src_b, int count) =
-      SumSquareError_C;
-#if defined(HAS_SUMSQUAREERROR_NEON)
-  if (TestCpuFlag(kCpuHasNEON)) {
-    SumSquareError = SumSquareError_NEON;
-  }
-#endif
-#if defined(HAS_SUMSQUAREERROR_SSE2)
-  if (TestCpuFlag(kCpuHasSSE2) &&
-      IS_ALIGNED(src_a, 16) && IS_ALIGNED(src_b, 16)) {
-    // Note only used for multiples of 16 so count is not checked.
-    SumSquareError = SumSquareError_SSE2;
-  }
-#endif
-#if defined(HAS_SUMSQUAREERROR_AVX2)
-  if (TestCpuFlag(kCpuHasAVX2)) {
-    // Note only used for multiples of 32 so count is not checked.
-    SumSquareError = SumSquareError_AVX2;
-  }
-#endif
-#ifdef _OPENMP
-#pragma omp parallel for reduction(+: sse)
-#endif
-  for (i = 0; i < (count - (kBlockSize - 1)); i += kBlockSize) {
-    sse += SumSquareError(src_a + i, src_b + i, kBlockSize);
-  }
-  src_a += count & ~(kBlockSize - 1);
-  src_b += count & ~(kBlockSize - 1);
-  if (remainder) {
-    sse += SumSquareError(src_a, src_b, remainder);
-    src_a += remainder;
-    src_b += remainder;
-  }
-  remainder = count & 31;
-  if (remainder) {
-    sse += SumSquareError_C(src_a, src_b, remainder);
-  }
-  return sse;
-}
-
-LIBYUV_API
-uint64 ComputeSumSquareErrorPlane(const uint8* src_a, int stride_a,
-                                  const uint8* src_b, int stride_b,
-                                  int width, int height) {
-  uint64 sse = 0;
-  int h;
-  // Coalesce rows.
-  if (stride_a == width &&
-      stride_b == width) {
-    width *= height;
-    height = 1;
-    stride_a = stride_b = 0;
-  }
-  for (h = 0; h < height; ++h) {
-    sse += ComputeSumSquareError(src_a, src_b, width);
-    src_a += stride_a;
-    src_b += stride_b;
-  }
-  return sse;
-}
-
-LIBYUV_API
-double SumSquareErrorToPsnr(uint64 sse, uint64 count) {
-  double psnr;
-  if (sse > 0) {
-    double mse = (double)(count) / (double)(sse);
-    psnr = 10.0 * log10(255.0 * 255.0 * mse);
-  } else {
-    psnr = kMaxPsnr;      // Limit to prevent divide by 0
-  }
-
-  if (psnr > kMaxPsnr)
-    psnr = kMaxPsnr;
-
-  return psnr;
-}
-
-LIBYUV_API
-double CalcFramePsnr(const uint8* src_a, int stride_a,
-                     const uint8* src_b, int stride_b,
-                     int width, int height) {
-  const uint64 samples = width * height;
-  const uint64 sse = ComputeSumSquareErrorPlane(src_a, stride_a,
-                                                src_b, stride_b,
-                                                width, height);
-  return SumSquareErrorToPsnr(sse, samples);
-}
-
-LIBYUV_API
-double I420Psnr(const uint8* src_y_a, int stride_y_a,
-                const uint8* src_u_a, int stride_u_a,
-                const uint8* src_v_a, int stride_v_a,
-                const uint8* src_y_b, int stride_y_b,
-                const uint8* src_u_b, int stride_u_b,
-                const uint8* src_v_b, int stride_v_b,
-                int width, int height) {
-  const uint64 sse_y = ComputeSumSquareErrorPlane(src_y_a, stride_y_a,
-                                                  src_y_b, stride_y_b,
-                                                  width, height);
-  const int width_uv = (width + 1) >> 1;
-  const int height_uv = (height + 1) >> 1;
-  const uint64 sse_u = ComputeSumSquareErrorPlane(src_u_a, stride_u_a,
-                                                  src_u_b, stride_u_b,
-                                                  width_uv, height_uv);
-  const uint64 sse_v = ComputeSumSquareErrorPlane(src_v_a, stride_v_a,
-                                                  src_v_b, stride_v_b,
-                                                  width_uv, height_uv);
-  const uint64 samples = width * height + 2 * (width_uv * height_uv);
-  const uint64 sse = sse_y + sse_u + sse_v;
-  return SumSquareErrorToPsnr(sse, samples);
-}
-
-static const int64 cc1 =  26634;  // (64^2*(.01*255)^2
-static const int64 cc2 = 239708;  // (64^2*(.03*255)^2
-
-static double Ssim8x8_C(const uint8* src_a, int stride_a,
-                        const uint8* src_b, int stride_b) {
-  int64 sum_a = 0;
-  int64 sum_b = 0;
-  int64 sum_sq_a = 0;
-  int64 sum_sq_b = 0;
-  int64 sum_axb = 0;
-
-  int i;
-  for (i = 0; i < 8; ++i) {
-    int j;
-    for (j = 0; j < 8; ++j) {
-      sum_a += src_a[j];
-      sum_b += src_b[j];
-      sum_sq_a += src_a[j] * src_a[j];
-      sum_sq_b += src_b[j] * src_b[j];
-      sum_axb += src_a[j] * src_b[j];
-    }
-
-    src_a += stride_a;
-    src_b += stride_b;
-  }
-
-  {
-    const int64 count = 64;
-    // scale the constants by number of pixels
-    const int64 c1 = (cc1 * count * count) >> 12;
-    const int64 c2 = (cc2 * count * count) >> 12;
-
-    const int64 sum_a_x_sum_b = sum_a * sum_b;
-
-    const int64 ssim_n = (2 * sum_a_x_sum_b + c1) *
-                         (2 * count * sum_axb - 2 * sum_a_x_sum_b + c2);
-
-    const int64 sum_a_sq = sum_a*sum_a;
-    const int64 sum_b_sq = sum_b*sum_b;
-
-    const int64 ssim_d = (sum_a_sq + sum_b_sq + c1) *
-                         (count * sum_sq_a - sum_a_sq +
-                          count * sum_sq_b - sum_b_sq + c2);
-
-    if (ssim_d == 0.0) {
-      return DBL_MAX;
-    }
-    return ssim_n * 1.0 / ssim_d;
-  }
-}
-
-// We are using a 8x8 moving window with starting location of each 8x8 window
-// on the 4x4 pixel grid. Such arrangement allows the windows to overlap
-// block boundaries to penalize blocking artifacts.
-LIBYUV_API
-double CalcFrameSsim(const uint8* src_a, int stride_a,
-                     const uint8* src_b, int stride_b,
-                     int width, int height) {
-  int samples = 0;
-  double ssim_total = 0;
-  double (*Ssim8x8)(const uint8* src_a, int stride_a,
-                    const uint8* src_b, int stride_b) = Ssim8x8_C;
-
-  // sample point start with each 4x4 location
-  int i;
-  for (i = 0; i < height - 8; i += 4) {
-    int j;
-    for (j = 0; j < width - 8; j += 4) {
-      ssim_total += Ssim8x8(src_a + j, stride_a, src_b + j, stride_b);
-      samples++;
-    }
-
-    src_a += stride_a * 4;
-    src_b += stride_b * 4;
-  }
-
-  ssim_total /= samples;
-  return ssim_total;
-}
-
-LIBYUV_API
-double I420Ssim(const uint8* src_y_a, int stride_y_a,
-                const uint8* src_u_a, int stride_u_a,
-                const uint8* src_v_a, int stride_v_a,
-                const uint8* src_y_b, int stride_y_b,
-                const uint8* src_u_b, int stride_u_b,
-                const uint8* src_v_b, int stride_v_b,
-                int width, int height) {
-  const double ssim_y = CalcFrameSsim(src_y_a, stride_y_a,
-                                      src_y_b, stride_y_b, width, height);
-  const int width_uv = (width + 1) >> 1;
-  const int height_uv = (height + 1) >> 1;
-  const double ssim_u = CalcFrameSsim(src_u_a, stride_u_a,
-                                      src_u_b, stride_u_b,
-                                      width_uv, height_uv);
-  const double ssim_v = CalcFrameSsim(src_v_a, stride_v_a,
-                                      src_v_b, stride_v_b,
-                                      width_uv, height_uv);
-  return ssim_y * 0.8 + 0.1 * (ssim_u + ssim_v);
-}
-
-#ifdef __cplusplus
-}  // extern "C"
-}  // namespace libyuv
-#endif
diff --git a/drivers/theoraplayer/src/YUV/libyuv/src/compare_common.cc b/drivers/theoraplayer/src/YUV/libyuv/src/compare_common.cc
deleted file mode 100755
index c546b51829..0000000000
--- a/drivers/theoraplayer/src/YUV/libyuv/src/compare_common.cc
+++ /dev/null
@@ -1,42 +0,0 @@
-/*
- *  Copyright 2012 The LibYuv Project Authors. All rights reserved.
- *
- *  Use of this source code is governed by a BSD-style license
- *  that can be found in the LICENSE file in the root of the source
- *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS. All contributing project authors may
- *  be found in the AUTHORS file in the root of the source tree.
- */
-
-#include "libyuv/basic_types.h"
-
-#ifdef __cplusplus
-namespace libyuv {
-extern "C" {
-#endif
-
-uint32 SumSquareError_C(const uint8* src_a, const uint8* src_b, int count) {
-  uint32 sse = 0u;
-  int i;
-  for (i = 0; i < count; ++i) {
-    int diff = src_a[i] - src_b[i];
-    sse += (uint32)(diff * diff);
-  }
-  return sse;
-}
-
-// hash seed of 5381 recommended.
-// Internal C version of HashDjb2 with int sized count for efficiency.
-uint32 HashDjb2_C(const uint8* src, int count, uint32 seed) {
-  uint32 hash = seed;
-  int i;
-  for (i = 0; i < count; ++i) {
-    hash += (hash << 5) + src[i];
-  }
-  return hash;
-}
-
-#ifdef __cplusplus
-}  // extern "C"
-}  // namespace libyuv
-#endif
diff --git a/drivers/theoraplayer/src/YUV/libyuv/src/compare_neon.cc b/drivers/theoraplayer/src/YUV/libyuv/src/compare_neon.cc
deleted file mode 100755
index bb843a6ab8..0000000000
--- a/drivers/theoraplayer/src/YUV/libyuv/src/compare_neon.cc
+++ /dev/null
@@ -1,64 +0,0 @@
-/*
- *  Copyright 2012 The LibYuv Project Authors. All rights reserved.
- *
- *  Use of this source code is governed by a BSD-style license
- *  that can be found in the LICENSE file in the root of the source
- *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS. All contributing project authors may
- *  be found in the AUTHORS file in the root of the source tree.
- */
-
-#include "libyuv/basic_types.h"
-
-#ifdef __cplusplus
-namespace libyuv {
-extern "C" {
-#endif
-
-#if !defined(LIBYUV_DISABLE_NEON) && defined(__ARM_NEON__)
-
-uint32 SumSquareError_NEON(const uint8* src_a, const uint8* src_b, int count) {
-  volatile uint32 sse;
-  asm volatile (
-#ifdef _ANDROID
-	".fpu neon\n"
-#endif
-    "vmov.u8    q8, #0                         \n"
-    "vmov.u8    q10, #0                        \n"
-    "vmov.u8    q9, #0                         \n"
-    "vmov.u8    q11, #0                        \n"
-
-    ".p2align  2                               \n"
-  "1:                                          \n"
-    "vld1.8     {q0}, [%0]!                    \n"
-    "vld1.8     {q1}, [%1]!                    \n"
-    "subs       %2, %2, #16                    \n"
-    "vsubl.u8   q2, d0, d2                     \n"
-    "vsubl.u8   q3, d1, d3                     \n"
-    "vmlal.s16  q8, d4, d4                     \n"
-    "vmlal.s16  q9, d6, d6                     \n"
-    "vmlal.s16  q10, d5, d5                    \n"
-    "vmlal.s16  q11, d7, d7                    \n"
-    "bgt        1b                             \n"
-
-    "vadd.u32   q8, q8, q9                     \n"
-    "vadd.u32   q10, q10, q11                  \n"
-    "vadd.u32   q11, q8, q10                   \n"
-    "vpaddl.u32 q1, q11                        \n"
-    "vadd.u64   d0, d2, d3                     \n"
-    "vmov.32    %3, d0[0]                      \n"
-    : "+r"(src_a),
-      "+r"(src_b),
-      "+r"(count),
-      "=r"(sse)
-    :
-    : "memory", "cc", "q0", "q1", "q2", "q3", "q8", "q9", "q10", "q11");
-  return sse;
-}
-
-#endif  // __ARM_NEON__
-
-#ifdef __cplusplus
-}  // extern "C"
-}  // namespace libyuv
-#endif
diff --git a/drivers/theoraplayer/src/YUV/libyuv/src/compare_posix.cc b/drivers/theoraplayer/src/YUV/libyuv/src/compare_posix.cc
deleted file mode 100755
index ac361190e8..0000000000
--- a/drivers/theoraplayer/src/YUV/libyuv/src/compare_posix.cc
+++ /dev/null
@@ -1,158 +0,0 @@
-/*
- *  Copyright 2012 The LibYuv Project Authors. All rights reserved.
- *
- *  Use of this source code is governed by a BSD-style license
- *  that can be found in the LICENSE file in the root of the source
- *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS. All contributing project authors may
- *  be found in the AUTHORS file in the root of the source tree.
- */
-
-#include "libyuv/basic_types.h"
-#include "libyuv/row.h"
-
-#ifdef __cplusplus
-namespace libyuv {
-extern "C" {
-#endif
-
-#if !defined(LIBYUV_DISABLE_X86) && (defined(__x86_64__) || defined(__i386__))
-
-uint32 SumSquareError_SSE2(const uint8* src_a, const uint8* src_b, int count) {
-  uint32 sse;
-  asm volatile (  // NOLINT
-    "pxor      %%xmm0,%%xmm0                   \n"
-    "pxor      %%xmm5,%%xmm5                   \n"
-    LABELALIGN
-  "1:                                          \n"
-    "movdqa    " MEMACCESS(0) ",%%xmm1         \n"
-    "lea       " MEMLEA(0x10, 0) ",%0          \n"
-    "movdqa    " MEMACCESS(1) ",%%xmm2         \n"
-    "lea       " MEMLEA(0x10, 1) ",%1          \n"
-    "sub       $0x10,%2                        \n"
-    "movdqa    %%xmm1,%%xmm3                   \n"
-    "psubusb   %%xmm2,%%xmm1                   \n"
-    "psubusb   %%xmm3,%%xmm2                   \n"
-    "por       %%xmm2,%%xmm1                   \n"
-    "movdqa    %%xmm1,%%xmm2                   \n"
-    "punpcklbw %%xmm5,%%xmm1                   \n"
-    "punpckhbw %%xmm5,%%xmm2                   \n"
-    "pmaddwd   %%xmm1,%%xmm1                   \n"
-    "pmaddwd   %%xmm2,%%xmm2                   \n"
-    "paddd     %%xmm1,%%xmm0                   \n"
-    "paddd     %%xmm2,%%xmm0                   \n"
-    "jg        1b                              \n"
-
-    "pshufd    $0xee,%%xmm0,%%xmm1             \n"
-    "paddd     %%xmm1,%%xmm0                   \n"
-    "pshufd    $0x1,%%xmm0,%%xmm1              \n"
-    "paddd     %%xmm1,%%xmm0                   \n"
-    "movd      %%xmm0,%3                       \n"
-
-  : "+r"(src_a),      // %0
-    "+r"(src_b),      // %1
-    "+r"(count),      // %2
-    "=g"(sse)         // %3
-  :
-  : "memory", "cc"
-#if defined(__SSE2__)
-    , "xmm0", "xmm1", "xmm2", "xmm3", "xmm5"
-#endif
-  );  // NOLINT
-  return sse;
-}
-
-#endif  // defined(__x86_64__) || defined(__i386__)
-
-#if !defined(LIBYUV_DISABLE_X86) && \
-    (defined(__x86_64__) || (defined(__i386__) && !defined(__pic__)))
-#define HAS_HASHDJB2_SSE41
-static uvec32 kHash16x33 = { 0x92d9e201, 0, 0, 0 };  // 33 ^ 16
-static uvec32 kHashMul0 = {
-  0x0c3525e1,  // 33 ^ 15
-  0xa3476dc1,  // 33 ^ 14
-  0x3b4039a1,  // 33 ^ 13
-  0x4f5f0981,  // 33 ^ 12
-};
-static uvec32 kHashMul1 = {
-  0x30f35d61,  // 33 ^ 11
-  0x855cb541,  // 33 ^ 10
-  0x040a9121,  // 33 ^ 9
-  0x747c7101,  // 33 ^ 8
-};
-static uvec32 kHashMul2 = {
-  0xec41d4e1,  // 33 ^ 7
-  0x4cfa3cc1,  // 33 ^ 6
-  0x025528a1,  // 33 ^ 5
-  0x00121881,  // 33 ^ 4
-};
-static uvec32 kHashMul3 = {
-  0x00008c61,  // 33 ^ 3
-  0x00000441,  // 33 ^ 2
-  0x00000021,  // 33 ^ 1
-  0x00000001,  // 33 ^ 0
-};
-
-uint32 HashDjb2_SSE41(const uint8* src, int count, uint32 seed) {
-  uint32 hash;
-  asm volatile (  // NOLINT
-    "movd      %2,%%xmm0                       \n"
-    "pxor      %%xmm7,%%xmm7                   \n"
-    "movdqa    %4,%%xmm6                       \n"
-    LABELALIGN
-  "1:                                          \n"
-    "movdqu    " MEMACCESS(0) ",%%xmm1         \n"
-    "lea       " MEMLEA(0x10, 0) ",%0          \n"
-    "pmulld    %%xmm6,%%xmm0                   \n"
-    "movdqa    %5,%%xmm5                       \n"
-    "movdqa    %%xmm1,%%xmm2                   \n"
-    "punpcklbw %%xmm7,%%xmm2                   \n"
-    "movdqa    %%xmm2,%%xmm3                   \n"
-    "punpcklwd %%xmm7,%%xmm3                   \n"
-    "pmulld    %%xmm5,%%xmm3                   \n"
-    "movdqa    %6,%%xmm5                       \n"
-    "movdqa    %%xmm2,%%xmm4                   \n"
-    "punpckhwd %%xmm7,%%xmm4                   \n"
-    "pmulld    %%xmm5,%%xmm4                   \n"
-    "movdqa    %7,%%xmm5                       \n"
-    "punpckhbw %%xmm7,%%xmm1                   \n"
-    "movdqa    %%xmm1,%%xmm2                   \n"
-    "punpcklwd %%xmm7,%%xmm2                   \n"
-    "pmulld    %%xmm5,%%xmm2                   \n"
-    "movdqa    %8,%%xmm5                       \n"
-    "punpckhwd %%xmm7,%%xmm1                   \n"
-    "pmulld    %%xmm5,%%xmm1                   \n"
-    "paddd     %%xmm4,%%xmm3                   \n"
-    "paddd     %%xmm2,%%xmm1                   \n"
-    "sub       $0x10,%1                        \n"
-    "paddd     %%xmm3,%%xmm1                   \n"
-    "pshufd    $0xe,%%xmm1,%%xmm2              \n"
-    "paddd     %%xmm2,%%xmm1                   \n"
-    "pshufd    $0x1,%%xmm1,%%xmm2              \n"
-    "paddd     %%xmm2,%%xmm1                   \n"
-    "paddd     %%xmm1,%%xmm0                   \n"
-    "jg        1b                              \n"
-    "movd      %%xmm0,%3                       \n"
-  : "+r"(src),        // %0
-    "+r"(count),      // %1
-    "+rm"(seed),      // %2
-    "=g"(hash)        // %3
-  : "m"(kHash16x33),  // %4
-    "m"(kHashMul0),   // %5
-    "m"(kHashMul1),   // %6
-    "m"(kHashMul2),   // %7
-    "m"(kHashMul3)    // %8
-  : "memory", "cc"
-#if defined(__SSE2__)
-    , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7"
-#endif
-  );  // NOLINT
-  return hash;
-}
-#endif  // defined(__x86_64__) || (defined(__i386__) && !defined(__pic__)))
-
-#ifdef __cplusplus
-}  // extern "C"
-}  // namespace libyuv
-#endif
-
diff --git a/drivers/theoraplayer/src/YUV/libyuv/src/compare_win.cc b/drivers/theoraplayer/src/YUV/libyuv/src/compare_win.cc
deleted file mode 100755
index 99831651f5..0000000000
--- a/drivers/theoraplayer/src/YUV/libyuv/src/compare_win.cc
+++ /dev/null
@@ -1,232 +0,0 @@
-/*
- *  Copyright 2012 The LibYuv Project Authors. All rights reserved.
- *
- *  Use of this source code is governed by a BSD-style license
- *  that can be found in the LICENSE file in the root of the source
- *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS. All contributing project authors may
- *  be found in the AUTHORS file in the root of the source tree.
- */
-
-#include "libyuv/basic_types.h"
-#include "libyuv/row.h"
-
-#ifdef __cplusplus
-namespace libyuv {
-extern "C" {
-#endif
-
-#if !defined(LIBYUV_DISABLE_X86) && defined(_M_IX86) && defined(_MSC_VER)
-
-__declspec(naked) __declspec(align(16))
-uint32 SumSquareError_SSE2(const uint8* src_a, const uint8* src_b, int count) {
-  __asm {
-    mov        eax, [esp + 4]    // src_a
-    mov        edx, [esp + 8]    // src_b
-    mov        ecx, [esp + 12]   // count
-    pxor       xmm0, xmm0
-    pxor       xmm5, xmm5
-
-    align      4
-  wloop:
-    movdqa     xmm1, [eax]
-    lea        eax,  [eax + 16]
-    movdqa     xmm2, [edx]
-    lea        edx,  [edx + 16]
-    sub        ecx, 16
-    movdqa     xmm3, xmm1  // abs trick
-    psubusb    xmm1, xmm2
-    psubusb    xmm2, xmm3
-    por        xmm1, xmm2
-    movdqa     xmm2, xmm1
-    punpcklbw  xmm1, xmm5
-    punpckhbw  xmm2, xmm5
-    pmaddwd    xmm1, xmm1
-    pmaddwd    xmm2, xmm2
-    paddd      xmm0, xmm1
-    paddd      xmm0, xmm2
-    jg         wloop
-
-    pshufd     xmm1, xmm0, 0xee
-    paddd      xmm0, xmm1
-    pshufd     xmm1, xmm0, 0x01
-    paddd      xmm0, xmm1
-    movd       eax, xmm0
-    ret
-  }
-}
-
-// Visual C 2012 required for AVX2.
-#if _MSC_VER >= 1700
-// C4752: found Intel(R) Advanced Vector Extensions; consider using /arch:AVX.
-#pragma warning(disable: 4752)
-__declspec(naked) __declspec(align(16))
-uint32 SumSquareError_AVX2(const uint8* src_a, const uint8* src_b, int count) {
-  __asm {
-    mov        eax, [esp + 4]    // src_a
-    mov        edx, [esp + 8]    // src_b
-    mov        ecx, [esp + 12]   // count
-    vpxor      ymm0, ymm0, ymm0  // sum
-    vpxor      ymm5, ymm5, ymm5  // constant 0 for unpck
-    sub        edx, eax
-
-    align      4
-  wloop:
-    vmovdqu    ymm1, [eax]
-    vmovdqu    ymm2, [eax + edx]
-    lea        eax,  [eax + 32]
-    sub        ecx, 32
-    vpsubusb   ymm3, ymm1, ymm2  // abs difference trick
-    vpsubusb   ymm2, ymm2, ymm1
-    vpor       ymm1, ymm2, ymm3
-    vpunpcklbw ymm2, ymm1, ymm5  // u16.  mutates order.
-    vpunpckhbw ymm1, ymm1, ymm5
-    vpmaddwd   ymm2, ymm2, ymm2  // square + hadd to u32.
-    vpmaddwd   ymm1, ymm1, ymm1
-    vpaddd     ymm0, ymm0, ymm1
-    vpaddd     ymm0, ymm0, ymm2
-    jg         wloop
-
-    vpshufd    ymm1, ymm0, 0xee  // 3, 2 + 1, 0 both lanes.
-    vpaddd     ymm0, ymm0, ymm1
-    vpshufd    ymm1, ymm0, 0x01  // 1 + 0 both lanes.
-    vpaddd     ymm0, ymm0, ymm1
-    vpermq     ymm1, ymm0, 0x02  // high + low lane.
-    vpaddd     ymm0, ymm0, ymm1
-    vmovd      eax, xmm0
-    vzeroupper
-    ret
-  }
-}
-#endif  // _MSC_VER >= 1700
-
-#define HAS_HASHDJB2_SSE41
-static uvec32 kHash16x33 = { 0x92d9e201, 0, 0, 0 };  // 33 ^ 16
-static uvec32 kHashMul0 = {
-  0x0c3525e1,  // 33 ^ 15
-  0xa3476dc1,  // 33 ^ 14
-  0x3b4039a1,  // 33 ^ 13
-  0x4f5f0981,  // 33 ^ 12
-};
-static uvec32 kHashMul1 = {
-  0x30f35d61,  // 33 ^ 11
-  0x855cb541,  // 33 ^ 10
-  0x040a9121,  // 33 ^ 9
-  0x747c7101,  // 33 ^ 8
-};
-static uvec32 kHashMul2 = {
-  0xec41d4e1,  // 33 ^ 7
-  0x4cfa3cc1,  // 33 ^ 6
-  0x025528a1,  // 33 ^ 5
-  0x00121881,  // 33 ^ 4
-};
-static uvec32 kHashMul3 = {
-  0x00008c61,  // 33 ^ 3
-  0x00000441,  // 33 ^ 2
-  0x00000021,  // 33 ^ 1
-  0x00000001,  // 33 ^ 0
-};
-
-// 27: 66 0F 38 40 C6     pmulld      xmm0,xmm6
-// 44: 66 0F 38 40 DD     pmulld      xmm3,xmm5
-// 59: 66 0F 38 40 E5     pmulld      xmm4,xmm5
-// 72: 66 0F 38 40 D5     pmulld      xmm2,xmm5
-// 83: 66 0F 38 40 CD     pmulld      xmm1,xmm5
-#define pmulld(reg) _asm _emit 0x66 _asm _emit 0x0F _asm _emit 0x38 \
-    _asm _emit 0x40 _asm _emit reg
-
-__declspec(naked) __declspec(align(16))
-uint32 HashDjb2_SSE41(const uint8* src, int count, uint32 seed) {
-  __asm {
-    mov        eax, [esp + 4]    // src
-    mov        ecx, [esp + 8]    // count
-    movd       xmm0, [esp + 12]  // seed
-
-    pxor       xmm7, xmm7        // constant 0 for unpck
-    movdqa     xmm6, kHash16x33
-
-    align      4
-  wloop:
-    movdqu     xmm1, [eax]       // src[0-15]
-    lea        eax, [eax + 16]
-    pmulld(0xc6)                 // pmulld      xmm0,xmm6  hash *= 33 ^ 16
-    movdqa     xmm5, kHashMul0
-    movdqa     xmm2, xmm1
-    punpcklbw  xmm2, xmm7        // src[0-7]
-    movdqa     xmm3, xmm2
-    punpcklwd  xmm3, xmm7        // src[0-3]
-    pmulld(0xdd)                 // pmulld     xmm3, xmm5
-    movdqa     xmm5, kHashMul1
-    movdqa     xmm4, xmm2
-    punpckhwd  xmm4, xmm7        // src[4-7]
-    pmulld(0xe5)                 // pmulld     xmm4, xmm5
-    movdqa     xmm5, kHashMul2
-    punpckhbw  xmm1, xmm7        // src[8-15]
-    movdqa     xmm2, xmm1
-    punpcklwd  xmm2, xmm7        // src[8-11]
-    pmulld(0xd5)                 // pmulld     xmm2, xmm5
-    movdqa     xmm5, kHashMul3
-    punpckhwd  xmm1, xmm7        // src[12-15]
-    pmulld(0xcd)                 // pmulld     xmm1, xmm5
-    paddd      xmm3, xmm4        // add 16 results
-    paddd      xmm1, xmm2
-    sub        ecx, 16
-    paddd      xmm1, xmm3
-
-    pshufd     xmm2, xmm1, 0x0e  // upper 2 dwords
-    paddd      xmm1, xmm2
-    pshufd     xmm2, xmm1, 0x01
-    paddd      xmm1, xmm2
-    paddd      xmm0, xmm1
-    jg         wloop
-
-    movd       eax, xmm0         // return hash
-    ret
-  }
-}
-
-// Visual C 2012 required for AVX2.
-#if _MSC_VER >= 1700
-__declspec(naked) __declspec(align(16))
-uint32 HashDjb2_AVX2(const uint8* src, int count, uint32 seed) {
-  __asm {
-    mov        eax, [esp + 4]    // src
-    mov        ecx, [esp + 8]    // count
-    movd       xmm0, [esp + 12]  // seed
-    movdqa     xmm6, kHash16x33
-
-    align      4
-  wloop:
-    vpmovzxbd  xmm3, dword ptr [eax]  // src[0-3]
-    pmulld     xmm0, xmm6  // hash *= 33 ^ 16
-    vpmovzxbd  xmm4, dword ptr [eax + 4]  // src[4-7]
-    pmulld     xmm3, kHashMul0
-    vpmovzxbd  xmm2, dword ptr [eax + 8]  // src[8-11]
-    pmulld     xmm4, kHashMul1
-    vpmovzxbd  xmm1, dword ptr [eax + 12]  // src[12-15]
-    pmulld     xmm2, kHashMul2
-    lea        eax, [eax + 16]
-    pmulld     xmm1, kHashMul3
-    paddd      xmm3, xmm4        // add 16 results
-    paddd      xmm1, xmm2
-    sub        ecx, 16
-    paddd      xmm1, xmm3
-    pshufd     xmm2, xmm1, 0x0e  // upper 2 dwords
-    paddd      xmm1, xmm2
-    pshufd     xmm2, xmm1, 0x01
-    paddd      xmm1, xmm2
-    paddd      xmm0, xmm1
-    jg         wloop
-
-    movd       eax, xmm0         // return hash
-    ret
-  }
-}
-#endif  // _MSC_VER >= 1700
-
-#endif  // !defined(LIBYUV_DISABLE_X86) && defined(_M_IX86) && defined(_MSC_VER)
-
-#ifdef __cplusplus
-}  // extern "C"
-}  // namespace libyuv
-#endif
diff --git a/drivers/theoraplayer/src/YUV/libyuv/src/convert.cc b/drivers/theoraplayer/src/YUV/libyuv/src/convert.cc
deleted file mode 100755
index c8408dc798..0000000000
--- a/drivers/theoraplayer/src/YUV/libyuv/src/convert.cc
+++ /dev/null
@@ -1,1491 +0,0 @@
-/*
- *  Copyright 2011 The LibYuv Project Authors. All rights reserved.
- *
- *  Use of this source code is governed by a BSD-style license
- *  that can be found in the LICENSE file in the root of the source
- *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS. All contributing project authors may
- *  be found in the AUTHORS file in the root of the source tree.
- */
-
-#include "libyuv/convert.h"
-
-#include "libyuv/basic_types.h"
-#include "libyuv/cpu_id.h"
-#include "libyuv/planar_functions.h"
-#include "libyuv/rotate.h"
-#include "libyuv/scale.h"  // For ScalePlane()
-#include "libyuv/row.h"
-
-#ifdef __cplusplus
-namespace libyuv {
-extern "C" {
-#endif
-
-#define SUBSAMPLE(v, a, s) (v < 0) ? (-((-v + a) >> s)) : ((v + a) >> s)
-static __inline int Abs(int v) {
-  return v >= 0 ? v : -v;
-}
-
-// Any I4xx To I420 format with mirroring.
-static int I4xxToI420(const uint8* src_y, int src_stride_y,
-                      const uint8* src_u, int src_stride_u,
-                      const uint8* src_v, int src_stride_v,
-                      uint8* dst_y, int dst_stride_y,
-                      uint8* dst_u, int dst_stride_u,
-                      uint8* dst_v, int dst_stride_v,
-                      int src_y_width, int src_y_height,
-                      int src_uv_width, int src_uv_height) {
-  if (src_y_width == 0 || src_y_height == 0 ||
-      src_uv_width == 0 || src_uv_height == 0) {
-    return -1;
-  }
-  const int dst_y_width = Abs(src_y_width);
-  const int dst_y_height = Abs(src_y_height);
-  const int dst_uv_width = SUBSAMPLE(dst_y_width, 1, 1);
-  const int dst_uv_height = SUBSAMPLE(dst_y_height, 1, 1);
-  ScalePlane(src_y, src_stride_y, src_y_width, src_y_height,
-             dst_y, dst_stride_y, dst_y_width, dst_y_height,
-             kFilterBilinear);
-  ScalePlane(src_u, src_stride_u, src_uv_width, src_uv_height,
-             dst_u, dst_stride_u, dst_uv_width, dst_uv_height,
-             kFilterBilinear);
-  ScalePlane(src_v, src_stride_v, src_uv_width, src_uv_height,
-             dst_v, dst_stride_v, dst_uv_width, dst_uv_height,
-             kFilterBilinear);
-  return 0;
-}
-
-// Copy I420 with optional flipping
-// TODO(fbarchard): Use Scale plane which supports mirroring, but ensure
-// is does row coalescing.
-LIBYUV_API
-int I420Copy(const uint8* src_y, int src_stride_y,
-             const uint8* src_u, int src_stride_u,
-             const uint8* src_v, int src_stride_v,
-             uint8* dst_y, int dst_stride_y,
-             uint8* dst_u, int dst_stride_u,
-             uint8* dst_v, int dst_stride_v,
-             int width, int height) {
-  if (!src_y || !src_u || !src_v ||
-      !dst_y || !dst_u || !dst_v ||
-      width <= 0 || height == 0) {
-    return -1;
-  }
-  // Negative height means invert the image.
-  if (height < 0) {
-    height = -height;
-    const int halfheight = (height + 1) >> 1;
-    src_y = src_y + (height - 1) * src_stride_y;
-    src_u = src_u + (halfheight - 1) * src_stride_u;
-    src_v = src_v + (halfheight - 1) * src_stride_v;
-    src_stride_y = -src_stride_y;
-    src_stride_u = -src_stride_u;
-    src_stride_v = -src_stride_v;
-  }
-
-  if (dst_y) {
-    CopyPlane(src_y, src_stride_y, dst_y, dst_stride_y, width, height);
-  }
-  // Copy UV planes.
-  const int halfwidth = (width + 1) >> 1;
-  const int halfheight = (height + 1) >> 1;
-  CopyPlane(src_u, src_stride_u, dst_u, dst_stride_u, halfwidth, halfheight);
-  CopyPlane(src_v, src_stride_v, dst_v, dst_stride_v, halfwidth, halfheight);
-  return 0;
-}
-
-// 422 chroma is 1/2 width, 1x height
-// 420 chroma is 1/2 width, 1/2 height
-LIBYUV_API
-int I422ToI420(const uint8* src_y, int src_stride_y,
-               const uint8* src_u, int src_stride_u,
-               const uint8* src_v, int src_stride_v,
-               uint8* dst_y, int dst_stride_y,
-               uint8* dst_u, int dst_stride_u,
-               uint8* dst_v, int dst_stride_v,
-               int width, int height) {
-  const int src_uv_width = SUBSAMPLE(width, 1, 1);
-  return I4xxToI420(src_y, src_stride_y,
-                    src_u, src_stride_u,
-                    src_v, src_stride_v,
-                    dst_y, dst_stride_y,
-                    dst_u, dst_stride_u,
-                    dst_v, dst_stride_v,
-                    width, height,
-                    src_uv_width, height);
-}
-
-// 444 chroma is 1x width, 1x height
-// 420 chroma is 1/2 width, 1/2 height
-LIBYUV_API
-int I444ToI420(const uint8* src_y, int src_stride_y,
-               const uint8* src_u, int src_stride_u,
-               const uint8* src_v, int src_stride_v,
-               uint8* dst_y, int dst_stride_y,
-               uint8* dst_u, int dst_stride_u,
-               uint8* dst_v, int dst_stride_v,
-               int width, int height) {
-  return I4xxToI420(src_y, src_stride_y,
-                    src_u, src_stride_u,
-                    src_v, src_stride_v,
-                    dst_y, dst_stride_y,
-                    dst_u, dst_stride_u,
-                    dst_v, dst_stride_v,
-                    width, height,
-                    width, height);
-}
-
-// 411 chroma is 1/4 width, 1x height
-// 420 chroma is 1/2 width, 1/2 height
-LIBYUV_API
-int I411ToI420(const uint8* src_y, int src_stride_y,
-               const uint8* src_u, int src_stride_u,
-               const uint8* src_v, int src_stride_v,
-               uint8* dst_y, int dst_stride_y,
-               uint8* dst_u, int dst_stride_u,
-               uint8* dst_v, int dst_stride_v,
-               int width, int height) {
-  const int src_uv_width = SUBSAMPLE(width, 3, 2);
-  return I4xxToI420(src_y, src_stride_y,
-                    src_u, src_stride_u,
-                    src_v, src_stride_v,
-                    dst_y, dst_stride_y,
-                    dst_u, dst_stride_u,
-                    dst_v, dst_stride_v,
-                    width, height,
-                    src_uv_width, height);
-}
-
-// I400 is greyscale typically used in MJPG
-LIBYUV_API
-int I400ToI420(const uint8* src_y, int src_stride_y,
-               uint8* dst_y, int dst_stride_y,
-               uint8* dst_u, int dst_stride_u,
-               uint8* dst_v, int dst_stride_v,
-               int width, int height) {
-  if (!src_y || !dst_y || !dst_u || !dst_v ||
-      width <= 0 || height == 0) {
-    return -1;
-  }
-  // Negative height means invert the image.
-  if (height < 0) {
-    height = -height;
-    src_y = src_y + (height - 1) * src_stride_y;
-    src_stride_y = -src_stride_y;
-  }
-  int halfwidth = (width + 1) >> 1;
-  int halfheight = (height + 1) >> 1;
-  CopyPlane(src_y, src_stride_y, dst_y, dst_stride_y, width, height);
-  SetPlane(dst_u, dst_stride_u, halfwidth, halfheight, 128);
-  SetPlane(dst_v, dst_stride_v, halfwidth, halfheight, 128);
-  return 0;
-}
-
-static void CopyPlane2(const uint8* src, int src_stride_0, int src_stride_1,
-                       uint8* dst, int dst_stride,
-                       int width, int height) {
-  void (*CopyRow)(const uint8* src, uint8* dst, int width) = CopyRow_C;
-#if defined(HAS_COPYROW_X86)
-  if (TestCpuFlag(kCpuHasX86) && IS_ALIGNED(width, 4)) {
-    CopyRow = CopyRow_X86;
-  }
-#endif
-#if defined(HAS_COPYROW_SSE2)
-  if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(width, 32) &&
-      IS_ALIGNED(src, 16) &&
-      IS_ALIGNED(src_stride_0, 16) && IS_ALIGNED(src_stride_1, 16) &&
-      IS_ALIGNED(dst, 16) && IS_ALIGNED(dst_stride, 16)) {
-    CopyRow = CopyRow_SSE2;
-  }
-#endif
-#if defined(HAS_COPYROW_ERMS)
-  if (TestCpuFlag(kCpuHasERMS)) {
-    CopyRow = CopyRow_ERMS;
-  }
-#endif
-#if defined(HAS_COPYROW_NEON)
-  if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 32)) {
-    CopyRow = CopyRow_NEON;
-  }
-#endif
-#if defined(HAS_COPYROW_MIPS)
-  if (TestCpuFlag(kCpuHasMIPS)) {
-    CopyRow = CopyRow_MIPS;
-  }
-#endif
-
-  // Copy plane
-  for (int y = 0; y < height - 1; y += 2) {
-    CopyRow(src, dst, width);
-    CopyRow(src + src_stride_0, dst + dst_stride, width);
-    src += src_stride_0 + src_stride_1;
-    dst += dst_stride * 2;
-  }
-  if (height & 1) {
-    CopyRow(src, dst, width);
-  }
-}
-
-// Support converting from FOURCC_M420
-// Useful for bandwidth constrained transports like USB 1.0 and 2.0 and for
-// easy conversion to I420.
-// M420 format description:
-// M420 is row biplanar 420: 2 rows of Y and 1 row of UV.
-// Chroma is half width / half height. (420)
-// src_stride_m420 is row planar. Normally this will be the width in pixels.
-//   The UV plane is half width, but 2 values, so src_stride_m420 applies to
-//   this as well as the two Y planes.
-static int X420ToI420(const uint8* src_y,
-                      int src_stride_y0, int src_stride_y1,
-                      const uint8* src_uv, int src_stride_uv,
-                      uint8* dst_y, int dst_stride_y,
-                      uint8* dst_u, int dst_stride_u,
-                      uint8* dst_v, int dst_stride_v,
-                      int width, int height) {
-  if (!src_y || !src_uv ||
-      !dst_y || !dst_u || !dst_v ||
-      width <= 0 || height == 0) {
-    return -1;
-  }
-  // Negative height means invert the image.
-  if (height < 0) {
-    height = -height;
-    int halfheight = (height + 1) >> 1;
-    dst_y = dst_y + (height - 1) * dst_stride_y;
-    dst_u = dst_u + (halfheight - 1) * dst_stride_u;
-    dst_v = dst_v + (halfheight - 1) * dst_stride_v;
-    dst_stride_y = -dst_stride_y;
-    dst_stride_u = -dst_stride_u;
-    dst_stride_v = -dst_stride_v;
-  }
-  // Coalesce rows.
-  int halfwidth = (width + 1) >> 1;
-  int halfheight = (height + 1) >> 1;
-  if (src_stride_y0 == width &&
-      src_stride_y1 == width &&
-      dst_stride_y == width) {
-    width *= height;
-    height = 1;
-    src_stride_y0 = src_stride_y1 = dst_stride_y = 0;
-  }
-  // Coalesce rows.
-  if (src_stride_uv == halfwidth * 2 &&
-      dst_stride_u == halfwidth &&
-      dst_stride_v == halfwidth) {
-    halfwidth *= halfheight;
-    halfheight = 1;
-    src_stride_uv = dst_stride_u = dst_stride_v = 0;
-  }
-  void (*SplitUVRow)(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int pix) =
-      SplitUVRow_C;
-#if defined(HAS_SPLITUVROW_SSE2)
-  if (TestCpuFlag(kCpuHasSSE2) && halfwidth >= 16) {
-    SplitUVRow = SplitUVRow_Any_SSE2;
-    if (IS_ALIGNED(halfwidth, 16)) {
-      SplitUVRow = SplitUVRow_Unaligned_SSE2;
-      if (IS_ALIGNED(src_uv, 16) && IS_ALIGNED(src_stride_uv, 16) &&
-          IS_ALIGNED(dst_u, 16) && IS_ALIGNED(dst_stride_u, 16) &&
-          IS_ALIGNED(dst_v, 16) && IS_ALIGNED(dst_stride_v, 16)) {
-        SplitUVRow = SplitUVRow_SSE2;
-      }
-    }
-  }
-#endif
-#if defined(HAS_SPLITUVROW_AVX2)
-  if (TestCpuFlag(kCpuHasAVX2) && halfwidth >= 32) {
-    SplitUVRow = SplitUVRow_Any_AVX2;
-    if (IS_ALIGNED(halfwidth, 32)) {
-      SplitUVRow = SplitUVRow_AVX2;
-    }
-  }
-#endif
-#if defined(HAS_SPLITUVROW_NEON)
-  if (TestCpuFlag(kCpuHasNEON) && halfwidth >= 16) {
-    SplitUVRow = SplitUVRow_Any_NEON;
-    if (IS_ALIGNED(halfwidth, 16)) {
-      SplitUVRow = SplitUVRow_NEON;
-    }
-  }
-#endif
-#if defined(HAS_SPLITUVROW_MIPS_DSPR2)
-  if (TestCpuFlag(kCpuHasMIPS_DSPR2) && halfwidth >= 16) {
-    SplitUVRow = SplitUVRow_Any_MIPS_DSPR2;
-    if (IS_ALIGNED(halfwidth, 16)) {
-      SplitUVRow = SplitUVRow_Unaligned_MIPS_DSPR2;
-      if (IS_ALIGNED(src_uv, 4) && IS_ALIGNED(src_stride_uv, 4) &&
-          IS_ALIGNED(dst_u, 4) && IS_ALIGNED(dst_stride_u, 4) &&
-          IS_ALIGNED(dst_v, 4) && IS_ALIGNED(dst_stride_v, 4)) {
-        SplitUVRow = SplitUVRow_MIPS_DSPR2;
-      }
-    }
-  }
-#endif
-
-  if (dst_y) {
-    if (src_stride_y0 == src_stride_y1) {
-      CopyPlane(src_y, src_stride_y0, dst_y, dst_stride_y, width, height);
-    } else {
-      CopyPlane2(src_y, src_stride_y0, src_stride_y1, dst_y, dst_stride_y,
-                 width, height);
-    }
-  }
-
-  for (int y = 0; y < halfheight; ++y) {
-    // Copy a row of UV.
-    SplitUVRow(src_uv, dst_u, dst_v, halfwidth);
-    dst_u += dst_stride_u;
-    dst_v += dst_stride_v;
-    src_uv += src_stride_uv;
-  }
-  return 0;
-}
-
-// Convert NV12 to I420.
-LIBYUV_API
-int NV12ToI420(const uint8* src_y, int src_stride_y,
-               const uint8* src_uv, int src_stride_uv,
-               uint8* dst_y, int dst_stride_y,
-               uint8* dst_u, int dst_stride_u,
-               uint8* dst_v, int dst_stride_v,
-               int width, int height) {
-  return X420ToI420(src_y, src_stride_y, src_stride_y,
-                    src_uv, src_stride_uv,
-                    dst_y, dst_stride_y,
-                    dst_u, dst_stride_u,
-                    dst_v, dst_stride_v,
-                    width, height);
-}
-
-// Convert NV21 to I420.  Same as NV12 but u and v pointers swapped.
-LIBYUV_API
-int NV21ToI420(const uint8* src_y, int src_stride_y,
-               const uint8* src_vu, int src_stride_vu,
-               uint8* dst_y, int dst_stride_y,
-               uint8* dst_u, int dst_stride_u,
-               uint8* dst_v, int dst_stride_v,
-               int width, int height) {
-  return X420ToI420(src_y, src_stride_y, src_stride_y,
-                    src_vu, src_stride_vu,
-                    dst_y, dst_stride_y,
-                    dst_v, dst_stride_v,
-                    dst_u, dst_stride_u,
-                    width, height);
-}
-
-// Convert M420 to I420.
-LIBYUV_API
-int M420ToI420(const uint8* src_m420, int src_stride_m420,
-               uint8* dst_y, int dst_stride_y,
-               uint8* dst_u, int dst_stride_u,
-               uint8* dst_v, int dst_stride_v,
-               int width, int height) {
-  return X420ToI420(src_m420, src_stride_m420, src_stride_m420 * 2,
-                    src_m420 + src_stride_m420 * 2, src_stride_m420 * 3,
-                    dst_y, dst_stride_y,
-                    dst_u, dst_stride_u,
-                    dst_v, dst_stride_v,
-                    width, height);
-}
-
-// Convert Q420 to I420.
-// Format is rows of YY/YUYV
-LIBYUV_API
-int Q420ToI420(const uint8* src_y, int src_stride_y,
-               const uint8* src_yuy2, int src_stride_yuy2,
-               uint8* dst_y, int dst_stride_y,
-               uint8* dst_u, int dst_stride_u,
-               uint8* dst_v, int dst_stride_v,
-               int width, int height) {
-  if (!src_y || !src_yuy2 ||
-      !dst_y || !dst_u || !dst_v ||
-      width <= 0 || height == 0) {
-    return -1;
-  }
-  // Negative height means invert the image.
-  if (height < 0) {
-    height = -height;
-    int halfheight = (height + 1) >> 1;
-    dst_y = dst_y + (height - 1) * dst_stride_y;
-    dst_u = dst_u + (halfheight - 1) * dst_stride_u;
-    dst_v = dst_v + (halfheight - 1) * dst_stride_v;
-    dst_stride_y = -dst_stride_y;
-    dst_stride_u = -dst_stride_u;
-    dst_stride_v = -dst_stride_v;
-  }
-  // CopyRow for rows of just Y in Q420 copied to Y plane of I420.
-  void (*CopyRow)(const uint8* src, uint8* dst, int width) = CopyRow_C;
-#if defined(HAS_COPYROW_NEON)
-  if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 32)) {
-    CopyRow = CopyRow_NEON;
-  }
-#endif
-#if defined(HAS_COPYROW_X86)
-  if (IS_ALIGNED(width, 4)) {
-    CopyRow = CopyRow_X86;
-  }
-#endif
-#if defined(HAS_COPYROW_SSE2)
-  if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(width, 32) &&
-      IS_ALIGNED(src_y, 16) && IS_ALIGNED(src_stride_y, 16) &&
-      IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) {
-    CopyRow = CopyRow_SSE2;
-  }
-#endif
-#if defined(HAS_COPYROW_ERMS)
-  if (TestCpuFlag(kCpuHasERMS)) {
-    CopyRow = CopyRow_ERMS;
-  }
-#endif
-#if defined(HAS_COPYROW_MIPS)
-  if (TestCpuFlag(kCpuHasMIPS)) {
-    CopyRow = CopyRow_MIPS;
-  }
-#endif
-
-  void (*YUY2ToUV422Row)(const uint8* src_yuy2, uint8* dst_u, uint8* dst_v,
-      int pix) = YUY2ToUV422Row_C;
-  void (*YUY2ToYRow)(const uint8* src_yuy2, uint8* dst_y, int pix) =
-      YUY2ToYRow_C;
-#if defined(HAS_YUY2TOYROW_SSE2)
-  if (TestCpuFlag(kCpuHasSSE2) && width >= 16) {
-    YUY2ToUV422Row = YUY2ToUV422Row_Any_SSE2;
-    YUY2ToYRow = YUY2ToYRow_Any_SSE2;
-    if (IS_ALIGNED(width, 16)) {
-      YUY2ToUV422Row = YUY2ToUV422Row_Unaligned_SSE2;
-      YUY2ToYRow = YUY2ToYRow_Unaligned_SSE2;
-      if (IS_ALIGNED(src_yuy2, 16) && IS_ALIGNED(src_stride_yuy2, 16)) {
-        YUY2ToUV422Row = YUY2ToUV422Row_SSE2;
-        if (IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) {
-          YUY2ToYRow = YUY2ToYRow_SSE2;
-        }
-      }
-    }
-  }
-#endif
-#if defined(HAS_YUY2TOYROW_AVX2)
-  if (TestCpuFlag(kCpuHasAVX2) && width >= 32) {
-    YUY2ToUV422Row = YUY2ToUV422Row_Any_AVX2;
-    YUY2ToYRow = YUY2ToYRow_Any_AVX2;
-    if (IS_ALIGNED(width, 32)) {
-      YUY2ToUV422Row = YUY2ToUV422Row_AVX2;
-      YUY2ToYRow = YUY2ToYRow_AVX2;
-    }
-  }
-#endif
-#if defined(HAS_YUY2TOYROW_NEON)
-  if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
-    YUY2ToYRow = YUY2ToYRow_Any_NEON;
-    if (width >= 16) {
-      YUY2ToUV422Row = YUY2ToUV422Row_Any_NEON;
-    }
-    if (IS_ALIGNED(width, 16)) {
-      YUY2ToYRow = YUY2ToYRow_NEON;
-      YUY2ToUV422Row = YUY2ToUV422Row_NEON;
-    }
-  }
-#endif
-
-  for (int y = 0; y < height - 1; y += 2) {
-    CopyRow(src_y, dst_y, width);
-    src_y += src_stride_y;
-    dst_y += dst_stride_y;
-
-    YUY2ToUV422Row(src_yuy2, dst_u, dst_v, width);
-    YUY2ToYRow(src_yuy2, dst_y, width);
-    src_yuy2 += src_stride_yuy2;
-    dst_y += dst_stride_y;
-    dst_u += dst_stride_u;
-    dst_v += dst_stride_v;
-  }
-  if (height & 1) {
-    CopyRow(src_y, dst_y, width);
-    YUY2ToUV422Row(src_yuy2, dst_u, dst_v, width);
-  }
-  return 0;
-}
-
-// Convert YUY2 to I420.
-LIBYUV_API
-int YUY2ToI420(const uint8* src_yuy2, int src_stride_yuy2,
-               uint8* dst_y, int dst_stride_y,
-               uint8* dst_u, int dst_stride_u,
-               uint8* dst_v, int dst_stride_v,
-               int width, int height) {
-  // Negative height means invert the image.
-  if (height < 0) {
-    height = -height;
-    src_yuy2 = src_yuy2 + (height - 1) * src_stride_yuy2;
-    src_stride_yuy2 = -src_stride_yuy2;
-  }
-  void (*YUY2ToUVRow)(const uint8* src_yuy2, int src_stride_yuy2,
-                      uint8* dst_u, uint8* dst_v, int pix);
-  void (*YUY2ToYRow)(const uint8* src_yuy2,
-                     uint8* dst_y, int pix);
-  YUY2ToYRow = YUY2ToYRow_C;
-  YUY2ToUVRow = YUY2ToUVRow_C;
-#if defined(HAS_YUY2TOYROW_SSE2)
-  if (TestCpuFlag(kCpuHasSSE2) && width >= 16) {
-    YUY2ToUVRow = YUY2ToUVRow_Any_SSE2;
-    YUY2ToYRow = YUY2ToYRow_Any_SSE2;
-    if (IS_ALIGNED(width, 16)) {
-      YUY2ToUVRow = YUY2ToUVRow_Unaligned_SSE2;
-      YUY2ToYRow = YUY2ToYRow_Unaligned_SSE2;
-      if (IS_ALIGNED(src_yuy2, 16) && IS_ALIGNED(src_stride_yuy2, 16)) {
-        YUY2ToUVRow = YUY2ToUVRow_SSE2;
-        if (IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) {
-          YUY2ToYRow = YUY2ToYRow_SSE2;
-        }
-      }
-    }
-  }
-#endif
-#if defined(HAS_YUY2TOYROW_AVX2)
-  if (TestCpuFlag(kCpuHasAVX2) && width >= 32) {
-    YUY2ToUVRow = YUY2ToUVRow_Any_AVX2;
-    YUY2ToYRow = YUY2ToYRow_Any_AVX2;
-    if (IS_ALIGNED(width, 32)) {
-      YUY2ToUVRow = YUY2ToUVRow_AVX2;
-      YUY2ToYRow = YUY2ToYRow_AVX2;
-    }
-  }
-#endif
-#if defined(HAS_YUY2TOYROW_NEON)
-  if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
-    YUY2ToYRow = YUY2ToYRow_Any_NEON;
-    if (width >= 16) {
-      YUY2ToUVRow = YUY2ToUVRow_Any_NEON;
-    }
-    if (IS_ALIGNED(width, 16)) {
-      YUY2ToYRow = YUY2ToYRow_NEON;
-      YUY2ToUVRow = YUY2ToUVRow_NEON;
-    }
-  }
-#endif
-
-  for (int y = 0; y < height - 1; y += 2) {
-    YUY2ToUVRow(src_yuy2, src_stride_yuy2, dst_u, dst_v, width);
-    YUY2ToYRow(src_yuy2, dst_y, width);
-    YUY2ToYRow(src_yuy2 + src_stride_yuy2, dst_y + dst_stride_y, width);
-    src_yuy2 += src_stride_yuy2 * 2;
-    dst_y += dst_stride_y * 2;
-    dst_u += dst_stride_u;
-    dst_v += dst_stride_v;
-  }
-  if (height & 1) {
-    YUY2ToUVRow(src_yuy2, 0, dst_u, dst_v, width);
-    YUY2ToYRow(src_yuy2, dst_y, width);
-  }
-  return 0;
-}
-
-// Convert UYVY to I420.
-LIBYUV_API
-int UYVYToI420(const uint8* src_uyvy, int src_stride_uyvy,
-               uint8* dst_y, int dst_stride_y,
-               uint8* dst_u, int dst_stride_u,
-               uint8* dst_v, int dst_stride_v,
-               int width, int height) {
-  // Negative height means invert the image.
-  if (height < 0) {
-    height = -height;
-    src_uyvy = src_uyvy + (height - 1) * src_stride_uyvy;
-    src_stride_uyvy = -src_stride_uyvy;
-  }
-  void (*UYVYToUVRow)(const uint8* src_uyvy, int src_stride_uyvy,
-                      uint8* dst_u, uint8* dst_v, int pix);
-  void (*UYVYToYRow)(const uint8* src_uyvy,
-                     uint8* dst_y, int pix);
-  UYVYToYRow = UYVYToYRow_C;
-  UYVYToUVRow = UYVYToUVRow_C;
-#if defined(HAS_UYVYTOYROW_SSE2)
-  if (TestCpuFlag(kCpuHasSSE2) && width >= 16) {
-    UYVYToUVRow = UYVYToUVRow_Any_SSE2;
-    UYVYToYRow = UYVYToYRow_Any_SSE2;
-    if (IS_ALIGNED(width, 16)) {
-      UYVYToUVRow = UYVYToUVRow_Unaligned_SSE2;
-      UYVYToYRow = UYVYToYRow_Unaligned_SSE2;
-      if (IS_ALIGNED(src_uyvy, 16) && IS_ALIGNED(src_stride_uyvy, 16)) {
-        UYVYToUVRow = UYVYToUVRow_SSE2;
-        if (IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) {
-          UYVYToYRow = UYVYToYRow_SSE2;
-        }
-      }
-    }
-  }
-#endif
-#if defined(HAS_UYVYTOYROW_AVX2)
-  if (TestCpuFlag(kCpuHasAVX2) && width >= 32) {
-    UYVYToUVRow = UYVYToUVRow_Any_AVX2;
-    UYVYToYRow = UYVYToYRow_Any_AVX2;
-    if (IS_ALIGNED(width, 32)) {
-      UYVYToUVRow = UYVYToUVRow_AVX2;
-      UYVYToYRow = UYVYToYRow_AVX2;
-    }
-  }
-#endif
-#if defined(HAS_UYVYTOYROW_NEON)
-  if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
-    UYVYToYRow = UYVYToYRow_Any_NEON;
-    if (width >= 16) {
-      UYVYToUVRow = UYVYToUVRow_Any_NEON;
-    }
-    if (IS_ALIGNED(width, 16)) {
-      UYVYToYRow = UYVYToYRow_NEON;
-      UYVYToUVRow = UYVYToUVRow_NEON;
-    }
-  }
-#endif
-
-  for (int y = 0; y < height - 1; y += 2) {
-    UYVYToUVRow(src_uyvy, src_stride_uyvy, dst_u, dst_v, width);
-    UYVYToYRow(src_uyvy, dst_y, width);
-    UYVYToYRow(src_uyvy + src_stride_uyvy, dst_y + dst_stride_y, width);
-    src_uyvy += src_stride_uyvy * 2;
-    dst_y += dst_stride_y * 2;
-    dst_u += dst_stride_u;
-    dst_v += dst_stride_v;
-  }
-  if (height & 1) {
-    UYVYToUVRow(src_uyvy, 0, dst_u, dst_v, width);
-    UYVYToYRow(src_uyvy, dst_y, width);
-  }
-  return 0;
-}
-
-// Convert ARGB to I420.
-LIBYUV_API
-int ARGBToI420(const uint8* src_argb, int src_stride_argb,
-               uint8* dst_y, int dst_stride_y,
-               uint8* dst_u, int dst_stride_u,
-               uint8* dst_v, int dst_stride_v,
-               int width, int height) {
-  if (!src_argb ||
-      !dst_y || !dst_u || !dst_v ||
-      width <= 0 || height == 0) {
-    return -1;
-  }
-  // Negative height means invert the image.
-  if (height < 0) {
-    height = -height;
-    src_argb = src_argb + (height - 1) * src_stride_argb;
-    src_stride_argb = -src_stride_argb;
-  }
-  void (*ARGBToUVRow)(const uint8* src_argb0, int src_stride_argb,
-                      uint8* dst_u, uint8* dst_v, int width) = ARGBToUVRow_C;
-  void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int pix) =
-      ARGBToYRow_C;
-#if defined(HAS_ARGBTOYROW_SSSE3) && defined(HAS_ARGBTOUVROW_SSSE3)
-  if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) {
-    ARGBToUVRow = ARGBToUVRow_Any_SSSE3;
-    ARGBToYRow = ARGBToYRow_Any_SSSE3;
-    if (IS_ALIGNED(width, 16)) {
-      ARGBToUVRow = ARGBToUVRow_Unaligned_SSSE3;
-      ARGBToYRow = ARGBToYRow_Unaligned_SSSE3;
-      if (IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16)) {
-        ARGBToUVRow = ARGBToUVRow_SSSE3;
-        if (IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) {
-          ARGBToYRow = ARGBToYRow_SSSE3;
-        }
-      }
-    }
-  }
-#endif
-#if defined(HAS_ARGBTOYROW_AVX2) && defined(HAS_ARGBTOUVROW_AVX2)
-  if (TestCpuFlag(kCpuHasAVX2) && width >= 32) {
-    ARGBToUVRow = ARGBToUVRow_Any_AVX2;
-    ARGBToYRow = ARGBToYRow_Any_AVX2;
-    if (IS_ALIGNED(width, 32)) {
-      ARGBToUVRow = ARGBToUVRow_AVX2;
-      ARGBToYRow = ARGBToYRow_AVX2;
-    }
-  }
-#endif
-#if defined(HAS_ARGBTOYROW_NEON)
-  if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
-    ARGBToYRow = ARGBToYRow_Any_NEON;
-    if (IS_ALIGNED(width, 8)) {
-      ARGBToYRow = ARGBToYRow_NEON;
-    }
-    if (width >= 16) {
-      ARGBToUVRow = ARGBToUVRow_Any_NEON;
-      if (IS_ALIGNED(width, 16)) {
-        ARGBToUVRow = ARGBToUVRow_NEON;
-      }
-    }
-  }
-#endif
-
-  for (int y = 0; y < height - 1; y += 2) {
-    ARGBToUVRow(src_argb, src_stride_argb, dst_u, dst_v, width);
-    ARGBToYRow(src_argb, dst_y, width);
-    ARGBToYRow(src_argb + src_stride_argb, dst_y + dst_stride_y, width);
-    src_argb += src_stride_argb * 2;
-    dst_y += dst_stride_y * 2;
-    dst_u += dst_stride_u;
-    dst_v += dst_stride_v;
-  }
-  if (height & 1) {
-    ARGBToUVRow(src_argb, 0, dst_u, dst_v, width);
-    ARGBToYRow(src_argb, dst_y, width);
-  }
-  return 0;
-}
-
-// Convert BGRA to I420.
-LIBYUV_API
-int BGRAToI420(const uint8* src_bgra, int src_stride_bgra,
-               uint8* dst_y, int dst_stride_y,
-               uint8* dst_u, int dst_stride_u,
-               uint8* dst_v, int dst_stride_v,
-               int width, int height) {
-  if (!src_bgra ||
-      !dst_y || !dst_u || !dst_v ||
-      width <= 0 || height == 0) {
-    return -1;
-  }
-  // Negative height means invert the image.
-  if (height < 0) {
-    height = -height;
-    src_bgra = src_bgra + (height - 1) * src_stride_bgra;
-    src_stride_bgra = -src_stride_bgra;
-  }
-  void (*BGRAToUVRow)(const uint8* src_bgra0, int src_stride_bgra,
-                      uint8* dst_u, uint8* dst_v, int width) = BGRAToUVRow_C;
-  void (*BGRAToYRow)(const uint8* src_bgra, uint8* dst_y, int pix) =
-      BGRAToYRow_C;
-#if defined(HAS_BGRATOYROW_SSSE3)
-  if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) {
-    BGRAToUVRow = BGRAToUVRow_Any_SSSE3;
-    BGRAToYRow = BGRAToYRow_Any_SSSE3;
-    if (IS_ALIGNED(width, 16)) {
-      BGRAToUVRow = BGRAToUVRow_Unaligned_SSSE3;
-      BGRAToYRow = BGRAToYRow_Unaligned_SSSE3;
-      if (IS_ALIGNED(src_bgra, 16) && IS_ALIGNED(src_stride_bgra, 16)) {
-        BGRAToUVRow = BGRAToUVRow_SSSE3;
-        if (IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) {
-          BGRAToYRow = BGRAToYRow_SSSE3;
-        }
-      }
-    }
-  }
-#elif defined(HAS_BGRATOYROW_NEON)
-  if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
-    BGRAToYRow = BGRAToYRow_Any_NEON;
-    if (IS_ALIGNED(width, 8)) {
-      BGRAToYRow = BGRAToYRow_NEON;
-    }
-    if (width >= 16) {
-      BGRAToUVRow = BGRAToUVRow_Any_NEON;
-      if (IS_ALIGNED(width, 16)) {
-        BGRAToUVRow = BGRAToUVRow_NEON;
-      }
-    }
-  }
-#endif
-
-  for (int y = 0; y < height - 1; y += 2) {
-    BGRAToUVRow(src_bgra, src_stride_bgra, dst_u, dst_v, width);
-    BGRAToYRow(src_bgra, dst_y, width);
-    BGRAToYRow(src_bgra + src_stride_bgra, dst_y + dst_stride_y, width);
-    src_bgra += src_stride_bgra * 2;
-    dst_y += dst_stride_y * 2;
-    dst_u += dst_stride_u;
-    dst_v += dst_stride_v;
-  }
-  if (height & 1) {
-    BGRAToUVRow(src_bgra, 0, dst_u, dst_v, width);
-    BGRAToYRow(src_bgra, dst_y, width);
-  }
-  return 0;
-}
-
-// Convert ABGR to I420.
-LIBYUV_API
-int ABGRToI420(const uint8* src_abgr, int src_stride_abgr,
-               uint8* dst_y, int dst_stride_y,
-               uint8* dst_u, int dst_stride_u,
-               uint8* dst_v, int dst_stride_v,
-               int width, int height) {
-  if (!src_abgr ||
-      !dst_y || !dst_u || !dst_v ||
-      width <= 0 || height == 0) {
-    return -1;
-  }
-  // Negative height means invert the image.
-  if (height < 0) {
-    height = -height;
-    src_abgr = src_abgr + (height - 1) * src_stride_abgr;
-    src_stride_abgr = -src_stride_abgr;
-  }
-  void (*ABGRToUVRow)(const uint8* src_abgr0, int src_stride_abgr,
-                      uint8* dst_u, uint8* dst_v, int width) = ABGRToUVRow_C;
-  void (*ABGRToYRow)(const uint8* src_abgr, uint8* dst_y, int pix) =
-      ABGRToYRow_C;
-#if defined(HAS_ABGRTOYROW_SSSE3)
-  if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) {
-    ABGRToUVRow = ABGRToUVRow_Any_SSSE3;
-    ABGRToYRow = ABGRToYRow_Any_SSSE3;
-    if (IS_ALIGNED(width, 16)) {
-      ABGRToUVRow = ABGRToUVRow_Unaligned_SSSE3;
-      ABGRToYRow = ABGRToYRow_Unaligned_SSSE3;
-      if (IS_ALIGNED(src_abgr, 16) && IS_ALIGNED(src_stride_abgr, 16)) {
-        ABGRToUVRow = ABGRToUVRow_SSSE3;
-        if (IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) {
-          ABGRToYRow = ABGRToYRow_SSSE3;
-        }
-      }
-    }
-  }
-#elif defined(HAS_ABGRTOYROW_NEON)
-  if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
-    ABGRToYRow = ABGRToYRow_Any_NEON;
-    if (IS_ALIGNED(width, 8)) {
-      ABGRToYRow = ABGRToYRow_NEON;
-    }
-    if (width >= 16) {
-      ABGRToUVRow = ABGRToUVRow_Any_NEON;
-      if (IS_ALIGNED(width, 16)) {
-        ABGRToUVRow = ABGRToUVRow_NEON;
-      }
-    }
-  }
-#endif
-
-  for (int y = 0; y < height - 1; y += 2) {
-    ABGRToUVRow(src_abgr, src_stride_abgr, dst_u, dst_v, width);
-    ABGRToYRow(src_abgr, dst_y, width);
-    ABGRToYRow(src_abgr + src_stride_abgr, dst_y + dst_stride_y, width);
-    src_abgr += src_stride_abgr * 2;
-    dst_y += dst_stride_y * 2;
-    dst_u += dst_stride_u;
-    dst_v += dst_stride_v;
-  }
-  if (height & 1) {
-    ABGRToUVRow(src_abgr, 0, dst_u, dst_v, width);
-    ABGRToYRow(src_abgr, dst_y, width);
-  }
-  return 0;
-}
-
-// Convert RGBA to I420.
-LIBYUV_API
-int RGBAToI420(const uint8* src_rgba, int src_stride_rgba,
-               uint8* dst_y, int dst_stride_y,
-               uint8* dst_u, int dst_stride_u,
-               uint8* dst_v, int dst_stride_v,
-               int width, int height) {
-  if (!src_rgba ||
-      !dst_y || !dst_u || !dst_v ||
-      width <= 0 || height == 0) {
-    return -1;
-  }
-  // Negative height means invert the image.
-  if (height < 0) {
-    height = -height;
-    src_rgba = src_rgba + (height - 1) * src_stride_rgba;
-    src_stride_rgba = -src_stride_rgba;
-  }
-  void (*RGBAToUVRow)(const uint8* src_rgba0, int src_stride_rgba,
-                      uint8* dst_u, uint8* dst_v, int width) = RGBAToUVRow_C;
-  void (*RGBAToYRow)(const uint8* src_rgba, uint8* dst_y, int pix) =
-      RGBAToYRow_C;
-#if defined(HAS_RGBATOYROW_SSSE3)
-  if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) {
-    RGBAToUVRow = RGBAToUVRow_Any_SSSE3;
-    RGBAToYRow = RGBAToYRow_Any_SSSE3;
-    if (IS_ALIGNED(width, 16)) {
-      RGBAToUVRow = RGBAToUVRow_Unaligned_SSSE3;
-      RGBAToYRow = RGBAToYRow_Unaligned_SSSE3;
-      if (IS_ALIGNED(src_rgba, 16) && IS_ALIGNED(src_stride_rgba, 16)) {
-        RGBAToUVRow = RGBAToUVRow_SSSE3;
-        if (IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) {
-          RGBAToYRow = RGBAToYRow_SSSE3;
-        }
-      }
-    }
-  }
-#elif defined(HAS_RGBATOYROW_NEON)
-  if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
-    RGBAToYRow = RGBAToYRow_Any_NEON;
-    if (IS_ALIGNED(width, 8)) {
-      RGBAToYRow = RGBAToYRow_NEON;
-    }
-    if (width >= 16) {
-      RGBAToUVRow = RGBAToUVRow_Any_NEON;
-      if (IS_ALIGNED(width, 16)) {
-        RGBAToUVRow = RGBAToUVRow_NEON;
-      }
-    }
-  }
-#endif
-
-  for (int y = 0; y < height - 1; y += 2) {
-    RGBAToUVRow(src_rgba, src_stride_rgba, dst_u, dst_v, width);
-    RGBAToYRow(src_rgba, dst_y, width);
-    RGBAToYRow(src_rgba + src_stride_rgba, dst_y + dst_stride_y, width);
-    src_rgba += src_stride_rgba * 2;
-    dst_y += dst_stride_y * 2;
-    dst_u += dst_stride_u;
-    dst_v += dst_stride_v;
-  }
-  if (height & 1) {
-    RGBAToUVRow(src_rgba, 0, dst_u, dst_v, width);
-    RGBAToYRow(src_rgba, dst_y, width);
-  }
-  return 0;
-}
-
-// Convert RGB24 to I420.
-LIBYUV_API
-int RGB24ToI420(const uint8* src_rgb24, int src_stride_rgb24,
-                uint8* dst_y, int dst_stride_y,
-                uint8* dst_u, int dst_stride_u,
-                uint8* dst_v, int dst_stride_v,
-                int width, int height) {
-  if (!src_rgb24 || !dst_y || !dst_u || !dst_v ||
-      width <= 0 || height == 0) {
-    return -1;
-  }
-  // Negative height means invert the image.
-  if (height < 0) {
-    height = -height;
-    src_rgb24 = src_rgb24 + (height - 1) * src_stride_rgb24;
-    src_stride_rgb24 = -src_stride_rgb24;
-  }
-
-#if defined(HAS_RGB24TOYROW_NEON)
-  void (*RGB24ToUVRow)(const uint8* src_rgb24, int src_stride_rgb24,
-      uint8* dst_u, uint8* dst_v, int width) = RGB24ToUVRow_C;
-  void (*RGB24ToYRow)(const uint8* src_rgb24, uint8* dst_y, int pix) =
-      RGB24ToYRow_C;
-  if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
-    RGB24ToYRow = RGB24ToYRow_Any_NEON;
-    if (IS_ALIGNED(width, 8)) {
-      RGB24ToYRow = RGB24ToYRow_NEON;
-    }
-    if (width >= 16) {
-      RGB24ToUVRow = RGB24ToUVRow_Any_NEON;
-      if (IS_ALIGNED(width, 16)) {
-        RGB24ToUVRow = RGB24ToUVRow_NEON;
-      }
-    }
-  }
-#else  // HAS_RGB24TOYROW_NEON
-
-  // Allocate 2 rows of ARGB.
-  const int kRowSize = (width * 4 + 15) & ~15;
-  align_buffer_64(row, kRowSize * 2);
-
-  void (*RGB24ToARGBRow)(const uint8* src_rgb, uint8* dst_argb, int pix) =
-      RGB24ToARGBRow_C;
-#if defined(HAS_RGB24TOARGBROW_SSSE3)
-  if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) {
-    RGB24ToARGBRow = RGB24ToARGBRow_Any_SSSE3;
-    if (IS_ALIGNED(width, 16)) {
-      RGB24ToARGBRow = RGB24ToARGBRow_SSSE3;
-    }
-  }
-#endif
-  void (*ARGBToUVRow)(const uint8* src_argb0, int src_stride_argb,
-                      uint8* dst_u, uint8* dst_v, int width) = ARGBToUVRow_C;
-#if defined(HAS_ARGBTOUVROW_SSSE3)
-  if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) {
-    ARGBToUVRow = ARGBToUVRow_Any_SSSE3;
-    if (IS_ALIGNED(width, 16)) {
-      ARGBToUVRow = ARGBToUVRow_SSSE3;
-    }
-  }
-#endif
-  void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int pix) =
-      ARGBToYRow_C;
-#if defined(HAS_ARGBTOUVROW_SSSE3)
-  if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) {
-    ARGBToYRow = ARGBToYRow_Any_SSSE3;
-    if (IS_ALIGNED(width, 16)) {
-      ARGBToYRow = ARGBToYRow_Unaligned_SSSE3;
-      if (IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) {
-        ARGBToYRow = ARGBToYRow_SSSE3;
-      }
-    }
-  }
-#endif  // HAS_ARGBTOUVROW_SSSE3
-#endif  // HAS_RGB24TOYROW_NEON
-
-  for (int y = 0; y < height - 1; y += 2) {
-#if defined(HAS_RGB24TOYROW_NEON)
-    RGB24ToUVRow(src_rgb24, src_stride_rgb24, dst_u, dst_v, width);
-    RGB24ToYRow(src_rgb24, dst_y, width);
-    RGB24ToYRow(src_rgb24 + src_stride_rgb24, dst_y + dst_stride_y, width);
-#else
-    RGB24ToARGBRow(src_rgb24, row, width);
-    RGB24ToARGBRow(src_rgb24 + src_stride_rgb24, row + kRowSize, width);
-    ARGBToUVRow(row, kRowSize, dst_u, dst_v, width);
-    ARGBToYRow(row, dst_y, width);
-    ARGBToYRow(row + kRowSize, dst_y + dst_stride_y, width);
-#endif
-    src_rgb24 += src_stride_rgb24 * 2;
-    dst_y += dst_stride_y * 2;
-    dst_u += dst_stride_u;
-    dst_v += dst_stride_v;
-  }
-  if (height & 1) {
-#if defined(HAS_RGB24TOYROW_NEON)
-    RGB24ToUVRow(src_rgb24, 0, dst_u, dst_v, width);
-    RGB24ToYRow(src_rgb24, dst_y, width);
-#else
-    RGB24ToARGBRow(src_rgb24, row, width);
-    ARGBToUVRow(row, 0, dst_u, dst_v, width);
-    ARGBToYRow(row, dst_y, width);
-#endif
-  }
-#if !defined(HAS_RGB24TOYROW_NEON)
-  free_aligned_buffer_64(row);
-#endif
-  return 0;
-}
-
-// Convert RAW to I420.
-LIBYUV_API
-int RAWToI420(const uint8* src_raw, int src_stride_raw,
-              uint8* dst_y, int dst_stride_y,
-              uint8* dst_u, int dst_stride_u,
-              uint8* dst_v, int dst_stride_v,
-              int width, int height) {
-  if (!src_raw || !dst_y || !dst_u || !dst_v ||
-      width <= 0 || height == 0) {
-    return -1;
-  }
-  // Negative height means invert the image.
-  if (height < 0) {
-    height = -height;
-    src_raw = src_raw + (height - 1) * src_stride_raw;
-    src_stride_raw = -src_stride_raw;
-  }
-
-#if defined(HAS_RAWTOYROW_NEON)
-  void (*RAWToUVRow)(const uint8* src_raw, int src_stride_raw,
-      uint8* dst_u, uint8* dst_v, int width) = RAWToUVRow_C;
-  void (*RAWToYRow)(const uint8* src_raw, uint8* dst_y, int pix) =
-      RAWToYRow_C;
-  if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
-    RAWToYRow = RAWToYRow_Any_NEON;
-    if (IS_ALIGNED(width, 8)) {
-      RAWToYRow = RAWToYRow_NEON;
-    }
-    if (width >= 16) {
-      RAWToUVRow = RAWToUVRow_Any_NEON;
-      if (IS_ALIGNED(width, 16)) {
-        RAWToUVRow = RAWToUVRow_NEON;
-      }
-    }
-  }
-#else  // HAS_RAWTOYROW_NEON
-
-  // Allocate 2 rows of ARGB.
-  const int kRowSize = (width * 4 + 15) & ~15;
-  align_buffer_64(row, kRowSize * 2);
-
-  void (*RAWToARGBRow)(const uint8* src_rgb, uint8* dst_argb, int pix) =
-      RAWToARGBRow_C;
-#if defined(HAS_RAWTOARGBROW_SSSE3)
-  if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) {
-    RAWToARGBRow = RAWToARGBRow_Any_SSSE3;
-    if (IS_ALIGNED(width, 16)) {
-      RAWToARGBRow = RAWToARGBRow_SSSE3;
-    }
-  }
-#endif
-  void (*ARGBToUVRow)(const uint8* src_argb0, int src_stride_argb,
-                      uint8* dst_u, uint8* dst_v, int width) = ARGBToUVRow_C;
-#if defined(HAS_ARGBTOUVROW_SSSE3)
-  if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) {
-    ARGBToUVRow = ARGBToUVRow_Any_SSSE3;
-    if (IS_ALIGNED(width, 16)) {
-      ARGBToUVRow = ARGBToUVRow_SSSE3;
-    }
-  }
-#endif
-  void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int pix) =
-      ARGBToYRow_C;
-#if defined(HAS_ARGBTOUVROW_SSSE3)
-  if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) {
-    ARGBToYRow = ARGBToYRow_Any_SSSE3;
-    if (IS_ALIGNED(width, 16)) {
-      ARGBToYRow = ARGBToYRow_Unaligned_SSSE3;
-      if (IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) {
-        ARGBToYRow = ARGBToYRow_SSSE3;
-      }
-    }
-  }
-#endif  // HAS_ARGBTOUVROW_SSSE3
-#endif  // HAS_RAWTOYROW_NEON
-
-  for (int y = 0; y < height - 1; y += 2) {
-#if defined(HAS_RAWTOYROW_NEON)
-    RAWToUVRow(src_raw, src_stride_raw, dst_u, dst_v, width);
-    RAWToYRow(src_raw, dst_y, width);
-    RAWToYRow(src_raw + src_stride_raw, dst_y + dst_stride_y, width);
-#else
-    RAWToARGBRow(src_raw, row, width);
-    RAWToARGBRow(src_raw + src_stride_raw, row + kRowSize, width);
-    ARGBToUVRow(row, kRowSize, dst_u, dst_v, width);
-    ARGBToYRow(row, dst_y, width);
-    ARGBToYRow(row + kRowSize, dst_y + dst_stride_y, width);
-#endif
-    src_raw += src_stride_raw * 2;
-    dst_y += dst_stride_y * 2;
-    dst_u += dst_stride_u;
-    dst_v += dst_stride_v;
-  }
-  if (height & 1) {
-#if defined(HAS_RAWTOYROW_NEON)
-    RAWToUVRow(src_raw, 0, dst_u, dst_v, width);
-    RAWToYRow(src_raw, dst_y, width);
-#else
-    RAWToARGBRow(src_raw, row, width);
-    ARGBToUVRow(row, 0, dst_u, dst_v, width);
-    ARGBToYRow(row, dst_y, width);
-#endif
-  }
-#if !defined(HAS_RAWTOYROW_NEON)
-  free_aligned_buffer_64(row);
-#endif
-  return 0;
-}
-
-// Convert RGB565 to I420.
-LIBYUV_API
-int RGB565ToI420(const uint8* src_rgb565, int src_stride_rgb565,
-                uint8* dst_y, int dst_stride_y,
-                uint8* dst_u, int dst_stride_u,
-                uint8* dst_v, int dst_stride_v,
-                int width, int height) {
-  if (!src_rgb565 || !dst_y || !dst_u || !dst_v ||
-      width <= 0 || height == 0) {
-    return -1;
-  }
-  // Negative height means invert the image.
-  if (height < 0) {
-    height = -height;
-    src_rgb565 = src_rgb565 + (height - 1) * src_stride_rgb565;
-    src_stride_rgb565 = -src_stride_rgb565;
-  }
-
-#if defined(HAS_RGB565TOYROW_NEON)
-  void (*RGB565ToUVRow)(const uint8* src_rgb565, int src_stride_rgb565,
-      uint8* dst_u, uint8* dst_v, int width) = RGB565ToUVRow_C;
-  void (*RGB565ToYRow)(const uint8* src_rgb565, uint8* dst_y, int pix) =
-      RGB565ToYRow_C;
-  if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
-    RGB565ToYRow = RGB565ToYRow_Any_NEON;
-    if (IS_ALIGNED(width, 8)) {
-      RGB565ToYRow = RGB565ToYRow_NEON;
-    }
-    if (width >= 16) {
-      RGB565ToUVRow = RGB565ToUVRow_Any_NEON;
-      if (IS_ALIGNED(width, 16)) {
-        RGB565ToUVRow = RGB565ToUVRow_NEON;
-      }
-    }
-  }
-#else  // HAS_RGB565TOYROW_NEON
-
-  // Allocate 2 rows of ARGB.
-  const int kRowSize = (width * 4 + 15) & ~15;
-  align_buffer_64(row, kRowSize * 2);
-
-  void (*RGB565ToARGBRow)(const uint8* src_rgb, uint8* dst_argb, int pix) =
-      RGB565ToARGBRow_C;
-#if defined(HAS_RGB565TOARGBROW_SSE2)
-  if (TestCpuFlag(kCpuHasSSE2) && width >= 8) {
-    RGB565ToARGBRow = RGB565ToARGBRow_Any_SSE2;
-    if (IS_ALIGNED(width, 8)) {
-      RGB565ToARGBRow = RGB565ToARGBRow_SSE2;
-    }
-  }
-#endif
-  void (*ARGBToUVRow)(const uint8* src_argb0, int src_stride_argb,
-                      uint8* dst_u, uint8* dst_v, int width) = ARGBToUVRow_C;
-#if defined(HAS_ARGBTOUVROW_SSSE3)
-  if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) {
-    ARGBToUVRow = ARGBToUVRow_Any_SSSE3;
-    if (IS_ALIGNED(width, 16)) {
-      ARGBToUVRow = ARGBToUVRow_SSSE3;
-    }
-  }
-#endif
-  void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int pix) =
-      ARGBToYRow_C;
-#if defined(HAS_ARGBTOUVROW_SSSE3)
-  if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) {
-    ARGBToYRow = ARGBToYRow_Any_SSSE3;
-    if (IS_ALIGNED(width, 16)) {
-      ARGBToYRow = ARGBToYRow_Unaligned_SSSE3;
-      if (IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) {
-        ARGBToYRow = ARGBToYRow_SSSE3;
-      }
-    }
-  }
-#endif  // HAS_ARGBTOUVROW_SSSE3
-#endif  // HAS_RGB565TOYROW_NEON
-
-  for (int y = 0; y < height - 1; y += 2) {
-#if defined(HAS_RGB565TOYROW_NEON)
-    RGB565ToUVRow(src_rgb565, src_stride_rgb565, dst_u, dst_v, width);
-    RGB565ToYRow(src_rgb565, dst_y, width);
-    RGB565ToYRow(src_rgb565 + src_stride_rgb565, dst_y + dst_stride_y, width);
-#else
-    RGB565ToARGBRow(src_rgb565, row, width);
-    RGB565ToARGBRow(src_rgb565 + src_stride_rgb565, row + kRowSize, width);
-    ARGBToUVRow(row, kRowSize, dst_u, dst_v, width);
-    ARGBToYRow(row, dst_y, width);
-    ARGBToYRow(row + kRowSize, dst_y + dst_stride_y, width);
-#endif
-    src_rgb565 += src_stride_rgb565 * 2;
-    dst_y += dst_stride_y * 2;
-    dst_u += dst_stride_u;
-    dst_v += dst_stride_v;
-  }
-  if (height & 1) {
-#if defined(HAS_RGB565TOYROW_NEON)
-    RGB565ToUVRow(src_rgb565, 0, dst_u, dst_v, width);
-    RGB565ToYRow(src_rgb565, dst_y, width);
-#else
-    RGB565ToARGBRow(src_rgb565, row, width);
-    ARGBToUVRow(row, 0, dst_u, dst_v, width);
-    ARGBToYRow(row, dst_y, width);
-#endif
-  }
-#if !defined(HAS_RGB565TOYROW_NEON)
-  free_aligned_buffer_64(row);
-#endif
-  return 0;
-}
-
-// Convert ARGB1555 to I420.
-LIBYUV_API
-int ARGB1555ToI420(const uint8* src_argb1555, int src_stride_argb1555,
-                   uint8* dst_y, int dst_stride_y,
-                   uint8* dst_u, int dst_stride_u,
-                   uint8* dst_v, int dst_stride_v,
-                   int width, int height) {
-  if (!src_argb1555 || !dst_y || !dst_u || !dst_v ||
-      width <= 0 || height == 0) {
-    return -1;
-  }
-  // Negative height means invert the image.
-  if (height < 0) {
-    height = -height;
-    src_argb1555 = src_argb1555 + (height - 1) * src_stride_argb1555;
-    src_stride_argb1555 = -src_stride_argb1555;
-  }
-
-#if defined(HAS_ARGB1555TOYROW_NEON)
-  void (*ARGB1555ToUVRow)(const uint8* src_argb1555, int src_stride_argb1555,
-      uint8* dst_u, uint8* dst_v, int width) = ARGB1555ToUVRow_C;
-  void (*ARGB1555ToYRow)(const uint8* src_argb1555, uint8* dst_y, int pix) =
-      ARGB1555ToYRow_C;
-  if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
-    ARGB1555ToYRow = ARGB1555ToYRow_Any_NEON;
-    if (IS_ALIGNED(width, 8)) {
-      ARGB1555ToYRow = ARGB1555ToYRow_NEON;
-    }
-    if (width >= 16) {
-      ARGB1555ToUVRow = ARGB1555ToUVRow_Any_NEON;
-      if (IS_ALIGNED(width, 16)) {
-        ARGB1555ToUVRow = ARGB1555ToUVRow_NEON;
-      }
-    }
-  }
-#else  // HAS_ARGB1555TOYROW_NEON
-
-  // Allocate 2 rows of ARGB.
-  const int kRowSize = (width * 4 + 15) & ~15;
-  align_buffer_64(row, kRowSize * 2);
-
-  void (*ARGB1555ToARGBRow)(const uint8* src_rgb, uint8* dst_argb, int pix) =
-      ARGB1555ToARGBRow_C;
-#if defined(HAS_ARGB1555TOARGBROW_SSE2)
-  if (TestCpuFlag(kCpuHasSSE2) && width >= 8) {
-    ARGB1555ToARGBRow = ARGB1555ToARGBRow_Any_SSE2;
-    if (IS_ALIGNED(width, 8)) {
-      ARGB1555ToARGBRow = ARGB1555ToARGBRow_SSE2;
-    }
-  }
-#endif
-  void (*ARGBToUVRow)(const uint8* src_argb0, int src_stride_argb,
-                      uint8* dst_u, uint8* dst_v, int width) = ARGBToUVRow_C;
-#if defined(HAS_ARGBTOUVROW_SSSE3)
-  if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) {
-    ARGBToUVRow = ARGBToUVRow_Any_SSSE3;
-    if (IS_ALIGNED(width, 16)) {
-      ARGBToUVRow = ARGBToUVRow_SSSE3;
-    }
-  }
-#endif
-  void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int pix) =
-      ARGBToYRow_C;
-#if defined(HAS_ARGBTOUVROW_SSSE3)
-  if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) {
-    ARGBToYRow = ARGBToYRow_Any_SSSE3;
-    if (IS_ALIGNED(width, 16)) {
-      ARGBToYRow = ARGBToYRow_Unaligned_SSSE3;
-      if (IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) {
-        ARGBToYRow = ARGBToYRow_SSSE3;
-      }
-    }
-  }
-#endif  // HAS_ARGBTOUVROW_SSSE3
-#endif  // HAS_ARGB1555TOYROW_NEON
-
-  for (int y = 0; y < height - 1; y += 2) {
-#if defined(HAS_ARGB1555TOYROW_NEON)
-    ARGB1555ToUVRow(src_argb1555, src_stride_argb1555, dst_u, dst_v, width);
-    ARGB1555ToYRow(src_argb1555, dst_y, width);
-    ARGB1555ToYRow(src_argb1555 + src_stride_argb1555, dst_y + dst_stride_y,
-                   width);
-#else
-    ARGB1555ToARGBRow(src_argb1555, row, width);
-    ARGB1555ToARGBRow(src_argb1555 + src_stride_argb1555, row + kRowSize,
-                      width);
-    ARGBToUVRow(row, kRowSize, dst_u, dst_v, width);
-    ARGBToYRow(row, dst_y, width);
-    ARGBToYRow(row + kRowSize, dst_y + dst_stride_y, width);
-#endif
-    src_argb1555 += src_stride_argb1555 * 2;
-    dst_y += dst_stride_y * 2;
-    dst_u += dst_stride_u;
-    dst_v += dst_stride_v;
-  }
-  if (height & 1) {
-#if defined(HAS_ARGB1555TOYROW_NEON)
-    ARGB1555ToUVRow(src_argb1555, 0, dst_u, dst_v, width);
-    ARGB1555ToYRow(src_argb1555, dst_y, width);
-#else
-    ARGB1555ToARGBRow(src_argb1555, row, width);
-    ARGBToUVRow(row, 0, dst_u, dst_v, width);
-    ARGBToYRow(row, dst_y, width);
-#endif
-  }
-#if !defined(HAS_ARGB1555TOYROW_NEON)
-  free_aligned_buffer_64(row);
-#endif
-  return 0;
-}
-
-// Convert ARGB4444 to I420.
-LIBYUV_API
-int ARGB4444ToI420(const uint8* src_argb4444, int src_stride_argb4444,
-                   uint8* dst_y, int dst_stride_y,
-                   uint8* dst_u, int dst_stride_u,
-                   uint8* dst_v, int dst_stride_v,
-                   int width, int height) {
-  if (!src_argb4444 || !dst_y || !dst_u || !dst_v ||
-      width <= 0 || height == 0) {
-    return -1;
-  }
-  // Negative height means invert the image.
-  if (height < 0) {
-    height = -height;
-    src_argb4444 = src_argb4444 + (height - 1) * src_stride_argb4444;
-    src_stride_argb4444 = -src_stride_argb4444;
-  }
-
-#if defined(HAS_ARGB4444TOYROW_NEON)
-  void (*ARGB4444ToUVRow)(const uint8* src_argb4444, int src_stride_argb4444,
-      uint8* dst_u, uint8* dst_v, int width) = ARGB4444ToUVRow_C;
-  void (*ARGB4444ToYRow)(const uint8* src_argb4444, uint8* dst_y, int pix) =
-      ARGB4444ToYRow_C;
-  if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
-    ARGB4444ToYRow = ARGB4444ToYRow_Any_NEON;
-    if (IS_ALIGNED(width, 8)) {
-      ARGB4444ToYRow = ARGB4444ToYRow_NEON;
-    }
-    if (width >= 16) {
-      ARGB4444ToUVRow = ARGB4444ToUVRow_Any_NEON;
-      if (IS_ALIGNED(width, 16)) {
-        ARGB4444ToUVRow = ARGB4444ToUVRow_NEON;
-      }
-    }
-  }
-#else  // HAS_ARGB4444TOYROW_NEON
-
-  // Allocate 2 rows of ARGB.
-  const int kRowSize = (width * 4 + 15) & ~15;
-  align_buffer_64(row, kRowSize * 2);
-
-  void (*ARGB4444ToARGBRow)(const uint8* src_rgb, uint8* dst_argb, int pix) =
-      ARGB4444ToARGBRow_C;
-#if defined(HAS_ARGB4444TOARGBROW_SSE2)
-  if (TestCpuFlag(kCpuHasSSE2) && width >= 8) {
-    ARGB4444ToARGBRow = ARGB4444ToARGBRow_Any_SSE2;
-    if (IS_ALIGNED(width, 8)) {
-      ARGB4444ToARGBRow = ARGB4444ToARGBRow_SSE2;
-    }
-  }
-#endif
-  void (*ARGBToUVRow)(const uint8* src_argb0, int src_stride_argb,
-                      uint8* dst_u, uint8* dst_v, int width) = ARGBToUVRow_C;
-#if defined(HAS_ARGBTOUVROW_SSSE3)
-  if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) {
-    ARGBToUVRow = ARGBToUVRow_Any_SSSE3;
-    if (IS_ALIGNED(width, 16)) {
-      ARGBToUVRow = ARGBToUVRow_SSSE3;
-    }
-  }
-#endif
-  void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int pix) =
-      ARGBToYRow_C;
-#if defined(HAS_ARGBTOUVROW_SSSE3)
-  if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) {
-    ARGBToYRow = ARGBToYRow_Any_SSSE3;
-    if (IS_ALIGNED(width, 16)) {
-      ARGBToYRow = ARGBToYRow_Unaligned_SSSE3;
-      if (IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) {
-        ARGBToYRow = ARGBToYRow_SSSE3;
-      }
-    }
-  }
-#endif  // HAS_ARGBTOUVROW_SSSE3
-#endif  // HAS_ARGB4444TOYROW_NEON
-
-  for (int y = 0; y < height - 1; y += 2) {
-#if defined(HAS_ARGB4444TOYROW_NEON)
-    ARGB4444ToUVRow(src_argb4444, src_stride_argb4444, dst_u, dst_v, width);
-    ARGB4444ToYRow(src_argb4444, dst_y, width);
-    ARGB4444ToYRow(src_argb4444 + src_stride_argb4444, dst_y + dst_stride_y,
-                   width);
-#else
-    ARGB4444ToARGBRow(src_argb4444, row, width);
-    ARGB4444ToARGBRow(src_argb4444 + src_stride_argb4444, row + kRowSize,
-                      width);
-    ARGBToUVRow(row, kRowSize, dst_u, dst_v, width);
-    ARGBToYRow(row, dst_y, width);
-    ARGBToYRow(row + kRowSize, dst_y + dst_stride_y, width);
-#endif
-    src_argb4444 += src_stride_argb4444 * 2;
-    dst_y += dst_stride_y * 2;
-    dst_u += dst_stride_u;
-    dst_v += dst_stride_v;
-  }
-  if (height & 1) {
-#if defined(HAS_ARGB4444TOYROW_NEON)
-    ARGB4444ToUVRow(src_argb4444, 0, dst_u, dst_v, width);
-    ARGB4444ToYRow(src_argb4444, dst_y, width);
-#else
-    ARGB4444ToARGBRow(src_argb4444, row, width);
-    ARGBToUVRow(row, 0, dst_u, dst_v, width);
-    ARGBToYRow(row, dst_y, width);
-#endif
-  }
-#if !defined(HAS_ARGB4444TOYROW_NEON)
-  free_aligned_buffer_64(row);
-#endif
-  return 0;
-}
-
-#ifdef __cplusplus
-}  // extern "C"
-}  // namespace libyuv
-#endif
diff --git a/drivers/theoraplayer/src/YUV/libyuv/src/convert_argb.cc b/drivers/theoraplayer/src/YUV/libyuv/src/convert_argb.cc
deleted file mode 100755
index a8aab91478..0000000000
--- a/drivers/theoraplayer/src/YUV/libyuv/src/convert_argb.cc
+++ /dev/null
@@ -1,901 +0,0 @@
-/*
- *  Copyright 2011 The LibYuv Project Authors. All rights reserved.
- *
- *  Use of this source code is governed by a BSD-style license
- *  that can be found in the LICENSE file in the root of the source
- *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS. All contributing project authors may
- *  be found in the AUTHORS file in the root of the source tree.
- */
-
-#include "libyuv/convert_argb.h"
-
-#include "libyuv/cpu_id.h"
-#include "libyuv/format_conversion.h"
-#ifdef HAVE_JPEG
-#include "libyuv/mjpeg_decoder.h"
-#endif
-#include "libyuv/rotate_argb.h"
-#include "libyuv/row.h"
-#include "libyuv/video_common.h"
-
-#ifdef __cplusplus
-namespace libyuv {
-extern "C" {
-#endif
-
-// Copy ARGB with optional flipping
-LIBYUV_API
-int ARGBCopy(const uint8* src_argb, int src_stride_argb,
-             uint8* dst_argb, int dst_stride_argb,
-             int width, int height) {
-  if (!src_argb || !dst_argb ||
-      width <= 0 || height == 0) {
-    return -1;
-  }
-  // Negative height means invert the image.
-  if (height < 0) {
-    height = -height;
-    src_argb = src_argb + (height - 1) * src_stride_argb;
-    src_stride_argb = -src_stride_argb;
-  }
-
-  CopyPlane(src_argb, src_stride_argb, dst_argb, dst_stride_argb,
-            width * 4, height);
-  return 0;
-}
-
-// Convert I444 to ARGB.
-LIBYUV_API
-int I444ToARGB(const uint8* src_y, int src_stride_y,
-               const uint8* src_u, int src_stride_u,
-               const uint8* src_v, int src_stride_v,
-               uint8* dst_argb, int dst_stride_argb,
-               int width, int height) {
-  if (!src_y || !src_u || !src_v ||
-      !dst_argb ||
-      width <= 0 || height == 0) {
-    return -1;
-  }
-  // Negative height means invert the image.
-  if (height < 0) {
-    height = -height;
-    dst_argb = dst_argb + (height - 1) * dst_stride_argb;
-    dst_stride_argb = -dst_stride_argb;
-  }
-  // Coalesce rows.
-  if (src_stride_y == width &&
-      src_stride_u == width &&
-      src_stride_v == width &&
-      dst_stride_argb == width * 4) {
-    width *= height;
-    height = 1;
-    src_stride_y = src_stride_u = src_stride_v = dst_stride_argb = 0;
-  }
-  void (*I444ToARGBRow)(const uint8* y_buf,
-                        const uint8* u_buf,
-                        const uint8* v_buf,
-                        uint8* rgb_buf,
-                        int width) = I444ToARGBRow_C;
-#if defined(HAS_I444TOARGBROW_SSSE3)
-  if (TestCpuFlag(kCpuHasSSSE3) && width >= 8) {
-    I444ToARGBRow = I444ToARGBRow_Any_SSSE3;
-    if (IS_ALIGNED(width, 8)) {
-      I444ToARGBRow = I444ToARGBRow_Unaligned_SSSE3;
-      if (IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
-        I444ToARGBRow = I444ToARGBRow_SSSE3;
-      }
-    }
-  }
-#elif defined(HAS_I444TOARGBROW_NEON)
-  if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
-    I444ToARGBRow = I444ToARGBRow_Any_NEON;
-    if (IS_ALIGNED(width, 8)) {
-      I444ToARGBRow = I444ToARGBRow_NEON;
-    }
-  }
-#endif
-
-  for (int y = 0; y < height; ++y) {
-    I444ToARGBRow(src_y, src_u, src_v, dst_argb, width);
-    dst_argb += dst_stride_argb;
-    src_y += src_stride_y;
-    src_u += src_stride_u;
-    src_v += src_stride_v;
-  }
-  return 0;
-}
-
-// Convert I422 to ARGB.
-LIBYUV_API
-int I422ToARGB(const uint8* src_y, int src_stride_y,
-               const uint8* src_u, int src_stride_u,
-               const uint8* src_v, int src_stride_v,
-               uint8* dst_argb, int dst_stride_argb,
-               int width, int height) {
-  if (!src_y || !src_u || !src_v ||
-      !dst_argb ||
-      width <= 0 || height == 0) {
-    return -1;
-  }
-  // Negative height means invert the image.
-  if (height < 0) {
-    height = -height;
-    dst_argb = dst_argb + (height - 1) * dst_stride_argb;
-    dst_stride_argb = -dst_stride_argb;
-  }
-  // Coalesce rows.
-  if (src_stride_y == width &&
-      src_stride_u * 2 == width &&
-      src_stride_v * 2 == width &&
-      dst_stride_argb == width * 4) {
-    width *= height;
-    height = 1;
-    src_stride_y = src_stride_u = src_stride_v = dst_stride_argb = 0;
-  }
-  void (*I422ToARGBRow)(const uint8* y_buf,
-                        const uint8* u_buf,
-                        const uint8* v_buf,
-                        uint8* rgb_buf,
-                        int width) = I422ToARGBRow_C;
-#if defined(HAS_I422TOARGBROW_SSSE3)
-  if (TestCpuFlag(kCpuHasSSSE3) && width >= 8) {
-    I422ToARGBRow = I422ToARGBRow_Any_SSSE3;
-    if (IS_ALIGNED(width, 8)) {
-      I422ToARGBRow = I422ToARGBRow_Unaligned_SSSE3;
-      if (IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
-        I422ToARGBRow = I422ToARGBRow_SSSE3;
-      }
-    }
-  }
-#endif
-#if defined(HAS_I422TOARGBROW_AVX2)
-  if (TestCpuFlag(kCpuHasAVX2) && width >= 16) {
-    I422ToARGBRow = I422ToARGBRow_Any_AVX2;
-    if (IS_ALIGNED(width, 16)) {
-      I422ToARGBRow = I422ToARGBRow_AVX2;
-    }
-  }
-#endif
-#if defined(HAS_I422TOARGBROW_NEON)
-  if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
-    I422ToARGBRow = I422ToARGBRow_Any_NEON;
-    if (IS_ALIGNED(width, 8)) {
-      I422ToARGBRow = I422ToARGBRow_NEON;
-    }
-  }
-#endif
-#if defined(HAS_I422TOARGBROW_MIPS_DSPR2)
-  if (TestCpuFlag(kCpuHasMIPS_DSPR2) && IS_ALIGNED(width, 4) &&
-      IS_ALIGNED(src_y, 4) && IS_ALIGNED(src_stride_y, 4) &&
-      IS_ALIGNED(src_u, 2) && IS_ALIGNED(src_stride_u, 2) &&
-      IS_ALIGNED(src_v, 2) && IS_ALIGNED(src_stride_v, 2) &&
-      IS_ALIGNED(dst_argb, 4) && IS_ALIGNED(dst_stride_argb, 4)) {
-    I422ToARGBRow = I422ToARGBRow_MIPS_DSPR2;
-  }
-#endif
-
-  for (int y = 0; y < height; ++y) {
-    I422ToARGBRow(src_y, src_u, src_v, dst_argb, width);
-    dst_argb += dst_stride_argb;
-    src_y += src_stride_y;
-    src_u += src_stride_u;
-    src_v += src_stride_v;
-  }
-  return 0;
-}
-
-// Convert I411 to ARGB.
-LIBYUV_API
-int I411ToARGB(const uint8* src_y, int src_stride_y,
-               const uint8* src_u, int src_stride_u,
-               const uint8* src_v, int src_stride_v,
-               uint8* dst_argb, int dst_stride_argb,
-               int width, int height) {
-  if (!src_y || !src_u || !src_v ||
-      !dst_argb ||
-      width <= 0 || height == 0) {
-    return -1;
-  }
-  // Negative height means invert the image.
-  if (height < 0) {
-    height = -height;
-    dst_argb = dst_argb + (height - 1) * dst_stride_argb;
-    dst_stride_argb = -dst_stride_argb;
-  }
-  // Coalesce rows.
-  if (src_stride_y == width &&
-      src_stride_u * 4 == width &&
-      src_stride_v * 4 == width &&
-      dst_stride_argb == width * 4) {
-    width *= height;
-    height = 1;
-    src_stride_y = src_stride_u = src_stride_v = dst_stride_argb = 0;
-  }
-  void (*I411ToARGBRow)(const uint8* y_buf,
-                        const uint8* u_buf,
-                        const uint8* v_buf,
-                        uint8* rgb_buf,
-                        int width) = I411ToARGBRow_C;
-#if defined(HAS_I411TOARGBROW_SSSE3)
-  if (TestCpuFlag(kCpuHasSSSE3) && width >= 8) {
-    I411ToARGBRow = I411ToARGBRow_Any_SSSE3;
-    if (IS_ALIGNED(width, 8)) {
-      I411ToARGBRow = I411ToARGBRow_Unaligned_SSSE3;
-      if (IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
-        I411ToARGBRow = I411ToARGBRow_SSSE3;
-      }
-    }
-  }
-#elif defined(HAS_I411TOARGBROW_NEON)
-  if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
-    I411ToARGBRow = I411ToARGBRow_Any_NEON;
-    if (IS_ALIGNED(width, 8)) {
-      I411ToARGBRow = I411ToARGBRow_NEON;
-    }
-  }
-#endif
-
-  for (int y = 0; y < height; ++y) {
-    I411ToARGBRow(src_y, src_u, src_v, dst_argb, width);
-    dst_argb += dst_stride_argb;
-    src_y += src_stride_y;
-    src_u += src_stride_u;
-    src_v += src_stride_v;
-  }
-  return 0;
-}
-
-// Convert I400 to ARGB.
-LIBYUV_API
-int I400ToARGB_Reference(const uint8* src_y, int src_stride_y,
-                         uint8* dst_argb, int dst_stride_argb,
-                         int width, int height) {
-  if (!src_y || !dst_argb ||
-      width <= 0 || height == 0) {
-    return -1;
-  }
-  // Negative height means invert the image.
-  if (height < 0) {
-    height = -height;
-    dst_argb = dst_argb + (height - 1) * dst_stride_argb;
-    dst_stride_argb = -dst_stride_argb;
-  }
-  // Coalesce rows.
-  if (src_stride_y == width &&
-      dst_stride_argb == width * 4) {
-    width *= height;
-    height = 1;
-    src_stride_y = dst_stride_argb = 0;
-  }
-  void (*YToARGBRow)(const uint8* y_buf,
-                     uint8* rgb_buf,
-                     int width) = YToARGBRow_C;
-#if defined(HAS_YTOARGBROW_SSE2)
-  if (TestCpuFlag(kCpuHasSSE2) && width >= 8 &&
-      IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
-    YToARGBRow = YToARGBRow_Any_SSE2;
-    if (IS_ALIGNED(width, 8)) {
-      YToARGBRow = YToARGBRow_SSE2;
-    }
-  }
-#elif defined(HAS_YTOARGBROW_NEON)
-  if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
-    YToARGBRow = YToARGBRow_Any_NEON;
-    if (IS_ALIGNED(width, 8)) {
-      YToARGBRow = YToARGBRow_NEON;
-    }
-  }
-#endif
-
-  for (int y = 0; y < height; ++y) {
-    YToARGBRow(src_y, dst_argb, width);
-    dst_argb += dst_stride_argb;
-    src_y += src_stride_y;
-  }
-  return 0;
-}
-
-// Convert I400 to ARGB.
-LIBYUV_API
-int I400ToARGB(const uint8* src_y, int src_stride_y,
-               uint8* dst_argb, int dst_stride_argb,
-               int width, int height) {
-  if (!src_y || !dst_argb ||
-      width <= 0 || height == 0) {
-    return -1;
-  }
-  // Negative height means invert the image.
-  if (height < 0) {
-    height = -height;
-    src_y = src_y + (height - 1) * src_stride_y;
-    src_stride_y = -src_stride_y;
-  }
-  // Coalesce rows.
-  if (src_stride_y == width &&
-      dst_stride_argb == width * 4) {
-    width *= height;
-    height = 1;
-    src_stride_y = dst_stride_argb = 0;
-  }
-  void (*I400ToARGBRow)(const uint8* src_y, uint8* dst_argb, int pix) =
-      I400ToARGBRow_C;
-#if defined(HAS_I400TOARGBROW_SSE2)
-  if (TestCpuFlag(kCpuHasSSE2) && width >= 8) {
-    I400ToARGBRow = I400ToARGBRow_Any_SSE2;
-    if (IS_ALIGNED(width, 8)) {
-      I400ToARGBRow = I400ToARGBRow_Unaligned_SSE2;
-      if (IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
-        I400ToARGBRow = I400ToARGBRow_SSE2;
-      }
-    }
-  }
-#elif defined(HAS_I400TOARGBROW_NEON)
-  if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
-    I400ToARGBRow = I400ToARGBRow_Any_NEON;
-    if (IS_ALIGNED(width, 8)) {
-      I400ToARGBRow = I400ToARGBRow_NEON;
-    }
-  }
-#endif
-  for (int y = 0; y < height; ++y) {
-    I400ToARGBRow(src_y, dst_argb, width);
-    src_y += src_stride_y;
-    dst_argb += dst_stride_argb;
-  }
-  return 0;
-}
-
-// Shuffle table for converting BGRA to ARGB.
-static uvec8 kShuffleMaskBGRAToARGB = {
-  3u, 2u, 1u, 0u, 7u, 6u, 5u, 4u, 11u, 10u, 9u, 8u, 15u, 14u, 13u, 12u
-};
-
-// Shuffle table for converting ABGR to ARGB.
-static uvec8 kShuffleMaskABGRToARGB = {
-  2u, 1u, 0u, 3u, 6u, 5u, 4u, 7u, 10u, 9u, 8u, 11u, 14u, 13u, 12u, 15u
-};
-
-// Shuffle table for converting RGBA to ARGB.
-static uvec8 kShuffleMaskRGBAToARGB = {
-  1u, 2u, 3u, 0u, 5u, 6u, 7u, 4u, 9u, 10u, 11u, 8u, 13u, 14u, 15u, 12u
-};
-
-// Convert BGRA to ARGB.
-LIBYUV_API
-int BGRAToARGB(const uint8* src_bgra, int src_stride_bgra,
-               uint8* dst_argb, int dst_stride_argb,
-               int width, int height) {
-  return ARGBShuffle(src_bgra, src_stride_bgra,
-                     dst_argb, dst_stride_argb,
-                     (const uint8*)(&kShuffleMaskBGRAToARGB),
-                     width, height);
-}
-
-// Convert ABGR to ARGB.
-LIBYUV_API
-int ABGRToARGB(const uint8* src_abgr, int src_stride_abgr,
-               uint8* dst_argb, int dst_stride_argb,
-               int width, int height) {
-  return ARGBShuffle(src_abgr, src_stride_abgr,
-                     dst_argb, dst_stride_argb,
-                     (const uint8*)(&kShuffleMaskABGRToARGB),
-                     width, height);
-}
-
-// Convert RGBA to ARGB.
-LIBYUV_API
-int RGBAToARGB(const uint8* src_rgba, int src_stride_rgba,
-               uint8* dst_argb, int dst_stride_argb,
-               int width, int height) {
-  return ARGBShuffle(src_rgba, src_stride_rgba,
-                     dst_argb, dst_stride_argb,
-                     (const uint8*)(&kShuffleMaskRGBAToARGB),
-                     width, height);
-}
-
-// Convert RGB24 to ARGB.
-LIBYUV_API
-int RGB24ToARGB(const uint8* src_rgb24, int src_stride_rgb24,
-                uint8* dst_argb, int dst_stride_argb,
-                int width, int height) {
-  if (!src_rgb24 || !dst_argb ||
-      width <= 0 || height == 0) {
-    return -1;
-  }
-  // Negative height means invert the image.
-  if (height < 0) {
-    height = -height;
-    src_rgb24 = src_rgb24 + (height - 1) * src_stride_rgb24;
-    src_stride_rgb24 = -src_stride_rgb24;
-  }
-  // Coalesce rows.
-  if (src_stride_rgb24 == width * 3 &&
-      dst_stride_argb == width * 4) {
-    width *= height;
-    height = 1;
-    src_stride_rgb24 = dst_stride_argb = 0;
-  }
-  void (*RGB24ToARGBRow)(const uint8* src_rgb, uint8* dst_argb, int pix) =
-      RGB24ToARGBRow_C;
-#if defined(HAS_RGB24TOARGBROW_SSSE3)
-  if (TestCpuFlag(kCpuHasSSSE3) && width >= 16 &&
-      IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
-    RGB24ToARGBRow = RGB24ToARGBRow_Any_SSSE3;
-    if (IS_ALIGNED(width, 16)) {
-      RGB24ToARGBRow = RGB24ToARGBRow_SSSE3;
-    }
-  }
-#elif defined(HAS_RGB24TOARGBROW_NEON)
-  if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
-    RGB24ToARGBRow = RGB24ToARGBRow_Any_NEON;
-    if (IS_ALIGNED(width, 8)) {
-      RGB24ToARGBRow = RGB24ToARGBRow_NEON;
-    }
-  }
-#endif
-
-  for (int y = 0; y < height; ++y) {
-    RGB24ToARGBRow(src_rgb24, dst_argb, width);
-    src_rgb24 += src_stride_rgb24;
-    dst_argb += dst_stride_argb;
-  }
-  return 0;
-}
-
-// Convert RAW to ARGB.
-LIBYUV_API
-int RAWToARGB(const uint8* src_raw, int src_stride_raw,
-              uint8* dst_argb, int dst_stride_argb,
-              int width, int height) {
-  if (!src_raw || !dst_argb ||
-      width <= 0 || height == 0) {
-    return -1;
-  }
-  // Negative height means invert the image.
-  if (height < 0) {
-    height = -height;
-    src_raw = src_raw + (height - 1) * src_stride_raw;
-    src_stride_raw = -src_stride_raw;
-  }
-  // Coalesce rows.
-  if (src_stride_raw == width * 3 &&
-      dst_stride_argb == width * 4) {
-    width *= height;
-    height = 1;
-    src_stride_raw = dst_stride_argb = 0;
-  }
-  void (*RAWToARGBRow)(const uint8* src_rgb, uint8* dst_argb, int pix) =
-      RAWToARGBRow_C;
-#if defined(HAS_RAWTOARGBROW_SSSE3)
-  if (TestCpuFlag(kCpuHasSSSE3) && width >= 16 &&
-      IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
-    RAWToARGBRow = RAWToARGBRow_Any_SSSE3;
-    if (IS_ALIGNED(width, 16)) {
-      RAWToARGBRow = RAWToARGBRow_SSSE3;
-    }
-  }
-#elif defined(HAS_RAWTOARGBROW_NEON)
-  if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
-    RAWToARGBRow = RAWToARGBRow_Any_NEON;
-    if (IS_ALIGNED(width, 8)) {
-      RAWToARGBRow = RAWToARGBRow_NEON;
-    }
-  }
-#endif
-
-  for (int y = 0; y < height; ++y) {
-    RAWToARGBRow(src_raw, dst_argb, width);
-    src_raw += src_stride_raw;
-    dst_argb += dst_stride_argb;
-  }
-  return 0;
-}
-
-// Convert RGB565 to ARGB.
-LIBYUV_API
-int RGB565ToARGB(const uint8* src_rgb565, int src_stride_rgb565,
-                 uint8* dst_argb, int dst_stride_argb,
-                 int width, int height) {
-  if (!src_rgb565 || !dst_argb ||
-      width <= 0 || height == 0) {
-    return -1;
-  }
-  // Negative height means invert the image.
-  if (height < 0) {
-    height = -height;
-    src_rgb565 = src_rgb565 + (height - 1) * src_stride_rgb565;
-    src_stride_rgb565 = -src_stride_rgb565;
-  }
-  // Coalesce rows.
-  if (src_stride_rgb565 == width * 2 &&
-      dst_stride_argb == width * 4) {
-    width *= height;
-    height = 1;
-    src_stride_rgb565 = dst_stride_argb = 0;
-  }
-  void (*RGB565ToARGBRow)(const uint8* src_rgb565, uint8* dst_argb, int pix) =
-      RGB565ToARGBRow_C;
-#if defined(HAS_RGB565TOARGBROW_SSE2)
-  if (TestCpuFlag(kCpuHasSSE2) && width >= 8 &&
-      IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
-    RGB565ToARGBRow = RGB565ToARGBRow_Any_SSE2;
-    if (IS_ALIGNED(width, 8)) {
-      RGB565ToARGBRow = RGB565ToARGBRow_SSE2;
-    }
-  }
-#elif defined(HAS_RGB565TOARGBROW_NEON)
-  if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
-    RGB565ToARGBRow = RGB565ToARGBRow_Any_NEON;
-    if (IS_ALIGNED(width, 8)) {
-      RGB565ToARGBRow = RGB565ToARGBRow_NEON;
-    }
-  }
-#endif
-
-  for (int y = 0; y < height; ++y) {
-    RGB565ToARGBRow(src_rgb565, dst_argb, width);
-    src_rgb565 += src_stride_rgb565;
-    dst_argb += dst_stride_argb;
-  }
-  return 0;
-}
-
-// Convert ARGB1555 to ARGB.
-LIBYUV_API
-int ARGB1555ToARGB(const uint8* src_argb1555, int src_stride_argb1555,
-                   uint8* dst_argb, int dst_stride_argb,
-                   int width, int height) {
-  if (!src_argb1555 || !dst_argb ||
-      width <= 0 || height == 0) {
-    return -1;
-  }
-  // Negative height means invert the image.
-  if (height < 0) {
-    height = -height;
-    src_argb1555 = src_argb1555 + (height - 1) * src_stride_argb1555;
-    src_stride_argb1555 = -src_stride_argb1555;
-  }
-  // Coalesce rows.
-  if (src_stride_argb1555 == width * 2 &&
-      dst_stride_argb == width * 4) {
-    width *= height;
-    height = 1;
-    src_stride_argb1555 = dst_stride_argb = 0;
-  }
-  void (*ARGB1555ToARGBRow)(const uint8* src_argb1555, uint8* dst_argb,
-                            int pix) = ARGB1555ToARGBRow_C;
-#if defined(HAS_ARGB1555TOARGBROW_SSE2)
-  if (TestCpuFlag(kCpuHasSSE2) && width >= 8 &&
-      IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
-    ARGB1555ToARGBRow = ARGB1555ToARGBRow_Any_SSE2;
-    if (IS_ALIGNED(width, 8)) {
-      ARGB1555ToARGBRow = ARGB1555ToARGBRow_SSE2;
-    }
-  }
-#elif defined(HAS_ARGB1555TOARGBROW_NEON)
-  if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
-    ARGB1555ToARGBRow = ARGB1555ToARGBRow_Any_NEON;
-    if (IS_ALIGNED(width, 8)) {
-      ARGB1555ToARGBRow = ARGB1555ToARGBRow_NEON;
-    }
-  }
-#endif
-
-  for (int y = 0; y < height; ++y) {
-    ARGB1555ToARGBRow(src_argb1555, dst_argb, width);
-    src_argb1555 += src_stride_argb1555;
-    dst_argb += dst_stride_argb;
-  }
-  return 0;
-}
-
-// Convert ARGB4444 to ARGB.
-LIBYUV_API
-int ARGB4444ToARGB(const uint8* src_argb4444, int src_stride_argb4444,
-                   uint8* dst_argb, int dst_stride_argb,
-                   int width, int height) {
-  if (!src_argb4444 || !dst_argb ||
-      width <= 0 || height == 0) {
-    return -1;
-  }
-  // Negative height means invert the image.
-  if (height < 0) {
-    height = -height;
-    src_argb4444 = src_argb4444 + (height - 1) * src_stride_argb4444;
-    src_stride_argb4444 = -src_stride_argb4444;
-  }
-  // Coalesce rows.
-  if (src_stride_argb4444 == width * 2 &&
-      dst_stride_argb == width * 4) {
-    width *= height;
-    height = 1;
-    src_stride_argb4444 = dst_stride_argb = 0;
-  }
-  void (*ARGB4444ToARGBRow)(const uint8* src_argb4444, uint8* dst_argb,
-                            int pix) = ARGB4444ToARGBRow_C;
-#if defined(HAS_ARGB4444TOARGBROW_SSE2)
-  if (TestCpuFlag(kCpuHasSSE2) && width >= 8 &&
-      IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
-    ARGB4444ToARGBRow = ARGB4444ToARGBRow_Any_SSE2;
-    if (IS_ALIGNED(width, 8)) {
-      ARGB4444ToARGBRow = ARGB4444ToARGBRow_SSE2;
-    }
-  }
-#elif defined(HAS_ARGB4444TOARGBROW_NEON)
-  if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
-    ARGB4444ToARGBRow = ARGB4444ToARGBRow_Any_NEON;
-    if (IS_ALIGNED(width, 8)) {
-      ARGB4444ToARGBRow = ARGB4444ToARGBRow_NEON;
-    }
-  }
-#endif
-
-  for (int y = 0; y < height; ++y) {
-    ARGB4444ToARGBRow(src_argb4444, dst_argb, width);
-    src_argb4444 += src_stride_argb4444;
-    dst_argb += dst_stride_argb;
-  }
-  return 0;
-}
-
-// Convert NV12 to ARGB.
-LIBYUV_API
-int NV12ToARGB(const uint8* src_y, int src_stride_y,
-               const uint8* src_uv, int src_stride_uv,
-               uint8* dst_argb, int dst_stride_argb,
-               int width, int height) {
-  if (!src_y || !src_uv || !dst_argb ||
-      width <= 0 || height == 0) {
-    return -1;
-  }
-  // Negative height means invert the image.
-  if (height < 0) {
-    height = -height;
-    dst_argb = dst_argb + (height - 1) * dst_stride_argb;
-    dst_stride_argb = -dst_stride_argb;
-  }
-  void (*NV12ToARGBRow)(const uint8* y_buf,
-                        const uint8* uv_buf,
-                        uint8* rgb_buf,
-                        int width) = NV12ToARGBRow_C;
-#if defined(HAS_NV12TOARGBROW_SSSE3)
-  if (TestCpuFlag(kCpuHasSSSE3) && width >= 8) {
-    NV12ToARGBRow = NV12ToARGBRow_Any_SSSE3;
-    if (IS_ALIGNED(width, 8)) {
-      NV12ToARGBRow = NV12ToARGBRow_Unaligned_SSSE3;
-      if (IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
-        NV12ToARGBRow = NV12ToARGBRow_SSSE3;
-      }
-    }
-  }
-#elif defined(HAS_NV12TOARGBROW_NEON)
-  if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
-    NV12ToARGBRow = NV12ToARGBRow_Any_NEON;
-    if (IS_ALIGNED(width, 8)) {
-      NV12ToARGBRow = NV12ToARGBRow_NEON;
-    }
-  }
-#endif
-
-  for (int y = 0; y < height; ++y) {
-    NV12ToARGBRow(src_y, src_uv, dst_argb, width);
-    dst_argb += dst_stride_argb;
-    src_y += src_stride_y;
-    if (y & 1) {
-      src_uv += src_stride_uv;
-    }
-  }
-  return 0;
-}
-
-// Convert NV21 to ARGB.
-LIBYUV_API
-int NV21ToARGB(const uint8* src_y, int src_stride_y,
-               const uint8* src_uv, int src_stride_uv,
-               uint8* dst_argb, int dst_stride_argb,
-               int width, int height) {
-  if (!src_y || !src_uv || !dst_argb ||
-      width <= 0 || height == 0) {
-    return -1;
-  }
-  // Negative height means invert the image.
-  if (height < 0) {
-    height = -height;
-    dst_argb = dst_argb + (height - 1) * dst_stride_argb;
-    dst_stride_argb = -dst_stride_argb;
-  }
-  void (*NV21ToARGBRow)(const uint8* y_buf,
-                        const uint8* uv_buf,
-                        uint8* rgb_buf,
-                        int width) = NV21ToARGBRow_C;
-#if defined(HAS_NV21TOARGBROW_SSSE3)
-  if (TestCpuFlag(kCpuHasSSSE3) && width >= 8) {
-    NV21ToARGBRow = NV21ToARGBRow_Any_SSSE3;
-    if (IS_ALIGNED(width, 8)) {
-      NV21ToARGBRow = NV21ToARGBRow_Unaligned_SSSE3;
-      if (IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
-        NV21ToARGBRow = NV21ToARGBRow_SSSE3;
-      }
-    }
-  }
-#endif
-#if defined(HAS_NV21TOARGBROW_NEON)
-  if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
-    NV21ToARGBRow = NV21ToARGBRow_Any_NEON;
-    if (IS_ALIGNED(width, 8)) {
-      NV21ToARGBRow = NV21ToARGBRow_NEON;
-    }
-  }
-#endif
-
-  for (int y = 0; y < height; ++y) {
-    NV21ToARGBRow(src_y, src_uv, dst_argb, width);
-    dst_argb += dst_stride_argb;
-    src_y += src_stride_y;
-    if (y & 1) {
-      src_uv += src_stride_uv;
-    }
-  }
-  return 0;
-}
-
-// Convert M420 to ARGB.
-LIBYUV_API
-int M420ToARGB(const uint8* src_m420, int src_stride_m420,
-               uint8* dst_argb, int dst_stride_argb,
-               int width, int height) {
-  if (!src_m420 || !dst_argb ||
-      width <= 0 || height == 0) {
-    return -1;
-  }
-  // Negative height means invert the image.
-  if (height < 0) {
-    height = -height;
-    dst_argb = dst_argb + (height - 1) * dst_stride_argb;
-    dst_stride_argb = -dst_stride_argb;
-  }
-  void (*NV12ToARGBRow)(const uint8* y_buf,
-                        const uint8* uv_buf,
-                        uint8* rgb_buf,
-                        int width) = NV12ToARGBRow_C;
-#if defined(HAS_NV12TOARGBROW_SSSE3)
-  if (TestCpuFlag(kCpuHasSSSE3) && width >= 8) {
-    NV12ToARGBRow = NV12ToARGBRow_Any_SSSE3;
-    if (IS_ALIGNED(width, 8)) {
-      NV12ToARGBRow = NV12ToARGBRow_Unaligned_SSSE3;
-      if (IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
-        NV12ToARGBRow = NV12ToARGBRow_SSSE3;
-      }
-    }
-  }
-#elif defined(HAS_NV12TOARGBROW_NEON)
-  if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
-    NV12ToARGBRow = NV12ToARGBRow_Any_NEON;
-    if (IS_ALIGNED(width, 8)) {
-      NV12ToARGBRow = NV12ToARGBRow_NEON;
-    }
-  }
-#endif
-
-  for (int y = 0; y < height - 1; y += 2) {
-    NV12ToARGBRow(src_m420, src_m420 + src_stride_m420 * 2, dst_argb, width);
-    NV12ToARGBRow(src_m420 + src_stride_m420, src_m420 + src_stride_m420 * 2,
-                  dst_argb + dst_stride_argb, width);
-    dst_argb += dst_stride_argb * 2;
-    src_m420 += src_stride_m420 * 3;
-  }
-  if (height & 1) {
-    NV12ToARGBRow(src_m420, src_m420 + src_stride_m420 * 2, dst_argb, width);
-  }
-  return 0;
-}
-
-// Convert YUY2 to ARGB.
-LIBYUV_API
-int YUY2ToARGB(const uint8* src_yuy2, int src_stride_yuy2,
-               uint8* dst_argb, int dst_stride_argb,
-               int width, int height) {
-  if (!src_yuy2 || !dst_argb ||
-      width <= 0 || height == 0) {
-    return -1;
-  }
-  // Negative height means invert the image.
-  if (height < 0) {
-    height = -height;
-    src_yuy2 = src_yuy2 + (height - 1) * src_stride_yuy2;
-    src_stride_yuy2 = -src_stride_yuy2;
-  }
-  // Coalesce rows.
-  if (src_stride_yuy2 == width * 2 &&
-      dst_stride_argb == width * 4) {
-    width *= height;
-    height = 1;
-    src_stride_yuy2 = dst_stride_argb = 0;
-  }
-  void (*YUY2ToARGBRow)(const uint8* src_yuy2, uint8* dst_argb, int pix) =
-      YUY2ToARGBRow_C;
-#if defined(HAS_YUY2TOARGBROW_SSSE3)
-  // Posix is 16, Windows is 8.
-  if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) {
-    YUY2ToARGBRow = YUY2ToARGBRow_Any_SSSE3;
-    if (IS_ALIGNED(width, 16)) {
-      YUY2ToARGBRow = YUY2ToARGBRow_Unaligned_SSSE3;
-      if (IS_ALIGNED(src_yuy2, 16) && IS_ALIGNED(src_stride_yuy2, 16) &&
-          IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
-        YUY2ToARGBRow = YUY2ToARGBRow_SSSE3;
-      }
-    }
-  }
-#elif defined(HAS_YUY2TOARGBROW_NEON)
-  if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
-    YUY2ToARGBRow = YUY2ToARGBRow_Any_NEON;
-    if (IS_ALIGNED(width, 8)) {
-      YUY2ToARGBRow = YUY2ToARGBRow_NEON;
-    }
-  }
-#endif
-  for (int y = 0; y < height; ++y) {
-    YUY2ToARGBRow(src_yuy2, dst_argb, width);
-    src_yuy2 += src_stride_yuy2;
-    dst_argb += dst_stride_argb;
-  }
-  return 0;
-}
-
-// Convert UYVY to ARGB.
-LIBYUV_API
-int UYVYToARGB(const uint8* src_uyvy, int src_stride_uyvy,
-               uint8* dst_argb, int dst_stride_argb,
-               int width, int height) {
-  if (!src_uyvy || !dst_argb ||
-      width <= 0 || height == 0) {
-    return -1;
-  }
-  // Negative height means invert the image.
-  if (height < 0) {
-    height = -height;
-    src_uyvy = src_uyvy + (height - 1) * src_stride_uyvy;
-    src_stride_uyvy = -src_stride_uyvy;
-  }
-  // Coalesce rows.
-  if (src_stride_uyvy == width * 2 &&
-      dst_stride_argb == width * 4) {
-    width *= height;
-    height = 1;
-    src_stride_uyvy = dst_stride_argb = 0;
-  }
-  void (*UYVYToARGBRow)(const uint8* src_uyvy, uint8* dst_argb, int pix) =
-      UYVYToARGBRow_C;
-#if defined(HAS_UYVYTOARGBROW_SSSE3)
-  // Posix is 16, Windows is 8.
-  if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) {
-    UYVYToARGBRow = UYVYToARGBRow_Any_SSSE3;
-    if (IS_ALIGNED(width, 16)) {
-      UYVYToARGBRow = UYVYToARGBRow_Unaligned_SSSE3;
-      if (IS_ALIGNED(src_uyvy, 16) && IS_ALIGNED(src_stride_uyvy, 16) &&
-          IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
-        UYVYToARGBRow = UYVYToARGBRow_SSSE3;
-      }
-    }
-  }
-#elif defined(HAS_UYVYTOARGBROW_NEON)
-  if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
-    UYVYToARGBRow = UYVYToARGBRow_Any_NEON;
-    if (IS_ALIGNED(width, 8)) {
-      UYVYToARGBRow = UYVYToARGBRow_NEON;
-    }
-  }
-#endif
-  for (int y = 0; y < height; ++y) {
-    UYVYToARGBRow(src_uyvy, dst_argb, width);
-    src_uyvy += src_stride_uyvy;
-    dst_argb += dst_stride_argb;
-  }
-  return 0;
-}
-
-#ifdef __cplusplus
-}  // extern "C"
-}  // namespace libyuv
-#endif
diff --git a/drivers/theoraplayer/src/YUV/libyuv/src/convert_from.cc b/drivers/theoraplayer/src/YUV/libyuv/src/convert_from.cc
deleted file mode 100755
index 1e10832856..0000000000
--- a/drivers/theoraplayer/src/YUV/libyuv/src/convert_from.cc
+++ /dev/null
@@ -1,1196 +0,0 @@
-/*
- *  Copyright 2012 The LibYuv Project Authors. All rights reserved.
- *
- *  Use of this source code is governed by a BSD-style license
- *  that can be found in the LICENSE file in the root of the source
- *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS. All contributing project authors may
- *  be found in the AUTHORS file in the root of the source tree.
- */
-
-#include "libyuv/convert_from.h"
-
-#include "libyuv/basic_types.h"
-#include "libyuv/convert.h"  // For I420Copy
-#include "libyuv/cpu_id.h"
-#include "libyuv/format_conversion.h"
-#include "libyuv/planar_functions.h"
-#include "libyuv/rotate.h"
-#include "libyuv/scale.h"  // For ScalePlane()
-#include "libyuv/video_common.h"
-#include "libyuv/row.h"
-
-#ifdef __cplusplus
-namespace libyuv {
-extern "C" {
-#endif
-
-#define SUBSAMPLE(v, a, s) (v < 0) ? (-((-v + a) >> s)) : ((v + a) >> s)
-static __inline int Abs(int v) {
-  return v >= 0 ? v : -v;
-}
-
-// I420 To any I4xx YUV format with mirroring.
-static int I420ToI4xx(const uint8* src_y, int src_stride_y,
-                      const uint8* src_u, int src_stride_u,
-                      const uint8* src_v, int src_stride_v,
-                      uint8* dst_y, int dst_stride_y,
-                      uint8* dst_u, int dst_stride_u,
-                      uint8* dst_v, int dst_stride_v,
-                      int src_y_width, int src_y_height,
-                      int dst_uv_width, int dst_uv_height) {
-  if (src_y_width == 0 || src_y_height == 0 ||
-      dst_uv_width <= 0 || dst_uv_height <= 0) {
-    return -1;
-  }
-  const int dst_y_width = Abs(src_y_width);
-  const int dst_y_height = Abs(src_y_height);
-  const int src_uv_width = SUBSAMPLE(src_y_width, 1, 1);
-  const int src_uv_height = SUBSAMPLE(src_y_height, 1, 1);
-  ScalePlane(src_y, src_stride_y, src_y_width, src_y_height,
-             dst_y, dst_stride_y, dst_y_width, dst_y_height,
-             kFilterBilinear);
-  ScalePlane(src_u, src_stride_u, src_uv_width, src_uv_height,
-             dst_u, dst_stride_u, dst_uv_width, dst_uv_height,
-             kFilterBilinear);
-  ScalePlane(src_v, src_stride_v, src_uv_width, src_uv_height,
-             dst_v, dst_stride_v, dst_uv_width, dst_uv_height,
-             kFilterBilinear);
-  return 0;
-}
-
-// 420 chroma is 1/2 width, 1/2 height
-// 422 chroma is 1/2 width, 1x height
-LIBYUV_API
-int I420ToI422(const uint8* src_y, int src_stride_y,
-               const uint8* src_u, int src_stride_u,
-               const uint8* src_v, int src_stride_v,
-               uint8* dst_y, int dst_stride_y,
-               uint8* dst_u, int dst_stride_u,
-               uint8* dst_v, int dst_stride_v,
-               int width, int height) {
-  const int dst_uv_width = (Abs(width) + 1) >> 1;
-  const int dst_uv_height = Abs(height);
-  return I420ToI4xx(src_y, src_stride_y,
-                    src_u, src_stride_u,
-                    src_v, src_stride_v,
-                    dst_y, dst_stride_y,
-                    dst_u, dst_stride_u,
-                    dst_v, dst_stride_v,
-                    width, height,
-                    dst_uv_width, dst_uv_height);
-}
-
-// 420 chroma is 1/2 width, 1/2 height
-// 444 chroma is 1x width, 1x height
-LIBYUV_API
-int I420ToI444(const uint8* src_y, int src_stride_y,
-               const uint8* src_u, int src_stride_u,
-               const uint8* src_v, int src_stride_v,
-               uint8* dst_y, int dst_stride_y,
-               uint8* dst_u, int dst_stride_u,
-               uint8* dst_v, int dst_stride_v,
-               int width, int height) {
-  const int dst_uv_width = Abs(width);
-  const int dst_uv_height = Abs(height);
-  return I420ToI4xx(src_y, src_stride_y,
-                    src_u, src_stride_u,
-                    src_v, src_stride_v,
-                    dst_y, dst_stride_y,
-                    dst_u, dst_stride_u,
-                    dst_v, dst_stride_v,
-                    width, height,
-                    dst_uv_width, dst_uv_height);
-}
-
-// 420 chroma is 1/2 width, 1/2 height
-// 411 chroma is 1/4 width, 1x height
-LIBYUV_API
-int I420ToI411(const uint8* src_y, int src_stride_y,
-               const uint8* src_u, int src_stride_u,
-               const uint8* src_v, int src_stride_v,
-               uint8* dst_y, int dst_stride_y,
-               uint8* dst_u, int dst_stride_u,
-               uint8* dst_v, int dst_stride_v,
-               int width, int height) {
-  const int dst_uv_width = (Abs(width) + 3) >> 2;
-  const int dst_uv_height = Abs(height);
-  return I420ToI4xx(src_y, src_stride_y,
-                    src_u, src_stride_u,
-                    src_v, src_stride_v,
-                    dst_y, dst_stride_y,
-                    dst_u, dst_stride_u,
-                    dst_v, dst_stride_v,
-                    width, height,
-                    dst_uv_width, dst_uv_height);
-}
-
-// Copy to I400. Source can be I420,422,444,400,NV12,NV21
-LIBYUV_API
-int I400Copy(const uint8* src_y, int src_stride_y,
-             uint8* dst_y, int dst_stride_y,
-             int width, int height) {
-  if (!src_y || !dst_y ||
-      width <= 0 || height == 0) {
-    return -1;
-  }
-  // Negative height means invert the image.
-  if (height < 0) {
-    height = -height;
-    src_y = src_y + (height - 1) * src_stride_y;
-    src_stride_y = -src_stride_y;
-  }
-  CopyPlane(src_y, src_stride_y, dst_y, dst_stride_y, width, height);
-  return 0;
-}
-
-LIBYUV_API
-int I422ToYUY2(const uint8* src_y, int src_stride_y,
-               const uint8* src_u, int src_stride_u,
-               const uint8* src_v, int src_stride_v,
-               uint8* dst_yuy2, int dst_stride_yuy2,
-               int width, int height) {
-  if (!src_y || !src_u || !src_v || !dst_yuy2 ||
-      width <= 0 || height == 0) {
-    return -1;
-  }
-  // Negative height means invert the image.
-  if (height < 0) {
-    height = -height;
-    dst_yuy2 = dst_yuy2 + (height - 1) * dst_stride_yuy2;
-    dst_stride_yuy2 = -dst_stride_yuy2;
-  }
-  // Coalesce rows.
-  if (src_stride_y == width &&
-      src_stride_u * 2 == width &&
-      src_stride_v * 2 == width &&
-      dst_stride_yuy2 == width * 2) {
-    width *= height;
-    height = 1;
-    src_stride_y = src_stride_u = src_stride_v = dst_stride_yuy2 = 0;
-  }
-  void (*I422ToYUY2Row)(const uint8* src_y, const uint8* src_u,
-                        const uint8* src_v, uint8* dst_yuy2, int width) =
-      I422ToYUY2Row_C;
-#if defined(HAS_I422TOYUY2ROW_SSE2)
-  if (TestCpuFlag(kCpuHasSSE2) && width >= 16) {
-    I422ToYUY2Row = I422ToYUY2Row_Any_SSE2;
-    if (IS_ALIGNED(width, 16)) {
-      I422ToYUY2Row = I422ToYUY2Row_SSE2;
-    }
-  }
-#elif defined(HAS_I422TOYUY2ROW_NEON)
-  if (TestCpuFlag(kCpuHasNEON) && width >= 16) {
-    I422ToYUY2Row = I422ToYUY2Row_Any_NEON;
-    if (IS_ALIGNED(width, 16)) {
-      I422ToYUY2Row = I422ToYUY2Row_NEON;
-    }
-  }
-#endif
-
-  for (int y = 0; y < height; ++y) {
-    I422ToYUY2Row(src_y, src_u, src_v, dst_yuy2, width);
-    src_y += src_stride_y;
-    src_u += src_stride_u;
-    src_v += src_stride_v;
-    dst_yuy2 += dst_stride_yuy2;
-  }
-  return 0;
-}
-
-LIBYUV_API
-int I420ToYUY2(const uint8* src_y, int src_stride_y,
-               const uint8* src_u, int src_stride_u,
-               const uint8* src_v, int src_stride_v,
-               uint8* dst_yuy2, int dst_stride_yuy2,
-               int width, int height) {
-  if (!src_y || !src_u || !src_v || !dst_yuy2 ||
-      width <= 0 || height == 0) {
-    return -1;
-  }
-  // Negative height means invert the image.
-  if (height < 0) {
-    height = -height;
-    dst_yuy2 = dst_yuy2 + (height - 1) * dst_stride_yuy2;
-    dst_stride_yuy2 = -dst_stride_yuy2;
-  }
-  void (*I422ToYUY2Row)(const uint8* src_y, const uint8* src_u,
-                        const uint8* src_v, uint8* dst_yuy2, int width) =
-      I422ToYUY2Row_C;
-#if defined(HAS_I422TOYUY2ROW_SSE2)
-  if (TestCpuFlag(kCpuHasSSE2) && width >= 16) {
-    I422ToYUY2Row = I422ToYUY2Row_Any_SSE2;
-    if (IS_ALIGNED(width, 16)) {
-      I422ToYUY2Row = I422ToYUY2Row_SSE2;
-    }
-  }
-#elif defined(HAS_I422TOYUY2ROW_NEON)
-  if (TestCpuFlag(kCpuHasNEON) && width >= 16) {
-    I422ToYUY2Row = I422ToYUY2Row_Any_NEON;
-    if (IS_ALIGNED(width, 16)) {
-      I422ToYUY2Row = I422ToYUY2Row_NEON;
-    }
-  }
-#endif
-
-  for (int y = 0; y < height - 1; y += 2) {
-    I422ToYUY2Row(src_y, src_u, src_v, dst_yuy2, width);
-    I422ToYUY2Row(src_y + src_stride_y, src_u, src_v,
-                  dst_yuy2 + dst_stride_yuy2, width);
-    src_y += src_stride_y * 2;
-    src_u += src_stride_u;
-    src_v += src_stride_v;
-    dst_yuy2 += dst_stride_yuy2 * 2;
-  }
-  if (height & 1) {
-    I422ToYUY2Row(src_y, src_u, src_v, dst_yuy2, width);
-  }
-  return 0;
-}
-
-LIBYUV_API
-int I422ToUYVY(const uint8* src_y, int src_stride_y,
-               const uint8* src_u, int src_stride_u,
-               const uint8* src_v, int src_stride_v,
-               uint8* dst_uyvy, int dst_stride_uyvy,
-               int width, int height) {
-  if (!src_y || !src_u || !src_v || !dst_uyvy ||
-      width <= 0 || height == 0) {
-    return -1;
-  }
-  // Negative height means invert the image.
-  if (height < 0) {
-    height = -height;
-    dst_uyvy = dst_uyvy + (height - 1) * dst_stride_uyvy;
-    dst_stride_uyvy = -dst_stride_uyvy;
-  }
-  // Coalesce rows.
-  if (src_stride_y == width &&
-      src_stride_u * 2 == width &&
-      src_stride_v * 2 == width &&
-      dst_stride_uyvy == width * 2) {
-    width *= height;
-    height = 1;
-    src_stride_y = src_stride_u = src_stride_v = dst_stride_uyvy = 0;
-  }
-  void (*I422ToUYVYRow)(const uint8* src_y, const uint8* src_u,
-                        const uint8* src_v, uint8* dst_uyvy, int width) =
-      I422ToUYVYRow_C;
-#if defined(HAS_I422TOUYVYROW_SSE2)
-  if (TestCpuFlag(kCpuHasSSE2) && width >= 16) {
-    I422ToUYVYRow = I422ToUYVYRow_Any_SSE2;
-    if (IS_ALIGNED(width, 16)) {
-      I422ToUYVYRow = I422ToUYVYRow_SSE2;
-    }
-  }
-#elif defined(HAS_I422TOUYVYROW_NEON)
-  if (TestCpuFlag(kCpuHasNEON) && width >= 16) {
-    I422ToUYVYRow = I422ToUYVYRow_Any_NEON;
-    if (IS_ALIGNED(width, 16)) {
-      I422ToUYVYRow = I422ToUYVYRow_NEON;
-    }
-  }
-#endif
-
-  for (int y = 0; y < height; ++y) {
-    I422ToUYVYRow(src_y, src_u, src_v, dst_uyvy, width);
-    src_y += src_stride_y;
-    src_u += src_stride_u;
-    src_v += src_stride_v;
-    dst_uyvy += dst_stride_uyvy;
-  }
-  return 0;
-}
-
-LIBYUV_API
-int I420ToUYVY(const uint8* src_y, int src_stride_y,
-               const uint8* src_u, int src_stride_u,
-               const uint8* src_v, int src_stride_v,
-               uint8* dst_uyvy, int dst_stride_uyvy,
-               int width, int height) {
-  if (!src_y || !src_u || !src_v || !dst_uyvy ||
-      width <= 0 || height == 0) {
-    return -1;
-  }
-  // Negative height means invert the image.
-  if (height < 0) {
-    height = -height;
-    dst_uyvy = dst_uyvy + (height - 1) * dst_stride_uyvy;
-    dst_stride_uyvy = -dst_stride_uyvy;
-  }
-  void (*I422ToUYVYRow)(const uint8* src_y, const uint8* src_u,
-                        const uint8* src_v, uint8* dst_uyvy, int width) =
-      I422ToUYVYRow_C;
-#if defined(HAS_I422TOUYVYROW_SSE2)
-  if (TestCpuFlag(kCpuHasSSE2) && width >= 16) {
-    I422ToUYVYRow = I422ToUYVYRow_Any_SSE2;
-    if (IS_ALIGNED(width, 16)) {
-      I422ToUYVYRow = I422ToUYVYRow_SSE2;
-    }
-  }
-#elif defined(HAS_I422TOUYVYROW_NEON)
-  if (TestCpuFlag(kCpuHasNEON) && width >= 16) {
-    I422ToUYVYRow = I422ToUYVYRow_Any_NEON;
-    if (IS_ALIGNED(width, 16)) {
-      I422ToUYVYRow = I422ToUYVYRow_NEON;
-    }
-  }
-#endif
-
-  for (int y = 0; y < height - 1; y += 2) {
-    I422ToUYVYRow(src_y, src_u, src_v, dst_uyvy, width);
-    I422ToUYVYRow(src_y + src_stride_y, src_u, src_v,
-                  dst_uyvy + dst_stride_uyvy, width);
-    src_y += src_stride_y * 2;
-    src_u += src_stride_u;
-    src_v += src_stride_v;
-    dst_uyvy += dst_stride_uyvy * 2;
-  }
-  if (height & 1) {
-    I422ToUYVYRow(src_y, src_u, src_v, dst_uyvy, width);
-  }
-  return 0;
-}
-
-LIBYUV_API
-int I420ToNV12(const uint8* src_y, int src_stride_y,
-               const uint8* src_u, int src_stride_u,
-               const uint8* src_v, int src_stride_v,
-               uint8* dst_y, int dst_stride_y,
-               uint8* dst_uv, int dst_stride_uv,
-               int width, int height) {
-  if (!src_y || !src_u || !src_v || !dst_y || !dst_uv ||
-      width <= 0 || height == 0) {
-    return -1;
-  }
-  // Negative height means invert the image.
-  if (height < 0) {
-    height = -height;
-    int halfheight = (height + 1) >> 1;
-    dst_y = dst_y + (height - 1) * dst_stride_y;
-    dst_uv = dst_uv + (halfheight - 1) * dst_stride_uv;
-    dst_stride_y = -dst_stride_y;
-    dst_stride_uv = -dst_stride_uv;
-  }
-  // Coalesce rows.
-  int halfwidth = (width + 1) >> 1;
-  int halfheight = (height + 1) >> 1;
-  if (src_stride_y == width &&
-      dst_stride_y == width) {
-    width *= height;
-    height = 1;
-    src_stride_y = dst_stride_y = 0;
-  }
-  // Coalesce rows.
-  if (src_stride_u == halfwidth &&
-      src_stride_v == halfwidth &&
-      dst_stride_uv == halfwidth * 2) {
-    halfwidth *= halfheight;
-    halfheight = 1;
-    src_stride_u = src_stride_v = dst_stride_uv = 0;
-  }
-  void (*MergeUVRow_)(const uint8* src_u, const uint8* src_v, uint8* dst_uv,
-                      int width) = MergeUVRow_C;
-#if defined(HAS_MERGEUVROW_SSE2)
-  if (TestCpuFlag(kCpuHasSSE2) && halfwidth >= 16) {
-    MergeUVRow_ = MergeUVRow_Any_SSE2;
-    if (IS_ALIGNED(halfwidth, 16)) {
-      MergeUVRow_ = MergeUVRow_Unaligned_SSE2;
-      if (IS_ALIGNED(src_u, 16) && IS_ALIGNED(src_stride_u, 16) &&
-          IS_ALIGNED(src_v, 16) && IS_ALIGNED(src_stride_v, 16) &&
-          IS_ALIGNED(dst_uv, 16) && IS_ALIGNED(dst_stride_uv, 16)) {
-        MergeUVRow_ = MergeUVRow_SSE2;
-      }
-    }
-  }
-#endif
-#if defined(HAS_MERGEUVROW_AVX2)
-  if (TestCpuFlag(kCpuHasAVX2) && halfwidth >= 32) {
-    MergeUVRow_ = MergeUVRow_Any_AVX2;
-    if (IS_ALIGNED(halfwidth, 32)) {
-      MergeUVRow_ = MergeUVRow_AVX2;
-    }
-  }
-#endif
-#if defined(HAS_MERGEUVROW_NEON)
-  if (TestCpuFlag(kCpuHasNEON) && halfwidth >= 16) {
-    MergeUVRow_ = MergeUVRow_Any_NEON;
-    if (IS_ALIGNED(halfwidth, 16)) {
-      MergeUVRow_ = MergeUVRow_NEON;
-    }
-  }
-#endif
-
-  CopyPlane(src_y, src_stride_y, dst_y, dst_stride_y, width, height);
-  for (int y = 0; y < halfheight; ++y) {
-    // Merge a row of U and V into a row of UV.
-    MergeUVRow_(src_u, src_v, dst_uv, halfwidth);
-    src_u += src_stride_u;
-    src_v += src_stride_v;
-    dst_uv += dst_stride_uv;
-  }
-  return 0;
-}
-
-LIBYUV_API
-int I420ToNV21(const uint8* src_y, int src_stride_y,
-               const uint8* src_u, int src_stride_u,
-               const uint8* src_v, int src_stride_v,
-               uint8* dst_y, int dst_stride_y,
-               uint8* dst_vu, int dst_stride_vu,
-               int width, int height) {
-  return I420ToNV12(src_y, src_stride_y,
-                    src_v, src_stride_v,
-                    src_u, src_stride_u,
-                    dst_y, src_stride_y,
-                    dst_vu, dst_stride_vu,
-                    width, height);
-}
-
-// Convert I420 to ARGB.
-LIBYUV_API
-int I420ToARGB(const uint8* src_y, int src_stride_y,
-               const uint8* src_u, int src_stride_u,
-               const uint8* src_v, int src_stride_v,
-               uint8* dst_argb, int dst_stride_argb,
-               int width, int height) {
-  if (!src_y || !src_u || !src_v || !dst_argb ||
-      width <= 0 || height == 0) {
-    return -1;
-  }
-  // Negative height means invert the image.
-  if (height < 0) {
-    height = -height;
-    dst_argb = dst_argb + (height - 1) * dst_stride_argb;
-    dst_stride_argb = -dst_stride_argb;
-  }
-  void (*I422ToARGBRow)(const uint8* y_buf,
-                        const uint8* u_buf,
-                        const uint8* v_buf,
-                        uint8* rgb_buf,
-                        int width) = I422ToARGBRow_C;
-#if defined(HAS_I422TOARGBROW_SSSE3)
-  if (TestCpuFlag(kCpuHasSSSE3) && width >= 8) {
-    I422ToARGBRow = I422ToARGBRow_Any_SSSE3;
-    if (IS_ALIGNED(width, 8)) {
-      I422ToARGBRow = I422ToARGBRow_Unaligned_SSSE3;
-      if (IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
-        I422ToARGBRow = I422ToARGBRow_SSSE3;
-      }
-    }
-  }
-#endif
-#if defined(HAS_I422TOARGBROW_AVX2)
-  if (TestCpuFlag(kCpuHasAVX2) && width >= 16) {
-    I422ToARGBRow = I422ToARGBRow_Any_AVX2;
-    if (IS_ALIGNED(width, 16)) {
-      I422ToARGBRow = I422ToARGBRow_AVX2;
-    }
-  }
-#endif
-#if defined(HAS_I422TOARGBROW_NEON)
-  if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
-    I422ToARGBRow = I422ToARGBRow_Any_NEON;
-    if (IS_ALIGNED(width, 8)) {
-      I422ToARGBRow = I422ToARGBRow_NEON;
-    }
-  }
-#endif
-#if defined(HAS_I422TOARGBROW_MIPS_DSPR2)
-  if (TestCpuFlag(kCpuHasMIPS_DSPR2) && IS_ALIGNED(width, 4) &&
-      IS_ALIGNED(src_y, 4) && IS_ALIGNED(src_stride_y, 4) &&
-      IS_ALIGNED(src_u, 2) && IS_ALIGNED(src_stride_u, 2) &&
-      IS_ALIGNED(src_v, 2) && IS_ALIGNED(src_stride_v, 2) &&
-      IS_ALIGNED(dst_argb, 4) && IS_ALIGNED(dst_stride_argb, 4)) {
-    I422ToARGBRow = I422ToARGBRow_MIPS_DSPR2;
-  }
-#endif
-
-  for (int y = 0; y < height; ++y) {
-    I422ToARGBRow(src_y, src_u, src_v, dst_argb, width);
-    dst_argb += dst_stride_argb;
-    src_y += src_stride_y;
-    if (y & 1) {
-      src_u += src_stride_u;
-      src_v += src_stride_v;
-    }
-  }
-  return 0;
-}
-
-// Convert I420 to BGRA.
-LIBYUV_API
-int I420ToBGRA(const uint8* src_y, int src_stride_y,
-               const uint8* src_u, int src_stride_u,
-               const uint8* src_v, int src_stride_v,
-               uint8* dst_bgra, int dst_stride_bgra,
-               int width, int height) {
-  if (!src_y || !src_u || !src_v || !dst_bgra ||
-      width <= 0 || height == 0) {
-    return -1;
-  }
-  // Negative height means invert the image.
-  if (height < 0) {
-    height = -height;
-    dst_bgra = dst_bgra + (height - 1) * dst_stride_bgra;
-    dst_stride_bgra = -dst_stride_bgra;
-  }
-  void (*I422ToBGRARow)(const uint8* y_buf,
-                        const uint8* u_buf,
-                        const uint8* v_buf,
-                        uint8* rgb_buf,
-                        int width) = I422ToBGRARow_C;
-#if defined(HAS_I422TOBGRAROW_SSSE3)
-  if (TestCpuFlag(kCpuHasSSSE3) && width >= 8) {
-    I422ToBGRARow = I422ToBGRARow_Any_SSSE3;
-    if (IS_ALIGNED(width, 8)) {
-      I422ToBGRARow = I422ToBGRARow_Unaligned_SSSE3;
-      if (IS_ALIGNED(dst_bgra, 16) && IS_ALIGNED(dst_stride_bgra, 16)) {
-        I422ToBGRARow = I422ToBGRARow_SSSE3;
-      }
-    }
-  }
-#elif defined(HAS_I422TOBGRAROW_NEON)
-  if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
-    I422ToBGRARow = I422ToBGRARow_Any_NEON;
-    if (IS_ALIGNED(width, 8)) {
-      I422ToBGRARow = I422ToBGRARow_NEON;
-    }
-  }
-#elif defined(HAS_I422TOBGRAROW_MIPS_DSPR2)
-  if (TestCpuFlag(kCpuHasMIPS_DSPR2) && IS_ALIGNED(width, 4) &&
-      IS_ALIGNED(src_y, 4) && IS_ALIGNED(src_stride_y, 4) &&
-      IS_ALIGNED(src_u, 2) && IS_ALIGNED(src_stride_u, 2) &&
-      IS_ALIGNED(src_v, 2) && IS_ALIGNED(src_stride_v, 2) &&
-      IS_ALIGNED(dst_bgra, 4) && IS_ALIGNED(dst_stride_bgra, 4)) {
-    I422ToBGRARow = I422ToBGRARow_MIPS_DSPR2;
-  }
-#endif
-
-  for (int y = 0; y < height; ++y) {
-    I422ToBGRARow(src_y, src_u, src_v, dst_bgra, width);
-    dst_bgra += dst_stride_bgra;
-    src_y += src_stride_y;
-    if (y & 1) {
-      src_u += src_stride_u;
-      src_v += src_stride_v;
-    }
-  }
-  return 0;
-}
-
-// Convert I420 to ABGR.
-LIBYUV_API
-int I420ToABGR(const uint8* src_y, int src_stride_y,
-               const uint8* src_u, int src_stride_u,
-               const uint8* src_v, int src_stride_v,
-               uint8* dst_abgr, int dst_stride_abgr,
-               int width, int height) {
-  if (!src_y || !src_u || !src_v || !dst_abgr ||
-      width <= 0 || height == 0) {
-    return -1;
-  }
-  // Negative height means invert the image.
-  if (height < 0) {
-    height = -height;
-    dst_abgr = dst_abgr + (height - 1) * dst_stride_abgr;
-    dst_stride_abgr = -dst_stride_abgr;
-  }
-  void (*I422ToABGRRow)(const uint8* y_buf,
-                        const uint8* u_buf,
-                        const uint8* v_buf,
-                        uint8* rgb_buf,
-                        int width) = I422ToABGRRow_C;
-#if defined(HAS_I422TOABGRROW_SSSE3)
-  if (TestCpuFlag(kCpuHasSSSE3) && width >= 8) {
-    I422ToABGRRow = I422ToABGRRow_Any_SSSE3;
-    if (IS_ALIGNED(width, 8)) {
-      I422ToABGRRow = I422ToABGRRow_Unaligned_SSSE3;
-      if (IS_ALIGNED(dst_abgr, 16) && IS_ALIGNED(dst_stride_abgr, 16)) {
-        I422ToABGRRow = I422ToABGRRow_SSSE3;
-      }
-    }
-  }
-#elif defined(HAS_I422TOABGRROW_NEON)
-  if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
-    I422ToABGRRow = I422ToABGRRow_Any_NEON;
-    if (IS_ALIGNED(width, 8)) {
-      I422ToABGRRow = I422ToABGRRow_NEON;
-    }
-  }
-#endif
-
-  for (int y = 0; y < height; ++y) {
-    I422ToABGRRow(src_y, src_u, src_v, dst_abgr, width);
-    dst_abgr += dst_stride_abgr;
-    src_y += src_stride_y;
-    if (y & 1) {
-      src_u += src_stride_u;
-      src_v += src_stride_v;
-    }
-  }
-  return 0;
-}
-
-// Convert I420 to RGBA.
-LIBYUV_API
-int I420ToRGBA(const uint8* src_y, int src_stride_y,
-               const uint8* src_u, int src_stride_u,
-               const uint8* src_v, int src_stride_v,
-               uint8* dst_rgba, int dst_stride_rgba,
-               int width, int height) {
-  if (!src_y || !src_u || !src_v || !dst_rgba ||
-      width <= 0 || height == 0) {
-    return -1;
-  }
-  // Negative height means invert the image.
-  if (height < 0) {
-    height = -height;
-    dst_rgba = dst_rgba + (height - 1) * dst_stride_rgba;
-    dst_stride_rgba = -dst_stride_rgba;
-  }
-  void (*I422ToRGBARow)(const uint8* y_buf,
-                        const uint8* u_buf,
-                        const uint8* v_buf,
-                        uint8* rgb_buf,
-                        int width) = I422ToRGBARow_C;
-#if defined(HAS_I422TORGBAROW_SSSE3)
-  if (TestCpuFlag(kCpuHasSSSE3) && width >= 8) {
-    I422ToRGBARow = I422ToRGBARow_Any_SSSE3;
-    if (IS_ALIGNED(width, 8)) {
-      I422ToRGBARow = I422ToRGBARow_Unaligned_SSSE3;
-      if (IS_ALIGNED(dst_rgba, 16) && IS_ALIGNED(dst_stride_rgba, 16)) {
-        I422ToRGBARow = I422ToRGBARow_SSSE3;
-      }
-    }
-  }
-#elif defined(HAS_I422TORGBAROW_NEON)
-  if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
-    I422ToRGBARow = I422ToRGBARow_Any_NEON;
-    if (IS_ALIGNED(width, 8)) {
-      I422ToRGBARow = I422ToRGBARow_NEON;
-    }
-  }
-#endif
-
-  for (int y = 0; y < height; ++y) {
-    I422ToRGBARow(src_y, src_u, src_v, dst_rgba, width);
-    dst_rgba += dst_stride_rgba;
-    src_y += src_stride_y;
-    if (y & 1) {
-      src_u += src_stride_u;
-      src_v += src_stride_v;
-    }
-  }
-  return 0;
-}
-
-// Convert I420 to RGB24.
-LIBYUV_API
-int I420ToRGB24(const uint8* src_y, int src_stride_y,
-                const uint8* src_u, int src_stride_u,
-                const uint8* src_v, int src_stride_v,
-                uint8* dst_rgb24, int dst_stride_rgb24,
-                int width, int height) {
-  if (!src_y || !src_u || !src_v || !dst_rgb24 ||
-      width <= 0 || height == 0) {
-    return -1;
-  }
-  // Negative height means invert the image.
-  if (height < 0) {
-    height = -height;
-    dst_rgb24 = dst_rgb24 + (height - 1) * dst_stride_rgb24;
-    dst_stride_rgb24 = -dst_stride_rgb24;
-  }
-  void (*I422ToRGB24Row)(const uint8* y_buf,
-                        const uint8* u_buf,
-                        const uint8* v_buf,
-                        uint8* rgb_buf,
-                        int width) = I422ToRGB24Row_C;
-#if defined(HAS_I422TORGB24ROW_SSSE3)
-  if (TestCpuFlag(kCpuHasSSSE3) && width >= 8) {
-    I422ToRGB24Row = I422ToRGB24Row_Any_SSSE3;
-    if (IS_ALIGNED(width, 8)) {
-      I422ToRGB24Row = I422ToRGB24Row_SSSE3;
-    }
-  }
-#elif defined(HAS_I422TORGB24ROW_NEON)
-  if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
-    I422ToRGB24Row = I422ToRGB24Row_Any_NEON;
-    if (IS_ALIGNED(width, 8)) {
-      I422ToRGB24Row = I422ToRGB24Row_NEON;
-    }
-  }
-#endif
-
-  for (int y = 0; y < height; ++y) {
-    I422ToRGB24Row(src_y, src_u, src_v, dst_rgb24, width);
-    dst_rgb24 += dst_stride_rgb24;
-    src_y += src_stride_y;
-    if (y & 1) {
-      src_u += src_stride_u;
-      src_v += src_stride_v;
-    }
-  }
-  return 0;
-}
-
-// Convert I420 to RAW.
-LIBYUV_API
-int I420ToRAW(const uint8* src_y, int src_stride_y,
-                const uint8* src_u, int src_stride_u,
-                const uint8* src_v, int src_stride_v,
-                uint8* dst_raw, int dst_stride_raw,
-                int width, int height) {
-  if (!src_y || !src_u || !src_v || !dst_raw ||
-      width <= 0 || height == 0) {
-    return -1;
-  }
-  // Negative height means invert the image.
-  if (height < 0) {
-    height = -height;
-    dst_raw = dst_raw + (height - 1) * dst_stride_raw;
-    dst_stride_raw = -dst_stride_raw;
-  }
-  void (*I422ToRAWRow)(const uint8* y_buf,
-                        const uint8* u_buf,
-                        const uint8* v_buf,
-                        uint8* rgb_buf,
-                        int width) = I422ToRAWRow_C;
-#if defined(HAS_I422TORAWROW_SSSE3)
-  if (TestCpuFlag(kCpuHasSSSE3) && width >= 8) {
-    I422ToRAWRow = I422ToRAWRow_Any_SSSE3;
-    if (IS_ALIGNED(width, 8)) {
-      I422ToRAWRow = I422ToRAWRow_SSSE3;
-    }
-  }
-#elif defined(HAS_I422TORAWROW_NEON)
-  if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
-    I422ToRAWRow = I422ToRAWRow_Any_NEON;
-    if (IS_ALIGNED(width, 8)) {
-      I422ToRAWRow = I422ToRAWRow_NEON;
-    }
-  }
-#endif
-
-  for (int y = 0; y < height; ++y) {
-    I422ToRAWRow(src_y, src_u, src_v, dst_raw, width);
-    dst_raw += dst_stride_raw;
-    src_y += src_stride_y;
-    if (y & 1) {
-      src_u += src_stride_u;
-      src_v += src_stride_v;
-    }
-  }
-  return 0;
-}
-
-// Convert I420 to ARGB1555.
-LIBYUV_API
-int I420ToARGB1555(const uint8* src_y, int src_stride_y,
-                   const uint8* src_u, int src_stride_u,
-                   const uint8* src_v, int src_stride_v,
-                   uint8* dst_argb1555, int dst_stride_argb1555,
-                   int width, int height) {
-  if (!src_y || !src_u || !src_v || !dst_argb1555 ||
-      width <= 0 || height == 0) {
-    return -1;
-  }
-  // Negative height means invert the image.
-  if (height < 0) {
-    height = -height;
-    dst_argb1555 = dst_argb1555 + (height - 1) * dst_stride_argb1555;
-    dst_stride_argb1555 = -dst_stride_argb1555;
-  }
-  void (*I422ToARGB1555Row)(const uint8* y_buf,
-                          const uint8* u_buf,
-                          const uint8* v_buf,
-                          uint8* rgb_buf,
-                          int width) = I422ToARGB1555Row_C;
-#if defined(HAS_I422TOARGB1555ROW_SSSE3)
-  if (TestCpuFlag(kCpuHasSSSE3) && width >= 8) {
-    I422ToARGB1555Row = I422ToARGB1555Row_Any_SSSE3;
-    if (IS_ALIGNED(width, 8)) {
-      I422ToARGB1555Row = I422ToARGB1555Row_SSSE3;
-    }
-  }
-#elif defined(HAS_I422TOARGB1555ROW_NEON)
-  if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
-    I422ToARGB1555Row = I422ToARGB1555Row_Any_NEON;
-    if (IS_ALIGNED(width, 8)) {
-      I422ToARGB1555Row = I422ToARGB1555Row_NEON;
-    }
-  }
-#endif
-
-  for (int y = 0; y < height; ++y) {
-    I422ToARGB1555Row(src_y, src_u, src_v, dst_argb1555, width);
-    dst_argb1555 += dst_stride_argb1555;
-    src_y += src_stride_y;
-    if (y & 1) {
-      src_u += src_stride_u;
-      src_v += src_stride_v;
-    }
-  }
-  return 0;
-}
-
-
-// Convert I420 to ARGB4444.
-LIBYUV_API
-int I420ToARGB4444(const uint8* src_y, int src_stride_y,
-                   const uint8* src_u, int src_stride_u,
-                   const uint8* src_v, int src_stride_v,
-                   uint8* dst_argb4444, int dst_stride_argb4444,
-                   int width, int height) {
-  if (!src_y || !src_u || !src_v || !dst_argb4444 ||
-      width <= 0 || height == 0) {
-    return -1;
-  }
-  // Negative height means invert the image.
-  if (height < 0) {
-    height = -height;
-    dst_argb4444 = dst_argb4444 + (height - 1) * dst_stride_argb4444;
-    dst_stride_argb4444 = -dst_stride_argb4444;
-  }
-  void (*I422ToARGB4444Row)(const uint8* y_buf,
-                          const uint8* u_buf,
-                          const uint8* v_buf,
-                          uint8* rgb_buf,
-                          int width) = I422ToARGB4444Row_C;
-#if defined(HAS_I422TOARGB4444ROW_SSSE3)
-  if (TestCpuFlag(kCpuHasSSSE3) && width >= 8) {
-    I422ToARGB4444Row = I422ToARGB4444Row_Any_SSSE3;
-    if (IS_ALIGNED(width, 8)) {
-      I422ToARGB4444Row = I422ToARGB4444Row_SSSE3;
-    }
-  }
-#elif defined(HAS_I422TOARGB4444ROW_NEON)
-  if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
-    I422ToARGB4444Row = I422ToARGB4444Row_Any_NEON;
-    if (IS_ALIGNED(width, 8)) {
-      I422ToARGB4444Row = I422ToARGB4444Row_NEON;
-    }
-  }
-#endif
-
-  for (int y = 0; y < height; ++y) {
-    I422ToARGB4444Row(src_y, src_u, src_v, dst_argb4444, width);
-    dst_argb4444 += dst_stride_argb4444;
-    src_y += src_stride_y;
-    if (y & 1) {
-      src_u += src_stride_u;
-      src_v += src_stride_v;
-    }
-  }
-  return 0;
-}
-
-// Convert I420 to RGB565.
-LIBYUV_API
-int I420ToRGB565(const uint8* src_y, int src_stride_y,
-                 const uint8* src_u, int src_stride_u,
-                 const uint8* src_v, int src_stride_v,
-                 uint8* dst_rgb565, int dst_stride_rgb565,
-                 int width, int height) {
-  if (!src_y || !src_u || !src_v || !dst_rgb565 ||
-      width <= 0 || height == 0) {
-    return -1;
-  }
-  // Negative height means invert the image.
-  if (height < 0) {
-    height = -height;
-    dst_rgb565 = dst_rgb565 + (height - 1) * dst_stride_rgb565;
-    dst_stride_rgb565 = -dst_stride_rgb565;
-  }
-  void (*I422ToRGB565Row)(const uint8* y_buf,
-                          const uint8* u_buf,
-                          const uint8* v_buf,
-                          uint8* rgb_buf,
-                          int width) = I422ToRGB565Row_C;
-#if defined(HAS_I422TORGB565ROW_SSSE3)
-  if (TestCpuFlag(kCpuHasSSSE3) && width >= 8) {
-    I422ToRGB565Row = I422ToRGB565Row_Any_SSSE3;
-    if (IS_ALIGNED(width, 8)) {
-      I422ToRGB565Row = I422ToRGB565Row_SSSE3;
-    }
-  }
-#elif defined(HAS_I422TORGB565ROW_NEON)
-  if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
-    I422ToRGB565Row = I422ToRGB565Row_Any_NEON;
-    if (IS_ALIGNED(width, 8)) {
-      I422ToRGB565Row = I422ToRGB565Row_NEON;
-    }
-  }
-#endif
-
-  for (int y = 0; y < height; ++y) {
-    I422ToRGB565Row(src_y, src_u, src_v, dst_rgb565, width);
-    dst_rgb565 += dst_stride_rgb565;
-    src_y += src_stride_y;
-    if (y & 1) {
-      src_u += src_stride_u;
-      src_v += src_stride_v;
-    }
-  }
-  return 0;
-}
-
-// Convert I420 to specified format
-LIBYUV_API
-int ConvertFromI420(const uint8* y, int y_stride,
-                    const uint8* u, int u_stride,
-                    const uint8* v, int v_stride,
-                    uint8* dst_sample, int dst_sample_stride,
-                    int width, int height,
-                    uint32 fourcc) {
-  uint32 format = CanonicalFourCC(fourcc);
-  if (!y || !u|| !v || !dst_sample ||
-      width <= 0 || height == 0) {
-    return -1;
-  }
-  int r = 0;
-  switch (format) {
-    // Single plane formats
-    case FOURCC_YUY2:
-      r = I420ToYUY2(y, y_stride,
-                     u, u_stride,
-                     v, v_stride,
-                     dst_sample,
-                     dst_sample_stride ? dst_sample_stride : width * 2,
-                     width, height);
-      break;
-    case FOURCC_UYVY:
-      r = I420ToUYVY(y, y_stride,
-                     u, u_stride,
-                     v, v_stride,
-                     dst_sample,
-                     dst_sample_stride ? dst_sample_stride : width * 2,
-                     width, height);
-      break;
-    case FOURCC_RGBP:
-      r = I420ToRGB565(y, y_stride,
-                       u, u_stride,
-                       v, v_stride,
-                       dst_sample,
-                       dst_sample_stride ? dst_sample_stride : width * 2,
-                       width, height);
-      break;
-    case FOURCC_RGBO:
-      r = I420ToARGB1555(y, y_stride,
-                         u, u_stride,
-                         v, v_stride,
-                         dst_sample,
-                         dst_sample_stride ? dst_sample_stride : width * 2,
-                         width, height);
-      break;
-    case FOURCC_R444:
-      r = I420ToARGB4444(y, y_stride,
-                         u, u_stride,
-                         v, v_stride,
-                         dst_sample,
-                         dst_sample_stride ? dst_sample_stride : width * 2,
-                         width, height);
-      break;
-    case FOURCC_24BG:
-      r = I420ToRGB24(y, y_stride,
-                      u, u_stride,
-                      v, v_stride,
-                      dst_sample,
-                      dst_sample_stride ? dst_sample_stride : width * 3,
-                      width, height);
-      break;
-    case FOURCC_RAW:
-      r = I420ToRAW(y, y_stride,
-                    u, u_stride,
-                    v, v_stride,
-                    dst_sample,
-                    dst_sample_stride ? dst_sample_stride : width * 3,
-                    width, height);
-      break;
-    case FOURCC_ARGB:
-      r = I420ToARGB(y, y_stride,
-                     u, u_stride,
-                     v, v_stride,
-                     dst_sample,
-                     dst_sample_stride ? dst_sample_stride : width * 4,
-                     width, height);
-      break;
-    case FOURCC_BGRA:
-      r = I420ToBGRA(y, y_stride,
-                     u, u_stride,
-                     v, v_stride,
-                     dst_sample,
-                     dst_sample_stride ? dst_sample_stride : width * 4,
-                     width, height);
-      break;
-    case FOURCC_ABGR:
-      r = I420ToABGR(y, y_stride,
-                     u, u_stride,
-                     v, v_stride,
-                     dst_sample,
-                     dst_sample_stride ? dst_sample_stride : width * 4,
-                     width, height);
-      break;
-    case FOURCC_RGBA:
-      r = I420ToRGBA(y, y_stride,
-                     u, u_stride,
-                     v, v_stride,
-                     dst_sample,
-                     dst_sample_stride ? dst_sample_stride : width * 4,
-                     width, height);
-      break;
-    case FOURCC_BGGR:
-      r = I420ToBayerBGGR(y, y_stride,
-                          u, u_stride,
-                          v, v_stride,
-                          dst_sample,
-                          dst_sample_stride ? dst_sample_stride : width,
-                          width, height);
-      break;
-    case FOURCC_GBRG:
-      r = I420ToBayerGBRG(y, y_stride,
-                          u, u_stride,
-                          v, v_stride,
-                          dst_sample,
-                          dst_sample_stride ? dst_sample_stride : width,
-                          width, height);
-      break;
-    case FOURCC_GRBG:
-      r = I420ToBayerGRBG(y, y_stride,
-                          u, u_stride,
-                          v, v_stride,
-                          dst_sample,
-                          dst_sample_stride ? dst_sample_stride : width,
-                          width, height);
-      break;
-    case FOURCC_RGGB:
-      r = I420ToBayerRGGB(y, y_stride,
-                          u, u_stride,
-                          v, v_stride,
-                          dst_sample,
-                          dst_sample_stride ? dst_sample_stride : width,
-                          width, height);
-      break;
-    case FOURCC_I400:
-      r = I400Copy(y, y_stride,
-                   dst_sample,
-                   dst_sample_stride ? dst_sample_stride : width,
-                   width, height);
-      break;
-    case FOURCC_NV12: {
-      uint8* dst_uv = dst_sample + width * height;
-      r = I420ToNV12(y, y_stride,
-                     u, u_stride,
-                     v, v_stride,
-                     dst_sample,
-                     dst_sample_stride ? dst_sample_stride : width,
-                     dst_uv,
-                     dst_sample_stride ? dst_sample_stride : width,
-                     width, height);
-      break;
-    }
-    case FOURCC_NV21: {
-      uint8* dst_vu = dst_sample + width * height;
-      r = I420ToNV21(y, y_stride,
-                     u, u_stride,
-                     v, v_stride,
-                     dst_sample,
-                     dst_sample_stride ? dst_sample_stride : width,
-                     dst_vu,
-                     dst_sample_stride ? dst_sample_stride : width,
-                     width, height);
-      break;
-    }
-    // TODO(fbarchard): Add M420 and Q420.
-    // Triplanar formats
-    // TODO(fbarchard): halfstride instead of halfwidth
-    case FOURCC_I420:
-    case FOURCC_YU12:
-    case FOURCC_YV12: {
-      int halfwidth = (width + 1) / 2;
-      int halfheight = (height + 1) / 2;
-      uint8* dst_u;
-      uint8* dst_v;
-      if (format == FOURCC_YV12) {
-        dst_v = dst_sample + width * height;
-        dst_u = dst_v + halfwidth * halfheight;
-      } else {
-        dst_u = dst_sample + width * height;
-        dst_v = dst_u + halfwidth * halfheight;
-      }
-      r = I420Copy(y, y_stride,
-                   u, u_stride,
-                   v, v_stride,
-                   dst_sample, width,
-                   dst_u, halfwidth,
-                   dst_v, halfwidth,
-                   width, height);
-      break;
-    }
-    case FOURCC_I422:
-    case FOURCC_YV16: {
-      int halfwidth = (width + 1) / 2;
-      uint8* dst_u;
-      uint8* dst_v;
-      if (format == FOURCC_YV16) {
-        dst_v = dst_sample + width * height;
-        dst_u = dst_v + halfwidth * height;
-      } else {
-        dst_u = dst_sample + width * height;
-        dst_v = dst_u + halfwidth * height;
-      }
-      r = I420ToI422(y, y_stride,
-                     u, u_stride,
-                     v, v_stride,
-                     dst_sample, width,
-                     dst_u, halfwidth,
-                     dst_v, halfwidth,
-                     width, height);
-      break;
-    }
-    case FOURCC_I444:
-    case FOURCC_YV24: {
-      uint8* dst_u;
-      uint8* dst_v;
-      if (format == FOURCC_YV24) {
-        dst_v = dst_sample + width * height;
-        dst_u = dst_v + width * height;
-      } else {
-        dst_u = dst_sample + width * height;
-        dst_v = dst_u + width * height;
-      }
-      r = I420ToI444(y, y_stride,
-                     u, u_stride,
-                     v, v_stride,
-                     dst_sample, width,
-                     dst_u, width,
-                     dst_v, width,
-                     width, height);
-      break;
-    }
-    case FOURCC_I411: {
-      int quarterwidth = (width + 3) / 4;
-      uint8* dst_u = dst_sample + width * height;
-      uint8* dst_v = dst_u + quarterwidth * height;
-      r = I420ToI411(y, y_stride,
-                     u, u_stride,
-                     v, v_stride,
-                     dst_sample, width,
-                     dst_u, quarterwidth,
-                     dst_v, quarterwidth,
-                     width, height);
-      break;
-    }
-
-    // Formats not supported - MJPG, biplanar, some rgb formats.
-    default:
-      return -1;  // unknown fourcc - return failure code.
-  }
-  return r;
-}
-
-#ifdef __cplusplus
-}  // extern "C"
-}  // namespace libyuv
-#endif
diff --git a/drivers/theoraplayer/src/YUV/libyuv/src/convert_from_argb.cc b/drivers/theoraplayer/src/YUV/libyuv/src/convert_from_argb.cc
deleted file mode 100755
index 41421fb30b..0000000000
--- a/drivers/theoraplayer/src/YUV/libyuv/src/convert_from_argb.cc
+++ /dev/null
@@ -1,1096 +0,0 @@
-/*
- *  Copyright 2012 The LibYuv Project Authors. All rights reserved.
- *
- *  Use of this source code is governed by a BSD-style license
- *  that can be found in the LICENSE file in the root of the source
- *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS. All contributing project authors may
- *  be found in the AUTHORS file in the root of the source tree.
- */
-
-#include "libyuv/convert_from_argb.h"
-
-#include "libyuv/basic_types.h"
-#include "libyuv/cpu_id.h"
-#include "libyuv/format_conversion.h"
-#include "libyuv/planar_functions.h"
-#include "libyuv/row.h"
-
-#ifdef __cplusplus
-namespace libyuv {
-extern "C" {
-#endif
-
-// ARGB little endian (bgra in memory) to I444
-LIBYUV_API
-int ARGBToI444(const uint8* src_argb, int src_stride_argb,
-               uint8* dst_y, int dst_stride_y,
-               uint8* dst_u, int dst_stride_u,
-               uint8* dst_v, int dst_stride_v,
-               int width, int height) {
-  if (!src_argb || !dst_y || !dst_u || !dst_v || width <= 0 || height == 0) {
-    return -1;
-  }
-  if (height < 0) {
-    height = -height;
-    src_argb = src_argb + (height - 1) * src_stride_argb;
-    src_stride_argb = -src_stride_argb;
-  }
-  // Coalesce rows.
-  if (src_stride_argb == width * 4 &&
-      dst_stride_y == width &&
-      dst_stride_u == width &&
-      dst_stride_v == width) {
-    width *= height;
-    height = 1;
-    src_stride_argb = dst_stride_y = dst_stride_u = dst_stride_v = 0;
-  }
-  void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int pix) =
-      ARGBToYRow_C;
-  void (*ARGBToUV444Row)(const uint8* src_argb, uint8* dst_u, uint8* dst_v,
-                         int pix) = ARGBToUV444Row_C;
-#if defined(HAS_ARGBTOUV444ROW_SSSE3)
-    if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) {
-      ARGBToUV444Row = ARGBToUV444Row_Any_SSSE3;
-      if (IS_ALIGNED(width, 16)) {
-        ARGBToUV444Row = ARGBToUV444Row_Unaligned_SSSE3;
-        if (IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16)) {
-          ARGBToUV444Row = ARGBToUV444Row_SSSE3;
-        }
-      }
-  }
-#endif
-#if defined(HAS_ARGBTOYROW_SSSE3)
-  if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) {
-    ARGBToYRow = ARGBToYRow_Any_SSSE3;
-    if (IS_ALIGNED(width, 16)) {
-      ARGBToYRow = ARGBToYRow_Unaligned_SSSE3;
-      if (IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16) &&
-          IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) {
-        ARGBToYRow = ARGBToYRow_SSSE3;
-      }
-    }
-  }
-
-#elif defined(HAS_ARGBTOYROW_NEON)
-  if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
-    ARGBToYRow = ARGBToYRow_Any_NEON;
-    ARGBToUV444Row = ARGBToUV444Row_Any_NEON;
-    if (IS_ALIGNED(width, 8)) {
-      ARGBToYRow = ARGBToYRow_NEON;
-      ARGBToUV444Row = ARGBToUV444Row_NEON;
-    }
-  }
-#endif
-
-  for (int y = 0; y < height; ++y) {
-    ARGBToUV444Row(src_argb, dst_u, dst_v, width);
-    ARGBToYRow(src_argb, dst_y, width);
-    src_argb += src_stride_argb;
-    dst_y += dst_stride_y;
-    dst_u += dst_stride_u;
-    dst_v += dst_stride_v;
-  }
-  return 0;
-}
-
-// ARGB little endian (bgra in memory) to I422
-LIBYUV_API
-int ARGBToI422(const uint8* src_argb, int src_stride_argb,
-               uint8* dst_y, int dst_stride_y,
-               uint8* dst_u, int dst_stride_u,
-               uint8* dst_v, int dst_stride_v,
-               int width, int height) {
-  if (!src_argb || !dst_y || !dst_u || !dst_v || width <= 0 || height == 0) {
-    return -1;
-  }
-  if (height < 0) {
-    height = -height;
-    src_argb = src_argb + (height - 1) * src_stride_argb;
-    src_stride_argb = -src_stride_argb;
-  }
-  // Coalesce rows.
-  if (src_stride_argb == width * 4 &&
-      dst_stride_y == width &&
-      dst_stride_u * 2 == width &&
-      dst_stride_v * 2 == width) {
-    width *= height;
-    height = 1;
-    src_stride_argb = dst_stride_y = dst_stride_u = dst_stride_v = 0;
-  }
-  void (*ARGBToUV422Row)(const uint8* src_argb, uint8* dst_u, uint8* dst_v,
-                         int pix) = ARGBToUV422Row_C;
-#if defined(HAS_ARGBTOUV422ROW_SSSE3)
-  if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) {
-    ARGBToUV422Row = ARGBToUV422Row_Any_SSSE3;
-    if (IS_ALIGNED(width, 16)) {
-      ARGBToUV422Row = ARGBToUV422Row_Unaligned_SSSE3;
-      if (IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16)) {
-        ARGBToUV422Row = ARGBToUV422Row_SSSE3;
-      }
-    }
-  }
-#endif
-
-  void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int pix) =
-      ARGBToYRow_C;
-#if defined(HAS_ARGBTOYROW_SSSE3)
-  if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) {
-    ARGBToYRow = ARGBToYRow_Any_SSSE3;
-    if (IS_ALIGNED(width, 16)) {
-      ARGBToYRow = ARGBToYRow_Unaligned_SSSE3;
-      if (IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16) &&
-          IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) {
-        ARGBToYRow = ARGBToYRow_SSSE3;
-      }
-    }
-  }
-#elif defined(HAS_ARGBTOYROW_NEON)
-  if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
-    ARGBToYRow = ARGBToYRow_Any_NEON;
-    if (IS_ALIGNED(width, 8)) {
-      ARGBToYRow = ARGBToYRow_NEON;
-    }
-    if (width >= 16) {
-      ARGBToUV422Row = ARGBToUV422Row_Any_NEON;
-      if (IS_ALIGNED(width, 16)) {
-        ARGBToUV422Row = ARGBToUV422Row_NEON;
-      }
-    }
-  }
-#endif
-
-  for (int y = 0; y < height; ++y) {
-    ARGBToUV422Row(src_argb, dst_u, dst_v, width);
-    ARGBToYRow(src_argb, dst_y, width);
-    src_argb += src_stride_argb;
-    dst_y += dst_stride_y;
-    dst_u += dst_stride_u;
-    dst_v += dst_stride_v;
-  }
-  return 0;
-}
-
-// ARGB little endian (bgra in memory) to I411
-LIBYUV_API
-int ARGBToI411(const uint8* src_argb, int src_stride_argb,
-               uint8* dst_y, int dst_stride_y,
-               uint8* dst_u, int dst_stride_u,
-               uint8* dst_v, int dst_stride_v,
-               int width, int height) {
-  if (!src_argb || !dst_y || !dst_u || !dst_v || width <= 0 || height == 0) {
-    return -1;
-  }
-  if (height < 0) {
-    height = -height;
-    src_argb = src_argb + (height - 1) * src_stride_argb;
-    src_stride_argb = -src_stride_argb;
-  }
-  // Coalesce rows.
-  if (src_stride_argb == width * 4 &&
-      dst_stride_y == width &&
-      dst_stride_u * 4 == width &&
-      dst_stride_v * 4 == width) {
-    width *= height;
-    height = 1;
-    src_stride_argb = dst_stride_y = dst_stride_u = dst_stride_v = 0;
-  }
-  void (*ARGBToUV411Row)(const uint8* src_argb, uint8* dst_u, uint8* dst_v,
-                         int pix) = ARGBToUV411Row_C;
-  void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int pix) =
-      ARGBToYRow_C;
-#if defined(HAS_ARGBTOYROW_SSSE3)
-  if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) {
-    ARGBToYRow = ARGBToYRow_Any_SSSE3;
-    if (IS_ALIGNED(width, 16)) {
-      ARGBToYRow = ARGBToYRow_Unaligned_SSSE3;
-      if (IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16) &&
-          IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) {
-        ARGBToYRow = ARGBToYRow_SSSE3;
-      }
-    }
-  }
-#endif
-#if defined(HAS_ARGBTOYROW_AVX2)
-  if (TestCpuFlag(kCpuHasAVX2) && width >= 32) {
-    ARGBToYRow = ARGBToYRow_Any_AVX2;
-    if (IS_ALIGNED(width, 32)) {
-      ARGBToYRow = ARGBToYRow_AVX2;
-    }
-  }
-#endif
-#if defined(HAS_ARGBTOYROW_NEON)
-  if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
-    ARGBToYRow = ARGBToYRow_Any_NEON;
-    if (IS_ALIGNED(width, 8)) {
-      ARGBToYRow = ARGBToYRow_NEON;
-    }
-    if (width >= 32) {
-      ARGBToUV411Row = ARGBToUV411Row_Any_NEON;
-      if (IS_ALIGNED(width, 32)) {
-        ARGBToUV411Row = ARGBToUV411Row_NEON;
-      }
-    }
-  }
-#endif
-
-  for (int y = 0; y < height; ++y) {
-    ARGBToUV411Row(src_argb, dst_u, dst_v, width);
-    ARGBToYRow(src_argb, dst_y, width);
-    src_argb += src_stride_argb;
-    dst_y += dst_stride_y;
-    dst_u += dst_stride_u;
-    dst_v += dst_stride_v;
-  }
-  return 0;
-}
-
-LIBYUV_API
-int ARGBToNV12(const uint8* src_argb, int src_stride_argb,
-               uint8* dst_y, int dst_stride_y,
-               uint8* dst_uv, int dst_stride_uv,
-               int width, int height) {
-  if (!src_argb ||
-      !dst_y || !dst_uv ||
-      width <= 0 || height == 0) {
-    return -1;
-  }
-  // Negative height means invert the image.
-  if (height < 0) {
-    height = -height;
-    src_argb = src_argb + (height - 1) * src_stride_argb;
-    src_stride_argb = -src_stride_argb;
-  }
-  void (*ARGBToUVRow)(const uint8* src_argb0, int src_stride_argb,
-                      uint8* dst_u, uint8* dst_v, int width) = ARGBToUVRow_C;
-  void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int pix) =
-      ARGBToYRow_C;
-#if defined(HAS_ARGBTOYROW_SSSE3) && defined(HAS_ARGBTOUVROW_SSSE3)
-  if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) {
-    ARGBToUVRow = ARGBToUVRow_Any_SSSE3;
-    ARGBToYRow = ARGBToYRow_Any_SSSE3;
-    if (IS_ALIGNED(width, 16)) {
-      ARGBToUVRow = ARGBToUVRow_Unaligned_SSSE3;
-      ARGBToYRow = ARGBToYRow_Unaligned_SSSE3;
-      if (IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16)) {
-        ARGBToUVRow = ARGBToUVRow_SSSE3;
-        if (IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) {
-          ARGBToYRow = ARGBToYRow_SSSE3;
-        }
-      }
-    }
-  }
-#elif defined(HAS_ARGBTOYROW_NEON)
-  if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
-    ARGBToYRow = ARGBToYRow_Any_NEON;
-    if (IS_ALIGNED(width, 8)) {
-      ARGBToYRow = ARGBToYRow_NEON;
-    }
-    if (width >= 16) {
-      ARGBToUVRow = ARGBToUVRow_Any_NEON;
-      if (IS_ALIGNED(width, 16)) {
-        ARGBToUVRow = ARGBToUVRow_NEON;
-      }
-    }
-  }
-#endif
-  int halfwidth = (width + 1) >> 1;
-  void (*MergeUVRow_)(const uint8* src_u, const uint8* src_v, uint8* dst_uv,
-                      int width) = MergeUVRow_C;
-#if defined(HAS_MERGEUVROW_SSE2)
-  if (TestCpuFlag(kCpuHasSSE2) && halfwidth >= 16) {
-    MergeUVRow_ = MergeUVRow_Any_SSE2;
-    if (IS_ALIGNED(halfwidth, 16)) {
-      MergeUVRow_ = MergeUVRow_Unaligned_SSE2;
-      if (IS_ALIGNED(dst_uv, 16) && IS_ALIGNED(dst_stride_uv, 16)) {
-        MergeUVRow_ = MergeUVRow_SSE2;
-      }
-    }
-  }
-#endif
-#if defined(HAS_MERGEUVROW_AVX2)
-  if (TestCpuFlag(kCpuHasAVX2) && halfwidth >= 32) {
-    MergeUVRow_ = MergeUVRow_Any_AVX2;
-    if (IS_ALIGNED(halfwidth, 32)) {
-      MergeUVRow_ = MergeUVRow_AVX2;
-    }
-  }
-#endif
-#if defined(HAS_MERGEUVROW_NEON)
-  if (TestCpuFlag(kCpuHasNEON) && halfwidth >= 16) {
-    MergeUVRow_ = MergeUVRow_Any_NEON;
-    if (IS_ALIGNED(halfwidth, 16)) {
-      MergeUVRow_ = MergeUVRow_NEON;
-    }
-  }
-#endif
-
-  // Allocate a rows of uv.
-  align_buffer_64(row_u, ((halfwidth + 15) & ~15) * 2);
-  uint8* row_v = row_u + ((halfwidth + 15) & ~15);
-
-  for (int y = 0; y < height - 1; y += 2) {
-    ARGBToUVRow(src_argb, src_stride_argb, row_u, row_v, width);
-    MergeUVRow_(row_u, row_v, dst_uv, halfwidth);
-    ARGBToYRow(src_argb, dst_y, width);
-    ARGBToYRow(src_argb + src_stride_argb, dst_y + dst_stride_y, width);
-    src_argb += src_stride_argb * 2;
-    dst_y += dst_stride_y * 2;
-    dst_uv += dst_stride_uv;
-  }
-  if (height & 1) {
-    ARGBToUVRow(src_argb, 0, row_u, row_v, width);
-    MergeUVRow_(row_u, row_v, dst_uv, halfwidth);
-    ARGBToYRow(src_argb, dst_y, width);
-  }
-  free_aligned_buffer_64(row_u);
-  return 0;
-}
-
-// Same as NV12 but U and V swapped.
-LIBYUV_API
-int ARGBToNV21(const uint8* src_argb, int src_stride_argb,
-               uint8* dst_y, int dst_stride_y,
-               uint8* dst_uv, int dst_stride_uv,
-               int width, int height) {
-  if (!src_argb ||
-      !dst_y || !dst_uv ||
-      width <= 0 || height == 0) {
-    return -1;
-  }
-  // Negative height means invert the image.
-  if (height < 0) {
-    height = -height;
-    src_argb = src_argb + (height - 1) * src_stride_argb;
-    src_stride_argb = -src_stride_argb;
-  }
-  void (*ARGBToUVRow)(const uint8* src_argb0, int src_stride_argb,
-                      uint8* dst_u, uint8* dst_v, int width) = ARGBToUVRow_C;
-  void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int pix) =
-      ARGBToYRow_C;
-#if defined(HAS_ARGBTOYROW_SSSE3) && defined(HAS_ARGBTOUVROW_SSSE3)
-  if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) {
-    ARGBToUVRow = ARGBToUVRow_Any_SSSE3;
-    ARGBToYRow = ARGBToYRow_Any_SSSE3;
-    if (IS_ALIGNED(width, 16)) {
-      ARGBToUVRow = ARGBToUVRow_Unaligned_SSSE3;
-      ARGBToYRow = ARGBToYRow_Unaligned_SSSE3;
-      if (IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16)) {
-        ARGBToUVRow = ARGBToUVRow_SSSE3;
-        if (IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) {
-          ARGBToYRow = ARGBToYRow_SSSE3;
-        }
-      }
-    }
-  }
-#elif defined(HAS_ARGBTOYROW_NEON)
-  if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
-    ARGBToYRow = ARGBToYRow_Any_NEON;
-    if (IS_ALIGNED(width, 8)) {
-      ARGBToYRow = ARGBToYRow_NEON;
-    }
-    if (width >= 16) {
-      ARGBToUVRow = ARGBToUVRow_Any_NEON;
-      if (IS_ALIGNED(width, 16)) {
-        ARGBToUVRow = ARGBToUVRow_NEON;
-      }
-    }
-  }
-#endif
-  int halfwidth = (width + 1) >> 1;
-  void (*MergeUVRow_)(const uint8* src_u, const uint8* src_v, uint8* dst_uv,
-                      int width) = MergeUVRow_C;
-#if defined(HAS_MERGEUVROW_SSE2)
-  if (TestCpuFlag(kCpuHasSSE2) && halfwidth >= 16) {
-    MergeUVRow_ = MergeUVRow_Any_SSE2;
-    if (IS_ALIGNED(halfwidth, 16)) {
-      MergeUVRow_ = MergeUVRow_Unaligned_SSE2;
-      if (IS_ALIGNED(dst_uv, 16) && IS_ALIGNED(dst_stride_uv, 16)) {
-        MergeUVRow_ = MergeUVRow_SSE2;
-      }
-    }
-  }
-#endif
-#if defined(HAS_MERGEUVROW_AVX2)
-  if (TestCpuFlag(kCpuHasAVX2) && halfwidth >= 32) {
-    MergeUVRow_ = MergeUVRow_Any_AVX2;
-    if (IS_ALIGNED(halfwidth, 32)) {
-      MergeUVRow_ = MergeUVRow_AVX2;
-    }
-  }
-#endif
-#if defined(HAS_MERGEUVROW_NEON)
-  if (TestCpuFlag(kCpuHasNEON) && halfwidth >= 16) {
-    MergeUVRow_ = MergeUVRow_Any_NEON;
-    if (IS_ALIGNED(halfwidth, 16)) {
-      MergeUVRow_ = MergeUVRow_NEON;
-    }
-  }
-#endif
-
-  // Allocate a rows of uv.
-  align_buffer_64(row_u, ((halfwidth + 15) & ~15) * 2);
-  uint8* row_v = row_u + ((halfwidth + 15) & ~15);
-
-  for (int y = 0; y < height - 1; y += 2) {
-    ARGBToUVRow(src_argb, src_stride_argb, row_u, row_v, width);
-    MergeUVRow_(row_v, row_u, dst_uv, halfwidth);
-    ARGBToYRow(src_argb, dst_y, width);
-    ARGBToYRow(src_argb + src_stride_argb, dst_y + dst_stride_y, width);
-    src_argb += src_stride_argb * 2;
-    dst_y += dst_stride_y * 2;
-    dst_uv += dst_stride_uv;
-  }
-  if (height & 1) {
-    ARGBToUVRow(src_argb, 0, row_u, row_v, width);
-    MergeUVRow_(row_v, row_u, dst_uv, halfwidth);
-    ARGBToYRow(src_argb, dst_y, width);
-  }
-  free_aligned_buffer_64(row_u);
-  return 0;
-}
-
-// Convert ARGB to YUY2.
-LIBYUV_API
-int ARGBToYUY2(const uint8* src_argb, int src_stride_argb,
-               uint8* dst_yuy2, int dst_stride_yuy2,
-               int width, int height) {
-  if (!src_argb || !dst_yuy2 ||
-      width <= 0 || height == 0) {
-    return -1;
-  }
-  // Negative height means invert the image.
-  if (height < 0) {
-    height = -height;
-    dst_yuy2 = dst_yuy2 + (height - 1) * dst_stride_yuy2;
-    dst_stride_yuy2 = -dst_stride_yuy2;
-  }
-  // Coalesce rows.
-  if (src_stride_argb == width * 4 &&
-      dst_stride_yuy2 == width * 2) {
-    width *= height;
-    height = 1;
-    src_stride_argb = dst_stride_yuy2 = 0;
-  }
-  void (*ARGBToUV422Row)(const uint8* src_argb, uint8* dst_u, uint8* dst_v,
-                         int pix) = ARGBToUV422Row_C;
-#if defined(HAS_ARGBTOUV422ROW_SSSE3)
-  if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) {
-    ARGBToUV422Row = ARGBToUV422Row_Any_SSSE3;
-    if (IS_ALIGNED(width, 16)) {
-      ARGBToUV422Row = ARGBToUV422Row_Unaligned_SSSE3;
-      if (IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16)) {
-        ARGBToUV422Row = ARGBToUV422Row_SSSE3;
-      }
-    }
-  }
-#endif
-  void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int pix) =
-      ARGBToYRow_C;
-#if defined(HAS_ARGBTOYROW_SSSE3)
-  if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) {
-    ARGBToYRow = ARGBToYRow_Any_SSSE3;
-    if (IS_ALIGNED(width, 16)) {
-      ARGBToYRow = ARGBToYRow_Unaligned_SSSE3;
-      if (IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16)) {
-        ARGBToYRow = ARGBToYRow_SSSE3;
-      }
-    }
-  }
-#elif defined(HAS_ARGBTOYROW_NEON)
-  if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
-    ARGBToYRow = ARGBToYRow_Any_NEON;
-    if (IS_ALIGNED(width, 8)) {
-      ARGBToYRow = ARGBToYRow_NEON;
-    }
-    if (width >= 16) {
-      ARGBToUV422Row = ARGBToUV422Row_Any_NEON;
-      if (IS_ALIGNED(width, 16)) {
-        ARGBToUV422Row = ARGBToUV422Row_NEON;
-      }
-    }
-  }
-#endif
-
-  void (*I422ToYUY2Row)(const uint8* src_y, const uint8* src_u,
-                        const uint8* src_v, uint8* dst_yuy2, int width) =
-      I422ToYUY2Row_C;
-#if defined(HAS_I422TOYUY2ROW_SSE2)
-  if (TestCpuFlag(kCpuHasSSE2) && width >= 16) {
-    I422ToYUY2Row = I422ToYUY2Row_Any_SSE2;
-    if (IS_ALIGNED(width, 16)) {
-      I422ToYUY2Row = I422ToYUY2Row_SSE2;
-    }
-  }
-#elif defined(HAS_I422TOYUY2ROW_NEON)
-  if (TestCpuFlag(kCpuHasNEON) && width >= 16) {
-    I422ToYUY2Row = I422ToYUY2Row_Any_NEON;
-    if (IS_ALIGNED(width, 16)) {
-      I422ToYUY2Row = I422ToYUY2Row_NEON;
-    }
-  }
-#endif
-
-  // Allocate a rows of yuv.
-  align_buffer_64(row_y, ((width + 63) & ~63) * 2);
-  uint8* row_u = row_y + ((width + 63) & ~63);
-  uint8* row_v = row_u + ((width + 63) & ~63) / 2;
-
-  for (int y = 0; y < height; ++y) {
-    ARGBToUV422Row(src_argb, row_u, row_v, width);
-    ARGBToYRow(src_argb, row_y, width);
-    I422ToYUY2Row(row_y, row_u, row_v, dst_yuy2, width);
-    src_argb += src_stride_argb;
-    dst_yuy2 += dst_stride_yuy2;
-  }
-
-  free_aligned_buffer_64(row_y);
-  return 0;
-}
-
-// Convert ARGB to UYVY.
-LIBYUV_API
-int ARGBToUYVY(const uint8* src_argb, int src_stride_argb,
-               uint8* dst_uyvy, int dst_stride_uyvy,
-               int width, int height) {
-  if (!src_argb || !dst_uyvy ||
-      width <= 0 || height == 0) {
-    return -1;
-  }
-  // Negative height means invert the image.
-  if (height < 0) {
-    height = -height;
-    dst_uyvy = dst_uyvy + (height - 1) * dst_stride_uyvy;
-    dst_stride_uyvy = -dst_stride_uyvy;
-  }
-  // Coalesce rows.
-  if (src_stride_argb == width * 4 &&
-      dst_stride_uyvy == width * 2) {
-    width *= height;
-    height = 1;
-    src_stride_argb = dst_stride_uyvy = 0;
-  }
-  void (*ARGBToUV422Row)(const uint8* src_argb, uint8* dst_u, uint8* dst_v,
-                         int pix) = ARGBToUV422Row_C;
-#if defined(HAS_ARGBTOUV422ROW_SSSE3)
-  if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) {
-    ARGBToUV422Row = ARGBToUV422Row_Any_SSSE3;
-    if (IS_ALIGNED(width, 16)) {
-      ARGBToUV422Row = ARGBToUV422Row_Unaligned_SSSE3;
-      if (IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16)) {
-        ARGBToUV422Row = ARGBToUV422Row_SSSE3;
-      }
-    }
-  }
-#endif
-  void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int pix) =
-      ARGBToYRow_C;
-#if defined(HAS_ARGBTOYROW_SSSE3)
-  if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) {
-    ARGBToYRow = ARGBToYRow_Any_SSSE3;
-    if (IS_ALIGNED(width, 16)) {
-      ARGBToYRow = ARGBToYRow_Unaligned_SSSE3;
-      if (IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16)) {
-        ARGBToYRow = ARGBToYRow_SSSE3;
-      }
-    }
-  }
-#elif defined(HAS_ARGBTOYROW_NEON)
-  if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
-    ARGBToYRow = ARGBToYRow_Any_NEON;
-    if (IS_ALIGNED(width, 8)) {
-      ARGBToYRow = ARGBToYRow_NEON;
-    }
-    if (width >= 16) {
-      ARGBToUV422Row = ARGBToUV422Row_Any_NEON;
-      if (IS_ALIGNED(width, 16)) {
-        ARGBToUV422Row = ARGBToUV422Row_NEON;
-      }
-    }
-  }
-#endif
-
-  void (*I422ToUYVYRow)(const uint8* src_y, const uint8* src_u,
-                        const uint8* src_v, uint8* dst_uyvy, int width) =
-      I422ToUYVYRow_C;
-#if defined(HAS_I422TOUYVYROW_SSE2)
-  if (TestCpuFlag(kCpuHasSSE2) && width >= 16) {
-    I422ToUYVYRow = I422ToUYVYRow_Any_SSE2;
-    if (IS_ALIGNED(width, 16)) {
-      I422ToUYVYRow = I422ToUYVYRow_SSE2;
-    }
-  }
-#elif defined(HAS_I422TOUYVYROW_NEON)
-  if (TestCpuFlag(kCpuHasNEON) && width >= 16) {
-    I422ToUYVYRow = I422ToUYVYRow_Any_NEON;
-    if (IS_ALIGNED(width, 16)) {
-      I422ToUYVYRow = I422ToUYVYRow_NEON;
-    }
-  }
-#endif
-
-  // Allocate a rows of yuv.
-  align_buffer_64(row_y, ((width + 63) & ~63) * 2);
-  uint8* row_u = row_y + ((width + 63) & ~63);
-  uint8* row_v = row_u + ((width + 63) & ~63) / 2;
-
-  for (int y = 0; y < height; ++y) {
-    ARGBToUV422Row(src_argb, row_u, row_v, width);
-    ARGBToYRow(src_argb, row_y, width);
-    I422ToUYVYRow(row_y, row_u, row_v, dst_uyvy, width);
-    src_argb += src_stride_argb;
-    dst_uyvy += dst_stride_uyvy;
-  }
-
-  free_aligned_buffer_64(row_y);
-  return 0;
-}
-
-// Convert ARGB to I400.
-LIBYUV_API
-int ARGBToI400(const uint8* src_argb, int src_stride_argb,
-               uint8* dst_y, int dst_stride_y,
-               int width, int height) {
-  if (!src_argb || !dst_y || width <= 0 || height == 0) {
-    return -1;
-  }
-  if (height < 0) {
-    height = -height;
-    src_argb = src_argb + (height - 1) * src_stride_argb;
-    src_stride_argb = -src_stride_argb;
-  }
-  // Coalesce rows.
-  if (src_stride_argb == width * 4 &&
-      dst_stride_y == width) {
-    width *= height;
-    height = 1;
-    src_stride_argb = dst_stride_y = 0;
-  }
-  void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int pix) =
-      ARGBToYRow_C;
-#if defined(HAS_ARGBTOYROW_SSSE3)
-  if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) {
-    ARGBToYRow = ARGBToYRow_Any_SSSE3;
-    if (IS_ALIGNED(width, 16)) {
-      ARGBToYRow = ARGBToYRow_Unaligned_SSSE3;
-      if (IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16) &&
-          IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) {
-        ARGBToYRow = ARGBToYRow_SSSE3;
-      }
-    }
-  }
-#endif
-#if defined(HAS_ARGBTOYROW_AVX2)
-  if (TestCpuFlag(kCpuHasAVX2) && width >= 32) {
-    ARGBToYRow = ARGBToYRow_Any_AVX2;
-    if (IS_ALIGNED(width, 32)) {
-      ARGBToYRow = ARGBToYRow_AVX2;
-    }
-  }
-#endif
-#if defined(HAS_ARGBTOYROW_NEON)
-  if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
-    ARGBToYRow = ARGBToYRow_Any_NEON;
-    if (IS_ALIGNED(width, 8)) {
-      ARGBToYRow = ARGBToYRow_NEON;
-    }
-  }
-#endif
-
-  for (int y = 0; y < height; ++y) {
-    ARGBToYRow(src_argb, dst_y, width);
-    src_argb += src_stride_argb;
-    dst_y += dst_stride_y;
-  }
-  return 0;
-}
-
-// Shuffle table for converting ARGB to RGBA.
-static uvec8 kShuffleMaskARGBToRGBA = {
-  3u, 0u, 1u, 2u, 7u, 4u, 5u, 6u, 11u, 8u, 9u, 10u, 15u, 12u, 13u, 14u
-};
-
-// Convert ARGB to RGBA.
-LIBYUV_API
-int ARGBToRGBA(const uint8* src_argb, int src_stride_argb,
-               uint8* dst_rgba, int dst_stride_rgba,
-               int width, int height) {
-  return ARGBShuffle(src_argb, src_stride_argb,
-                     dst_rgba, dst_stride_rgba,
-                     (const uint8*)(&kShuffleMaskARGBToRGBA),
-                     width, height);
-}
-
-// Convert ARGB To RGB24.
-LIBYUV_API
-int ARGBToRGB24(const uint8* src_argb, int src_stride_argb,
-                uint8* dst_rgb24, int dst_stride_rgb24,
-                int width, int height) {
-  if (!src_argb || !dst_rgb24 || width <= 0 || height == 0) {
-    return -1;
-  }
-  if (height < 0) {
-    height = -height;
-    src_argb = src_argb + (height - 1) * src_stride_argb;
-    src_stride_argb = -src_stride_argb;
-  }
-  // Coalesce rows.
-  if (src_stride_argb == width * 4 &&
-      dst_stride_rgb24 == width * 3) {
-    width *= height;
-    height = 1;
-    src_stride_argb = dst_stride_rgb24 = 0;
-  }
-  void (*ARGBToRGB24Row)(const uint8* src_argb, uint8* dst_rgb, int pix) =
-      ARGBToRGB24Row_C;
-#if defined(HAS_ARGBTORGB24ROW_SSSE3)
-  if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) {
-    ARGBToRGB24Row = ARGBToRGB24Row_Any_SSSE3;
-    if (IS_ALIGNED(width, 16)) {
-      ARGBToRGB24Row = ARGBToRGB24Row_SSSE3;
-    }
-  }
-#elif defined(HAS_ARGBTORGB24ROW_NEON)
-  if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
-    ARGBToRGB24Row = ARGBToRGB24Row_Any_NEON;
-    if (IS_ALIGNED(width, 8)) {
-      ARGBToRGB24Row = ARGBToRGB24Row_NEON;
-    }
-  }
-#endif
-
-  for (int y = 0; y < height; ++y) {
-    ARGBToRGB24Row(src_argb, dst_rgb24, width);
-    src_argb += src_stride_argb;
-    dst_rgb24 += dst_stride_rgb24;
-  }
-  return 0;
-}
-
-// Convert ARGB To RAW.
-LIBYUV_API
-int ARGBToRAW(const uint8* src_argb, int src_stride_argb,
-              uint8* dst_raw, int dst_stride_raw,
-              int width, int height) {
-  if (!src_argb || !dst_raw || width <= 0 || height == 0) {
-    return -1;
-  }
-  if (height < 0) {
-    height = -height;
-    src_argb = src_argb + (height - 1) * src_stride_argb;
-    src_stride_argb = -src_stride_argb;
-  }
-  // Coalesce rows.
-  if (src_stride_argb == width * 4 &&
-      dst_stride_raw == width * 3) {
-    width *= height;
-    height = 1;
-    src_stride_argb = dst_stride_raw = 0;
-  }
-  void (*ARGBToRAWRow)(const uint8* src_argb, uint8* dst_rgb, int pix) =
-      ARGBToRAWRow_C;
-#if defined(HAS_ARGBTORAWROW_SSSE3)
-  if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) {
-    ARGBToRAWRow = ARGBToRAWRow_Any_SSSE3;
-    if (IS_ALIGNED(width, 16)) {
-      ARGBToRAWRow = ARGBToRAWRow_SSSE3;
-    }
-  }
-#elif defined(HAS_ARGBTORAWROW_NEON)
-  if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
-    ARGBToRAWRow = ARGBToRAWRow_Any_NEON;
-    if (IS_ALIGNED(width, 8)) {
-      ARGBToRAWRow = ARGBToRAWRow_NEON;
-    }
-  }
-#endif
-
-  for (int y = 0; y < height; ++y) {
-    ARGBToRAWRow(src_argb, dst_raw, width);
-    src_argb += src_stride_argb;
-    dst_raw += dst_stride_raw;
-  }
-  return 0;
-}
-
-// Convert ARGB To RGB565.
-LIBYUV_API
-int ARGBToRGB565(const uint8* src_argb, int src_stride_argb,
-                 uint8* dst_rgb565, int dst_stride_rgb565,
-                 int width, int height) {
-  if (!src_argb || !dst_rgb565 || width <= 0 || height == 0) {
-    return -1;
-  }
-  if (height < 0) {
-    height = -height;
-    src_argb = src_argb + (height - 1) * src_stride_argb;
-    src_stride_argb = -src_stride_argb;
-  }
-  // Coalesce rows.
-  if (src_stride_argb == width * 4 &&
-      dst_stride_rgb565 == width * 2) {
-    width *= height;
-    height = 1;
-    src_stride_argb = dst_stride_rgb565 = 0;
-  }
-  void (*ARGBToRGB565Row)(const uint8* src_argb, uint8* dst_rgb, int pix) =
-      ARGBToRGB565Row_C;
-#if defined(HAS_ARGBTORGB565ROW_SSE2)
-  if (TestCpuFlag(kCpuHasSSE2) && width >= 4 &&
-      IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16)) {
-    ARGBToRGB565Row = ARGBToRGB565Row_Any_SSE2;
-    if (IS_ALIGNED(width, 4)) {
-      ARGBToRGB565Row = ARGBToRGB565Row_SSE2;
-    }
-  }
-#elif defined(HAS_ARGBTORGB565ROW_NEON)
-  if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
-    ARGBToRGB565Row = ARGBToRGB565Row_Any_NEON;
-    if (IS_ALIGNED(width, 8)) {
-      ARGBToRGB565Row = ARGBToRGB565Row_NEON;
-    }
-  }
-#endif
-
-  for (int y = 0; y < height; ++y) {
-    ARGBToRGB565Row(src_argb, dst_rgb565, width);
-    src_argb += src_stride_argb;
-    dst_rgb565 += dst_stride_rgb565;
-  }
-  return 0;
-}
-
-// Convert ARGB To ARGB1555.
-LIBYUV_API
-int ARGBToARGB1555(const uint8* src_argb, int src_stride_argb,
-                   uint8* dst_argb1555, int dst_stride_argb1555,
-                   int width, int height) {
-  if (!src_argb || !dst_argb1555 || width <= 0 || height == 0) {
-    return -1;
-  }
-  if (height < 0) {
-    height = -height;
-    src_argb = src_argb + (height - 1) * src_stride_argb;
-    src_stride_argb = -src_stride_argb;
-  }
-  // Coalesce rows.
-  if (src_stride_argb == width * 4 &&
-      dst_stride_argb1555 == width * 2) {
-    width *= height;
-    height = 1;
-    src_stride_argb = dst_stride_argb1555 = 0;
-  }
-  void (*ARGBToARGB1555Row)(const uint8* src_argb, uint8* dst_rgb, int pix) =
-      ARGBToARGB1555Row_C;
-#if defined(HAS_ARGBTOARGB1555ROW_SSE2)
-  if (TestCpuFlag(kCpuHasSSE2) && width >= 4 &&
-      IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16)) {
-    ARGBToARGB1555Row = ARGBToARGB1555Row_Any_SSE2;
-    if (IS_ALIGNED(width, 4)) {
-      ARGBToARGB1555Row = ARGBToARGB1555Row_SSE2;
-    }
-  }
-#elif defined(HAS_ARGBTOARGB1555ROW_NEON)
-  if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
-    ARGBToARGB1555Row = ARGBToARGB1555Row_Any_NEON;
-    if (IS_ALIGNED(width, 8)) {
-      ARGBToARGB1555Row = ARGBToARGB1555Row_NEON;
-    }
-  }
-#endif
-
-  for (int y = 0; y < height; ++y) {
-    ARGBToARGB1555Row(src_argb, dst_argb1555, width);
-    src_argb += src_stride_argb;
-    dst_argb1555 += dst_stride_argb1555;
-  }
-  return 0;
-}
-
-// Convert ARGB To ARGB4444.
-LIBYUV_API
-int ARGBToARGB4444(const uint8* src_argb, int src_stride_argb,
-                   uint8* dst_argb4444, int dst_stride_argb4444,
-                   int width, int height) {
-  if (!src_argb || !dst_argb4444 || width <= 0 || height == 0) {
-    return -1;
-  }
-  if (height < 0) {
-    height = -height;
-    src_argb = src_argb + (height - 1) * src_stride_argb;
-    src_stride_argb = -src_stride_argb;
-  }
-  // Coalesce rows.
-  if (src_stride_argb == width * 4 &&
-      dst_stride_argb4444 == width * 2) {
-    width *= height;
-    height = 1;
-    src_stride_argb = dst_stride_argb4444 = 0;
-  }
-  void (*ARGBToARGB4444Row)(const uint8* src_argb, uint8* dst_rgb, int pix) =
-      ARGBToARGB4444Row_C;
-#if defined(HAS_ARGBTOARGB4444ROW_SSE2)
-  if (TestCpuFlag(kCpuHasSSE2) && width >= 4 &&
-      IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16)) {
-    ARGBToARGB4444Row = ARGBToARGB4444Row_Any_SSE2;
-    if (IS_ALIGNED(width, 4)) {
-      ARGBToARGB4444Row = ARGBToARGB4444Row_SSE2;
-    }
-  }
-#elif defined(HAS_ARGBTOARGB4444ROW_NEON)
-  if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
-    ARGBToARGB4444Row = ARGBToARGB4444Row_Any_NEON;
-    if (IS_ALIGNED(width, 8)) {
-      ARGBToARGB4444Row = ARGBToARGB4444Row_NEON;
-    }
-  }
-#endif
-
-  for (int y = 0; y < height; ++y) {
-    ARGBToARGB4444Row(src_argb, dst_argb4444, width);
-    src_argb += src_stride_argb;
-    dst_argb4444 += dst_stride_argb4444;
-  }
-  return 0;
-}
-
-// Convert ARGB to J420. (JPeg full range I420).
-LIBYUV_API
-int ARGBToJ420(const uint8* src_argb, int src_stride_argb,
-               uint8* dst_yj, int dst_stride_yj,
-               uint8* dst_u, int dst_stride_u,
-               uint8* dst_v, int dst_stride_v,
-               int width, int height) {
-  if (!src_argb ||
-      !dst_yj || !dst_u || !dst_v ||
-      width <= 0 || height == 0) {
-    return -1;
-  }
-  // Negative height means invert the image.
-  if (height < 0) {
-    height = -height;
-    src_argb = src_argb + (height - 1) * src_stride_argb;
-    src_stride_argb = -src_stride_argb;
-  }
-  void (*ARGBToUVJRow)(const uint8* src_argb0, int src_stride_argb,
-                      uint8* dst_u, uint8* dst_v, int width) = ARGBToUVJRow_C;
-  void (*ARGBToYJRow)(const uint8* src_argb, uint8* dst_yj, int pix) =
-      ARGBToYJRow_C;
-#if defined(HAS_ARGBTOYJROW_SSSE3) && defined(HAS_ARGBTOUVJROW_SSSE3)
-  if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) {
-    ARGBToUVJRow = ARGBToUVJRow_Any_SSSE3;
-    ARGBToYJRow = ARGBToYJRow_Any_SSSE3;
-    if (IS_ALIGNED(width, 16)) {
-      ARGBToUVJRow = ARGBToUVJRow_Unaligned_SSSE3;
-      ARGBToYJRow = ARGBToYJRow_Unaligned_SSSE3;
-      if (IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16)) {
-        ARGBToUVJRow = ARGBToUVJRow_SSSE3;
-        if (IS_ALIGNED(dst_yj, 16) && IS_ALIGNED(dst_stride_yj, 16)) {
-          ARGBToYJRow = ARGBToYJRow_SSSE3;
-        }
-      }
-    }
-  }
-#endif
-#if defined(HAS_ARGBTOYJROW_AVX2) && defined(HAS_ARGBTOUVJROW_AVX2)
-  if (TestCpuFlag(kCpuHasAVX2) && width >= 32) {
-    ARGBToYJRow = ARGBToYJRow_Any_AVX2;
-    if (IS_ALIGNED(width, 32)) {
-      ARGBToYJRow = ARGBToYJRow_AVX2;
-    }
-  }
-#endif
-#if defined(HAS_ARGBTOYJROW_NEON)
-  if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
-    ARGBToYJRow = ARGBToYJRow_Any_NEON;
-    if (IS_ALIGNED(width, 8)) {
-      ARGBToYJRow = ARGBToYJRow_NEON;
-    }
-    if (width >= 16) {
-      ARGBToUVJRow = ARGBToUVJRow_Any_NEON;
-      if (IS_ALIGNED(width, 16)) {
-        ARGBToUVJRow = ARGBToUVJRow_NEON;
-      }
-    }
-  }
-#endif
-
-  for (int y = 0; y < height - 1; y += 2) {
-    ARGBToUVJRow(src_argb, src_stride_argb, dst_u, dst_v, width);
-    ARGBToYJRow(src_argb, dst_yj, width);
-    ARGBToYJRow(src_argb + src_stride_argb, dst_yj + dst_stride_yj, width);
-    src_argb += src_stride_argb * 2;
-    dst_yj += dst_stride_yj * 2;
-    dst_u += dst_stride_u;
-    dst_v += dst_stride_v;
-  }
-  if (height & 1) {
-    ARGBToUVJRow(src_argb, 0, dst_u, dst_v, width);
-    ARGBToYJRow(src_argb, dst_yj, width);
-  }
-  return 0;
-}
-
-// Convert ARGB to J400.
-LIBYUV_API
-int ARGBToJ400(const uint8* src_argb, int src_stride_argb,
-               uint8* dst_yj, int dst_stride_yj,
-               int width, int height) {
-  if (!src_argb || !dst_yj || width <= 0 || height == 0) {
-    return -1;
-  }
-  if (height < 0) {
-    height = -height;
-    src_argb = src_argb + (height - 1) * src_stride_argb;
-    src_stride_argb = -src_stride_argb;
-  }
-  // Coalesce rows.
-  if (src_stride_argb == width * 4 &&
-      dst_stride_yj == width) {
-    width *= height;
-    height = 1;
-    src_stride_argb = dst_stride_yj = 0;
-  }
-  void (*ARGBToYJRow)(const uint8* src_argb, uint8* dst_yj, int pix) =
-      ARGBToYJRow_C;
-#if defined(HAS_ARGBTOYJROW_SSSE3)
-  if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) {
-    ARGBToYJRow = ARGBToYJRow_Any_SSSE3;
-    if (IS_ALIGNED(width, 16)) {
-      ARGBToYJRow = ARGBToYJRow_Unaligned_SSSE3;
-      if (IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16) &&
-          IS_ALIGNED(dst_yj, 16) && IS_ALIGNED(dst_stride_yj, 16)) {
-        ARGBToYJRow = ARGBToYJRow_SSSE3;
-      }
-    }
-  }
-#endif
-#if defined(HAS_ARGBTOYJROW_AVX2)
-  if (TestCpuFlag(kCpuHasAVX2) && width >= 32) {
-    ARGBToYJRow = ARGBToYJRow_Any_AVX2;
-    if (IS_ALIGNED(width, 32)) {
-      ARGBToYJRow = ARGBToYJRow_AVX2;
-    }
-  }
-#endif
-#if defined(HAS_ARGBTOYJROW_NEON)
-  if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
-    ARGBToYJRow = ARGBToYJRow_Any_NEON;
-    if (IS_ALIGNED(width, 8)) {
-      ARGBToYJRow = ARGBToYJRow_NEON;
-    }
-  }
-#endif
-
-  for (int y = 0; y < height; ++y) {
-    ARGBToYJRow(src_argb, dst_yj, width);
-    src_argb += src_stride_argb;
-    dst_yj += dst_stride_yj;
-  }
-  return 0;
-}
-
-#ifdef __cplusplus
-}  // extern "C"
-}  // namespace libyuv
-#endif
diff --git a/drivers/theoraplayer/src/YUV/libyuv/src/convert_jpeg.cc b/drivers/theoraplayer/src/YUV/libyuv/src/convert_jpeg.cc
deleted file mode 100755
index bcb980f7f1..0000000000
--- a/drivers/theoraplayer/src/YUV/libyuv/src/convert_jpeg.cc
+++ /dev/null
@@ -1,392 +0,0 @@
-/*
- *  Copyright 2011 The LibYuv Project Authors. All rights reserved.
- *
- *  Use of this source code is governed by a BSD-style license
- *  that can be found in the LICENSE file in the root of the source
- *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS. All contributing project authors may
- *  be found in the AUTHORS file in the root of the source tree.
- */
-
-#include "libyuv/convert.h"
-
-#ifdef HAVE_JPEG
-#include "libyuv/mjpeg_decoder.h"
-#endif
-
-#ifdef __cplusplus
-namespace libyuv {
-extern "C" {
-#endif
-
-#ifdef HAVE_JPEG
-struct I420Buffers {
-  uint8* y;
-  int y_stride;
-  uint8* u;
-  int u_stride;
-  uint8* v;
-  int v_stride;
-  int w;
-  int h;
-};
-
-static void JpegCopyI420(void* opaque,
-                         const uint8* const* data,
-                         const int* strides,
-                         int rows) {
-  I420Buffers* dest = (I420Buffers*)(opaque);
-  I420Copy(data[0], strides[0],
-           data[1], strides[1],
-           data[2], strides[2],
-           dest->y, dest->y_stride,
-           dest->u, dest->u_stride,
-           dest->v, dest->v_stride,
-           dest->w, rows);
-  dest->y += rows * dest->y_stride;
-  dest->u += ((rows + 1) >> 1) * dest->u_stride;
-  dest->v += ((rows + 1) >> 1) * dest->v_stride;
-  dest->h -= rows;
-}
-
-static void JpegI422ToI420(void* opaque,
-                           const uint8* const* data,
-                           const int* strides,
-                           int rows) {
-  I420Buffers* dest = (I420Buffers*)(opaque);
-  I422ToI420(data[0], strides[0],
-             data[1], strides[1],
-             data[2], strides[2],
-             dest->y, dest->y_stride,
-             dest->u, dest->u_stride,
-             dest->v, dest->v_stride,
-             dest->w, rows);
-  dest->y += rows * dest->y_stride;
-  dest->u += ((rows + 1) >> 1) * dest->u_stride;
-  dest->v += ((rows + 1) >> 1) * dest->v_stride;
-  dest->h -= rows;
-}
-
-static void JpegI444ToI420(void* opaque,
-                           const uint8* const* data,
-                           const int* strides,
-                           int rows) {
-  I420Buffers* dest = (I420Buffers*)(opaque);
-  I444ToI420(data[0], strides[0],
-             data[1], strides[1],
-             data[2], strides[2],
-             dest->y, dest->y_stride,
-             dest->u, dest->u_stride,
-             dest->v, dest->v_stride,
-             dest->w, rows);
-  dest->y += rows * dest->y_stride;
-  dest->u += ((rows + 1) >> 1) * dest->u_stride;
-  dest->v += ((rows + 1) >> 1) * dest->v_stride;
-  dest->h -= rows;
-}
-
-static void JpegI411ToI420(void* opaque,
-                           const uint8* const* data,
-                           const int* strides,
-                           int rows) {
-  I420Buffers* dest = (I420Buffers*)(opaque);
-  I411ToI420(data[0], strides[0],
-             data[1], strides[1],
-             data[2], strides[2],
-             dest->y, dest->y_stride,
-             dest->u, dest->u_stride,
-             dest->v, dest->v_stride,
-             dest->w, rows);
-  dest->y += rows * dest->y_stride;
-  dest->u += ((rows + 1) >> 1) * dest->u_stride;
-  dest->v += ((rows + 1) >> 1) * dest->v_stride;
-  dest->h -= rows;
-}
-
-static void JpegI400ToI420(void* opaque,
-                           const uint8* const* data,
-                           const int* strides,
-                           int rows) {
-  I420Buffers* dest = (I420Buffers*)(opaque);
-  I400ToI420(data[0], strides[0],
-             dest->y, dest->y_stride,
-             dest->u, dest->u_stride,
-             dest->v, dest->v_stride,
-             dest->w, rows);
-  dest->y += rows * dest->y_stride;
-  dest->u += ((rows + 1) >> 1) * dest->u_stride;
-  dest->v += ((rows + 1) >> 1) * dest->v_stride;
-  dest->h -= rows;
-}
-
-// Query size of MJPG in pixels.
-LIBYUV_API
-int MJPGSize(const uint8* sample, size_t sample_size,
-             int* width, int* height) {
-  MJpegDecoder mjpeg_decoder;
-  LIBYUV_BOOL ret = mjpeg_decoder.LoadFrame(sample, sample_size);
-  if (ret) {
-    *width = mjpeg_decoder.GetWidth();
-    *height = mjpeg_decoder.GetHeight();
-  }
-  mjpeg_decoder.UnloadFrame();
-  return ret ? 0 : -1;  // -1 for runtime failure.
-}
-
-// MJPG (Motion JPeg) to I420
-// TODO(fbarchard): review w and h requirement. dw and dh may be enough.
-LIBYUV_API
-int MJPGToI420(const uint8* sample,
-               size_t sample_size,
-               uint8* y, int y_stride,
-               uint8* u, int u_stride,
-               uint8* v, int v_stride,
-               int w, int h,
-               int dw, int dh) {
-  if (sample_size == kUnknownDataSize) {
-    // ERROR: MJPEG frame size unknown
-    return -1;
-  }
-
-  // TODO(fbarchard): Port MJpeg to C.
-  MJpegDecoder mjpeg_decoder;
-  LIBYUV_BOOL ret = mjpeg_decoder.LoadFrame(sample, sample_size);
-  if (ret && (mjpeg_decoder.GetWidth() != w ||
-              mjpeg_decoder.GetHeight() != h)) {
-    // ERROR: MJPEG frame has unexpected dimensions
-    mjpeg_decoder.UnloadFrame();
-    return 1;  // runtime failure
-  }
-  if (ret) {
-    I420Buffers bufs = { y, y_stride, u, u_stride, v, v_stride, dw, dh };
-    // YUV420
-    if (mjpeg_decoder.GetColorSpace() ==
-            MJpegDecoder::kColorSpaceYCbCr &&
-        mjpeg_decoder.GetNumComponents() == 3 &&
-        mjpeg_decoder.GetVertSampFactor(0) == 2 &&
-        mjpeg_decoder.GetHorizSampFactor(0) == 2 &&
-        mjpeg_decoder.GetVertSampFactor(1) == 1 &&
-        mjpeg_decoder.GetHorizSampFactor(1) == 1 &&
-        mjpeg_decoder.GetVertSampFactor(2) == 1 &&
-        mjpeg_decoder.GetHorizSampFactor(2) == 1) {
-      ret = mjpeg_decoder.DecodeToCallback(&JpegCopyI420, &bufs, dw, dh);
-    // YUV422
-    } else if (mjpeg_decoder.GetColorSpace() ==
-                   MJpegDecoder::kColorSpaceYCbCr &&
-               mjpeg_decoder.GetNumComponents() == 3 &&
-               mjpeg_decoder.GetVertSampFactor(0) == 1 &&
-               mjpeg_decoder.GetHorizSampFactor(0) == 2 &&
-               mjpeg_decoder.GetVertSampFactor(1) == 1 &&
-               mjpeg_decoder.GetHorizSampFactor(1) == 1 &&
-               mjpeg_decoder.GetVertSampFactor(2) == 1 &&
-               mjpeg_decoder.GetHorizSampFactor(2) == 1) {
-      ret = mjpeg_decoder.DecodeToCallback(&JpegI422ToI420, &bufs, dw, dh);
-    // YUV444
-    } else if (mjpeg_decoder.GetColorSpace() ==
-                   MJpegDecoder::kColorSpaceYCbCr &&
-               mjpeg_decoder.GetNumComponents() == 3 &&
-               mjpeg_decoder.GetVertSampFactor(0) == 1 &&
-               mjpeg_decoder.GetHorizSampFactor(0) == 1 &&
-               mjpeg_decoder.GetVertSampFactor(1) == 1 &&
-               mjpeg_decoder.GetHorizSampFactor(1) == 1 &&
-               mjpeg_decoder.GetVertSampFactor(2) == 1 &&
-               mjpeg_decoder.GetHorizSampFactor(2) == 1) {
-      ret = mjpeg_decoder.DecodeToCallback(&JpegI444ToI420, &bufs, dw, dh);
-    // YUV411
-    } else if (mjpeg_decoder.GetColorSpace() ==
-                   MJpegDecoder::kColorSpaceYCbCr &&
-               mjpeg_decoder.GetNumComponents() == 3 &&
-               mjpeg_decoder.GetVertSampFactor(0) == 1 &&
-               mjpeg_decoder.GetHorizSampFactor(0) == 4 &&
-               mjpeg_decoder.GetVertSampFactor(1) == 1 &&
-               mjpeg_decoder.GetHorizSampFactor(1) == 1 &&
-               mjpeg_decoder.GetVertSampFactor(2) == 1 &&
-               mjpeg_decoder.GetHorizSampFactor(2) == 1) {
-      ret = mjpeg_decoder.DecodeToCallback(&JpegI411ToI420, &bufs, dw, dh);
-    // YUV400
-    } else if (mjpeg_decoder.GetColorSpace() ==
-                   MJpegDecoder::kColorSpaceGrayscale &&
-               mjpeg_decoder.GetNumComponents() == 1 &&
-               mjpeg_decoder.GetVertSampFactor(0) == 1 &&
-               mjpeg_decoder.GetHorizSampFactor(0) == 1) {
-      ret = mjpeg_decoder.DecodeToCallback(&JpegI400ToI420, &bufs, dw, dh);
-    } else {
-      // TODO(fbarchard): Implement conversion for any other colorspace/sample
-      // factors that occur in practice. 411 is supported by libjpeg
-      // ERROR: Unable to convert MJPEG frame because format is not supported
-      mjpeg_decoder.UnloadFrame();
-      return 1;
-    }
-  }
-  return ret ? 0 : 1;
-}
-
-#ifdef HAVE_JPEG
-struct ARGBBuffers {
-  uint8* argb;
-  int argb_stride;
-  int w;
-  int h;
-};
-
-static void JpegI420ToARGB(void* opaque,
-                         const uint8* const* data,
-                         const int* strides,
-                         int rows) {
-  ARGBBuffers* dest = (ARGBBuffers*)(opaque);
-  I420ToARGB(data[0], strides[0],
-             data[1], strides[1],
-             data[2], strides[2],
-             dest->argb, dest->argb_stride,
-             dest->w, rows);
-  dest->argb += rows * dest->argb_stride;
-  dest->h -= rows;
-}
-
-static void JpegI422ToARGB(void* opaque,
-                           const uint8* const* data,
-                           const int* strides,
-                           int rows) {
-  ARGBBuffers* dest = (ARGBBuffers*)(opaque);
-  I422ToARGB(data[0], strides[0],
-             data[1], strides[1],
-             data[2], strides[2],
-             dest->argb, dest->argb_stride,
-             dest->w, rows);
-  dest->argb += rows * dest->argb_stride;
-  dest->h -= rows;
-}
-
-static void JpegI444ToARGB(void* opaque,
-                           const uint8* const* data,
-                           const int* strides,
-                           int rows) {
-  ARGBBuffers* dest = (ARGBBuffers*)(opaque);
-  I444ToARGB(data[0], strides[0],
-             data[1], strides[1],
-             data[2], strides[2],
-             dest->argb, dest->argb_stride,
-             dest->w, rows);
-  dest->argb += rows * dest->argb_stride;
-  dest->h -= rows;
-}
-
-static void JpegI411ToARGB(void* opaque,
-                           const uint8* const* data,
-                           const int* strides,
-                           int rows) {
-  ARGBBuffers* dest = (ARGBBuffers*)(opaque);
-  I411ToARGB(data[0], strides[0],
-             data[1], strides[1],
-             data[2], strides[2],
-             dest->argb, dest->argb_stride,
-             dest->w, rows);
-  dest->argb += rows * dest->argb_stride;
-  dest->h -= rows;
-}
-
-static void JpegI400ToARGB(void* opaque,
-                           const uint8* const* data,
-                           const int* strides,
-                           int rows) {
-  ARGBBuffers* dest = (ARGBBuffers*)(opaque);
-  I400ToARGB(data[0], strides[0],
-             dest->argb, dest->argb_stride,
-             dest->w, rows);
-  dest->argb += rows * dest->argb_stride;
-  dest->h -= rows;
-}
-
-// MJPG (Motion JPeg) to ARGB
-// TODO(fbarchard): review w and h requirement. dw and dh may be enough.
-LIBYUV_API
-int MJPGToARGB(const uint8* sample,
-               size_t sample_size,
-               uint8* argb, int argb_stride,
-               int w, int h,
-               int dw, int dh) {
-  if (sample_size == kUnknownDataSize) {
-    // ERROR: MJPEG frame size unknown
-    return -1;
-  }
-
-  // TODO(fbarchard): Port MJpeg to C.
-  MJpegDecoder mjpeg_decoder;
-  LIBYUV_BOOL ret = mjpeg_decoder.LoadFrame(sample, sample_size);
-  if (ret && (mjpeg_decoder.GetWidth() != w ||
-              mjpeg_decoder.GetHeight() != h)) {
-    // ERROR: MJPEG frame has unexpected dimensions
-    mjpeg_decoder.UnloadFrame();
-    return 1;  // runtime failure
-  }
-  if (ret) {
-    ARGBBuffers bufs = { argb, argb_stride, dw, dh };
-    // YUV420
-    if (mjpeg_decoder.GetColorSpace() ==
-            MJpegDecoder::kColorSpaceYCbCr &&
-        mjpeg_decoder.GetNumComponents() == 3 &&
-        mjpeg_decoder.GetVertSampFactor(0) == 2 &&
-        mjpeg_decoder.GetHorizSampFactor(0) == 2 &&
-        mjpeg_decoder.GetVertSampFactor(1) == 1 &&
-        mjpeg_decoder.GetHorizSampFactor(1) == 1 &&
-        mjpeg_decoder.GetVertSampFactor(2) == 1 &&
-        mjpeg_decoder.GetHorizSampFactor(2) == 1) {
-      ret = mjpeg_decoder.DecodeToCallback(&JpegI420ToARGB, &bufs, dw, dh);
-    // YUV422
-    } else if (mjpeg_decoder.GetColorSpace() ==
-                   MJpegDecoder::kColorSpaceYCbCr &&
-               mjpeg_decoder.GetNumComponents() == 3 &&
-               mjpeg_decoder.GetVertSampFactor(0) == 1 &&
-               mjpeg_decoder.GetHorizSampFactor(0) == 2 &&
-               mjpeg_decoder.GetVertSampFactor(1) == 1 &&
-               mjpeg_decoder.GetHorizSampFactor(1) == 1 &&
-               mjpeg_decoder.GetVertSampFactor(2) == 1 &&
-               mjpeg_decoder.GetHorizSampFactor(2) == 1) {
-      ret = mjpeg_decoder.DecodeToCallback(&JpegI422ToARGB, &bufs, dw, dh);
-    // YUV444
-    } else if (mjpeg_decoder.GetColorSpace() ==
-                   MJpegDecoder::kColorSpaceYCbCr &&
-               mjpeg_decoder.GetNumComponents() == 3 &&
-               mjpeg_decoder.GetVertSampFactor(0) == 1 &&
-               mjpeg_decoder.GetHorizSampFactor(0) == 1 &&
-               mjpeg_decoder.GetVertSampFactor(1) == 1 &&
-               mjpeg_decoder.GetHorizSampFactor(1) == 1 &&
-               mjpeg_decoder.GetVertSampFactor(2) == 1 &&
-               mjpeg_decoder.GetHorizSampFactor(2) == 1) {
-      ret = mjpeg_decoder.DecodeToCallback(&JpegI444ToARGB, &bufs, dw, dh);
-    // YUV411
-    } else if (mjpeg_decoder.GetColorSpace() ==
-                   MJpegDecoder::kColorSpaceYCbCr &&
-               mjpeg_decoder.GetNumComponents() == 3 &&
-               mjpeg_decoder.GetVertSampFactor(0) == 1 &&
-               mjpeg_decoder.GetHorizSampFactor(0) == 4 &&
-               mjpeg_decoder.GetVertSampFactor(1) == 1 &&
-               mjpeg_decoder.GetHorizSampFactor(1) == 1 &&
-               mjpeg_decoder.GetVertSampFactor(2) == 1 &&
-               mjpeg_decoder.GetHorizSampFactor(2) == 1) {
-      ret = mjpeg_decoder.DecodeToCallback(&JpegI411ToARGB, &bufs, dw, dh);
-    // YUV400
-    } else if (mjpeg_decoder.GetColorSpace() ==
-                   MJpegDecoder::kColorSpaceGrayscale &&
-               mjpeg_decoder.GetNumComponents() == 1 &&
-               mjpeg_decoder.GetVertSampFactor(0) == 1 &&
-               mjpeg_decoder.GetHorizSampFactor(0) == 1) {
-      ret = mjpeg_decoder.DecodeToCallback(&JpegI400ToARGB, &bufs, dw, dh);
-    } else {
-      // TODO(fbarchard): Implement conversion for any other colorspace/sample
-      // factors that occur in practice. 411 is supported by libjpeg
-      // ERROR: Unable to convert MJPEG frame because format is not supported
-      mjpeg_decoder.UnloadFrame();
-      return 1;
-    }
-  }
-  return ret ? 0 : 1;
-}
-#endif
-
-#endif
-
-#ifdef __cplusplus
-}  // extern "C"
-}  // namespace libyuv
-#endif
diff --git a/drivers/theoraplayer/src/YUV/libyuv/src/convert_to_argb.cc b/drivers/theoraplayer/src/YUV/libyuv/src/convert_to_argb.cc
deleted file mode 100755
index 1b228a7b4d..0000000000
--- a/drivers/theoraplayer/src/YUV/libyuv/src/convert_to_argb.cc
+++ /dev/null
@@ -1,327 +0,0 @@
-/*
- *  Copyright 2011 The LibYuv Project Authors. All rights reserved.
- *
- *  Use of this source code is governed by a BSD-style license
- *  that can be found in the LICENSE file in the root of the source
- *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS. All contributing project authors may
- *  be found in the AUTHORS file in the root of the source tree.
- */
-
-#include "libyuv/convert_argb.h"
-
-#include "libyuv/cpu_id.h"
-#include "libyuv/format_conversion.h"
-#ifdef HAVE_JPEG
-#include "libyuv/mjpeg_decoder.h"
-#endif
-#include "libyuv/rotate_argb.h"
-#include "libyuv/row.h"
-#include "libyuv/video_common.h"
-
-#ifdef __cplusplus
-namespace libyuv {
-extern "C" {
-#endif
-
-// Convert camera sample to I420 with cropping, rotation and vertical flip.
-// src_width is used for source stride computation
-// src_height is used to compute location of planes, and indicate inversion
-// sample_size is measured in bytes and is the size of the frame.
-//   With MJPEG it is the compressed size of the frame.
-LIBYUV_API
-int ConvertToARGB(const uint8* sample, size_t sample_size,
-                  uint8* crop_argb, int argb_stride,
-                  int crop_x, int crop_y,
-                  int src_width, int src_height,
-                  int crop_width, int crop_height,
-                  enum RotationMode rotation,
-                  uint32 fourcc) {
-  uint32 format = CanonicalFourCC(fourcc);
-  int aligned_src_width = (src_width + 1) & ~1;
-  const uint8* src;
-  const uint8* src_uv;
-  int abs_src_height = (src_height < 0) ? -src_height : src_height;
-  int inv_crop_height = (crop_height < 0) ? -crop_height : crop_height;
-  int r = 0;
-
-  // One pass rotation is available for some formats. For the rest, convert
-  // to I420 (with optional vertical flipping) into a temporary I420 buffer,
-  // and then rotate the I420 to the final destination buffer.
-  // For in-place conversion, if destination crop_argb is same as source sample,
-  // also enable temporary buffer.
-  LIBYUV_BOOL need_buf = (rotation && format != FOURCC_ARGB) ||
-      crop_argb == sample;
-  uint8* tmp_argb = crop_argb;
-  int tmp_argb_stride = argb_stride;
-  uint8* rotate_buffer = NULL;
-  int abs_crop_height = (crop_height < 0) ? -crop_height : crop_height;
-
-  if (crop_argb == NULL || sample == NULL ||
-      src_width <= 0 || crop_width <= 0 ||
-      src_height == 0 || crop_height == 0) {
-    return -1;
-  }
-  if (src_height < 0) {
-    inv_crop_height = -inv_crop_height;
-  }
-
-  if (need_buf) {
-    int argb_size = crop_width * abs_crop_height * 4;
-    rotate_buffer = (uint8*)malloc(argb_size);
-    if (!rotate_buffer) {
-      return 1;  // Out of memory runtime error.
-    }
-    crop_argb = rotate_buffer;
-    argb_stride = crop_width;
-  }
-
-  switch (format) {
-    // Single plane formats
-    case FOURCC_YUY2:
-      src = sample + (aligned_src_width * crop_y + crop_x) * 2;
-      r = YUY2ToARGB(src, aligned_src_width * 2,
-                     crop_argb, argb_stride,
-                     crop_width, inv_crop_height);
-      break;
-    case FOURCC_UYVY:
-      src = sample + (aligned_src_width * crop_y + crop_x) * 2;
-      r = UYVYToARGB(src, aligned_src_width * 2,
-                     crop_argb, argb_stride,
-                     crop_width, inv_crop_height);
-      break;
-    case FOURCC_24BG:
-      src = sample + (src_width * crop_y + crop_x) * 3;
-      r = RGB24ToARGB(src, src_width * 3,
-                      crop_argb, argb_stride,
-                      crop_width, inv_crop_height);
-      break;
-    case FOURCC_RAW:
-      src = sample + (src_width * crop_y + crop_x) * 3;
-      r = RAWToARGB(src, src_width * 3,
-                    crop_argb, argb_stride,
-                    crop_width, inv_crop_height);
-      break;
-    case FOURCC_ARGB:
-      src = sample + (src_width * crop_y + crop_x) * 4;
-      r = ARGBToARGB(src, src_width * 4,
-                     crop_argb, argb_stride,
-                     crop_width, inv_crop_height);
-      break;
-    case FOURCC_BGRA:
-      src = sample + (src_width * crop_y + crop_x) * 4;
-      r = BGRAToARGB(src, src_width * 4,
-                     crop_argb, argb_stride,
-                     crop_width, inv_crop_height);
-      break;
-    case FOURCC_ABGR:
-      src = sample + (src_width * crop_y + crop_x) * 4;
-      r = ABGRToARGB(src, src_width * 4,
-                     crop_argb, argb_stride,
-                     crop_width, inv_crop_height);
-      break;
-    case FOURCC_RGBA:
-      src = sample + (src_width * crop_y + crop_x) * 4;
-      r = RGBAToARGB(src, src_width * 4,
-                     crop_argb, argb_stride,
-                     crop_width, inv_crop_height);
-      break;
-    case FOURCC_RGBP:
-      src = sample + (src_width * crop_y + crop_x) * 2;
-      r = RGB565ToARGB(src, src_width * 2,
-                       crop_argb, argb_stride,
-                       crop_width, inv_crop_height);
-      break;
-    case FOURCC_RGBO:
-      src = sample + (src_width * crop_y + crop_x) * 2;
-      r = ARGB1555ToARGB(src, src_width * 2,
-                         crop_argb, argb_stride,
-                         crop_width, inv_crop_height);
-      break;
-    case FOURCC_R444:
-      src = sample + (src_width * crop_y + crop_x) * 2;
-      r = ARGB4444ToARGB(src, src_width * 2,
-                         crop_argb, argb_stride,
-                         crop_width, inv_crop_height);
-      break;
-    // TODO(fbarchard): Support cropping Bayer by odd numbers
-    // by adjusting fourcc.
-    case FOURCC_BGGR:
-      src = sample + (src_width * crop_y + crop_x);
-      r = BayerBGGRToARGB(src, src_width,
-                          crop_argb, argb_stride,
-                          crop_width, inv_crop_height);
-      break;
-
-    case FOURCC_GBRG:
-      src = sample + (src_width * crop_y + crop_x);
-      r = BayerGBRGToARGB(src, src_width,
-                          crop_argb, argb_stride,
-                          crop_width, inv_crop_height);
-      break;
-
-    case FOURCC_GRBG:
-      src = sample + (src_width * crop_y + crop_x);
-      r = BayerGRBGToARGB(src, src_width,
-                          crop_argb, argb_stride,
-                          crop_width, inv_crop_height);
-      break;
-
-    case FOURCC_RGGB:
-      src = sample + (src_width * crop_y + crop_x);
-      r = BayerRGGBToARGB(src, src_width,
-                          crop_argb, argb_stride,
-                          crop_width, inv_crop_height);
-      break;
-
-    case FOURCC_I400:
-      src = sample + src_width * crop_y + crop_x;
-      r = I400ToARGB(src, src_width,
-                     crop_argb, argb_stride,
-                     crop_width, inv_crop_height);
-      break;
-
-    // Biplanar formats
-    case FOURCC_NV12:
-      src = sample + (src_width * crop_y + crop_x);
-      src_uv = sample + aligned_src_width * (src_height + crop_y / 2) + crop_x;
-      r = NV12ToARGB(src, src_width,
-                     src_uv, aligned_src_width,
-                     crop_argb, argb_stride,
-                     crop_width, inv_crop_height);
-      break;
-    case FOURCC_NV21:
-      src = sample + (src_width * crop_y + crop_x);
-      src_uv = sample + aligned_src_width * (src_height + crop_y / 2) + crop_x;
-      // Call NV12 but with u and v parameters swapped.
-      r = NV21ToARGB(src, src_width,
-                     src_uv, aligned_src_width,
-                     crop_argb, argb_stride,
-                     crop_width, inv_crop_height);
-      break;
-    case FOURCC_M420:
-      src = sample + (src_width * crop_y) * 12 / 8 + crop_x;
-      r = M420ToARGB(src, src_width,
-                     crop_argb, argb_stride,
-                     crop_width, inv_crop_height);
-      break;
-//    case FOURCC_Q420:
-//      src = sample + (src_width + aligned_src_width * 2) * crop_y + crop_x;
-//      src_uv = sample + (src_width + aligned_src_width * 2) * crop_y +
-//               src_width + crop_x * 2;
-//      r = Q420ToARGB(src, src_width * 3,
-//                    src_uv, src_width * 3,
-//                    crop_argb, argb_stride,
-//                    crop_width, inv_crop_height);
-//      break;
-    // Triplanar formats
-    case FOURCC_I420:
-    case FOURCC_YU12:
-    case FOURCC_YV12: {
-      const uint8* src_y = sample + (src_width * crop_y + crop_x);
-      const uint8* src_u;
-      const uint8* src_v;
-      int halfwidth = (src_width + 1) / 2;
-      int halfheight = (abs_src_height + 1) / 2;
-      if (format == FOURCC_YV12) {
-        src_v = sample + src_width * abs_src_height +
-            (halfwidth * crop_y + crop_x) / 2;
-        src_u = sample + src_width * abs_src_height +
-            halfwidth * (halfheight + crop_y / 2) + crop_x / 2;
-      } else {
-        src_u = sample + src_width * abs_src_height +
-            (halfwidth * crop_y + crop_x) / 2;
-        src_v = sample + src_width * abs_src_height +
-            halfwidth * (halfheight + crop_y / 2) + crop_x / 2;
-      }
-      r = I420ToARGB(src_y, src_width,
-                     src_u, halfwidth,
-                     src_v, halfwidth,
-                     crop_argb, argb_stride,
-                     crop_width, inv_crop_height);
-      break;
-    }
-    case FOURCC_I422:
-    case FOURCC_YV16: {
-      const uint8* src_y = sample + src_width * crop_y + crop_x;
-      const uint8* src_u;
-      const uint8* src_v;
-      int halfwidth = (src_width + 1) / 2;
-      if (format == FOURCC_YV16) {
-        src_v = sample + src_width * abs_src_height +
-            halfwidth * crop_y + crop_x / 2;
-        src_u = sample + src_width * abs_src_height +
-            halfwidth * (abs_src_height + crop_y) + crop_x / 2;
-      } else {
-        src_u = sample + src_width * abs_src_height +
-            halfwidth * crop_y + crop_x / 2;
-        src_v = sample + src_width * abs_src_height +
-            halfwidth * (abs_src_height + crop_y) + crop_x / 2;
-      }
-      r = I422ToARGB(src_y, src_width,
-                     src_u, halfwidth,
-                     src_v, halfwidth,
-                     crop_argb, argb_stride,
-                     crop_width, inv_crop_height);
-      break;
-    }
-    case FOURCC_I444:
-    case FOURCC_YV24: {
-      const uint8* src_y = sample + src_width * crop_y + crop_x;
-      const uint8* src_u;
-      const uint8* src_v;
-      if (format == FOURCC_YV24) {
-        src_v = sample + src_width * (abs_src_height + crop_y) + crop_x;
-        src_u = sample + src_width * (abs_src_height * 2 + crop_y) + crop_x;
-      } else {
-        src_u = sample + src_width * (abs_src_height + crop_y) + crop_x;
-        src_v = sample + src_width * (abs_src_height * 2 + crop_y) + crop_x;
-      }
-      r = I444ToARGB(src_y, src_width,
-                     src_u, src_width,
-                     src_v, src_width,
-                     crop_argb, argb_stride,
-                     crop_width, inv_crop_height);
-      break;
-    }
-    case FOURCC_I411: {
-      int quarterwidth = (src_width + 3) / 4;
-      const uint8* src_y = sample + src_width * crop_y + crop_x;
-      const uint8* src_u = sample + src_width * abs_src_height +
-          quarterwidth * crop_y + crop_x / 4;
-      const uint8* src_v = sample + src_width * abs_src_height +
-          quarterwidth * (abs_src_height + crop_y) + crop_x / 4;
-      r = I411ToARGB(src_y, src_width,
-                     src_u, quarterwidth,
-                     src_v, quarterwidth,
-                     crop_argb, argb_stride,
-                     crop_width, inv_crop_height);
-      break;
-    }
-#ifdef HAVE_JPEG
-    case FOURCC_MJPG:
-      r = MJPGToARGB(sample, sample_size,
-                     crop_argb, argb_stride,
-                     src_width, abs_src_height, crop_width, inv_crop_height);
-      break;
-#endif
-    default:
-      r = -1;  // unknown fourcc - return failure code.
-  }
-
-  if (need_buf) {
-    if (!r) {
-      r = ARGBRotate(crop_argb, argb_stride,
-                     tmp_argb, tmp_argb_stride,
-                     crop_width, abs_crop_height, rotation);
-    }
-    free(rotate_buffer);
-  }
-
-  return r;
-}
-
-#ifdef __cplusplus
-}  // extern "C"
-}  // namespace libyuv
-#endif
diff --git a/drivers/theoraplayer/src/YUV/libyuv/src/convert_to_i420.cc b/drivers/theoraplayer/src/YUV/libyuv/src/convert_to_i420.cc
deleted file mode 100755
index 7b194fff72..0000000000
--- a/drivers/theoraplayer/src/YUV/libyuv/src/convert_to_i420.cc
+++ /dev/null
@@ -1,383 +0,0 @@
-/*
- *  Copyright 2011 The LibYuv Project Authors. All rights reserved.
- *
- *  Use of this source code is governed by a BSD-style license
- *  that can be found in the LICENSE file in the root of the source
- *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS. All contributing project authors may
- *  be found in the AUTHORS file in the root of the source tree.
- */
-
-#include <stdlib.h>
-
-#include "libyuv/convert.h"
-
-#include "libyuv/format_conversion.h"
-#include "libyuv/video_common.h"
-
-#ifdef __cplusplus
-namespace libyuv {
-extern "C" {
-#endif
-
-// Convert camera sample to I420 with cropping, rotation and vertical flip.
-// src_width is used for source stride computation
-// src_height is used to compute location of planes, and indicate inversion
-// sample_size is measured in bytes and is the size of the frame.
-//   With MJPEG it is the compressed size of the frame.
-LIBYUV_API
-int ConvertToI420(const uint8* sample,
-                  size_t sample_size,
-                  uint8* y, int y_stride,
-                  uint8* u, int u_stride,
-                  uint8* v, int v_stride,
-                  int crop_x, int crop_y,
-                  int src_width, int src_height,
-                  int crop_width, int crop_height,
-                  enum RotationMode rotation,
-                  uint32 fourcc) {
-  uint32 format = CanonicalFourCC(fourcc);
-  int aligned_src_width = (src_width + 1) & ~1;
-  const uint8* src;
-  const uint8* src_uv;
-  int abs_src_height = (src_height < 0) ? -src_height : src_height;
-  int inv_crop_height = (crop_height < 0) ? -crop_height : crop_height;
-  int r = 0;
-  LIBYUV_BOOL need_buf = (rotation && format != FOURCC_I420 &&
-      format != FOURCC_NV12 && format != FOURCC_NV21 &&
-      format != FOURCC_YU12 && format != FOURCC_YV12) || y == sample;
-  uint8* tmp_y = y;
-  uint8* tmp_u = u;
-  uint8* tmp_v = v;
-  int tmp_y_stride = y_stride;
-  int tmp_u_stride = u_stride;
-  int tmp_v_stride = v_stride;
-  uint8* rotate_buffer = NULL;
-  int abs_crop_height = (crop_height < 0) ? -crop_height : crop_height;
-
-  if (!y || !u || !v || !sample ||
-      src_width <= 0 || crop_width <= 0  ||
-      src_height == 0 || crop_height == 0) {
-    return -1;
-  }
-  if (src_height < 0) {
-    inv_crop_height = -inv_crop_height;
-  }
-
-  // One pass rotation is available for some formats. For the rest, convert
-  // to I420 (with optional vertical flipping) into a temporary I420 buffer,
-  // and then rotate the I420 to the final destination buffer.
-  // For in-place conversion, if destination y is same as source sample,
-  // also enable temporary buffer.
-  if (need_buf) {
-    int y_size = crop_width * abs_crop_height;
-    int uv_size = ((crop_width + 1) / 2) * ((abs_crop_height + 1) / 2);
-    rotate_buffer = (uint8*)malloc(y_size + uv_size * 2);
-    if (!rotate_buffer) {
-      return 1;  // Out of memory runtime error.
-    }
-    y = rotate_buffer;
-    u = y + y_size;
-    v = u + uv_size;
-    y_stride = crop_width;
-    u_stride = v_stride = ((crop_width + 1) / 2);
-  }
-
-  switch (format) {
-    // Single plane formats
-    case FOURCC_YUY2:
-      src = sample + (aligned_src_width * crop_y + crop_x) * 2;
-      r = YUY2ToI420(src, aligned_src_width * 2,
-                     y, y_stride,
-                     u, u_stride,
-                     v, v_stride,
-                     crop_width, inv_crop_height);
-      break;
-    case FOURCC_UYVY:
-      src = sample + (aligned_src_width * crop_y + crop_x) * 2;
-      r = UYVYToI420(src, aligned_src_width * 2,
-                     y, y_stride,
-                     u, u_stride,
-                     v, v_stride,
-                     crop_width, inv_crop_height);
-      break;
-    case FOURCC_RGBP:
-      src = sample + (src_width * crop_y + crop_x) * 2;
-      r = RGB565ToI420(src, src_width * 2,
-                       y, y_stride,
-                       u, u_stride,
-                       v, v_stride,
-                       crop_width, inv_crop_height);
-      break;
-    case FOURCC_RGBO:
-      src = sample + (src_width * crop_y + crop_x) * 2;
-      r = ARGB1555ToI420(src, src_width * 2,
-                         y, y_stride,
-                         u, u_stride,
-                         v, v_stride,
-                         crop_width, inv_crop_height);
-      break;
-    case FOURCC_R444:
-      src = sample + (src_width * crop_y + crop_x) * 2;
-      r = ARGB4444ToI420(src, src_width * 2,
-                         y, y_stride,
-                         u, u_stride,
-                         v, v_stride,
-                         crop_width, inv_crop_height);
-      break;
-    case FOURCC_24BG:
-      src = sample + (src_width * crop_y + crop_x) * 3;
-      r = RGB24ToI420(src, src_width * 3,
-                      y, y_stride,
-                      u, u_stride,
-                      v, v_stride,
-                      crop_width, inv_crop_height);
-      break;
-    case FOURCC_RAW:
-      src = sample + (src_width * crop_y + crop_x) * 3;
-      r = RAWToI420(src, src_width * 3,
-                    y, y_stride,
-                    u, u_stride,
-                    v, v_stride,
-                    crop_width, inv_crop_height);
-      break;
-    case FOURCC_ARGB:
-      src = sample + (src_width * crop_y + crop_x) * 4;
-      r = ARGBToI420(src, src_width * 4,
-                     y, y_stride,
-                     u, u_stride,
-                     v, v_stride,
-                     crop_width, inv_crop_height);
-      break;
-    case FOURCC_BGRA:
-      src = sample + (src_width * crop_y + crop_x) * 4;
-      r = BGRAToI420(src, src_width * 4,
-                     y, y_stride,
-                     u, u_stride,
-                     v, v_stride,
-                     crop_width, inv_crop_height);
-      break;
-    case FOURCC_ABGR:
-      src = sample + (src_width * crop_y + crop_x) * 4;
-      r = ABGRToI420(src, src_width * 4,
-                     y, y_stride,
-                     u, u_stride,
-                     v, v_stride,
-                     crop_width, inv_crop_height);
-      break;
-    case FOURCC_RGBA:
-      src = sample + (src_width * crop_y + crop_x) * 4;
-      r = RGBAToI420(src, src_width * 4,
-                     y, y_stride,
-                     u, u_stride,
-                     v, v_stride,
-                     crop_width, inv_crop_height);
-      break;
-    // TODO(fbarchard): Support cropping Bayer by odd numbers
-    // by adjusting fourcc.
-    case FOURCC_BGGR:
-      src = sample + (src_width * crop_y + crop_x);
-      r = BayerBGGRToI420(src, src_width,
-                          y, y_stride,
-                          u, u_stride,
-                          v, v_stride,
-                          crop_width, inv_crop_height);
-      break;
-    case FOURCC_GBRG:
-      src = sample + (src_width * crop_y + crop_x);
-      r = BayerGBRGToI420(src, src_width,
-                          y, y_stride,
-                          u, u_stride,
-                          v, v_stride,
-                          crop_width, inv_crop_height);
-      break;
-    case FOURCC_GRBG:
-      src = sample + (src_width * crop_y + crop_x);
-      r = BayerGRBGToI420(src, src_width,
-                          y, y_stride,
-                          u, u_stride,
-                          v, v_stride,
-                          crop_width, inv_crop_height);
-      break;
-    case FOURCC_RGGB:
-      src = sample + (src_width * crop_y + crop_x);
-      r = BayerRGGBToI420(src, src_width,
-                          y, y_stride,
-                          u, u_stride,
-                          v, v_stride,
-                          crop_width, inv_crop_height);
-      break;
-    case FOURCC_I400:
-      src = sample + src_width * crop_y + crop_x;
-      r = I400ToI420(src, src_width,
-                     y, y_stride,
-                     u, u_stride,
-                     v, v_stride,
-                     crop_width, inv_crop_height);
-      break;
-    // Biplanar formats
-    case FOURCC_NV12:
-      src = sample + (src_width * crop_y + crop_x);
-      src_uv = sample + aligned_src_width * (src_height + crop_y / 2) + crop_x;
-      r = NV12ToI420Rotate(src, src_width,
-                           src_uv, aligned_src_width,
-                           y, y_stride,
-                           u, u_stride,
-                           v, v_stride,
-                           crop_width, inv_crop_height, rotation);
-      break;
-    case FOURCC_NV21:
-      src = sample + (src_width * crop_y + crop_x);
-      src_uv = sample + aligned_src_width * (src_height + crop_y / 2) + crop_x;
-      // Call NV12 but with u and v parameters swapped.
-      r = NV12ToI420Rotate(src, src_width,
-                           src_uv, aligned_src_width,
-                           y, y_stride,
-                           v, v_stride,
-                           u, u_stride,
-                           crop_width, inv_crop_height, rotation);
-      break;
-    case FOURCC_M420:
-      src = sample + (src_width * crop_y) * 12 / 8 + crop_x;
-      r = M420ToI420(src, src_width,
-                     y, y_stride,
-                     u, u_stride,
-                     v, v_stride,
-                     crop_width, inv_crop_height);
-      break;
-    case FOURCC_Q420:
-      src = sample + (src_width + aligned_src_width * 2) * crop_y + crop_x;
-      src_uv = sample + (src_width + aligned_src_width * 2) * crop_y +
-               src_width + crop_x * 2;
-      r = Q420ToI420(src, src_width * 3,
-                    src_uv, src_width * 3,
-                    y, y_stride,
-                    u, u_stride,
-                    v, v_stride,
-                    crop_width, inv_crop_height);
-      break;
-    // Triplanar formats
-    case FOURCC_I420:
-    case FOURCC_YU12:
-    case FOURCC_YV12: {
-      const uint8* src_y = sample + (src_width * crop_y + crop_x);
-      const uint8* src_u;
-      const uint8* src_v;
-      int halfwidth = (src_width + 1) / 2;
-      int halfheight = (abs_src_height + 1) / 2;
-      if (format == FOURCC_YV12) {
-        src_v = sample + src_width * abs_src_height +
-            (halfwidth * crop_y + crop_x) / 2;
-        src_u = sample + src_width * abs_src_height +
-            halfwidth * (halfheight + crop_y / 2) + crop_x / 2;
-      } else {
-        src_u = sample + src_width * abs_src_height +
-            (halfwidth * crop_y + crop_x) / 2;
-        src_v = sample + src_width * abs_src_height +
-            halfwidth * (halfheight + crop_y / 2) + crop_x / 2;
-      }
-      r = I420Rotate(src_y, src_width,
-                     src_u, halfwidth,
-                     src_v, halfwidth,
-                     y, y_stride,
-                     u, u_stride,
-                     v, v_stride,
-                     crop_width, inv_crop_height, rotation);
-      break;
-    }
-    case FOURCC_I422:
-    case FOURCC_YV16: {
-      const uint8* src_y = sample + src_width * crop_y + crop_x;
-      const uint8* src_u;
-      const uint8* src_v;
-      int halfwidth = (src_width + 1) / 2;
-      if (format == FOURCC_YV16) {
-        src_v = sample + src_width * abs_src_height +
-            halfwidth * crop_y + crop_x / 2;
-        src_u = sample + src_width * abs_src_height +
-            halfwidth * (abs_src_height + crop_y) + crop_x / 2;
-      } else {
-        src_u = sample + src_width * abs_src_height +
-            halfwidth * crop_y + crop_x / 2;
-        src_v = sample + src_width * abs_src_height +
-            halfwidth * (abs_src_height + crop_y) + crop_x / 2;
-      }
-      r = I422ToI420(src_y, src_width,
-                     src_u, halfwidth,
-                     src_v, halfwidth,
-                     y, y_stride,
-                     u, u_stride,
-                     v, v_stride,
-                     crop_width, inv_crop_height);
-      break;
-    }
-    case FOURCC_I444:
-    case FOURCC_YV24: {
-      const uint8* src_y = sample + src_width * crop_y + crop_x;
-      const uint8* src_u;
-      const uint8* src_v;
-      if (format == FOURCC_YV24) {
-        src_v = sample + src_width * (abs_src_height + crop_y) + crop_x;
-        src_u = sample + src_width * (abs_src_height * 2 + crop_y) + crop_x;
-      } else {
-        src_u = sample + src_width * (abs_src_height + crop_y) + crop_x;
-        src_v = sample + src_width * (abs_src_height * 2 + crop_y) + crop_x;
-      }
-      r = I444ToI420(src_y, src_width,
-                     src_u, src_width,
-                     src_v, src_width,
-                     y, y_stride,
-                     u, u_stride,
-                     v, v_stride,
-                     crop_width, inv_crop_height);
-      break;
-    }
-    case FOURCC_I411: {
-      int quarterwidth = (src_width + 3) / 4;
-      const uint8* src_y = sample + src_width * crop_y + crop_x;
-      const uint8* src_u = sample + src_width * abs_src_height +
-          quarterwidth * crop_y + crop_x / 4;
-      const uint8* src_v = sample + src_width * abs_src_height +
-          quarterwidth * (abs_src_height + crop_y) + crop_x / 4;
-      r = I411ToI420(src_y, src_width,
-                     src_u, quarterwidth,
-                     src_v, quarterwidth,
-                     y, y_stride,
-                     u, u_stride,
-                     v, v_stride,
-                     crop_width, inv_crop_height);
-      break;
-    }
-#ifdef HAVE_JPEG
-    case FOURCC_MJPG:
-      r = MJPGToI420(sample, sample_size,
-                     y, y_stride,
-                     u, u_stride,
-                     v, v_stride,
-                     src_width, abs_src_height, crop_width, inv_crop_height);
-      break;
-#endif
-    default:
-      r = -1;  // unknown fourcc - return failure code.
-  }
-
-  if (need_buf) {
-    if (!r) {
-      r = I420Rotate(y, y_stride,
-                     u, u_stride,
-                     v, v_stride,
-                     tmp_y, tmp_y_stride,
-                     tmp_u, tmp_u_stride,
-                     tmp_v, tmp_v_stride,
-                     crop_width, abs_crop_height, rotation);
-    }
-    free(rotate_buffer);
-  }
-
-  return r;
-}
-
-#ifdef __cplusplus
-}  // extern "C"
-}  // namespace libyuv
-#endif
diff --git a/drivers/theoraplayer/src/YUV/libyuv/src/cpu_id.cc b/drivers/theoraplayer/src/YUV/libyuv/src/cpu_id.cc
deleted file mode 100755
index f52bd95551..0000000000
--- a/drivers/theoraplayer/src/YUV/libyuv/src/cpu_id.cc
+++ /dev/null
@@ -1,300 +0,0 @@
-/*
- *  Copyright 2011 The LibYuv Project Authors. All rights reserved.
- *
- *  Use of this source code is governed by a BSD-style license
- *  that can be found in the LICENSE file in the root of the source
- *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS. All contributing project authors may
- *  be found in the AUTHORS file in the root of the source tree.
- */
-
-#include "libyuv/cpu_id.h"
-
-#ifdef _ANDROID //libtheoraplayer addition for cpu feature detection
-#include "cpu-features.h"
-#endif
-
-#ifdef _MSC_VER
-#include <intrin.h>  // For __cpuidex()
-#endif
-#if !defined(__pnacl__) && !defined(__CLR_VER) && \
-    !defined(__native_client__) && defined(_M_X64) && \
-    defined(_MSC_VER) && (_MSC_FULL_VER >= 160040219)
-#include <immintrin.h>  // For _xgetbv()
-#endif
-
-#if !defined(__native_client__)
-#include <stdlib.h>  // For getenv()
-#endif
-
-// For ArmCpuCaps() but unittested on all platforms
-#include <stdio.h>
-#include <string.h>
-
-#include "libyuv/basic_types.h"  // For CPU_X86
-
-#ifdef __cplusplus
-namespace libyuv {
-extern "C" {
-#endif
-
-// For functions that use the stack and have runtime checks for overflow,
-// use SAFEBUFFERS to avoid additional check.
-#if defined(_MSC_VER) && (_MSC_FULL_VER >= 160040219)
-#define SAFEBUFFERS __declspec(safebuffers)
-#else
-#define SAFEBUFFERS
-#endif
-
-// Low level cpuid for X86. Returns zeros on other CPUs.
-#if !defined(__pnacl__) && !defined(__CLR_VER) && \
-    (defined(_M_IX86) || defined(_M_X64) || \
-    defined(__i386__) || defined(__x86_64__))
-LIBYUV_API
-void CpuId(uint32 info_eax, uint32 info_ecx, uint32* cpu_info) {
-#if defined(_MSC_VER)
-#if (_MSC_FULL_VER >= 160040219)
-  __cpuidex((int*)(cpu_info), info_eax, info_ecx);
-#elif defined(_M_IX86)
-  __asm {
-    mov        eax, info_eax
-    mov        ecx, info_ecx
-    mov        edi, cpu_info
-    cpuid
-    mov        [edi], eax
-    mov        [edi + 4], ebx
-    mov        [edi + 8], ecx
-    mov        [edi + 12], edx
-  }
-#else
-  if (info_ecx == 0) {
-    __cpuid((int*)(cpu_info), info_eax);
-  } else {
-    cpu_info[3] = cpu_info[2] = cpu_info[1] = cpu_info[0] = 0;
-  }
-#endif
-#else  // defined(_MSC_VER)
-  uint32 info_ebx, info_edx;
-  asm volatile (  // NOLINT
-#if defined( __i386__) && defined(__PIC__)
-    // Preserve ebx for fpic 32 bit.
-    "mov %%ebx, %%edi                          \n"
-    "cpuid                                     \n"
-    "xchg %%edi, %%ebx                         \n"
-    : "=D" (info_ebx),
-#else
-    "cpuid                                     \n"
-    : "=b" (info_ebx),
-#endif  //  defined( __i386__) && defined(__PIC__)
-      "+a" (info_eax), "+c" (info_ecx), "=d" (info_edx));
-  cpu_info[0] = info_eax;
-  cpu_info[1] = info_ebx;
-  cpu_info[2] = info_ecx;
-  cpu_info[3] = info_edx;
-#endif  // defined(_MSC_VER)
-}
-
-#if !defined(__native_client__)
-#define HAS_XGETBV
-// X86 CPUs have xgetbv to detect OS saves high parts of ymm registers.
-int TestOsSaveYmm() {
-  uint32 xcr0 = 0u;
-#if defined(_MSC_VER) && (_MSC_FULL_VER >= 160040219)
-  xcr0 = (uint32)(_xgetbv(0));  // VS2010 SP1 required.
-#elif defined(_M_IX86)
-  __asm {
-    xor        ecx, ecx    // xcr 0
-    _asm _emit 0x0f _asm _emit 0x01 _asm _emit 0xd0  // For VS2010 and earlier.
-    mov        xcr0, eax
-  }
-#elif defined(__i386__) || defined(__x86_64__)
-  asm(".byte 0x0f, 0x01, 0xd0" : "=a" (xcr0) : "c" (0) : "%edx");
-#endif  // defined(_MSC_VER)
-  return((xcr0 & 6) == 6);  // Is ymm saved?
-}
-#endif  // !defined(__native_client__)
-#else
-LIBYUV_API
-void CpuId(uint32 eax, uint32 ecx, uint32* cpu_info) {
-  cpu_info[0] = cpu_info[1] = cpu_info[2] = cpu_info[3] = 0;
-}
-#endif
-
-// based on libvpx arm_cpudetect.c
-// For Arm, but public to allow testing on any CPU
-LIBYUV_API SAFEBUFFERS
-int ArmCpuCaps(const char* cpuinfo_name) {
-  char cpuinfo_line[512];
-  FILE* f = fopen(cpuinfo_name, "r");
-  if (!f) {
-    // Assume Neon if /proc/cpuinfo is unavailable.
-    // This will occur for Chrome sandbox for Pepper or Render process.
-    return kCpuHasNEON;
-  }
-  while (fgets(cpuinfo_line, sizeof(cpuinfo_line) - 1, f)) {
-    if (memcmp(cpuinfo_line, "Features", 8) == 0) {
-      char* p = strstr(cpuinfo_line, " neon");
-      if (p && (p[5] == ' ' || p[5] == '\n')) {
-        fclose(f);
-        return kCpuHasNEON;
-      }
-    }
-  }
-  fclose(f);
-  return 0;
-}
-
-#if defined(__mips__) && defined(__linux__)
-static int MipsCpuCaps(const char* search_string) {
-  char cpuinfo_line[512];
-  const char* file_name = "/proc/cpuinfo";
-  FILE* f = fopen(file_name, "r");
-  if (!f) {
-    // Assume DSP if /proc/cpuinfo is unavailable.
-    // This will occur for Chrome sandbox for Pepper or Render process.
-    return kCpuHasMIPS_DSP;
-  }
-  while (fgets(cpuinfo_line, sizeof(cpuinfo_line) - 1, f) != NULL) {
-    if (strstr(cpuinfo_line, search_string) != NULL) {
-      fclose(f);
-      return kCpuHasMIPS_DSP;
-    }
-  }
-  fclose(f);
-  return 0;
-}
-#endif
-
-// CPU detect function for SIMD instruction sets.
-LIBYUV_API
-int cpu_info_ = kCpuInit;  // cpu_info is not initialized yet.
-
-// Test environment variable for disabling CPU features. Any non-zero value
-// to disable. Zero ignored to make it easy to set the variable on/off.
-#if !defined(__native_client__) && !defined(_M_ARM)
-
-static LIBYUV_BOOL TestEnv(const char* name) {
-#ifndef _WINRT
-  const char* var = getenv(name);
-  if (var) {
-    if (var[0] != '0') {
-      return LIBYUV_TRUE;
-    }
-  }
-#endif
-  return LIBYUV_FALSE;
-}
-#else  // nacl does not support getenv().
-static LIBYUV_BOOL TestEnv(const char*) {
-  return LIBYUV_FALSE;
-}
-#endif
-
-LIBYUV_API SAFEBUFFERS
-int InitCpuFlags(void) {
-#if !defined(__pnacl__) && !defined(__CLR_VER) && defined(CPU_X86)
-
-  uint32 cpu_info1[4] = { 0, 0, 0, 0 };
-  uint32 cpu_info7[4] = { 0, 0, 0, 0 };
-  CpuId(1, 0, cpu_info1);
-  CpuId(7, 0, cpu_info7);
-  cpu_info_ = ((cpu_info1[3] & 0x04000000) ? kCpuHasSSE2 : 0) |
-              ((cpu_info1[2] & 0x00000200) ? kCpuHasSSSE3 : 0) |
-              ((cpu_info1[2] & 0x00080000) ? kCpuHasSSE41 : 0) |
-              ((cpu_info1[2] & 0x00100000) ? kCpuHasSSE42 : 0) |
-              ((cpu_info7[1] & 0x00000200) ? kCpuHasERMS : 0) |
-              ((cpu_info1[2] & 0x00001000) ? kCpuHasFMA3 : 0) |
-              kCpuHasX86;
-#ifdef HAS_XGETBV
-  if ((cpu_info1[2] & 0x18000000) == 0x18000000 &&  // AVX and OSSave
-      TestOsSaveYmm()) {  // Saves YMM.
-    cpu_info_ |= ((cpu_info7[1] & 0x00000020) ? kCpuHasAVX2 : 0) |
-                 kCpuHasAVX;
-  }
-#endif
-  // Environment variable overrides for testing.
-  if (TestEnv("LIBYUV_DISABLE_X86")) {
-    cpu_info_ &= ~kCpuHasX86;
-  }
-  if (TestEnv("LIBYUV_DISABLE_SSE2")) {
-    cpu_info_ &= ~kCpuHasSSE2;
-  }
-  if (TestEnv("LIBYUV_DISABLE_SSSE3")) {
-    cpu_info_ &= ~kCpuHasSSSE3;
-  }
-  if (TestEnv("LIBYUV_DISABLE_SSE41")) {
-    cpu_info_ &= ~kCpuHasSSE41;
-  }
-  if (TestEnv("LIBYUV_DISABLE_SSE42")) {
-    cpu_info_ &= ~kCpuHasSSE42;
-  }
-  if (TestEnv("LIBYUV_DISABLE_AVX")) {
-    cpu_info_ &= ~kCpuHasAVX;
-  }
-  if (TestEnv("LIBYUV_DISABLE_AVX2")) {
-    cpu_info_ &= ~kCpuHasAVX2;
-  }
-  if (TestEnv("LIBYUV_DISABLE_ERMS")) {
-    cpu_info_ &= ~kCpuHasERMS;
-  }
-  if (TestEnv("LIBYUV_DISABLE_FMA3")) {
-    cpu_info_ &= ~kCpuHasFMA3;
-  }
-#elif defined(__mips__) && defined(__linux__)
-  // Linux mips parse text file for dsp detect.
-  cpu_info_ = MipsCpuCaps("dsp");  // set kCpuHasMIPS_DSP.
-#if defined(__mips_dspr2)
-  cpu_info_ |= kCpuHasMIPS_DSPR2;
-#endif
-  cpu_info_ |= kCpuHasMIPS;
-
-  if (getenv("LIBYUV_DISABLE_MIPS")) {
-    cpu_info_ &= ~kCpuHasMIPS;
-  }
-  if (getenv("LIBYUV_DISABLE_MIPS_DSP")) {
-    cpu_info_ &= ~kCpuHasMIPS_DSP;
-  }
-  if (getenv("LIBYUV_DISABLE_MIPS_DSPR2")) {
-    cpu_info_ &= ~kCpuHasMIPS_DSPR2;
-  }
-#elif defined(__arm__)
-// gcc -mfpu=neon defines __ARM_NEON__
-// __ARM_NEON__ generates code that requires Neon.  NaCL also requires Neon.
-// For Linux, /proc/cpuinfo can be tested but without that assume Neon.
-#if defined(__ARM_NEON__) || defined(__native_client__) || !defined(__linux__)
-#ifdef _ANDROID
-  cpu_info_ = ArmCpuCaps("/proc/cpuinfo"); // libtheoraplayer #ifdef addition, just in case, android gave us troubles
-#else
-  cpu_info_ = kCpuHasNEON;
-#endif
-#else
-  // Linux arm parse text file for neon detect.
-  cpu_info_ = ArmCpuCaps("/proc/cpuinfo");
-#endif
-  cpu_info_ |= kCpuHasARM;
-  if (TestEnv("LIBYUV_DISABLE_NEON")) {
-    cpu_info_ &= ~kCpuHasNEON;
-  }
-#ifdef _ANDROID
-  // libtheoraplayer addition to disable NEON support on android devices that don't support it, once again, just in case	
-  if ((android_getCpuFeaturesExt() & ANDROID_CPU_ARM_FEATURE_NEON) == 0)
-  {
- 	cpu_info_ = kCpuHasARM;
-  }
-#endif
-#endif  // __arm__
-  if (TestEnv("LIBYUV_DISABLE_ASM")) {
-    cpu_info_ = 0;
-  }
-  return cpu_info_;
-}
-
-LIBYUV_API
-void MaskCpuFlags(int enable_flags) {
-  cpu_info_ = InitCpuFlags() & enable_flags;
-}
-
-#ifdef __cplusplus
-}  // extern "C"
-}  // namespace libyuv
-#endif
diff --git a/drivers/theoraplayer/src/YUV/libyuv/src/format_conversion.cc b/drivers/theoraplayer/src/YUV/libyuv/src/format_conversion.cc
deleted file mode 100755
index a3daf96a98..0000000000
--- a/drivers/theoraplayer/src/YUV/libyuv/src/format_conversion.cc
+++ /dev/null
@@ -1,552 +0,0 @@
-/*
- *  Copyright 2011 The LibYuv Project Authors. All rights reserved.
- *
- *  Use of this source code is governed by a BSD-style license
- *  that can be found in the LICENSE file in the root of the source
- *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS. All contributing project authors may
- *  be found in the AUTHORS file in the root of the source tree.
- */
-
-#include "libyuv/format_conversion.h"
-
-#include "libyuv/basic_types.h"
-#include "libyuv/cpu_id.h"
-#include "libyuv/video_common.h"
-#include "libyuv/row.h"
-
-#ifdef __cplusplus
-namespace libyuv {
-extern "C" {
-#endif
-
-// generate a selector mask useful for pshufb
-static uint32 GenerateSelector(int select0, int select1) {
-  return (uint32)(select0) |
-         (uint32)((select1 + 4) << 8) |
-         (uint32)((select0 + 8) << 16) |
-         (uint32)((select1 + 12) << 24);
-}
-
-static int MakeSelectors(const int blue_index,
-                         const int green_index,
-                         const int red_index,
-                         uint32 dst_fourcc_bayer,
-                         uint32* index_map) {
-  // Now build a lookup table containing the indices for the four pixels in each
-  // 2x2 Bayer grid.
-  switch (dst_fourcc_bayer) {
-    case FOURCC_BGGR:
-      index_map[0] = GenerateSelector(blue_index, green_index);
-      index_map[1] = GenerateSelector(green_index, red_index);
-      break;
-    case FOURCC_GBRG:
-      index_map[0] = GenerateSelector(green_index, blue_index);
-      index_map[1] = GenerateSelector(red_index, green_index);
-      break;
-    case FOURCC_RGGB:
-      index_map[0] = GenerateSelector(red_index, green_index);
-      index_map[1] = GenerateSelector(green_index, blue_index);
-      break;
-    case FOURCC_GRBG:
-      index_map[0] = GenerateSelector(green_index, red_index);
-      index_map[1] = GenerateSelector(blue_index, green_index);
-      break;
-    default:
-      return -1;  // Bad FourCC
-  }
-  return 0;
-}
-
-// Converts 32 bit ARGB to Bayer RGB formats.
-LIBYUV_API
-int ARGBToBayer(const uint8* src_argb, int src_stride_argb,
-                uint8* dst_bayer, int dst_stride_bayer,
-                int width, int height,
-                uint32 dst_fourcc_bayer) {
-  int y;
-  const int blue_index = 0;  // Offsets for ARGB format
-  const int green_index = 1;
-  const int red_index = 2;
-  uint32 index_map[2];
-  void (*ARGBToBayerRow)(const uint8* src_argb, uint8* dst_bayer,
-                         uint32 selector, int pix) = ARGBToBayerRow_C;
-  if (height < 0) {
-    height = -height;
-    src_argb = src_argb + (height - 1) * src_stride_argb;
-    src_stride_argb = -src_stride_argb;
-  }
-#if defined(HAS_ARGBTOBAYERROW_SSSE3)
-  if (TestCpuFlag(kCpuHasSSSE3) && width >= 8 &&
-      IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16)) {
-    ARGBToBayerRow = ARGBToBayerRow_Any_SSSE3;
-    if (IS_ALIGNED(width, 8)) {
-      ARGBToBayerRow = ARGBToBayerRow_SSSE3;
-    }
-  }
-#elif defined(HAS_ARGBTOBAYERROW_NEON)
-  if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
-    ARGBToBayerRow = ARGBToBayerRow_Any_NEON;
-    if (IS_ALIGNED(width, 8)) {
-      ARGBToBayerRow = ARGBToBayerRow_NEON;
-    }
-  }
-#endif
-  if (MakeSelectors(blue_index, green_index, red_index,
-                    dst_fourcc_bayer, index_map)) {
-    return -1;  // Bad FourCC
-  }
-
-  for (y = 0; y < height; ++y) {
-    ARGBToBayerRow(src_argb, dst_bayer, index_map[y & 1], width);
-    src_argb += src_stride_argb;
-    dst_bayer += dst_stride_bayer;
-  }
-  return 0;
-}
-
-#define AVG(a, b) (((a) + (b)) >> 1)
-
-static void BayerRowBG(const uint8* src_bayer0, int src_stride_bayer,
-                       uint8* dst_argb, int pix) {
-  const uint8* src_bayer1 = src_bayer0 + src_stride_bayer;
-  uint8 g = src_bayer0[1];
-  uint8 r = src_bayer1[1];
-  int x;
-  for (x = 0; x < pix - 2; x += 2) {
-    dst_argb[0] = src_bayer0[0];
-    dst_argb[1] = AVG(g, src_bayer0[1]);
-    dst_argb[2] = AVG(r, src_bayer1[1]);
-    dst_argb[3] = 255U;
-    dst_argb[4] = AVG(src_bayer0[0], src_bayer0[2]);
-    dst_argb[5] = src_bayer0[1];
-    dst_argb[6] = src_bayer1[1];
-    dst_argb[7] = 255U;
-    g = src_bayer0[1];
-    r = src_bayer1[1];
-    src_bayer0 += 2;
-    src_bayer1 += 2;
-    dst_argb += 8;
-  }
-  dst_argb[0] = src_bayer0[0];
-  dst_argb[1] = AVG(g, src_bayer0[1]);
-  dst_argb[2] = AVG(r, src_bayer1[1]);
-  dst_argb[3] = 255U;
-  if (!(pix & 1)) {
-    dst_argb[4] = src_bayer0[0];
-    dst_argb[5] = src_bayer0[1];
-    dst_argb[6] = src_bayer1[1];
-    dst_argb[7] = 255U;
-  }
-}
-
-static void BayerRowRG(const uint8* src_bayer0, int src_stride_bayer,
-                       uint8* dst_argb, int pix) {
-  const uint8* src_bayer1 = src_bayer0 + src_stride_bayer;
-  uint8 g = src_bayer0[1];
-  uint8 b = src_bayer1[1];
-  int x;
-  for (x = 0; x < pix - 2; x += 2) {
-    dst_argb[0] = AVG(b, src_bayer1[1]);
-    dst_argb[1] = AVG(g, src_bayer0[1]);
-    dst_argb[2] = src_bayer0[0];
-    dst_argb[3] = 255U;
-    dst_argb[4] = src_bayer1[1];
-    dst_argb[5] = src_bayer0[1];
-    dst_argb[6] = AVG(src_bayer0[0], src_bayer0[2]);
-    dst_argb[7] = 255U;
-    g = src_bayer0[1];
-    b = src_bayer1[1];
-    src_bayer0 += 2;
-    src_bayer1 += 2;
-    dst_argb += 8;
-  }
-  dst_argb[0] = AVG(b, src_bayer1[1]);
-  dst_argb[1] = AVG(g, src_bayer0[1]);
-  dst_argb[2] = src_bayer0[0];
-  dst_argb[3] = 255U;
-  if (!(pix & 1)) {
-    dst_argb[4] = src_bayer1[1];
-    dst_argb[5] = src_bayer0[1];
-    dst_argb[6] = src_bayer0[0];
-    dst_argb[7] = 255U;
-  }
-}
-
-static void BayerRowGB(const uint8* src_bayer0, int src_stride_bayer,
-                       uint8* dst_argb, int pix) {
-  const uint8* src_bayer1 = src_bayer0 + src_stride_bayer;
-  uint8 b = src_bayer0[1];
-  int x;
-  for (x = 0; x < pix - 2; x += 2) {
-    dst_argb[0] = AVG(b, src_bayer0[1]);
-    dst_argb[1] = src_bayer0[0];
-    dst_argb[2] = src_bayer1[0];
-    dst_argb[3] = 255U;
-    dst_argb[4] = src_bayer0[1];
-    dst_argb[5] = AVG(src_bayer0[0], src_bayer0[2]);
-    dst_argb[6] = AVG(src_bayer1[0], src_bayer1[2]);
-    dst_argb[7] = 255U;
-    b = src_bayer0[1];
-    src_bayer0 += 2;
-    src_bayer1 += 2;
-    dst_argb += 8;
-  }
-  dst_argb[0] = AVG(b, src_bayer0[1]);
-  dst_argb[1] = src_bayer0[0];
-  dst_argb[2] = src_bayer1[0];
-  dst_argb[3] = 255U;
-  if (!(pix & 1)) {
-    dst_argb[4] = src_bayer0[1];
-    dst_argb[5] = src_bayer0[0];
-    dst_argb[6] = src_bayer1[0];
-    dst_argb[7] = 255U;
-  }
-}
-
-static void BayerRowGR(const uint8* src_bayer0, int src_stride_bayer,
-                       uint8* dst_argb, int pix) {
-  const uint8* src_bayer1 = src_bayer0 + src_stride_bayer;
-  uint8 r = src_bayer0[1];
-  int x;
-  for (x = 0; x < pix - 2; x += 2) {
-    dst_argb[0] = src_bayer1[0];
-    dst_argb[1] = src_bayer0[0];
-    dst_argb[2] = AVG(r, src_bayer0[1]);
-    dst_argb[3] = 255U;
-    dst_argb[4] = AVG(src_bayer1[0], src_bayer1[2]);
-    dst_argb[5] = AVG(src_bayer0[0], src_bayer0[2]);
-    dst_argb[6] = src_bayer0[1];
-    dst_argb[7] = 255U;
-    r = src_bayer0[1];
-    src_bayer0 += 2;
-    src_bayer1 += 2;
-    dst_argb += 8;
-  }
-  dst_argb[0] = src_bayer1[0];
-  dst_argb[1] = src_bayer0[0];
-  dst_argb[2] = AVG(r, src_bayer0[1]);
-  dst_argb[3] = 255U;
-  if (!(pix & 1)) {
-    dst_argb[4] = src_bayer1[0];
-    dst_argb[5] = src_bayer0[0];
-    dst_argb[6] = src_bayer0[1];
-    dst_argb[7] = 255U;
-  }
-}
-
-// Converts any Bayer RGB format to ARGB.
-LIBYUV_API
-int BayerToARGB(const uint8* src_bayer, int src_stride_bayer,
-                uint8* dst_argb, int dst_stride_argb,
-                int width, int height,
-                uint32 src_fourcc_bayer) {
-  int y;
-  void (*BayerRow0)(const uint8* src_bayer, int src_stride_bayer,
-                    uint8* dst_argb, int pix);
-  void (*BayerRow1)(const uint8* src_bayer, int src_stride_bayer,
-                    uint8* dst_argb, int pix);
-  if (height < 0) {
-    height = -height;
-    dst_argb = dst_argb + (height - 1) * dst_stride_argb;
-    dst_stride_argb = -dst_stride_argb;
-  }
-  switch (src_fourcc_bayer) {
-    case FOURCC_BGGR:
-      BayerRow0 = BayerRowBG;
-      BayerRow1 = BayerRowGR;
-      break;
-    case FOURCC_GBRG:
-      BayerRow0 = BayerRowGB;
-      BayerRow1 = BayerRowRG;
-      break;
-    case FOURCC_GRBG:
-      BayerRow0 = BayerRowGR;
-      BayerRow1 = BayerRowBG;
-      break;
-    case FOURCC_RGGB:
-      BayerRow0 = BayerRowRG;
-      BayerRow1 = BayerRowGB;
-      break;
-    default:
-      return -1;    // Bad FourCC
-  }
-
-  for (y = 0; y < height - 1; y += 2) {
-    BayerRow0(src_bayer, src_stride_bayer, dst_argb, width);
-    BayerRow1(src_bayer + src_stride_bayer, -src_stride_bayer,
-              dst_argb + dst_stride_argb, width);
-    src_bayer += src_stride_bayer * 2;
-    dst_argb += dst_stride_argb * 2;
-  }
-  if (height & 1) {
-    BayerRow0(src_bayer, src_stride_bayer, dst_argb, width);
-  }
-  return 0;
-}
-
-// Converts any Bayer RGB format to ARGB.
-LIBYUV_API
-int BayerToI420(const uint8* src_bayer, int src_stride_bayer,
-                uint8* dst_y, int dst_stride_y,
-                uint8* dst_u, int dst_stride_u,
-                uint8* dst_v, int dst_stride_v,
-                int width, int height,
-                uint32 src_fourcc_bayer) {
-  void (*BayerRow0)(const uint8* src_bayer, int src_stride_bayer,
-                    uint8* dst_argb, int pix);
-  void (*BayerRow1)(const uint8* src_bayer, int src_stride_bayer,
-                    uint8* dst_argb, int pix);
-
-  void (*ARGBToUVRow)(const uint8* src_argb0, int src_stride_argb,
-                      uint8* dst_u, uint8* dst_v, int width) = ARGBToUVRow_C;
-  void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int pix) =
-      ARGBToYRow_C;
-  // Negative height means invert the image.
-  if (height < 0) {
-    int halfheight;
-    height = -height;
-    halfheight = (height + 1) >> 1;
-    dst_y = dst_y + (height - 1) * dst_stride_y;
-    dst_u = dst_u + (halfheight - 1) * dst_stride_u;
-    dst_v = dst_v + (halfheight - 1) * dst_stride_v;
-    dst_stride_y = -dst_stride_y;
-    dst_stride_u = -dst_stride_u;
-    dst_stride_v = -dst_stride_v;
-  }
-#if defined(HAS_ARGBTOYROW_SSSE3) && defined(HAS_ARGBTOUVROW_SSSE3)
-  if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) {
-    ARGBToUVRow = ARGBToUVRow_Any_SSSE3;
-    ARGBToYRow = ARGBToYRow_Any_SSSE3;
-    if (IS_ALIGNED(width, 16)) {
-      ARGBToYRow = ARGBToYRow_Unaligned_SSSE3;
-      ARGBToUVRow = ARGBToUVRow_SSSE3;
-      if (IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) {
-        ARGBToYRow = ARGBToYRow_SSSE3;
-      }
-    }
-  }
-#elif defined(HAS_ARGBTOYROW_NEON)
-  if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
-    ARGBToYRow = ARGBToYRow_Any_NEON;
-    if (IS_ALIGNED(width, 8)) {
-      ARGBToYRow = ARGBToYRow_NEON;
-    }
-    if (width >= 16) {
-      ARGBToUVRow = ARGBToUVRow_Any_NEON;
-      if (IS_ALIGNED(width, 16)) {
-        ARGBToUVRow = ARGBToUVRow_NEON;
-      }
-    }
-  }
-#endif
-
-  switch (src_fourcc_bayer) {
-    case FOURCC_BGGR:
-      BayerRow0 = BayerRowBG;
-      BayerRow1 = BayerRowGR;
-      break;
-    case FOURCC_GBRG:
-      BayerRow0 = BayerRowGB;
-      BayerRow1 = BayerRowRG;
-      break;
-    case FOURCC_GRBG:
-      BayerRow0 = BayerRowGR;
-      BayerRow1 = BayerRowBG;
-      break;
-    case FOURCC_RGGB:
-      BayerRow0 = BayerRowRG;
-      BayerRow1 = BayerRowGB;
-      break;
-    default:
-      return -1;  // Bad FourCC
-  }
-
-  {
-    // Allocate 2 rows of ARGB.
-    const int kRowSize = (width * 4 + 15) & ~15;
-    align_buffer_64(row, kRowSize * 2);
-    int y;
-    for (y = 0; y < height - 1; y += 2) {
-      BayerRow0(src_bayer, src_stride_bayer, row, width);
-      BayerRow1(src_bayer + src_stride_bayer, -src_stride_bayer,
-                row + kRowSize, width);
-      ARGBToUVRow(row, kRowSize, dst_u, dst_v, width);
-      ARGBToYRow(row, dst_y, width);
-      ARGBToYRow(row + kRowSize, dst_y + dst_stride_y, width);
-      src_bayer += src_stride_bayer * 2;
-      dst_y += dst_stride_y * 2;
-      dst_u += dst_stride_u;
-      dst_v += dst_stride_v;
-    }
-    if (height & 1) {
-      BayerRow0(src_bayer, src_stride_bayer, row, width);
-      ARGBToUVRow(row, 0, dst_u, dst_v, width);
-      ARGBToYRow(row, dst_y, width);
-    }
-    free_aligned_buffer_64(row);
-  }
-  return 0;
-}
-
-// Convert I420 to Bayer.
-LIBYUV_API
-int I420ToBayer(const uint8* src_y, int src_stride_y,
-                const uint8* src_u, int src_stride_u,
-                const uint8* src_v, int src_stride_v,
-                uint8* dst_bayer, int dst_stride_bayer,
-                int width, int height,
-                uint32 dst_fourcc_bayer) {
-  void (*I422ToARGBRow)(const uint8* y_buf,
-                        const uint8* u_buf,
-                        const uint8* v_buf,
-                        uint8* rgb_buf,
-                        int width) = I422ToARGBRow_C;
-  void (*ARGBToBayerRow)(const uint8* src_argb, uint8* dst_bayer,
-                         uint32 selector, int pix) = ARGBToBayerRow_C;
-  const int blue_index = 0;  // Offsets for ARGB format
-  const int green_index = 1;
-  const int red_index = 2;
-  uint32 index_map[2];
-  // Negative height means invert the image.
-  if (height < 0) {
-    int halfheight;
-    height = -height;
-    halfheight = (height + 1) >> 1;
-    src_y = src_y + (height - 1) * src_stride_y;
-    src_u = src_u + (halfheight - 1) * src_stride_u;
-    src_v = src_v + (halfheight - 1) * src_stride_v;
-    src_stride_y = -src_stride_y;
-    src_stride_u = -src_stride_u;
-    src_stride_v = -src_stride_v;
-  }
-#if defined(HAS_I422TOARGBROW_SSSE3)
-  if (TestCpuFlag(kCpuHasSSSE3) && width >= 8) {
-    I422ToARGBRow = I422ToARGBRow_Any_SSSE3;
-    if (IS_ALIGNED(width, 8)) {
-      I422ToARGBRow = I422ToARGBRow_SSSE3;
-    }
-  }
-#endif
-#if defined(HAS_I422TOARGBROW_AVX2)
-  if (TestCpuFlag(kCpuHasAVX2) && width >= 16) {
-    I422ToARGBRow = I422ToARGBRow_Any_AVX2;
-    if (IS_ALIGNED(width, 16)) {
-      I422ToARGBRow = I422ToARGBRow_AVX2;
-    }
-  }
-#endif
-#if defined(HAS_I422TOARGBROW_NEON)
-  if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
-    I422ToARGBRow = I422ToARGBRow_Any_NEON;
-    if (IS_ALIGNED(width, 8)) {
-      I422ToARGBRow = I422ToARGBRow_NEON;
-    }
-  }
-#endif
-#if defined(HAS_I422TOARGBROW_MIPS_DSPR2)
-  if (TestCpuFlag(kCpuHasMIPS_DSPR2) && IS_ALIGNED(width, 4) &&
-      IS_ALIGNED(src_y, 4) && IS_ALIGNED(src_stride_y, 4) &&
-      IS_ALIGNED(src_u, 2) && IS_ALIGNED(src_stride_u, 2) &&
-      IS_ALIGNED(src_v, 2) && IS_ALIGNED(src_stride_v, 2)) {
-    I422ToARGBRow = I422ToARGBRow_MIPS_DSPR2;
-  }
-#endif
-
-#if defined(HAS_ARGBTOBAYERROW_SSSE3)
-  if (TestCpuFlag(kCpuHasSSSE3) && width >= 8) {
-    ARGBToBayerRow = ARGBToBayerRow_Any_SSSE3;
-    if (IS_ALIGNED(width, 8)) {
-      ARGBToBayerRow = ARGBToBayerRow_SSSE3;
-    }
-  }
-#elif defined(HAS_ARGBTOBAYERROW_NEON)
-  if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
-    ARGBToBayerRow = ARGBToBayerRow_Any_NEON;
-    if (IS_ALIGNED(width, 8)) {
-      ARGBToBayerRow = ARGBToBayerRow_NEON;
-    }
-  }
-#endif
-
-  if (MakeSelectors(blue_index, green_index, red_index,
-                    dst_fourcc_bayer, index_map)) {
-    return -1;  // Bad FourCC
-  }
-  {
-    // Allocate a row of ARGB.
-    align_buffer_64(row, width * 4);
-    int y;
-    for (y = 0; y < height; ++y) {
-      I422ToARGBRow(src_y, src_u, src_v, row, width);
-      ARGBToBayerRow(row, dst_bayer, index_map[y & 1], width);
-      dst_bayer += dst_stride_bayer;
-      src_y += src_stride_y;
-      if (y & 1) {
-        src_u += src_stride_u;
-        src_v += src_stride_v;
-      }
-    }
-    free_aligned_buffer_64(row);
-  }
-  return 0;
-}
-
-#define MAKEBAYERFOURCC(BAYER)                                                 \
-LIBYUV_API                                                                     \
-int Bayer##BAYER##ToI420(const uint8* src_bayer, int src_stride_bayer,         \
-                         uint8* dst_y, int dst_stride_y,                       \
-                         uint8* dst_u, int dst_stride_u,                       \
-                         uint8* dst_v, int dst_stride_v,                       \
-                         int width, int height) {                              \
-  return BayerToI420(src_bayer, src_stride_bayer,                              \
-                     dst_y, dst_stride_y,                                      \
-                     dst_u, dst_stride_u,                                      \
-                     dst_v, dst_stride_v,                                      \
-                     width, height,                                            \
-                     FOURCC_##BAYER);                                          \
-}                                                                              \
-                                                                               \
-LIBYUV_API                                                                     \
-int I420ToBayer##BAYER(const uint8* src_y, int src_stride_y,                   \
-                       const uint8* src_u, int src_stride_u,                   \
-                       const uint8* src_v, int src_stride_v,                   \
-                       uint8* dst_bayer, int dst_stride_bayer,                 \
-                       int width, int height) {                                \
-  return I420ToBayer(src_y, src_stride_y,                                      \
-                     src_u, src_stride_u,                                      \
-                     src_v, src_stride_v,                                      \
-                     dst_bayer, dst_stride_bayer,                              \
-                     width, height,                                            \
-                     FOURCC_##BAYER);                                          \
-}                                                                              \
-                                                                               \
-LIBYUV_API                                                                     \
-int ARGBToBayer##BAYER(const uint8* src_argb, int src_stride_argb,             \
-                       uint8* dst_bayer, int dst_stride_bayer,                 \
-                       int width, int height) {                                \
-  return ARGBToBayer(src_argb, src_stride_argb,                                \
-                     dst_bayer, dst_stride_bayer,                              \
-                     width, height,                                            \
-                     FOURCC_##BAYER);                                          \
-}                                                                              \
-                                                                               \
-LIBYUV_API                                                                     \
-int Bayer##BAYER##ToARGB(const uint8* src_bayer, int src_stride_bayer,         \
-                         uint8* dst_argb, int dst_stride_argb,                 \
-                         int width, int height) {                              \
-  return BayerToARGB(src_bayer, src_stride_bayer,                              \
-                     dst_argb, dst_stride_argb,                                \
-                     width, height,                                            \
-                     FOURCC_##BAYER);                                          \
-}
-
-MAKEBAYERFOURCC(BGGR)
-MAKEBAYERFOURCC(GBRG)
-MAKEBAYERFOURCC(GRBG)
-MAKEBAYERFOURCC(RGGB)
-
-#ifdef __cplusplus
-}  // extern "C"
-}  // namespace libyuv
-#endif
diff --git a/drivers/theoraplayer/src/YUV/libyuv/src/mjpeg_decoder.cc b/drivers/theoraplayer/src/YUV/libyuv/src/mjpeg_decoder.cc
deleted file mode 100755
index 193b829ba9..0000000000
--- a/drivers/theoraplayer/src/YUV/libyuv/src/mjpeg_decoder.cc
+++ /dev/null
@@ -1,558 +0,0 @@
-/*
- *  Copyright 2012 The LibYuv Project Authors. All rights reserved.
- *
- *  Use of this source code is governed by a BSD-style license
- *  that can be found in the LICENSE file in the root of the source
- *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS. All contributing project authors may
- *  be found in the AUTHORS file in the root of the source tree.
- */
-
-#include "libyuv/mjpeg_decoder.h"
-
-#ifdef HAVE_JPEG
-#include <assert.h>
-
-#if !defined(__pnacl__) && !defined(__CLR_VER) && !defined(COVERAGE_ENABLED) &&\
-    !defined(TARGET_IPHONE_SIMULATOR)
-// Must be included before jpeglib.
-#include <setjmp.h>
-#define HAVE_SETJMP
-#endif
-struct FILE;  // For jpeglib.h.
-
-// C++ build requires extern C for jpeg internals.
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#include <jpeglib.h>
-
-#ifdef __cplusplus
-}  // extern "C"
-#endif
-
-#include "libyuv/planar_functions.h"  // For CopyPlane().
-
-namespace libyuv {
-
-#ifdef HAVE_SETJMP
-struct SetJmpErrorMgr {
-  jpeg_error_mgr base;  // Must be at the top
-  jmp_buf setjmp_buffer;
-};
-#endif
-
-const int MJpegDecoder::kColorSpaceUnknown = JCS_UNKNOWN;
-const int MJpegDecoder::kColorSpaceGrayscale = JCS_GRAYSCALE;
-const int MJpegDecoder::kColorSpaceRgb = JCS_RGB;
-const int MJpegDecoder::kColorSpaceYCbCr = JCS_YCbCr;
-const int MJpegDecoder::kColorSpaceCMYK = JCS_CMYK;
-const int MJpegDecoder::kColorSpaceYCCK = JCS_YCCK;
-
-MJpegDecoder::MJpegDecoder()
-    : has_scanline_padding_(LIBYUV_FALSE),
-      num_outbufs_(0),
-      scanlines_(NULL),
-      scanlines_sizes_(NULL),
-      databuf_(NULL),
-      databuf_strides_(NULL) {
-  decompress_struct_ = new jpeg_decompress_struct;
-  source_mgr_ = new jpeg_source_mgr;
-#ifdef HAVE_SETJMP
-  error_mgr_ = new SetJmpErrorMgr;
-  decompress_struct_->err = jpeg_std_error(&error_mgr_->base);
-  // Override standard exit()-based error handler.
-  error_mgr_->base.error_exit = &ErrorHandler;
-#endif
-  decompress_struct_->client_data = NULL;
-  source_mgr_->init_source = &init_source;
-  source_mgr_->fill_input_buffer = &fill_input_buffer;
-  source_mgr_->skip_input_data = &skip_input_data;
-  source_mgr_->resync_to_restart = &jpeg_resync_to_restart;
-  source_mgr_->term_source = &term_source;
-  jpeg_create_decompress(decompress_struct_);
-  decompress_struct_->src = source_mgr_;
-  buf_vec_.buffers = &buf_;
-  buf_vec_.len = 1;
-}
-
-MJpegDecoder::~MJpegDecoder() {
-  jpeg_destroy_decompress(decompress_struct_);
-  delete decompress_struct_;
-  delete source_mgr_;
-#ifdef HAVE_SETJMP
-  delete error_mgr_;
-#endif
-  DestroyOutputBuffers();
-}
-
-LIBYUV_BOOL MJpegDecoder::LoadFrame(const uint8* src, size_t src_len) {
-  if (!ValidateJpeg(src, src_len)) {
-    return LIBYUV_FALSE;
-  }
-
-  buf_.data = src;
-  buf_.len = (int)(src_len);
-  buf_vec_.pos = 0;
-  decompress_struct_->client_data = &buf_vec_;
-#ifdef HAVE_SETJMP
-  if (setjmp(error_mgr_->setjmp_buffer)) {
-    // We called jpeg_read_header, it experienced an error, and we called
-    // longjmp() and rewound the stack to here. Return error.
-    return LIBYUV_FALSE;
-  }
-#endif
-  if (jpeg_read_header(decompress_struct_, TRUE) != JPEG_HEADER_OK) {
-    // ERROR: Bad MJPEG header
-    return LIBYUV_FALSE;
-  }
-  AllocOutputBuffers(GetNumComponents());
-  for (int i = 0; i < num_outbufs_; ++i) {
-    int scanlines_size = GetComponentScanlinesPerImcuRow(i);
-    if (scanlines_sizes_[i] != scanlines_size) {
-      if (scanlines_[i]) {
-        delete scanlines_[i];
-      }
-      scanlines_[i] = new uint8* [scanlines_size];
-      scanlines_sizes_[i] = scanlines_size;
-    }
-
-    // We allocate padding for the final scanline to pad it up to DCTSIZE bytes
-    // to avoid memory errors, since jpeglib only reads full MCUs blocks. For
-    // the preceding scanlines, the padding is not needed/wanted because the
-    // following addresses will already be valid (they are the initial bytes of
-    // the next scanline) and will be overwritten when jpeglib writes out that
-    // next scanline.
-    int databuf_stride = GetComponentStride(i);
-    int databuf_size = scanlines_size * databuf_stride;
-    if (databuf_strides_[i] != databuf_stride) {
-      if (databuf_[i]) {
-        delete databuf_[i];
-      }
-      databuf_[i] = new uint8[databuf_size];
-      databuf_strides_[i] = databuf_stride;
-    }
-
-    if (GetComponentStride(i) != GetComponentWidth(i)) {
-      has_scanline_padding_ = LIBYUV_TRUE;
-    }
-  }
-  return LIBYUV_TRUE;
-}
-
-static int DivideAndRoundUp(int numerator, int denominator) {
-  return (numerator + denominator - 1) / denominator;
-}
-
-static int DivideAndRoundDown(int numerator, int denominator) {
-  return numerator / denominator;
-}
-
-// Returns width of the last loaded frame.
-int MJpegDecoder::GetWidth() {
-  return decompress_struct_->image_width;
-}
-
-// Returns height of the last loaded frame.
-int MJpegDecoder::GetHeight() {
-  return decompress_struct_->image_height;
-}
-
-// Returns format of the last loaded frame. The return value is one of the
-// kColorSpace* constants.
-int MJpegDecoder::GetColorSpace() {
-  return decompress_struct_->jpeg_color_space;
-}
-
-// Number of color components in the color space.
-int MJpegDecoder::GetNumComponents() {
-  return decompress_struct_->num_components;
-}
-
-// Sample factors of the n-th component.
-int MJpegDecoder::GetHorizSampFactor(int component) {
-  return decompress_struct_->comp_info[component].h_samp_factor;
-}
-
-int MJpegDecoder::GetVertSampFactor(int component) {
-  return decompress_struct_->comp_info[component].v_samp_factor;
-}
-
-int MJpegDecoder::GetHorizSubSampFactor(int component) {
-  return decompress_struct_->max_h_samp_factor /
-      GetHorizSampFactor(component);
-}
-
-int MJpegDecoder::GetVertSubSampFactor(int component) {
-  return decompress_struct_->max_v_samp_factor /
-      GetVertSampFactor(component);
-}
-
-int MJpegDecoder::GetImageScanlinesPerImcuRow() {
-  return decompress_struct_->max_v_samp_factor * DCTSIZE;
-}
-
-int MJpegDecoder::GetComponentScanlinesPerImcuRow(int component) {
-  int vs = GetVertSubSampFactor(component);
-  return DivideAndRoundUp(GetImageScanlinesPerImcuRow(), vs);
-}
-
-int MJpegDecoder::GetComponentWidth(int component) {
-  int hs = GetHorizSubSampFactor(component);
-  return DivideAndRoundUp(GetWidth(), hs);
-}
-
-int MJpegDecoder::GetComponentHeight(int component) {
-  int vs = GetVertSubSampFactor(component);
-  return DivideAndRoundUp(GetHeight(), vs);
-}
-
-// Get width in bytes padded out to a multiple of DCTSIZE
-int MJpegDecoder::GetComponentStride(int component) {
-  return (GetComponentWidth(component) + DCTSIZE - 1) & ~(DCTSIZE - 1);
-}
-
-int MJpegDecoder::GetComponentSize(int component) {
-  return GetComponentWidth(component) * GetComponentHeight(component);
-}
-
-LIBYUV_BOOL MJpegDecoder::UnloadFrame() {
-#ifdef HAVE_SETJMP
-  if (setjmp(error_mgr_->setjmp_buffer)) {
-    // We called jpeg_abort_decompress, it experienced an error, and we called
-    // longjmp() and rewound the stack to here. Return error.
-    return LIBYUV_FALSE;
-  }
-#endif
-  jpeg_abort_decompress(decompress_struct_);
-  return LIBYUV_TRUE;
-}
-
-// TODO(fbarchard): Allow rectangle to be specified: x, y, width, height.
-LIBYUV_BOOL MJpegDecoder::DecodeToBuffers(
-    uint8** planes, int dst_width, int dst_height) {
-  if (dst_width != GetWidth() ||
-      dst_height > GetHeight()) {
-    // ERROR: Bad dimensions
-    return LIBYUV_FALSE;
-  }
-#ifdef HAVE_SETJMP
-  if (setjmp(error_mgr_->setjmp_buffer)) {
-    // We called into jpeglib, it experienced an error sometime during this
-    // function call, and we called longjmp() and rewound the stack to here.
-    // Return error.
-    return LIBYUV_FALSE;
-  }
-#endif
-  if (!StartDecode()) {
-    return LIBYUV_FALSE;
-  }
-  SetScanlinePointers(databuf_);
-  int lines_left = dst_height;
-  // Compute amount of lines to skip to implement vertical crop.
-  // TODO(fbarchard): Ensure skip is a multiple of maximum component
-  // subsample. ie 2
-  int skip = (GetHeight() - dst_height) / 2;
-  if (skip > 0) {
-    // There is no API to skip lines in the output data, so we read them
-    // into the temp buffer.
-    while (skip >= GetImageScanlinesPerImcuRow()) {
-      if (!DecodeImcuRow()) {
-        FinishDecode();
-        return LIBYUV_FALSE;
-      }
-      skip -= GetImageScanlinesPerImcuRow();
-    }
-    if (skip > 0) {
-      // Have a partial iMCU row left over to skip. Must read it and then
-      // copy the parts we want into the destination.
-      if (!DecodeImcuRow()) {
-        FinishDecode();
-        return LIBYUV_FALSE;
-      }
-      for (int i = 0; i < num_outbufs_; ++i) {
-        // TODO(fbarchard): Compute skip to avoid this
-        assert(skip % GetVertSubSampFactor(i) == 0);
-        int rows_to_skip =
-            DivideAndRoundDown(skip, GetVertSubSampFactor(i));
-        int scanlines_to_copy = GetComponentScanlinesPerImcuRow(i) -
-                                rows_to_skip;
-        int data_to_skip = rows_to_skip * GetComponentStride(i);
-        CopyPlane(databuf_[i] + data_to_skip, GetComponentStride(i),
-                  planes[i], GetComponentWidth(i),
-                  GetComponentWidth(i), scanlines_to_copy);
-        planes[i] += scanlines_to_copy * GetComponentWidth(i);
-      }
-      lines_left -= (GetImageScanlinesPerImcuRow() - skip);
-    }
-  }
-
-  // Read full MCUs but cropped horizontally
-  for (; lines_left > GetImageScanlinesPerImcuRow();
-         lines_left -= GetImageScanlinesPerImcuRow()) {
-    if (!DecodeImcuRow()) {
-      FinishDecode();
-      return LIBYUV_FALSE;
-    }
-    for (int i = 0; i < num_outbufs_; ++i) {
-      int scanlines_to_copy = GetComponentScanlinesPerImcuRow(i);
-      CopyPlane(databuf_[i], GetComponentStride(i),
-                planes[i], GetComponentWidth(i),
-                GetComponentWidth(i), scanlines_to_copy);
-      planes[i] += scanlines_to_copy * GetComponentWidth(i);
-    }
-  }
-
-  if (lines_left > 0) {
-    // Have a partial iMCU row left over to decode.
-    if (!DecodeImcuRow()) {
-      FinishDecode();
-      return LIBYUV_FALSE;
-    }
-    for (int i = 0; i < num_outbufs_; ++i) {
-      int scanlines_to_copy =
-          DivideAndRoundUp(lines_left, GetVertSubSampFactor(i));
-      CopyPlane(databuf_[i], GetComponentStride(i),
-                planes[i], GetComponentWidth(i),
-                GetComponentWidth(i), scanlines_to_copy);
-      planes[i] += scanlines_to_copy * GetComponentWidth(i);
-    }
-  }
-  return FinishDecode();
-}
-
-LIBYUV_BOOL MJpegDecoder::DecodeToCallback(CallbackFunction fn, void* opaque,
-    int dst_width, int dst_height) {
-  if (dst_width != GetWidth() ||
-      dst_height > GetHeight()) {
-    // ERROR: Bad dimensions
-    return LIBYUV_FALSE;
-  }
-#ifdef HAVE_SETJMP
-  if (setjmp(error_mgr_->setjmp_buffer)) {
-    // We called into jpeglib, it experienced an error sometime during this
-    // function call, and we called longjmp() and rewound the stack to here.
-    // Return error.
-    return LIBYUV_FALSE;
-  }
-#endif
-  if (!StartDecode()) {
-    return LIBYUV_FALSE;
-  }
-  SetScanlinePointers(databuf_);
-  int lines_left = dst_height;
-  // TODO(fbarchard): Compute amount of lines to skip to implement vertical crop
-  int skip = (GetHeight() - dst_height) / 2;
-  if (skip > 0) {
-    while (skip >= GetImageScanlinesPerImcuRow()) {
-      if (!DecodeImcuRow()) {
-        FinishDecode();
-        return LIBYUV_FALSE;
-      }
-      skip -= GetImageScanlinesPerImcuRow();
-    }
-    if (skip > 0) {
-      // Have a partial iMCU row left over to skip.
-      if (!DecodeImcuRow()) {
-        FinishDecode();
-        return LIBYUV_FALSE;
-      }
-      for (int i = 0; i < num_outbufs_; ++i) {
-        // TODO(fbarchard): Compute skip to avoid this
-        assert(skip % GetVertSubSampFactor(i) == 0);
-        int rows_to_skip = DivideAndRoundDown(skip, GetVertSubSampFactor(i));
-        int data_to_skip = rows_to_skip * GetComponentStride(i);
-        // Change our own data buffer pointers so we can pass them to the
-        // callback.
-        databuf_[i] += data_to_skip;
-      }
-      int scanlines_to_copy = GetImageScanlinesPerImcuRow() - skip;
-      (*fn)(opaque, databuf_, databuf_strides_, scanlines_to_copy);
-      // Now change them back.
-      for (int i = 0; i < num_outbufs_; ++i) {
-        int rows_to_skip = DivideAndRoundDown(skip, GetVertSubSampFactor(i));
-        int data_to_skip = rows_to_skip * GetComponentStride(i);
-        databuf_[i] -= data_to_skip;
-      }
-      lines_left -= scanlines_to_copy;
-    }
-  }
-  // Read full MCUs until we get to the crop point.
-  for (; lines_left >= GetImageScanlinesPerImcuRow();
-         lines_left -= GetImageScanlinesPerImcuRow()) {
-    if (!DecodeImcuRow()) {
-      FinishDecode();
-      return LIBYUV_FALSE;
-    }
-    (*fn)(opaque, databuf_, databuf_strides_, GetImageScanlinesPerImcuRow());
-  }
-  if (lines_left > 0) {
-    // Have a partial iMCU row left over to decode.
-    if (!DecodeImcuRow()) {
-      FinishDecode();
-      return LIBYUV_FALSE;
-    }
-    (*fn)(opaque, databuf_, databuf_strides_, lines_left);
-  }
-  return FinishDecode();
-}
-
-void MJpegDecoder::init_source(j_decompress_ptr cinfo) {
-  fill_input_buffer(cinfo);
-}
-
-boolean MJpegDecoder::fill_input_buffer(j_decompress_ptr cinfo) {
-  BufferVector* buf_vec = (BufferVector*)(cinfo->client_data);
-  if (buf_vec->pos >= buf_vec->len) {
-    assert(0 && "No more data");
-    // ERROR: No more data
-    return FALSE;
-  }
-  cinfo->src->next_input_byte = buf_vec->buffers[buf_vec->pos].data;
-  cinfo->src->bytes_in_buffer = buf_vec->buffers[buf_vec->pos].len;
-  ++buf_vec->pos;
-  return TRUE;
-}
-
-void MJpegDecoder::skip_input_data(j_decompress_ptr cinfo,
-                                   long num_bytes) {  // NOLINT
-  cinfo->src->next_input_byte += num_bytes;
-}
-
-void MJpegDecoder::term_source(j_decompress_ptr cinfo) {
-  // Nothing to do.
-}
-
-#ifdef HAVE_SETJMP
-void MJpegDecoder::ErrorHandler(j_common_ptr cinfo) {
-  // This is called when a jpeglib command experiences an error. Unfortunately
-  // jpeglib's error handling model is not very flexible, because it expects the
-  // error handler to not return--i.e., it wants the program to terminate. To
-  // recover from errors we use setjmp() as shown in their example. setjmp() is
-  // C's implementation for the "call with current continuation" functionality
-  // seen in some functional programming languages.
-  // A formatted message can be output, but is unsafe for release.
-#ifdef DEBUG
-  char buf[JMSG_LENGTH_MAX];
-  (*cinfo->err->format_message)(cinfo, buf);
-  // ERROR: Error in jpeglib: buf
-#endif
-
-  SetJmpErrorMgr* mgr = (SetJmpErrorMgr*)(cinfo->err);
-  // This rewinds the call stack to the point of the corresponding setjmp()
-  // and causes it to return (for a second time) with value 1.
-  longjmp(mgr->setjmp_buffer, 1);
-}
-#endif
-
-void MJpegDecoder::AllocOutputBuffers(int num_outbufs) {
-  if (num_outbufs != num_outbufs_) {
-    // We could perhaps optimize this case to resize the output buffers without
-    // necessarily having to delete and recreate each one, but it's not worth
-    // it.
-    DestroyOutputBuffers();
-
-    scanlines_ = new uint8** [num_outbufs];
-    scanlines_sizes_ = new int[num_outbufs];
-    databuf_ = new uint8* [num_outbufs];
-    databuf_strides_ = new int[num_outbufs];
-
-    for (int i = 0; i < num_outbufs; ++i) {
-      scanlines_[i] = NULL;
-      scanlines_sizes_[i] = 0;
-      databuf_[i] = NULL;
-      databuf_strides_[i] = 0;
-    }
-
-    num_outbufs_ = num_outbufs;
-  }
-}
-
-void MJpegDecoder::DestroyOutputBuffers() {
-  for (int i = 0; i < num_outbufs_; ++i) {
-    delete [] scanlines_[i];
-    delete [] databuf_[i];
-  }
-  delete [] scanlines_;
-  delete [] databuf_;
-  delete [] scanlines_sizes_;
-  delete [] databuf_strides_;
-  scanlines_ = NULL;
-  databuf_ = NULL;
-  scanlines_sizes_ = NULL;
-  databuf_strides_ = NULL;
-  num_outbufs_ = 0;
-}
-
-// JDCT_IFAST and do_block_smoothing improve performance substantially.
-LIBYUV_BOOL MJpegDecoder::StartDecode() {
-  decompress_struct_->raw_data_out = TRUE;
-  decompress_struct_->dct_method = JDCT_IFAST;  // JDCT_ISLOW is default
-  decompress_struct_->dither_mode = JDITHER_NONE;
-  // Not applicable to 'raw':
-  decompress_struct_->do_fancy_upsampling = LIBYUV_FALSE;
-  // Only for buffered mode:
-  decompress_struct_->enable_2pass_quant = LIBYUV_FALSE;
-  // Blocky but fast:
-  decompress_struct_->do_block_smoothing = LIBYUV_FALSE;
-
-  if (!jpeg_start_decompress(decompress_struct_)) {
-    // ERROR: Couldn't start JPEG decompressor";
-    return LIBYUV_FALSE;
-  }
-  return LIBYUV_TRUE;
-}
-
-LIBYUV_BOOL MJpegDecoder::FinishDecode() {
-  // jpeglib considers it an error if we finish without decoding the whole
-  // image, so we call "abort" rather than "finish".
-  jpeg_abort_decompress(decompress_struct_);
-  return LIBYUV_TRUE;
-}
-
-void MJpegDecoder::SetScanlinePointers(uint8** data) {
-  for (int i = 0; i < num_outbufs_; ++i) {
-    uint8* data_i = data[i];
-    for (int j = 0; j < scanlines_sizes_[i]; ++j) {
-      scanlines_[i][j] = data_i;
-      data_i += GetComponentStride(i);
-    }
-  }
-}
-
-inline LIBYUV_BOOL MJpegDecoder::DecodeImcuRow() {
-  return (unsigned int)(GetImageScanlinesPerImcuRow()) ==
-      jpeg_read_raw_data(decompress_struct_,
-                         scanlines_,
-                         GetImageScanlinesPerImcuRow());
-}
-
-// The helper function which recognizes the jpeg sub-sampling type.
-JpegSubsamplingType MJpegDecoder::JpegSubsamplingTypeHelper(
-    int* subsample_x, int* subsample_y, int number_of_components) {
-  if (number_of_components == 3) {  // Color images.
-    if (subsample_x[0] == 1 && subsample_y[0] == 1 &&
-        subsample_x[1] == 2 && subsample_y[1] == 2 &&
-        subsample_x[2] == 2 && subsample_y[2] == 2) {
-      return kJpegYuv420;
-    } else if (subsample_x[0] == 1 && subsample_y[0] == 1 &&
-        subsample_x[1] == 2 && subsample_y[1] == 1 &&
-        subsample_x[2] == 2 && subsample_y[2] == 1) {
-      return kJpegYuv422;
-    } else if (subsample_x[0] == 1 && subsample_y[0] == 1 &&
-        subsample_x[1] == 1 && subsample_y[1] == 1 &&
-        subsample_x[2] == 1 && subsample_y[2] == 1) {
-      return kJpegYuv444;
-    }
-  } else if (number_of_components == 1) {  // Grey-scale images.
-    if (subsample_x[0] == 1 && subsample_y[0] == 1) {
-      return kJpegYuv400;
-    }
-  }
-  return kJpegUnknown;
-}
-
-}  // namespace libyuv
-#endif  // HAVE_JPEG
-
diff --git a/drivers/theoraplayer/src/YUV/libyuv/src/mjpeg_validate.cc b/drivers/theoraplayer/src/YUV/libyuv/src/mjpeg_validate.cc
deleted file mode 100755
index 23d22d099b..0000000000
--- a/drivers/theoraplayer/src/YUV/libyuv/src/mjpeg_validate.cc
+++ /dev/null
@@ -1,47 +0,0 @@
-/*
- *  Copyright 2012 The LibYuv Project Authors. All rights reserved.
- *
- *  Use of this source code is governed by a BSD-style license
- *  that can be found in the LICENSE file in the root of the source
- *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS. All contributing project authors may
- *  be found in the AUTHORS file in the root of the source tree.
- */
-
-#include "libyuv/mjpeg_decoder.h"
-
-#ifdef __cplusplus
-namespace libyuv {
-extern "C" {
-#endif
-
-// Helper function to validate the jpeg appears intact.
-// TODO(fbarchard): Optimize case where SOI is found but EOI is not.
-LIBYUV_BOOL ValidateJpeg(const uint8* sample, size_t sample_size) {
-  size_t i;
-  if (sample_size < 64) {
-    // ERROR: Invalid jpeg size: sample_size
-    return LIBYUV_FALSE;
-  }
-  if (sample[0] != 0xff || sample[1] != 0xd8) {  // Start Of Image
-    // ERROR: Invalid jpeg initial start code
-    return LIBYUV_FALSE;
-  }
-  for (i = sample_size - 2; i > 1;) {
-    if (sample[i] != 0xd9) {
-      if (sample[i] == 0xff && sample[i + 1] == 0xd9) {  // End Of Image
-        return LIBYUV_TRUE;  // Success: Valid jpeg.
-      }
-      --i;
-    }
-    --i;
-  }
-  // ERROR: Invalid jpeg end code not found. Size sample_size
-  return LIBYUV_FALSE;
-}
-
-#ifdef __cplusplus
-}  // extern "C"
-}  // namespace libyuv
-#endif
-
diff --git a/drivers/theoraplayer/src/YUV/libyuv/src/planar_functions.cc b/drivers/theoraplayer/src/YUV/libyuv/src/planar_functions.cc
deleted file mode 100755
index f0a8989051..0000000000
--- a/drivers/theoraplayer/src/YUV/libyuv/src/planar_functions.cc
+++ /dev/null
@@ -1,2238 +0,0 @@
-/*
- *  Copyright 2011 The LibYuv Project Authors. All rights reserved.
- *
- *  Use of this source code is governed by a BSD-style license
- *  that can be found in the LICENSE file in the root of the source
- *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS. All contributing project authors may
- *  be found in the AUTHORS file in the root of the source tree.
- */
-
-#include "libyuv/planar_functions.h"
-
-#include <string.h>  // for memset()
-
-#include "libyuv/cpu_id.h"
-#ifdef HAVE_JPEG
-#include "libyuv/mjpeg_decoder.h"
-#endif
-#include "libyuv/row.h"
-
-#ifdef __cplusplus
-namespace libyuv {
-extern "C" {
-#endif
-
-// Copy a plane of data
-LIBYUV_API
-void CopyPlane(const uint8* src_y, int src_stride_y,
-               uint8* dst_y, int dst_stride_y,
-               int width, int height) {
-  int y;
-  void (*CopyRow)(const uint8* src, uint8* dst, int width) = CopyRow_C;
-  // Coalesce rows.
-  if (src_stride_y == width &&
-      dst_stride_y == width) {
-    width *= height;
-    height = 1;
-    src_stride_y = dst_stride_y = 0;
-  }
-#if defined(HAS_COPYROW_X86)
-  if (TestCpuFlag(kCpuHasX86) && IS_ALIGNED(width, 4)) {
-    CopyRow = CopyRow_X86;
-  }
-#endif
-#if defined(HAS_COPYROW_SSE2)
-  if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(width, 32) &&
-      IS_ALIGNED(src_y, 16) && IS_ALIGNED(src_stride_y, 16) &&
-      IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) {
-    CopyRow = CopyRow_SSE2;
-  }
-#endif
-#if defined(HAS_COPYROW_ERMS)
-  if (TestCpuFlag(kCpuHasERMS)) {
-    CopyRow = CopyRow_ERMS;
-  }
-#endif
-#if defined(HAS_COPYROW_NEON)
-  if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 32)) {
-    CopyRow = CopyRow_NEON;
-  }
-#endif
-#if defined(HAS_COPYROW_MIPS)
-  if (TestCpuFlag(kCpuHasMIPS)) {
-    CopyRow = CopyRow_MIPS;
-  }
-#endif
-
-  // Copy plane
-  for (y = 0; y < height; ++y) {
-    CopyRow(src_y, dst_y, width);
-    src_y += src_stride_y;
-    dst_y += dst_stride_y;
-  }
-}
-
-// Copy I422.
-LIBYUV_API
-int I422Copy(const uint8* src_y, int src_stride_y,
-             const uint8* src_u, int src_stride_u,
-             const uint8* src_v, int src_stride_v,
-             uint8* dst_y, int dst_stride_y,
-             uint8* dst_u, int dst_stride_u,
-             uint8* dst_v, int dst_stride_v,
-             int width, int height) {
-  int halfwidth = (width + 1) >> 1;
-  if (!src_y || !src_u || !src_v ||
-      !dst_y || !dst_u || !dst_v ||
-      width <= 0 || height == 0) {
-    return -1;
-  }
-  // Negative height means invert the image.
-  if (height < 0) {
-    height = -height;
-    src_y = src_y + (height - 1) * src_stride_y;
-    src_u = src_u + (height - 1) * src_stride_u;
-    src_v = src_v + (height - 1) * src_stride_v;
-    src_stride_y = -src_stride_y;
-    src_stride_u = -src_stride_u;
-    src_stride_v = -src_stride_v;
-  }
-  CopyPlane(src_y, src_stride_y, dst_y, dst_stride_y, width, height);
-  CopyPlane(src_u, src_stride_u, dst_u, dst_stride_u, halfwidth, height);
-  CopyPlane(src_v, src_stride_v, dst_v, dst_stride_v, halfwidth, height);
-  return 0;
-}
-
-// Copy I444.
-LIBYUV_API
-int I444Copy(const uint8* src_y, int src_stride_y,
-             const uint8* src_u, int src_stride_u,
-             const uint8* src_v, int src_stride_v,
-             uint8* dst_y, int dst_stride_y,
-             uint8* dst_u, int dst_stride_u,
-             uint8* dst_v, int dst_stride_v,
-             int width, int height) {
-  if (!src_y || !src_u || !src_v ||
-      !dst_y || !dst_u || !dst_v ||
-      width <= 0 || height == 0) {
-    return -1;
-  }
-  // Negative height means invert the image.
-  if (height < 0) {
-    height = -height;
-    src_y = src_y + (height - 1) * src_stride_y;
-    src_u = src_u + (height - 1) * src_stride_u;
-    src_v = src_v + (height - 1) * src_stride_v;
-    src_stride_y = -src_stride_y;
-    src_stride_u = -src_stride_u;
-    src_stride_v = -src_stride_v;
-  }
-
-  CopyPlane(src_y, src_stride_y, dst_y, dst_stride_y, width, height);
-  CopyPlane(src_u, src_stride_u, dst_u, dst_stride_u, width, height);
-  CopyPlane(src_v, src_stride_v, dst_v, dst_stride_v, width, height);
-  return 0;
-}
-
-// Copy I400.
-LIBYUV_API
-int I400ToI400(const uint8* src_y, int src_stride_y,
-               uint8* dst_y, int dst_stride_y,
-               int width, int height) {
-  if (!src_y || !dst_y || width <= 0 || height == 0) {
-    return -1;
-  }
-  // Negative height means invert the image.
-  if (height < 0) {
-    height = -height;
-    src_y = src_y + (height - 1) * src_stride_y;
-    src_stride_y = -src_stride_y;
-  }
-  CopyPlane(src_y, src_stride_y, dst_y, dst_stride_y, width, height);
-  return 0;
-}
-
-// Convert I420 to I400.
-LIBYUV_API
-int I420ToI400(const uint8* src_y, int src_stride_y,
-               const uint8* src_u, int src_stride_u,
-               const uint8* src_v, int src_stride_v,
-               uint8* dst_y, int dst_stride_y,
-               int width, int height) {
-  if (!src_y || !dst_y || width <= 0 || height == 0) {
-    return -1;
-  }
-  // Negative height means invert the image.
-  if (height < 0) {
-    height = -height;
-    src_y = src_y + (height - 1) * src_stride_y;
-    src_stride_y = -src_stride_y;
-  }
-  CopyPlane(src_y, src_stride_y, dst_y, dst_stride_y, width, height);
-  return 0;
-}
-
-// Mirror a plane of data.
-void MirrorPlane(const uint8* src_y, int src_stride_y,
-                 uint8* dst_y, int dst_stride_y,
-                 int width, int height) {
-  int y;
-  void (*MirrorRow)(const uint8* src, uint8* dst, int width) = MirrorRow_C;
-  // Negative height means invert the image.
-  if (height < 0) {
-    height = -height;
-    src_y = src_y + (height - 1) * src_stride_y;
-    src_stride_y = -src_stride_y;
-  }
-#if defined(HAS_MIRRORROW_NEON)
-  if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 16)) {
-    MirrorRow = MirrorRow_NEON;
-  }
-#endif
-#if defined(HAS_MIRRORROW_SSE2)
-  if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(width, 16)) {
-    MirrorRow = MirrorRow_SSE2;
-  }
-#endif
-#if defined(HAS_MIRRORROW_SSSE3)
-  if (TestCpuFlag(kCpuHasSSSE3) && IS_ALIGNED(width, 16) &&
-      IS_ALIGNED(src_y, 16) && IS_ALIGNED(src_stride_y, 16) &&
-      IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) {
-    MirrorRow = MirrorRow_SSSE3;
-  }
-#endif
-#if defined(HAS_MIRRORROW_AVX2)
-  if (TestCpuFlag(kCpuHasAVX2) && IS_ALIGNED(width, 32)) {
-    MirrorRow = MirrorRow_AVX2;
-  }
-#endif
-
-  // Mirror plane
-  for (y = 0; y < height; ++y) {
-    MirrorRow(src_y, dst_y, width);
-    src_y += src_stride_y;
-    dst_y += dst_stride_y;
-  }
-}
-
-// Convert YUY2 to I422.
-LIBYUV_API
-int YUY2ToI422(const uint8* src_yuy2, int src_stride_yuy2,
-               uint8* dst_y, int dst_stride_y,
-               uint8* dst_u, int dst_stride_u,
-               uint8* dst_v, int dst_stride_v,
-               int width, int height) {
-  int y;
-  void (*YUY2ToUV422Row)(const uint8* src_yuy2,
-                         uint8* dst_u, uint8* dst_v, int pix) =
-      YUY2ToUV422Row_C;
-  void (*YUY2ToYRow)(const uint8* src_yuy2, uint8* dst_y, int pix) =
-      YUY2ToYRow_C;
-  // Negative height means invert the image.
-  if (height < 0) {
-    height = -height;
-    src_yuy2 = src_yuy2 + (height - 1) * src_stride_yuy2;
-    src_stride_yuy2 = -src_stride_yuy2;
-  }
-  // Coalesce rows.
-  if (src_stride_yuy2 == width * 2 &&
-      dst_stride_y == width &&
-      dst_stride_u * 2 == width &&
-      dst_stride_v * 2 == width) {
-    width *= height;
-    height = 1;
-    src_stride_yuy2 = dst_stride_y = dst_stride_u = dst_stride_v = 0;
-  }
-#if defined(HAS_YUY2TOYROW_SSE2)
-  if (TestCpuFlag(kCpuHasSSE2) && width >= 16) {
-    YUY2ToUV422Row = YUY2ToUV422Row_Any_SSE2;
-    YUY2ToYRow = YUY2ToYRow_Any_SSE2;
-    if (IS_ALIGNED(width, 16)) {
-      YUY2ToUV422Row = YUY2ToUV422Row_Unaligned_SSE2;
-      YUY2ToYRow = YUY2ToYRow_Unaligned_SSE2;
-      if (IS_ALIGNED(src_yuy2, 16) && IS_ALIGNED(src_stride_yuy2, 16)) {
-        YUY2ToUV422Row = YUY2ToUV422Row_SSE2;
-        if (IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) {
-          YUY2ToYRow = YUY2ToYRow_SSE2;
-        }
-      }
-    }
-  }
-#endif
-#if defined(HAS_YUY2TOYROW_AVX2)
-  if (TestCpuFlag(kCpuHasAVX2) && width >= 32) {
-    YUY2ToUV422Row = YUY2ToUV422Row_Any_AVX2;
-    YUY2ToYRow = YUY2ToYRow_Any_AVX2;
-    if (IS_ALIGNED(width, 32)) {
-      YUY2ToUV422Row = YUY2ToUV422Row_AVX2;
-      YUY2ToYRow = YUY2ToYRow_AVX2;
-    }
-  }
-#endif
-#if defined(HAS_YUY2TOYROW_NEON)
-  if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
-    YUY2ToYRow = YUY2ToYRow_Any_NEON;
-    if (width >= 16) {
-      YUY2ToUV422Row = YUY2ToUV422Row_Any_NEON;
-    }
-    if (IS_ALIGNED(width, 16)) {
-      YUY2ToYRow = YUY2ToYRow_NEON;
-      YUY2ToUV422Row = YUY2ToUV422Row_NEON;
-    }
-  }
-#endif
-
-  for (y = 0; y < height; ++y) {
-    YUY2ToUV422Row(src_yuy2, dst_u, dst_v, width);
-    YUY2ToYRow(src_yuy2, dst_y, width);
-    src_yuy2 += src_stride_yuy2;
-    dst_y += dst_stride_y;
-    dst_u += dst_stride_u;
-    dst_v += dst_stride_v;
-  }
-  return 0;
-}
-
-// Convert UYVY to I422.
-LIBYUV_API
-int UYVYToI422(const uint8* src_uyvy, int src_stride_uyvy,
-               uint8* dst_y, int dst_stride_y,
-               uint8* dst_u, int dst_stride_u,
-               uint8* dst_v, int dst_stride_v,
-               int width, int height) {
-  int y;
-  void (*UYVYToUV422Row)(const uint8* src_uyvy,
-                         uint8* dst_u, uint8* dst_v, int pix) =
-      UYVYToUV422Row_C;
-  void (*UYVYToYRow)(const uint8* src_uyvy,
-                     uint8* dst_y, int pix) = UYVYToYRow_C;
-  // Negative height means invert the image.
-  if (height < 0) {
-    height = -height;
-    src_uyvy = src_uyvy + (height - 1) * src_stride_uyvy;
-    src_stride_uyvy = -src_stride_uyvy;
-  }
-  // Coalesce rows.
-  if (src_stride_uyvy == width * 2 &&
-      dst_stride_y == width &&
-      dst_stride_u * 2 == width &&
-      dst_stride_v * 2 == width) {
-    width *= height;
-    height = 1;
-    src_stride_uyvy = dst_stride_y = dst_stride_u = dst_stride_v = 0;
-  }
-#if defined(HAS_UYVYTOYROW_SSE2)
-  if (TestCpuFlag(kCpuHasSSE2) && width >= 16) {
-    UYVYToUV422Row = UYVYToUV422Row_Any_SSE2;
-    UYVYToYRow = UYVYToYRow_Any_SSE2;
-    if (IS_ALIGNED(width, 16)) {
-      UYVYToUV422Row = UYVYToUV422Row_Unaligned_SSE2;
-      UYVYToYRow = UYVYToYRow_Unaligned_SSE2;
-      if (IS_ALIGNED(src_uyvy, 16) && IS_ALIGNED(src_stride_uyvy, 16)) {
-        UYVYToUV422Row = UYVYToUV422Row_SSE2;
-        if (IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) {
-          UYVYToYRow = UYVYToYRow_SSE2;
-        }
-      }
-    }
-  }
-#endif
-#if defined(HAS_UYVYTOYROW_AVX2)
-  if (TestCpuFlag(kCpuHasAVX2) && width >= 32) {
-    UYVYToUV422Row = UYVYToUV422Row_Any_AVX2;
-    UYVYToYRow = UYVYToYRow_Any_AVX2;
-    if (IS_ALIGNED(width, 32)) {
-      UYVYToUV422Row = UYVYToUV422Row_AVX2;
-      UYVYToYRow = UYVYToYRow_AVX2;
-    }
-  }
-#endif
-#if defined(HAS_UYVYTOYROW_NEON)
-  if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
-    UYVYToYRow = UYVYToYRow_Any_NEON;
-    if (width >= 16) {
-      UYVYToUV422Row = UYVYToUV422Row_Any_NEON;
-    }
-    if (IS_ALIGNED(width, 16)) {
-      UYVYToYRow = UYVYToYRow_NEON;
-      UYVYToUV422Row = UYVYToUV422Row_NEON;
-    }
-  }
-#endif
-
-  for (y = 0; y < height; ++y) {
-    UYVYToUV422Row(src_uyvy, dst_u, dst_v, width);
-    UYVYToYRow(src_uyvy, dst_y, width);
-    src_uyvy += src_stride_uyvy;
-    dst_y += dst_stride_y;
-    dst_u += dst_stride_u;
-    dst_v += dst_stride_v;
-  }
-  return 0;
-}
-
-// Mirror I400 with optional flipping
-LIBYUV_API
-int I400Mirror(const uint8* src_y, int src_stride_y,
-               uint8* dst_y, int dst_stride_y,
-               int width, int height) {
-  if (!src_y || !dst_y ||
-      width <= 0 || height == 0) {
-    return -1;
-  }
-  // Negative height means invert the image.
-  if (height < 0) {
-    height = -height;
-    src_y = src_y + (height - 1) * src_stride_y;
-    src_stride_y = -src_stride_y;
-  }
-
-  MirrorPlane(src_y, src_stride_y, dst_y, dst_stride_y, width, height);
-  return 0;
-}
-
-// Mirror I420 with optional flipping
-LIBYUV_API
-int I420Mirror(const uint8* src_y, int src_stride_y,
-               const uint8* src_u, int src_stride_u,
-               const uint8* src_v, int src_stride_v,
-               uint8* dst_y, int dst_stride_y,
-               uint8* dst_u, int dst_stride_u,
-               uint8* dst_v, int dst_stride_v,
-               int width, int height) {
-  int halfwidth = (width + 1) >> 1;
-  int halfheight = (height + 1) >> 1;
-  if (!src_y || !src_u || !src_v || !dst_y || !dst_u || !dst_v ||
-      width <= 0 || height == 0) {
-    return -1;
-  }
-  // Negative height means invert the image.
-  if (height < 0) {
-    height = -height;
-    halfheight = (height + 1) >> 1;
-    src_y = src_y + (height - 1) * src_stride_y;
-    src_u = src_u + (halfheight - 1) * src_stride_u;
-    src_v = src_v + (halfheight - 1) * src_stride_v;
-    src_stride_y = -src_stride_y;
-    src_stride_u = -src_stride_u;
-    src_stride_v = -src_stride_v;
-  }
-
-  if (dst_y) {
-    MirrorPlane(src_y, src_stride_y, dst_y, dst_stride_y, width, height);
-  }
-  MirrorPlane(src_u, src_stride_u, dst_u, dst_stride_u, halfwidth, halfheight);
-  MirrorPlane(src_v, src_stride_v, dst_v, dst_stride_v, halfwidth, halfheight);
-  return 0;
-}
-
-// ARGB mirror.
-LIBYUV_API
-int ARGBMirror(const uint8* src_argb, int src_stride_argb,
-               uint8* dst_argb, int dst_stride_argb,
-               int width, int height) {
-  int y;
-  void (*ARGBMirrorRow)(const uint8* src, uint8* dst, int width) =
-      ARGBMirrorRow_C;
-  if (!src_argb || !dst_argb || width <= 0 || height == 0) {
-    return -1;
-  }
-  // Negative height means invert the image.
-  if (height < 0) {
-    height = -height;
-    src_argb = src_argb + (height - 1) * src_stride_argb;
-    src_stride_argb = -src_stride_argb;
-  }
-
-#if defined(HAS_ARGBMIRRORROW_SSSE3)
-  if (TestCpuFlag(kCpuHasSSSE3) && IS_ALIGNED(width, 4) &&
-      IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16) &&
-      IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
-    ARGBMirrorRow = ARGBMirrorRow_SSSE3;
-  }
-#endif
-#if defined(HAS_ARGBMIRRORROW_AVX2)
-  if (TestCpuFlag(kCpuHasAVX2) && IS_ALIGNED(width, 8)) {
-    ARGBMirrorRow = ARGBMirrorRow_AVX2;
-  }
-#endif
-#if defined(HAS_ARGBMIRRORROW_NEON)
-  if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 4)) {
-    ARGBMirrorRow = ARGBMirrorRow_NEON;
-  }
-#endif
-
-  // Mirror plane
-  for (y = 0; y < height; ++y) {
-    ARGBMirrorRow(src_argb, dst_argb, width);
-    src_argb += src_stride_argb;
-    dst_argb += dst_stride_argb;
-  }
-  return 0;
-}
-
-// Get a blender that optimized for the CPU, alignment and pixel count.
-// As there are 6 blenders to choose from, the caller should try to use
-// the same blend function for all pixels if possible.
-LIBYUV_API
-ARGBBlendRow GetARGBBlend() {
-  void (*ARGBBlendRow)(const uint8* src_argb, const uint8* src_argb1,
-                       uint8* dst_argb, int width) = ARGBBlendRow_C;
-#if defined(HAS_ARGBBLENDROW_SSSE3)
-  if (TestCpuFlag(kCpuHasSSSE3)) {
-    ARGBBlendRow = ARGBBlendRow_SSSE3;
-    return ARGBBlendRow;
-  }
-#endif
-#if defined(HAS_ARGBBLENDROW_SSE2)
-  if (TestCpuFlag(kCpuHasSSE2)) {
-    ARGBBlendRow = ARGBBlendRow_SSE2;
-  }
-#endif
-#if defined(HAS_ARGBBLENDROW_NEON)
-  if (TestCpuFlag(kCpuHasNEON)) {
-    ARGBBlendRow = ARGBBlendRow_NEON;
-  }
-#endif
-  return ARGBBlendRow;
-}
-
-// Alpha Blend 2 ARGB images and store to destination.
-LIBYUV_API
-int ARGBBlend(const uint8* src_argb0, int src_stride_argb0,
-              const uint8* src_argb1, int src_stride_argb1,
-              uint8* dst_argb, int dst_stride_argb,
-              int width, int height) {
-  int y;
-  void (*ARGBBlendRow)(const uint8* src_argb, const uint8* src_argb1,
-                       uint8* dst_argb, int width) = GetARGBBlend();
-  if (!src_argb0 || !src_argb1 || !dst_argb || width <= 0 || height == 0) {
-    return -1;
-  }
-  // Negative height means invert the image.
-  if (height < 0) {
-    height = -height;
-    dst_argb = dst_argb + (height - 1) * dst_stride_argb;
-    dst_stride_argb = -dst_stride_argb;
-  }
-  // Coalesce rows.
-  if (src_stride_argb0 == width * 4 &&
-      src_stride_argb1 == width * 4 &&
-      dst_stride_argb == width * 4) {
-    width *= height;
-    height = 1;
-    src_stride_argb0 = src_stride_argb1 = dst_stride_argb = 0;
-  }
-
-  for (y = 0; y < height; ++y) {
-    ARGBBlendRow(src_argb0, src_argb1, dst_argb, width);
-    src_argb0 += src_stride_argb0;
-    src_argb1 += src_stride_argb1;
-    dst_argb += dst_stride_argb;
-  }
-  return 0;
-}
-
-// Multiply 2 ARGB images and store to destination.
-LIBYUV_API
-int ARGBMultiply(const uint8* src_argb0, int src_stride_argb0,
-                 const uint8* src_argb1, int src_stride_argb1,
-                 uint8* dst_argb, int dst_stride_argb,
-                 int width, int height) {
-  int y;
-  void (*ARGBMultiplyRow)(const uint8* src0, const uint8* src1, uint8* dst,
-                          int width) = ARGBMultiplyRow_C;
-  if (!src_argb0 || !src_argb1 || !dst_argb || width <= 0 || height == 0) {
-    return -1;
-  }
-  // Negative height means invert the image.
-  if (height < 0) {
-    height = -height;
-    dst_argb = dst_argb + (height - 1) * dst_stride_argb;
-    dst_stride_argb = -dst_stride_argb;
-  }
-  // Coalesce rows.
-  if (src_stride_argb0 == width * 4 &&
-      src_stride_argb1 == width * 4 &&
-      dst_stride_argb == width * 4) {
-    width *= height;
-    height = 1;
-    src_stride_argb0 = src_stride_argb1 = dst_stride_argb = 0;
-  }
-#if defined(HAS_ARGBMULTIPLYROW_SSE2)
-  if (TestCpuFlag(kCpuHasSSE2) && width >= 4) {
-    ARGBMultiplyRow = ARGBMultiplyRow_Any_SSE2;
-    if (IS_ALIGNED(width, 4)) {
-      ARGBMultiplyRow = ARGBMultiplyRow_SSE2;
-    }
-  }
-#endif
-#if defined(HAS_ARGBMULTIPLYROW_AVX2)
-  if (TestCpuFlag(kCpuHasAVX2) && width >= 8) {
-    ARGBMultiplyRow = ARGBMultiplyRow_Any_AVX2;
-    if (IS_ALIGNED(width, 8)) {
-      ARGBMultiplyRow = ARGBMultiplyRow_AVX2;
-    }
-  }
-#endif
-#if defined(HAS_ARGBMULTIPLYROW_NEON)
-  if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
-    ARGBMultiplyRow = ARGBMultiplyRow_Any_NEON;
-    if (IS_ALIGNED(width, 8)) {
-      ARGBMultiplyRow = ARGBMultiplyRow_NEON;
-    }
-  }
-#endif
-
-  // Multiply plane
-  for (y = 0; y < height; ++y) {
-    ARGBMultiplyRow(src_argb0, src_argb1, dst_argb, width);
-    src_argb0 += src_stride_argb0;
-    src_argb1 += src_stride_argb1;
-    dst_argb += dst_stride_argb;
-  }
-  return 0;
-}
-
-// Add 2 ARGB images and store to destination.
-LIBYUV_API
-int ARGBAdd(const uint8* src_argb0, int src_stride_argb0,
-            const uint8* src_argb1, int src_stride_argb1,
-            uint8* dst_argb, int dst_stride_argb,
-            int width, int height) {
-  int y;
-  void (*ARGBAddRow)(const uint8* src0, const uint8* src1, uint8* dst,
-                     int width) = ARGBAddRow_C;
-  if (!src_argb0 || !src_argb1 || !dst_argb || width <= 0 || height == 0) {
-    return -1;
-  }
-  // Negative height means invert the image.
-  if (height < 0) {
-    height = -height;
-    dst_argb = dst_argb + (height - 1) * dst_stride_argb;
-    dst_stride_argb = -dst_stride_argb;
-  }
-  // Coalesce rows.
-  if (src_stride_argb0 == width * 4 &&
-      src_stride_argb1 == width * 4 &&
-      dst_stride_argb == width * 4) {
-    width *= height;
-    height = 1;
-    src_stride_argb0 = src_stride_argb1 = dst_stride_argb = 0;
-  }
-#if defined(HAS_ARGBADDROW_SSE2) && defined(_MSC_VER)
-  if (TestCpuFlag(kCpuHasSSE2)) {
-    ARGBAddRow = ARGBAddRow_SSE2;
-  }
-#endif
-#if defined(HAS_ARGBADDROW_SSE2) && !defined(_MSC_VER)
-  if (TestCpuFlag(kCpuHasSSE2) && width >= 4) {
-    ARGBAddRow = ARGBAddRow_Any_SSE2;
-    if (IS_ALIGNED(width, 4)) {
-      ARGBAddRow = ARGBAddRow_SSE2;
-    }
-  }
-#endif
-#if defined(HAS_ARGBADDROW_AVX2)
-  if (TestCpuFlag(kCpuHasAVX2) && width >= 8) {
-    ARGBAddRow = ARGBAddRow_Any_AVX2;
-    if (IS_ALIGNED(width, 8)) {
-      ARGBAddRow = ARGBAddRow_AVX2;
-    }
-  }
-#endif
-#if defined(HAS_ARGBADDROW_NEON)
-  if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
-    ARGBAddRow = ARGBAddRow_Any_NEON;
-    if (IS_ALIGNED(width, 8)) {
-      ARGBAddRow = ARGBAddRow_NEON;
-    }
-  }
-#endif
-
-  // Add plane
-  for (y = 0; y < height; ++y) {
-    ARGBAddRow(src_argb0, src_argb1, dst_argb, width);
-    src_argb0 += src_stride_argb0;
-    src_argb1 += src_stride_argb1;
-    dst_argb += dst_stride_argb;
-  }
-  return 0;
-}
-
-// Subtract 2 ARGB images and store to destination.
-LIBYUV_API
-int ARGBSubtract(const uint8* src_argb0, int src_stride_argb0,
-                 const uint8* src_argb1, int src_stride_argb1,
-                 uint8* dst_argb, int dst_stride_argb,
-                 int width, int height) {
-  int y;
-  void (*ARGBSubtractRow)(const uint8* src0, const uint8* src1, uint8* dst,
-                          int width) = ARGBSubtractRow_C;
-  if (!src_argb0 || !src_argb1 || !dst_argb || width <= 0 || height == 0) {
-    return -1;
-  }
-  // Negative height means invert the image.
-  if (height < 0) {
-    height = -height;
-    dst_argb = dst_argb + (height - 1) * dst_stride_argb;
-    dst_stride_argb = -dst_stride_argb;
-  }
-  // Coalesce rows.
-  if (src_stride_argb0 == width * 4 &&
-      src_stride_argb1 == width * 4 &&
-      dst_stride_argb == width * 4) {
-    width *= height;
-    height = 1;
-    src_stride_argb0 = src_stride_argb1 = dst_stride_argb = 0;
-  }
-#if defined(HAS_ARGBSUBTRACTROW_SSE2)
-  if (TestCpuFlag(kCpuHasSSE2) && width >= 4) {
-    ARGBSubtractRow = ARGBSubtractRow_Any_SSE2;
-    if (IS_ALIGNED(width, 4)) {
-      ARGBSubtractRow = ARGBSubtractRow_SSE2;
-    }
-  }
-#endif
-#if defined(HAS_ARGBSUBTRACTROW_AVX2)
-  if (TestCpuFlag(kCpuHasAVX2) && width >= 8) {
-    ARGBSubtractRow = ARGBSubtractRow_Any_AVX2;
-    if (IS_ALIGNED(width, 8)) {
-      ARGBSubtractRow = ARGBSubtractRow_AVX2;
-    }
-  }
-#endif
-#if defined(HAS_ARGBSUBTRACTROW_NEON)
-  if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
-    ARGBSubtractRow = ARGBSubtractRow_Any_NEON;
-    if (IS_ALIGNED(width, 8)) {
-      ARGBSubtractRow = ARGBSubtractRow_NEON;
-    }
-  }
-#endif
-
-  // Subtract plane
-  for (y = 0; y < height; ++y) {
-    ARGBSubtractRow(src_argb0, src_argb1, dst_argb, width);
-    src_argb0 += src_stride_argb0;
-    src_argb1 += src_stride_argb1;
-    dst_argb += dst_stride_argb;
-  }
-  return 0;
-}
-
-// Convert I422 to BGRA.
-LIBYUV_API
-int I422ToBGRA(const uint8* src_y, int src_stride_y,
-               const uint8* src_u, int src_stride_u,
-               const uint8* src_v, int src_stride_v,
-               uint8* dst_bgra, int dst_stride_bgra,
-               int width, int height) {
-  int y;
-  void (*I422ToBGRARow)(const uint8* y_buf,
-                        const uint8* u_buf,
-                        const uint8* v_buf,
-                        uint8* rgb_buf,
-                        int width) = I422ToBGRARow_C;
-  if (!src_y || !src_u || !src_v ||
-      !dst_bgra ||
-      width <= 0 || height == 0) {
-    return -1;
-  }
-  // Negative height means invert the image.
-  if (height < 0) {
-    height = -height;
-    dst_bgra = dst_bgra + (height - 1) * dst_stride_bgra;
-    dst_stride_bgra = -dst_stride_bgra;
-  }
-  // Coalesce rows.
-  if (src_stride_y == width &&
-      src_stride_u * 2 == width &&
-      src_stride_v * 2 == width &&
-      dst_stride_bgra == width * 4) {
-    width *= height;
-    height = 1;
-    src_stride_y = src_stride_u = src_stride_v = dst_stride_bgra = 0;
-  }
-#if defined(HAS_I422TOBGRAROW_NEON)
-  if (TestCpuFlag(kCpuHasNEON)) {
-    I422ToBGRARow = I422ToBGRARow_Any_NEON;
-    if (IS_ALIGNED(width, 16)) {
-      I422ToBGRARow = I422ToBGRARow_NEON;
-    }
-  }
-#elif defined(HAS_I422TOBGRAROW_SSSE3)
-  if (TestCpuFlag(kCpuHasSSSE3) && width >= 8) {
-    I422ToBGRARow = I422ToBGRARow_Any_SSSE3;
-    if (IS_ALIGNED(width, 8)) {
-      I422ToBGRARow = I422ToBGRARow_Unaligned_SSSE3;
-      if (IS_ALIGNED(dst_bgra, 16) && IS_ALIGNED(dst_stride_bgra, 16)) {
-        I422ToBGRARow = I422ToBGRARow_SSSE3;
-      }
-    }
-  }
-#elif defined(HAS_I422TOBGRAROW_MIPS_DSPR2)
-  if (TestCpuFlag(kCpuHasMIPS_DSPR2) && IS_ALIGNED(width, 4) &&
-      IS_ALIGNED(src_y, 4) && IS_ALIGNED(src_stride_y, 4) &&
-      IS_ALIGNED(src_u, 2) && IS_ALIGNED(src_stride_u, 2) &&
-      IS_ALIGNED(src_v, 2) && IS_ALIGNED(src_stride_v, 2) &&
-      IS_ALIGNED(dst_bgra, 4) && IS_ALIGNED(dst_stride_bgra, 4)) {
-    I422ToBGRARow = I422ToBGRARow_MIPS_DSPR2;
-  }
-#endif
-
-  for (y = 0; y < height; ++y) {
-    I422ToBGRARow(src_y, src_u, src_v, dst_bgra, width);
-    dst_bgra += dst_stride_bgra;
-    src_y += src_stride_y;
-    src_u += src_stride_u;
-    src_v += src_stride_v;
-  }
-  return 0;
-}
-
-// Convert I422 to ABGR.
-LIBYUV_API
-int I422ToABGR(const uint8* src_y, int src_stride_y,
-               const uint8* src_u, int src_stride_u,
-               const uint8* src_v, int src_stride_v,
-               uint8* dst_abgr, int dst_stride_abgr,
-               int width, int height) {
-  int y;
-  void (*I422ToABGRRow)(const uint8* y_buf,
-                        const uint8* u_buf,
-                        const uint8* v_buf,
-                        uint8* rgb_buf,
-                        int width) = I422ToABGRRow_C;
-  if (!src_y || !src_u || !src_v ||
-      !dst_abgr ||
-      width <= 0 || height == 0) {
-    return -1;
-  }
-  // Negative height means invert the image.
-  if (height < 0) {
-    height = -height;
-    dst_abgr = dst_abgr + (height - 1) * dst_stride_abgr;
-    dst_stride_abgr = -dst_stride_abgr;
-  }
-  // Coalesce rows.
-  if (src_stride_y == width &&
-      src_stride_u * 2 == width &&
-      src_stride_v * 2 == width &&
-      dst_stride_abgr == width * 4) {
-    width *= height;
-    height = 1;
-    src_stride_y = src_stride_u = src_stride_v = dst_stride_abgr = 0;
-  }
-#if defined(HAS_I422TOABGRROW_NEON)
-  if (TestCpuFlag(kCpuHasNEON)) {
-    I422ToABGRRow = I422ToABGRRow_Any_NEON;
-    if (IS_ALIGNED(width, 16)) {
-      I422ToABGRRow = I422ToABGRRow_NEON;
-    }
-  }
-#elif defined(HAS_I422TOABGRROW_SSSE3)
-  if (TestCpuFlag(kCpuHasSSSE3) && width >= 8) {
-    I422ToABGRRow = I422ToABGRRow_Any_SSSE3;
-    if (IS_ALIGNED(width, 8)) {
-      I422ToABGRRow = I422ToABGRRow_Unaligned_SSSE3;
-      if (IS_ALIGNED(dst_abgr, 16) && IS_ALIGNED(dst_stride_abgr, 16)) {
-        I422ToABGRRow = I422ToABGRRow_SSSE3;
-      }
-    }
-  }
-#endif
-
-  for (y = 0; y < height; ++y) {
-    I422ToABGRRow(src_y, src_u, src_v, dst_abgr, width);
-    dst_abgr += dst_stride_abgr;
-    src_y += src_stride_y;
-    src_u += src_stride_u;
-    src_v += src_stride_v;
-  }
-  return 0;
-}
-
-// Convert I422 to RGBA.
-LIBYUV_API
-int I422ToRGBA(const uint8* src_y, int src_stride_y,
-               const uint8* src_u, int src_stride_u,
-               const uint8* src_v, int src_stride_v,
-               uint8* dst_rgba, int dst_stride_rgba,
-               int width, int height) {
-  int y;
-  void (*I422ToRGBARow)(const uint8* y_buf,
-                        const uint8* u_buf,
-                        const uint8* v_buf,
-                        uint8* rgb_buf,
-                        int width) = I422ToRGBARow_C;
-  if (!src_y || !src_u || !src_v ||
-      !dst_rgba ||
-      width <= 0 || height == 0) {
-    return -1;
-  }
-  // Negative height means invert the image.
-  if (height < 0) {
-    height = -height;
-    dst_rgba = dst_rgba + (height - 1) * dst_stride_rgba;
-    dst_stride_rgba = -dst_stride_rgba;
-  }
-  // Coalesce rows.
-  if (src_stride_y == width &&
-      src_stride_u * 2 == width &&
-      src_stride_v * 2 == width &&
-      dst_stride_rgba == width * 4) {
-    width *= height;
-    height = 1;
-    src_stride_y = src_stride_u = src_stride_v = dst_stride_rgba = 0;
-  }
-#if defined(HAS_I422TORGBAROW_NEON)
-  if (TestCpuFlag(kCpuHasNEON)) {
-    I422ToRGBARow = I422ToRGBARow_Any_NEON;
-    if (IS_ALIGNED(width, 16)) {
-      I422ToRGBARow = I422ToRGBARow_NEON;
-    }
-  }
-#elif defined(HAS_I422TORGBAROW_SSSE3)
-  if (TestCpuFlag(kCpuHasSSSE3) && width >= 8) {
-    I422ToRGBARow = I422ToRGBARow_Any_SSSE3;
-    if (IS_ALIGNED(width, 8)) {
-      I422ToRGBARow = I422ToRGBARow_Unaligned_SSSE3;
-      if (IS_ALIGNED(dst_rgba, 16) && IS_ALIGNED(dst_stride_rgba, 16)) {
-        I422ToRGBARow = I422ToRGBARow_SSSE3;
-      }
-    }
-  }
-#endif
-
-  for (y = 0; y < height; ++y) {
-    I422ToRGBARow(src_y, src_u, src_v, dst_rgba, width);
-    dst_rgba += dst_stride_rgba;
-    src_y += src_stride_y;
-    src_u += src_stride_u;
-    src_v += src_stride_v;
-  }
-  return 0;
-}
-
-// Convert NV12 to RGB565.
-LIBYUV_API
-int NV12ToRGB565(const uint8* src_y, int src_stride_y,
-                 const uint8* src_uv, int src_stride_uv,
-                 uint8* dst_rgb565, int dst_stride_rgb565,
-                 int width, int height) {
-  int y;
-  void (*NV12ToRGB565Row)(const uint8* y_buf,
-                          const uint8* uv_buf,
-                          uint8* rgb_buf,
-                          int width) = NV12ToRGB565Row_C;
-  if (!src_y || !src_uv || !dst_rgb565 ||
-      width <= 0 || height == 0) {
-    return -1;
-  }
-  // Negative height means invert the image.
-  if (height < 0) {
-    height = -height;
-    dst_rgb565 = dst_rgb565 + (height - 1) * dst_stride_rgb565;
-    dst_stride_rgb565 = -dst_stride_rgb565;
-  }
-#if defined(HAS_NV12TORGB565ROW_SSSE3)
-  if (TestCpuFlag(kCpuHasSSSE3) && width >= 8) {
-    NV12ToRGB565Row = NV12ToRGB565Row_Any_SSSE3;
-    if (IS_ALIGNED(width, 8)) {
-      NV12ToRGB565Row = NV12ToRGB565Row_SSSE3;
-    }
-  }
-#elif defined(HAS_NV12TORGB565ROW_NEON)
-  if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
-    NV12ToRGB565Row = NV12ToRGB565Row_Any_NEON;
-    if (IS_ALIGNED(width, 8)) {
-      NV12ToRGB565Row = NV12ToRGB565Row_NEON;
-    }
-  }
-#endif
-
-  for (y = 0; y < height; ++y) {
-    NV12ToRGB565Row(src_y, src_uv, dst_rgb565, width);
-    dst_rgb565 += dst_stride_rgb565;
-    src_y += src_stride_y;
-    if (y & 1) {
-      src_uv += src_stride_uv;
-    }
-  }
-  return 0;
-}
-
-// Convert NV21 to RGB565.
-LIBYUV_API
-int NV21ToRGB565(const uint8* src_y, int src_stride_y,
-                 const uint8* src_vu, int src_stride_vu,
-                 uint8* dst_rgb565, int dst_stride_rgb565,
-                 int width, int height) {
-  int y;
-  void (*NV21ToRGB565Row)(const uint8* y_buf,
-                          const uint8* src_vu,
-                          uint8* rgb_buf,
-                          int width) = NV21ToRGB565Row_C;
-  if (!src_y || !src_vu || !dst_rgb565 ||
-      width <= 0 || height == 0) {
-    return -1;
-  }
-  // Negative height means invert the image.
-  if (height < 0) {
-    height = -height;
-    dst_rgb565 = dst_rgb565 + (height - 1) * dst_stride_rgb565;
-    dst_stride_rgb565 = -dst_stride_rgb565;
-  }
-#if defined(HAS_NV21TORGB565ROW_SSSE3)
-  if (TestCpuFlag(kCpuHasSSSE3) && width >= 8) {
-    NV21ToRGB565Row = NV21ToRGB565Row_Any_SSSE3;
-    if (IS_ALIGNED(width, 8)) {
-      NV21ToRGB565Row = NV21ToRGB565Row_SSSE3;
-    }
-  }
-#elif defined(HAS_NV21TORGB565ROW_NEON)
-  if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
-    NV21ToRGB565Row = NV21ToRGB565Row_Any_NEON;
-    if (IS_ALIGNED(width, 8)) {
-      NV21ToRGB565Row = NV21ToRGB565Row_NEON;
-    }
-  }
-#endif
-
-  for (y = 0; y < height; ++y) {
-    NV21ToRGB565Row(src_y, src_vu, dst_rgb565, width);
-    dst_rgb565 += dst_stride_rgb565;
-    src_y += src_stride_y;
-    if (y & 1) {
-      src_vu += src_stride_vu;
-    }
-  }
-  return 0;
-}
-
-LIBYUV_API
-void SetPlane(uint8* dst_y, int dst_stride_y,
-              int width, int height,
-              uint32 value) {
-  int y;
-  uint32 v32 = value | (value << 8) | (value << 16) | (value << 24);
-  void (*SetRow)(uint8* dst, uint32 value, int pix) = SetRow_C;
-  // Coalesce rows.
-  if (dst_stride_y == width) {
-    width *= height;
-    height = 1;
-    dst_stride_y = 0;
-  }
-#if defined(HAS_SETROW_NEON)
-  if (TestCpuFlag(kCpuHasNEON) &&
-      IS_ALIGNED(width, 16) &&
-      IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) {
-    SetRow = SetRow_NEON;
-  }
-#endif
-#if defined(HAS_SETROW_X86)
-  if (TestCpuFlag(kCpuHasX86) && IS_ALIGNED(width, 4)) {
-    SetRow = SetRow_X86;
-  }
-#endif
-
-  // Set plane
-  for (y = 0; y < height; ++y) {
-    SetRow(dst_y, v32, width);
-    dst_y += dst_stride_y;
-  }
-}
-
-// Draw a rectangle into I420
-LIBYUV_API
-int I420Rect(uint8* dst_y, int dst_stride_y,
-             uint8* dst_u, int dst_stride_u,
-             uint8* dst_v, int dst_stride_v,
-             int x, int y,
-             int width, int height,
-             int value_y, int value_u, int value_v) {
-  int halfwidth = (width + 1) >> 1;
-  int halfheight = (height + 1) >> 1;
-  uint8* start_y = dst_y + y * dst_stride_y + x;
-  uint8* start_u = dst_u + (y / 2) * dst_stride_u + (x / 2);
-  uint8* start_v = dst_v + (y / 2) * dst_stride_v + (x / 2);
-  if (!dst_y || !dst_u || !dst_v ||
-      width <= 0 || height <= 0 ||
-      x < 0 || y < 0 ||
-      value_y < 0 || value_y > 255 ||
-      value_u < 0 || value_u > 255 ||
-      value_v < 0 || value_v > 255) {
-    return -1;
-  }
-
-  SetPlane(start_y, dst_stride_y, width, height, value_y);
-  SetPlane(start_u, dst_stride_u, halfwidth, halfheight, value_u);
-  SetPlane(start_v, dst_stride_v, halfwidth, halfheight, value_v);
-  return 0;
-}
-
-// Draw a rectangle into ARGB
-LIBYUV_API
-int ARGBRect(uint8* dst_argb, int dst_stride_argb,
-             int dst_x, int dst_y,
-             int width, int height,
-             uint32 value) {
-  if (!dst_argb ||
-      width <= 0 || height <= 0 ||
-      dst_x < 0 || dst_y < 0) {
-    return -1;
-  }
-  dst_argb += dst_y * dst_stride_argb + dst_x * 4;
-  // Coalesce rows.
-  if (dst_stride_argb == width * 4) {
-    width *= height;
-    height = 1;
-    dst_stride_argb = 0;
-  }
-#if defined(HAS_SETROW_NEON)
-  if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 16) &&
-      IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
-    ARGBSetRows_NEON(dst_argb, value, width, dst_stride_argb, height);
-    return 0;
-  }
-#endif
-#if defined(HAS_SETROW_X86)
-  if (TestCpuFlag(kCpuHasX86)) {
-    ARGBSetRows_X86(dst_argb, value, width, dst_stride_argb, height);
-    return 0;
-  }
-#endif
-  ARGBSetRows_C(dst_argb, value, width, dst_stride_argb, height);
-  return 0;
-}
-
-// Convert unattentuated ARGB to preattenuated ARGB.
-// An unattenutated ARGB alpha blend uses the formula
-// p = a * f + (1 - a) * b
-// where
-//   p is output pixel
-//   f is foreground pixel
-//   b is background pixel
-//   a is alpha value from foreground pixel
-// An preattenutated ARGB alpha blend uses the formula
-// p = f + (1 - a) * b
-// where
-//   f is foreground pixel premultiplied by alpha
-
-LIBYUV_API
-int ARGBAttenuate(const uint8* src_argb, int src_stride_argb,
-                  uint8* dst_argb, int dst_stride_argb,
-                  int width, int height) {
-  int y;
-  void (*ARGBAttenuateRow)(const uint8* src_argb, uint8* dst_argb,
-                           int width) = ARGBAttenuateRow_C;
-  if (!src_argb || !dst_argb || width <= 0 || height == 0) {
-    return -1;
-  }
-  if (height < 0) {
-    height = -height;
-    src_argb = src_argb + (height - 1) * src_stride_argb;
-    src_stride_argb = -src_stride_argb;
-  }
-  // Coalesce rows.
-  if (src_stride_argb == width * 4 &&
-      dst_stride_argb == width * 4) {
-    width *= height;
-    height = 1;
-    src_stride_argb = dst_stride_argb = 0;
-  }
-#if defined(HAS_ARGBATTENUATEROW_SSE2)
-  if (TestCpuFlag(kCpuHasSSE2) && width >= 4 &&
-      IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16) &&
-      IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
-    ARGBAttenuateRow = ARGBAttenuateRow_Any_SSE2;
-    if (IS_ALIGNED(width, 4)) {
-      ARGBAttenuateRow = ARGBAttenuateRow_SSE2;
-    }
-  }
-#endif
-#if defined(HAS_ARGBATTENUATEROW_SSSE3)
-  if (TestCpuFlag(kCpuHasSSSE3) && width >= 4) {
-    ARGBAttenuateRow = ARGBAttenuateRow_Any_SSSE3;
-    if (IS_ALIGNED(width, 4)) {
-      ARGBAttenuateRow = ARGBAttenuateRow_SSSE3;
-    }
-  }
-#endif
-#if defined(HAS_ARGBATTENUATEROW_AVX2)
-  if (TestCpuFlag(kCpuHasAVX2) && width >= 8) {
-    ARGBAttenuateRow = ARGBAttenuateRow_Any_AVX2;
-    if (IS_ALIGNED(width, 8)) {
-      ARGBAttenuateRow = ARGBAttenuateRow_AVX2;
-    }
-  }
-#endif
-#if defined(HAS_ARGBATTENUATEROW_NEON)
-  if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
-    ARGBAttenuateRow = ARGBAttenuateRow_Any_NEON;
-    if (IS_ALIGNED(width, 8)) {
-      ARGBAttenuateRow = ARGBAttenuateRow_NEON;
-    }
-  }
-#endif
-
-  for (y = 0; y < height; ++y) {
-    ARGBAttenuateRow(src_argb, dst_argb, width);
-    src_argb += src_stride_argb;
-    dst_argb += dst_stride_argb;
-  }
-  return 0;
-}
-
-// Convert preattentuated ARGB to unattenuated ARGB.
-LIBYUV_API
-int ARGBUnattenuate(const uint8* src_argb, int src_stride_argb,
-                    uint8* dst_argb, int dst_stride_argb,
-                    int width, int height) {
-  int y;
-  void (*ARGBUnattenuateRow)(const uint8* src_argb, uint8* dst_argb,
-                             int width) = ARGBUnattenuateRow_C;
-  if (!src_argb || !dst_argb || width <= 0 || height == 0) {
-    return -1;
-  }
-  if (height < 0) {
-    height = -height;
-    src_argb = src_argb + (height - 1) * src_stride_argb;
-    src_stride_argb = -src_stride_argb;
-  }
-  // Coalesce rows.
-  if (src_stride_argb == width * 4 &&
-      dst_stride_argb == width * 4) {
-    width *= height;
-    height = 1;
-    src_stride_argb = dst_stride_argb = 0;
-  }
-#if defined(HAS_ARGBUNATTENUATEROW_SSE2)
-  if (TestCpuFlag(kCpuHasSSE2) && width >= 4) {
-    ARGBUnattenuateRow = ARGBUnattenuateRow_Any_SSE2;
-    if (IS_ALIGNED(width, 4)) {
-      ARGBUnattenuateRow = ARGBUnattenuateRow_SSE2;
-    }
-  }
-#endif
-#if defined(HAS_ARGBUNATTENUATEROW_AVX2)
-  if (TestCpuFlag(kCpuHasAVX2) && width >= 8) {
-    ARGBUnattenuateRow = ARGBUnattenuateRow_Any_AVX2;
-    if (IS_ALIGNED(width, 8)) {
-      ARGBUnattenuateRow = ARGBUnattenuateRow_AVX2;
-    }
-  }
-#endif
-// TODO(fbarchard): Neon version.
-
-  for (y = 0; y < height; ++y) {
-    ARGBUnattenuateRow(src_argb, dst_argb, width);
-    src_argb += src_stride_argb;
-    dst_argb += dst_stride_argb;
-  }
-  return 0;
-}
-
-// Convert ARGB to Grayed ARGB.
-LIBYUV_API
-int ARGBGrayTo(const uint8* src_argb, int src_stride_argb,
-               uint8* dst_argb, int dst_stride_argb,
-               int width, int height) {
-  int y;
-  void (*ARGBGrayRow)(const uint8* src_argb, uint8* dst_argb,
-                      int width) = ARGBGrayRow_C;
-  if (!src_argb || !dst_argb || width <= 0 || height == 0) {
-    return -1;
-  }
-  if (height < 0) {
-    height = -height;
-    src_argb = src_argb + (height - 1) * src_stride_argb;
-    src_stride_argb = -src_stride_argb;
-  }
-  // Coalesce rows.
-  if (src_stride_argb == width * 4 &&
-      dst_stride_argb == width * 4) {
-    width *= height;
-    height = 1;
-    src_stride_argb = dst_stride_argb = 0;
-  }
-#if defined(HAS_ARGBGRAYROW_SSSE3)
-  if (TestCpuFlag(kCpuHasSSSE3) && IS_ALIGNED(width, 8) &&
-      IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16) &&
-      IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
-    ARGBGrayRow = ARGBGrayRow_SSSE3;
-  }
-#elif defined(HAS_ARGBGRAYROW_NEON)
-  if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 8)) {
-    ARGBGrayRow = ARGBGrayRow_NEON;
-  }
-#endif
-
-  for (y = 0; y < height; ++y) {
-    ARGBGrayRow(src_argb, dst_argb, width);
-    src_argb += src_stride_argb;
-    dst_argb += dst_stride_argb;
-  }
-  return 0;
-}
-
-// Make a rectangle of ARGB gray scale.
-LIBYUV_API
-int ARGBGray(uint8* dst_argb, int dst_stride_argb,
-             int dst_x, int dst_y,
-             int width, int height) {
-  int y;
-  void (*ARGBGrayRow)(const uint8* src_argb, uint8* dst_argb,
-                      int width) = ARGBGrayRow_C;
-  uint8* dst = dst_argb + dst_y * dst_stride_argb + dst_x * 4;
-  if (!dst_argb || width <= 0 || height <= 0 || dst_x < 0 || dst_y < 0) {
-    return -1;
-  }
-  // Coalesce rows.
-  if (dst_stride_argb == width * 4) {
-    width *= height;
-    height = 1;
-    dst_stride_argb = 0;
-  }
-#if defined(HAS_ARGBGRAYROW_SSSE3)
-  if (TestCpuFlag(kCpuHasSSSE3) && IS_ALIGNED(width, 8) &&
-      IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
-    ARGBGrayRow = ARGBGrayRow_SSSE3;
-  }
-#elif defined(HAS_ARGBGRAYROW_NEON)
-  if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 8)) {
-    ARGBGrayRow = ARGBGrayRow_NEON;
-  }
-#endif
-  for (y = 0; y < height; ++y) {
-    ARGBGrayRow(dst, dst, width);
-    dst += dst_stride_argb;
-  }
-  return 0;
-}
-
-// Make a rectangle of ARGB Sepia tone.
-LIBYUV_API
-int ARGBSepia(uint8* dst_argb, int dst_stride_argb,
-              int dst_x, int dst_y, int width, int height) {
-  int y;
-  void (*ARGBSepiaRow)(uint8* dst_argb, int width) = ARGBSepiaRow_C;
-  uint8* dst = dst_argb + dst_y * dst_stride_argb + dst_x * 4;
-  if (!dst_argb || width <= 0 || height <= 0 || dst_x < 0 || dst_y < 0) {
-    return -1;
-  }
-  // Coalesce rows.
-  if (dst_stride_argb == width * 4) {
-    width *= height;
-    height = 1;
-    dst_stride_argb = 0;
-  }
-#if defined(HAS_ARGBSEPIAROW_SSSE3)
-  if (TestCpuFlag(kCpuHasSSSE3) && IS_ALIGNED(width, 8) &&
-      IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
-    ARGBSepiaRow = ARGBSepiaRow_SSSE3;
-  }
-#elif defined(HAS_ARGBSEPIAROW_NEON)
-  if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 8)) {
-    ARGBSepiaRow = ARGBSepiaRow_NEON;
-  }
-#endif
-  for (y = 0; y < height; ++y) {
-    ARGBSepiaRow(dst, width);
-    dst += dst_stride_argb;
-  }
-  return 0;
-}
-
-// Apply a 4x4 matrix to each ARGB pixel.
-// Note: Normally for shading, but can be used to swizzle or invert.
-LIBYUV_API
-int ARGBColorMatrix(const uint8* src_argb, int src_stride_argb,
-                    uint8* dst_argb, int dst_stride_argb,
-                    const int8* matrix_argb,
-                    int width, int height) {
-  int y;
-  void (*ARGBColorMatrixRow)(const uint8* src_argb, uint8* dst_argb,
-      const int8* matrix_argb, int width) = ARGBColorMatrixRow_C;
-  if (!src_argb || !dst_argb || !matrix_argb || width <= 0 || height == 0) {
-    return -1;
-  }
-  if (height < 0) {
-    height = -height;
-    src_argb = src_argb + (height - 1) * src_stride_argb;
-    src_stride_argb = -src_stride_argb;
-  }
-  // Coalesce rows.
-  if (src_stride_argb == width * 4 &&
-      dst_stride_argb == width * 4) {
-    width *= height;
-    height = 1;
-    src_stride_argb = dst_stride_argb = 0;
-  }
-#if defined(HAS_ARGBCOLORMATRIXROW_SSSE3)
-  if (TestCpuFlag(kCpuHasSSSE3) && IS_ALIGNED(width, 8) &&
-      IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
-    ARGBColorMatrixRow = ARGBColorMatrixRow_SSSE3;
-  }
-#elif defined(HAS_ARGBCOLORMATRIXROW_NEON)
-  if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 8)) {
-    ARGBColorMatrixRow = ARGBColorMatrixRow_NEON;
-  }
-#endif
-  for (y = 0; y < height; ++y) {
-    ARGBColorMatrixRow(src_argb, dst_argb, matrix_argb, width);
-    src_argb += src_stride_argb;
-    dst_argb += dst_stride_argb;
-  }
-  return 0;
-}
-
-// Apply a 4x3 matrix to each ARGB pixel.
-// Deprecated.
-LIBYUV_API
-int RGBColorMatrix(uint8* dst_argb, int dst_stride_argb,
-                   const int8* matrix_rgb,
-                   int dst_x, int dst_y, int width, int height) {
-  SIMD_ALIGNED(int8 matrix_argb[16]);
-  uint8* dst = dst_argb + dst_y * dst_stride_argb + dst_x * 4;
-  if (!dst_argb || !matrix_rgb || width <= 0 || height <= 0 ||
-      dst_x < 0 || dst_y < 0) {
-    return -1;
-  }
-
-  // Convert 4x3 7 bit matrix to 4x4 6 bit matrix.
-  matrix_argb[0] = matrix_rgb[0] / 2;
-  matrix_argb[1] = matrix_rgb[1] / 2;
-  matrix_argb[2] = matrix_rgb[2] / 2;
-  matrix_argb[3] = matrix_rgb[3] / 2;
-  matrix_argb[4] = matrix_rgb[4] / 2;
-  matrix_argb[5] = matrix_rgb[5] / 2;
-  matrix_argb[6] = matrix_rgb[6] / 2;
-  matrix_argb[7] = matrix_rgb[7] / 2;
-  matrix_argb[8] = matrix_rgb[8] / 2;
-  matrix_argb[9] = matrix_rgb[9] / 2;
-  matrix_argb[10] = matrix_rgb[10] / 2;
-  matrix_argb[11] = matrix_rgb[11] / 2;
-  matrix_argb[14] = matrix_argb[13] = matrix_argb[12] = 0;
-  matrix_argb[15] = 64;  // 1.0
-
-  return ARGBColorMatrix((const uint8*)(dst), dst_stride_argb,
-                         dst, dst_stride_argb,
-                         &matrix_argb[0], width, height);
-}
-
-// Apply a color table each ARGB pixel.
-// Table contains 256 ARGB values.
-LIBYUV_API
-int ARGBColorTable(uint8* dst_argb, int dst_stride_argb,
-                   const uint8* table_argb,
-                   int dst_x, int dst_y, int width, int height) {
-  int y;
-  void (*ARGBColorTableRow)(uint8* dst_argb, const uint8* table_argb,
-                            int width) = ARGBColorTableRow_C;
-  uint8* dst = dst_argb + dst_y * dst_stride_argb + dst_x * 4;
-  if (!dst_argb || !table_argb || width <= 0 || height <= 0 ||
-      dst_x < 0 || dst_y < 0) {
-    return -1;
-  }
-  // Coalesce rows.
-  if (dst_stride_argb == width * 4) {
-    width *= height;
-    height = 1;
-    dst_stride_argb = 0;
-  }
-#if defined(HAS_ARGBCOLORTABLEROW_X86)
-  if (TestCpuFlag(kCpuHasX86)) {
-    ARGBColorTableRow = ARGBColorTableRow_X86;
-  }
-#endif
-  for (y = 0; y < height; ++y) {
-    ARGBColorTableRow(dst, table_argb, width);
-    dst += dst_stride_argb;
-  }
-  return 0;
-}
-
-// Apply a color table each ARGB pixel but preserve destination alpha.
-// Table contains 256 ARGB values.
-LIBYUV_API
-int RGBColorTable(uint8* dst_argb, int dst_stride_argb,
-                  const uint8* table_argb,
-                  int dst_x, int dst_y, int width, int height) {
-  int y;
-  void (*RGBColorTableRow)(uint8* dst_argb, const uint8* table_argb,
-                           int width) = RGBColorTableRow_C;
-  uint8* dst = dst_argb + dst_y * dst_stride_argb + dst_x * 4;
-  if (!dst_argb || !table_argb || width <= 0 || height <= 0 ||
-      dst_x < 0 || dst_y < 0) {
-    return -1;
-  }
-  // Coalesce rows.
-  if (dst_stride_argb == width * 4) {
-    width *= height;
-    height = 1;
-    dst_stride_argb = 0;
-  }
-#if defined(HAS_RGBCOLORTABLEROW_X86)
-  if (TestCpuFlag(kCpuHasX86)) {
-    RGBColorTableRow = RGBColorTableRow_X86;
-  }
-#endif
-  for (y = 0; y < height; ++y) {
-    RGBColorTableRow(dst, table_argb, width);
-    dst += dst_stride_argb;
-  }
-  return 0;
-}
-
-// ARGBQuantize is used to posterize art.
-// e.g. rgb / qvalue * qvalue + qvalue / 2
-// But the low levels implement efficiently with 3 parameters, and could be
-// used for other high level operations.
-// dst_argb[0] = (b * scale >> 16) * interval_size + interval_offset;
-// where scale is 1 / interval_size as a fixed point value.
-// The divide is replaces with a multiply by reciprocal fixed point multiply.
-// Caveat - although SSE2 saturates, the C function does not and should be used
-// with care if doing anything but quantization.
-LIBYUV_API
-int ARGBQuantize(uint8* dst_argb, int dst_stride_argb,
-                 int scale, int interval_size, int interval_offset,
-                 int dst_x, int dst_y, int width, int height) {
-  int y;
-  void (*ARGBQuantizeRow)(uint8* dst_argb, int scale, int interval_size,
-                          int interval_offset, int width) = ARGBQuantizeRow_C;
-  uint8* dst = dst_argb + dst_y * dst_stride_argb + dst_x * 4;
-  if (!dst_argb || width <= 0 || height <= 0 || dst_x < 0 || dst_y < 0 ||
-      interval_size < 1 || interval_size > 255) {
-    return -1;
-  }
-  // Coalesce rows.
-  if (dst_stride_argb == width * 4) {
-    width *= height;
-    height = 1;
-    dst_stride_argb = 0;
-  }
-#if defined(HAS_ARGBQUANTIZEROW_SSE2)
-  if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(width, 4) &&
-      IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
-    ARGBQuantizeRow = ARGBQuantizeRow_SSE2;
-  }
-#elif defined(HAS_ARGBQUANTIZEROW_NEON)
-  if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 8)) {
-    ARGBQuantizeRow = ARGBQuantizeRow_NEON;
-  }
-#endif
-  for (y = 0; y < height; ++y) {
-    ARGBQuantizeRow(dst, scale, interval_size, interval_offset, width);
-    dst += dst_stride_argb;
-  }
-  return 0;
-}
-
-// Computes table of cumulative sum for image where the value is the sum
-// of all values above and to the left of the entry. Used by ARGBBlur.
-LIBYUV_API
-int ARGBComputeCumulativeSum(const uint8* src_argb, int src_stride_argb,
-                             int32* dst_cumsum, int dst_stride32_cumsum,
-                             int width, int height) {
-  int y;
-  void (*ComputeCumulativeSumRow)(const uint8* row, int32* cumsum,
-      const int32* previous_cumsum, int width) = ComputeCumulativeSumRow_C;
-  int32* previous_cumsum = dst_cumsum;
-  if (!dst_cumsum || !src_argb || width <= 0 || height <= 0) {
-    return -1;
-  }
-#if defined(HAS_CUMULATIVESUMTOAVERAGEROW_SSE2)
-  if (TestCpuFlag(kCpuHasSSE2)) {
-    ComputeCumulativeSumRow = ComputeCumulativeSumRow_SSE2;
-  }
-#endif
-  memset(dst_cumsum, 0, width * sizeof(dst_cumsum[0]) * 4);  // 4 int per pixel.
-  for (y = 0; y < height; ++y) {
-    ComputeCumulativeSumRow(src_argb, dst_cumsum, previous_cumsum, width);
-    previous_cumsum = dst_cumsum;
-    dst_cumsum += dst_stride32_cumsum;
-    src_argb += src_stride_argb;
-  }
-  return 0;
-}
-
-// Blur ARGB image.
-// Caller should allocate CumulativeSum table of width * height * 16 bytes
-// aligned to 16 byte boundary. height can be radius * 2 + 2 to save memory
-// as the buffer is treated as circular.
-LIBYUV_API
-int ARGBBlur(const uint8* src_argb, int src_stride_argb,
-             uint8* dst_argb, int dst_stride_argb,
-             int32* dst_cumsum, int dst_stride32_cumsum,
-             int width, int height, int radius) {
-  int y;
-  void (*ComputeCumulativeSumRow)(const uint8 *row, int32 *cumsum,
-      const int32* previous_cumsum, int width) = ComputeCumulativeSumRow_C;
-  void (*CumulativeSumToAverageRow)(const int32* topleft, const int32* botleft,
-      int width, int area, uint8* dst, int count) = CumulativeSumToAverageRow_C;
-  int32* cumsum_bot_row;
-  int32* max_cumsum_bot_row;
-  int32* cumsum_top_row;
-
-  if (!src_argb || !dst_argb || width <= 0 || height == 0) {
-    return -1;
-  }
-  if (height < 0) {
-    height = -height;
-    src_argb = src_argb + (height - 1) * src_stride_argb;
-    src_stride_argb = -src_stride_argb;
-  }
-  if (radius > height) {
-    radius = height;
-  }
-  if (radius > (width / 2 - 1)) {
-    radius = width / 2 - 1;
-  }
-  if (radius <= 0) {
-    return -1;
-  }
-#if defined(HAS_CUMULATIVESUMTOAVERAGEROW_SSE2)
-  if (TestCpuFlag(kCpuHasSSE2)) {
-    ComputeCumulativeSumRow = ComputeCumulativeSumRow_SSE2;
-    CumulativeSumToAverageRow = CumulativeSumToAverageRow_SSE2;
-  }
-#endif
-  // Compute enough CumulativeSum for first row to be blurred. After this
-  // one row of CumulativeSum is updated at a time.
-  ARGBComputeCumulativeSum(src_argb, src_stride_argb,
-                           dst_cumsum, dst_stride32_cumsum,
-                           width, radius);
-
-  src_argb = src_argb + radius * src_stride_argb;
-  cumsum_bot_row = &dst_cumsum[(radius - 1) * dst_stride32_cumsum];
-
-  max_cumsum_bot_row = &dst_cumsum[(radius * 2 + 2) * dst_stride32_cumsum];
-  cumsum_top_row = &dst_cumsum[0];
-
-  for (y = 0; y < height; ++y) {
-    int top_y = ((y - radius - 1) >= 0) ? (y - radius - 1) : 0;
-    int bot_y = ((y + radius) < height) ? (y + radius) : (height - 1);
-    int area = radius * (bot_y - top_y);
-    int boxwidth = radius * 4;
-    int x;
-    int n;
-
-    // Increment cumsum_top_row pointer with circular buffer wrap around.
-    if (top_y) {
-      cumsum_top_row += dst_stride32_cumsum;
-      if (cumsum_top_row >= max_cumsum_bot_row) {
-        cumsum_top_row = dst_cumsum;
-      }
-    }
-    // Increment cumsum_bot_row pointer with circular buffer wrap around and
-    // then fill in a row of CumulativeSum.
-    if ((y + radius) < height) {
-      const int32* prev_cumsum_bot_row = cumsum_bot_row;
-      cumsum_bot_row += dst_stride32_cumsum;
-      if (cumsum_bot_row >= max_cumsum_bot_row) {
-        cumsum_bot_row = dst_cumsum;
-      }
-      ComputeCumulativeSumRow(src_argb, cumsum_bot_row, prev_cumsum_bot_row,
-                              width);
-      src_argb += src_stride_argb;
-    }
-
-    // Left clipped.
-    for (x = 0; x < radius + 1; ++x) {
-      CumulativeSumToAverageRow(cumsum_top_row, cumsum_bot_row,
-                                boxwidth, area, &dst_argb[x * 4], 1);
-      area += (bot_y - top_y);
-      boxwidth += 4;
-    }
-
-    // Middle unclipped.
-    n = (width - 1) - radius - x + 1;
-    CumulativeSumToAverageRow(cumsum_top_row, cumsum_bot_row,
-                              boxwidth, area, &dst_argb[x * 4], n);
-
-    // Right clipped.
-    for (x += n; x <= width - 1; ++x) {
-      area -= (bot_y - top_y);
-      boxwidth -= 4;
-      CumulativeSumToAverageRow(cumsum_top_row + (x - radius - 1) * 4,
-                                cumsum_bot_row + (x - radius - 1) * 4,
-                                boxwidth, area, &dst_argb[x * 4], 1);
-    }
-    dst_argb += dst_stride_argb;
-  }
-  return 0;
-}
-
-// Multiply ARGB image by a specified ARGB value.
-LIBYUV_API
-int ARGBShade(const uint8* src_argb, int src_stride_argb,
-              uint8* dst_argb, int dst_stride_argb,
-              int width, int height, uint32 value) {
-  int y;
-  void (*ARGBShadeRow)(const uint8* src_argb, uint8* dst_argb,
-                       int width, uint32 value) = ARGBShadeRow_C;
-  if (!src_argb || !dst_argb || width <= 0 || height == 0 || value == 0u) {
-    return -1;
-  }
-  if (height < 0) {
-    height = -height;
-    src_argb = src_argb + (height - 1) * src_stride_argb;
-    src_stride_argb = -src_stride_argb;
-  }
-  // Coalesce rows.
-  if (src_stride_argb == width * 4 &&
-      dst_stride_argb == width * 4) {
-    width *= height;
-    height = 1;
-    src_stride_argb = dst_stride_argb = 0;
-  }
-#if defined(HAS_ARGBSHADEROW_SSE2)
-  if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(width, 4) &&
-      IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16) &&
-      IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
-    ARGBShadeRow = ARGBShadeRow_SSE2;
-  }
-#elif defined(HAS_ARGBSHADEROW_NEON)
-  if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 8)) {
-    ARGBShadeRow = ARGBShadeRow_NEON;
-  }
-#endif
-
-  for (y = 0; y < height; ++y) {
-    ARGBShadeRow(src_argb, dst_argb, width, value);
-    src_argb += src_stride_argb;
-    dst_argb += dst_stride_argb;
-  }
-  return 0;
-}
-
-// Interpolate 2 ARGB images by specified amount (0 to 255).
-LIBYUV_API
-int ARGBInterpolate(const uint8* src_argb0, int src_stride_argb0,
-                    const uint8* src_argb1, int src_stride_argb1,
-                    uint8* dst_argb, int dst_stride_argb,
-                    int width, int height, int interpolation) {
-  int y;
-  void (*InterpolateRow)(uint8* dst_ptr, const uint8* src_ptr,
-                         ptrdiff_t src_stride, int dst_width,
-                         int source_y_fraction) = InterpolateRow_C;
-  if (!src_argb0 || !src_argb1 || !dst_argb || width <= 0 || height == 0) {
-    return -1;
-  }
-  // Negative height means invert the image.
-  if (height < 0) {
-    height = -height;
-    dst_argb = dst_argb + (height - 1) * dst_stride_argb;
-    dst_stride_argb = -dst_stride_argb;
-  }
-  // Coalesce rows.
-  if (src_stride_argb0 == width * 4 &&
-      src_stride_argb1 == width * 4 &&
-      dst_stride_argb == width * 4) {
-    width *= height;
-    height = 1;
-    src_stride_argb0 = src_stride_argb1 = dst_stride_argb = 0;
-  }
-#if defined(HAS_INTERPOLATEROW_SSE2)
-  if (TestCpuFlag(kCpuHasSSE2) && width >= 4) {
-    InterpolateRow = InterpolateRow_Any_SSE2;
-    if (IS_ALIGNED(width, 4)) {
-      InterpolateRow = InterpolateRow_Unaligned_SSE2;
-      if (IS_ALIGNED(src_argb0, 16) && IS_ALIGNED(src_stride_argb0, 16) &&
-          IS_ALIGNED(src_argb1, 16) && IS_ALIGNED(src_stride_argb1, 16) &&
-          IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
-        InterpolateRow = InterpolateRow_SSE2;
-      }
-    }
-  }
-#endif
-#if defined(HAS_INTERPOLATEROW_SSSE3)
-  if (TestCpuFlag(kCpuHasSSSE3) && width >= 4) {
-    InterpolateRow = InterpolateRow_Any_SSSE3;
-    if (IS_ALIGNED(width, 4)) {
-      InterpolateRow = InterpolateRow_Unaligned_SSSE3;
-      if (IS_ALIGNED(src_argb0, 16) && IS_ALIGNED(src_stride_argb0, 16) &&
-          IS_ALIGNED(src_argb1, 16) && IS_ALIGNED(src_stride_argb1, 16) &&
-          IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
-        InterpolateRow = InterpolateRow_SSSE3;
-      }
-    }
-  }
-#endif
-#if defined(HAS_INTERPOLATEROW_AVX2)
-  if (TestCpuFlag(kCpuHasAVX2) && width >= 8) {
-    InterpolateRow = InterpolateRow_Any_AVX2;
-    if (IS_ALIGNED(width, 8)) {
-      InterpolateRow = InterpolateRow_AVX2;
-    }
-  }
-#endif
-#if defined(HAS_INTERPOLATEROW_NEON)
-  if (TestCpuFlag(kCpuHasNEON) && width >= 4) {
-    InterpolateRow = InterpolateRow_Any_NEON;
-    if (IS_ALIGNED(width, 4)) {
-      InterpolateRow = InterpolateRow_NEON;
-    }
-  }
-#endif
-#if defined(HAS_INTERPOLATEROWS_MIPS_DSPR2)
-  if (TestCpuFlag(kCpuHasMIPS_DSPR2) && width >= 1 &&
-      IS_ALIGNED(src_argb0, 4) && IS_ALIGNED(src_stride_argb0, 4) &&
-      IS_ALIGNED(src_argb1, 4) && IS_ALIGNED(src_stride_argb1, 4) &&
-      IS_ALIGNED(dst_argb, 4) && IS_ALIGNED(dst_stride_argb, 4)) {
-    ScaleARGBFilterRows = InterpolateRow_MIPS_DSPR2;
-  }
-#endif
-
-  for (y = 0; y < height; ++y) {
-    InterpolateRow(dst_argb, src_argb0, src_argb1 - src_argb0,
-                   width * 4, interpolation);
-    src_argb0 += src_stride_argb0;
-    src_argb1 += src_stride_argb1;
-    dst_argb += dst_stride_argb;
-  }
-  return 0;
-}
-
-// Shuffle ARGB channel order.  e.g. BGRA to ARGB.
-LIBYUV_API
-int ARGBShuffle(const uint8* src_bgra, int src_stride_bgra,
-                uint8* dst_argb, int dst_stride_argb,
-                const uint8* shuffler, int width, int height) {
-  int y;
-  void (*ARGBShuffleRow)(const uint8* src_bgra, uint8* dst_argb,
-                         const uint8* shuffler, int pix) = ARGBShuffleRow_C;
-  if (!src_bgra || !dst_argb ||
-      width <= 0 || height == 0) {
-    return -1;
-  }
-  // Negative height means invert the image.
-  if (height < 0) {
-    height = -height;
-    src_bgra = src_bgra + (height - 1) * src_stride_bgra;
-    src_stride_bgra = -src_stride_bgra;
-  }
-  // Coalesce rows.
-  if (src_stride_bgra == width * 4 &&
-      dst_stride_argb == width * 4) {
-    width *= height;
-    height = 1;
-    src_stride_bgra = dst_stride_argb = 0;
-  }
-#if defined(HAS_ARGBSHUFFLEROW_SSE2)
-  if (TestCpuFlag(kCpuHasSSE2) && width >= 4) {
-    ARGBShuffleRow = ARGBShuffleRow_Any_SSE2;
-    if (IS_ALIGNED(width, 4)) {
-      ARGBShuffleRow = ARGBShuffleRow_SSE2;
-    }
-  }
-#endif
-#if defined(HAS_ARGBSHUFFLEROW_SSSE3)
-  if (TestCpuFlag(kCpuHasSSSE3) && width >= 8) {
-    ARGBShuffleRow = ARGBShuffleRow_Any_SSSE3;
-    if (IS_ALIGNED(width, 8)) {
-      ARGBShuffleRow = ARGBShuffleRow_Unaligned_SSSE3;
-      if (IS_ALIGNED(src_bgra, 16) && IS_ALIGNED(src_stride_bgra, 16) &&
-          IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
-        ARGBShuffleRow = ARGBShuffleRow_SSSE3;
-      }
-    }
-  }
-#endif
-#if defined(HAS_ARGBSHUFFLEROW_AVX2)
-  if (TestCpuFlag(kCpuHasAVX2) && width >= 16) {
-    ARGBShuffleRow = ARGBShuffleRow_Any_AVX2;
-    if (IS_ALIGNED(width, 16)) {
-      ARGBShuffleRow = ARGBShuffleRow_AVX2;
-    }
-  }
-#endif
-#if defined(HAS_ARGBSHUFFLEROW_NEON)
-  if (TestCpuFlag(kCpuHasNEON) && width >= 4) {
-    ARGBShuffleRow = ARGBShuffleRow_Any_NEON;
-    if (IS_ALIGNED(width, 4)) {
-      ARGBShuffleRow = ARGBShuffleRow_NEON;
-    }
-  }
-#endif
-
-  for (y = 0; y < height; ++y) {
-    ARGBShuffleRow(src_bgra, dst_argb, shuffler, width);
-    src_bgra += src_stride_bgra;
-    dst_argb += dst_stride_argb;
-  }
-  return 0;
-}
-
-// Sobel ARGB effect.
-static int ARGBSobelize(const uint8* src_argb, int src_stride_argb,
-                        uint8* dst_argb, int dst_stride_argb,
-                        int width, int height,
-                        void (*SobelRow)(const uint8* src_sobelx,
-                                         const uint8* src_sobely,
-                                         uint8* dst, int width)) {
-  int y;
-  void (*ARGBToBayerRow)(const uint8* src_argb, uint8* dst_bayer,
-                         uint32 selector, int pix) = ARGBToBayerGGRow_C;
-  void (*SobelYRow)(const uint8* src_y0, const uint8* src_y1,
-                    uint8* dst_sobely, int width) = SobelYRow_C;
-  void (*SobelXRow)(const uint8* src_y0, const uint8* src_y1,
-                    const uint8* src_y2, uint8* dst_sobely, int width) =
-      SobelXRow_C;
-  const int kEdge = 16;  // Extra pixels at start of row for extrude/align.
-  if (!src_argb  || !dst_argb || width <= 0 || height == 0) {
-    return -1;
-  }
-  // Negative height means invert the image.
-  if (height < 0) {
-    height = -height;
-    src_argb  = src_argb  + (height - 1) * src_stride_argb;
-    src_stride_argb = -src_stride_argb;
-  }
-  // ARGBToBayer used to select G channel from ARGB.
-#if defined(HAS_ARGBTOBAYERGGROW_SSE2)
-  if (TestCpuFlag(kCpuHasSSE2) && width >= 8 &&
-      IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16)) {
-    ARGBToBayerRow = ARGBToBayerGGRow_Any_SSE2;
-    if (IS_ALIGNED(width, 8)) {
-      ARGBToBayerRow = ARGBToBayerGGRow_SSE2;
-    }
-  }
-#endif
-#if defined(HAS_ARGBTOBAYERROW_SSSE3)
-  if (TestCpuFlag(kCpuHasSSSE3) && width >= 8 &&
-      IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16)) {
-    ARGBToBayerRow = ARGBToBayerRow_Any_SSSE3;
-    if (IS_ALIGNED(width, 8)) {
-      ARGBToBayerRow = ARGBToBayerRow_SSSE3;
-    }
-  }
-#endif
-#if defined(HAS_ARGBTOBAYERGGROW_NEON)
-  if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
-    ARGBToBayerRow = ARGBToBayerGGRow_Any_NEON;
-    if (IS_ALIGNED(width, 8)) {
-      ARGBToBayerRow = ARGBToBayerGGRow_NEON;
-    }
-  }
-#endif
-#if defined(HAS_SOBELYROW_SSE2)
-  if (TestCpuFlag(kCpuHasSSE2)) {
-    SobelYRow = SobelYRow_SSE2;
-  }
-#endif
-#if defined(HAS_SOBELYROW_NEON)
-  if (TestCpuFlag(kCpuHasNEON)) {
-    SobelYRow = SobelYRow_NEON;
-  }
-#endif
-#if defined(HAS_SOBELXROW_SSE2)
-  if (TestCpuFlag(kCpuHasSSE2)) {
-    SobelXRow = SobelXRow_SSE2;
-  }
-#endif
-#if defined(HAS_SOBELXROW_NEON)
-  if (TestCpuFlag(kCpuHasNEON)) {
-    SobelXRow = SobelXRow_NEON;
-  }
-#endif
-  {
-    // 3 rows with edges before/after.
-    const int kRowSize = (width + kEdge + 15) & ~15;
-    align_buffer_64(rows, kRowSize * 2 + (kEdge + kRowSize * 3 + kEdge));
-    uint8* row_sobelx = rows;
-    uint8* row_sobely = rows + kRowSize;
-    uint8* row_y = rows + kRowSize * 2;
-
-    // Convert first row.
-    uint8* row_y0 = row_y + kEdge;
-    uint8* row_y1 = row_y0 + kRowSize;
-    uint8* row_y2 = row_y1 + kRowSize;
-    ARGBToBayerRow(src_argb, row_y0, 0x0d090501, width);
-    row_y0[-1] = row_y0[0];
-    memset(row_y0 + width, row_y0[width - 1], 16);  // Extrude 16 for valgrind.
-    ARGBToBayerRow(src_argb, row_y1, 0x0d090501, width);
-    row_y1[-1] = row_y1[0];
-    memset(row_y1 + width, row_y1[width - 1], 16);
-    memset(row_y2 + width, 0, 16);
-
-    for (y = 0; y < height; ++y) {
-      // Convert next row of ARGB to Y.
-      if (y < (height - 1)) {
-        src_argb += src_stride_argb;
-      }
-      ARGBToBayerRow(src_argb, row_y2, 0x0d090501, width);
-      row_y2[-1] = row_y2[0];
-      row_y2[width] = row_y2[width - 1];
-
-      SobelXRow(row_y0 - 1, row_y1 - 1, row_y2 - 1, row_sobelx, width);
-      SobelYRow(row_y0 - 1, row_y2 - 1, row_sobely, width);
-      SobelRow(row_sobelx, row_sobely, dst_argb, width);
-
-      // Cycle thru circular queue of 3 row_y buffers.
-      {
-        uint8* row_yt = row_y0;
-        row_y0 = row_y1;
-        row_y1 = row_y2;
-        row_y2 = row_yt;
-      }
-
-      dst_argb += dst_stride_argb;
-    }
-    free_aligned_buffer_64(rows);
-  }
-  return 0;
-}
-
-// Sobel ARGB effect.
-LIBYUV_API
-int ARGBSobel(const uint8* src_argb, int src_stride_argb,
-              uint8* dst_argb, int dst_stride_argb,
-              int width, int height) {
-  void (*SobelRow)(const uint8* src_sobelx, const uint8* src_sobely,
-                   uint8* dst_argb, int width) = SobelRow_C;
-#if defined(HAS_SOBELROW_SSE2)
-  if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(width, 16) &&
-      IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
-    SobelRow = SobelRow_SSE2;
-  }
-#endif
-#if defined(HAS_SOBELROW_NEON)
-  if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 8)) {
-    SobelRow = SobelRow_NEON;
-  }
-#endif
-  return ARGBSobelize(src_argb, src_stride_argb, dst_argb, dst_stride_argb,
-                      width, height, SobelRow);
-}
-
-// Sobel ARGB effect with planar output.
-LIBYUV_API
-int ARGBSobelToPlane(const uint8* src_argb, int src_stride_argb,
-                     uint8* dst_y, int dst_stride_y,
-                     int width, int height) {
-  void (*SobelToPlaneRow)(const uint8* src_sobelx, const uint8* src_sobely,
-                          uint8* dst_, int width) = SobelToPlaneRow_C;
-#if defined(HAS_SOBELTOPLANEROW_SSE2)
-  if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(width, 16) &&
-      IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) {
-    SobelToPlaneRow = SobelToPlaneRow_SSE2;
-  }
-#endif
-#if defined(HAS_SOBELTOPLANEROW_NEON)
-  if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 16)) {
-    SobelToPlaneRow = SobelToPlaneRow_NEON;
-  }
-#endif
-  return ARGBSobelize(src_argb, src_stride_argb, dst_y, dst_stride_y,
-                      width, height, SobelToPlaneRow);
-}
-
-// SobelXY ARGB effect.
-// Similar to Sobel, but also stores Sobel X in R and Sobel Y in B.  G = Sobel.
-LIBYUV_API
-int ARGBSobelXY(const uint8* src_argb, int src_stride_argb,
-                uint8* dst_argb, int dst_stride_argb,
-                int width, int height) {
-  void (*SobelXYRow)(const uint8* src_sobelx, const uint8* src_sobely,
-                     uint8* dst_argb, int width) = SobelXYRow_C;
-#if defined(HAS_SOBELXYROW_SSE2)
-  if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(width, 16) &&
-      IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
-    SobelXYRow = SobelXYRow_SSE2;
-  }
-#endif
-#if defined(HAS_SOBELXYROW_NEON)
-  if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 8)) {
-    SobelXYRow = SobelXYRow_NEON;
-  }
-#endif
-  return ARGBSobelize(src_argb, src_stride_argb, dst_argb, dst_stride_argb,
-                      width, height, SobelXYRow);
-}
-
-// Apply a 4x4 polynomial to each ARGB pixel.
-LIBYUV_API
-int ARGBPolynomial(const uint8* src_argb, int src_stride_argb,
-                   uint8* dst_argb, int dst_stride_argb,
-                   const float* poly,
-                   int width, int height) {
-  int y;
-  void (*ARGBPolynomialRow)(const uint8* src_argb,
-                            uint8* dst_argb, const float* poly,
-                            int width) = ARGBPolynomialRow_C;
-  if (!src_argb || !dst_argb || !poly || width <= 0 || height == 0) {
-    return -1;
-  }
-  // Negative height means invert the image.
-  if (height < 0) {
-    height = -height;
-    src_argb  = src_argb  + (height - 1) * src_stride_argb;
-    src_stride_argb = -src_stride_argb;
-  }
-  // Coalesce rows.
-  if (src_stride_argb == width * 4 &&
-      dst_stride_argb == width * 4) {
-    width *= height;
-    height = 1;
-    src_stride_argb = dst_stride_argb = 0;
-  }
-#if defined(HAS_ARGBPOLYNOMIALROW_SSE2)
-  if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(width, 2)) {
-    ARGBPolynomialRow = ARGBPolynomialRow_SSE2;
-  }
-#endif
-#if defined(HAS_ARGBPOLYNOMIALROW_AVX2)
-  if (TestCpuFlag(kCpuHasAVX2) && TestCpuFlag(kCpuHasFMA3) &&
-      IS_ALIGNED(width, 2)) {
-    ARGBPolynomialRow = ARGBPolynomialRow_AVX2;
-  }
-#endif
-
-  for (y = 0; y < height; ++y) {
-    ARGBPolynomialRow(src_argb, dst_argb, poly, width);
-    src_argb += src_stride_argb;
-    dst_argb += dst_stride_argb;
-  }
-  return 0;
-}
-
-// Apply a lumacolortable to each ARGB pixel.
-LIBYUV_API
-int ARGBLumaColorTable(const uint8* src_argb, int src_stride_argb,
-                       uint8* dst_argb, int dst_stride_argb,
-                       const uint8* luma,
-                       int width, int height) {
-  int y;
-  void (*ARGBLumaColorTableRow)(const uint8* src_argb, uint8* dst_argb,
-      int width, const uint8* luma, const uint32 lumacoeff) =
-      ARGBLumaColorTableRow_C;
-  if (!src_argb || !dst_argb || !luma || width <= 0 || height == 0) {
-    return -1;
-  }
-  // Negative height means invert the image.
-  if (height < 0) {
-    height = -height;
-    src_argb  = src_argb  + (height - 1) * src_stride_argb;
-    src_stride_argb = -src_stride_argb;
-  }
-  // Coalesce rows.
-  if (src_stride_argb == width * 4 &&
-      dst_stride_argb == width * 4) {
-    width *= height;
-    height = 1;
-    src_stride_argb = dst_stride_argb = 0;
-  }
-#if defined(HAS_ARGBLUMACOLORTABLEROW_SSSE3)
-  if (TestCpuFlag(kCpuHasSSSE3) && IS_ALIGNED(width, 4)) {
-    ARGBLumaColorTableRow = ARGBLumaColorTableRow_SSSE3;
-  }
-#endif
-
-  for (y = 0; y < height; ++y) {
-    ARGBLumaColorTableRow(src_argb, dst_argb, width, luma, 0x00264b0f);
-    src_argb += src_stride_argb;
-    dst_argb += dst_stride_argb;
-  }
-  return 0;
-}
-
-// Copy Alpha from one ARGB image to another.
-LIBYUV_API
-int ARGBCopyAlpha(const uint8* src_argb, int src_stride_argb,
-                  uint8* dst_argb, int dst_stride_argb,
-                  int width, int height) {
-  int y;
-  void (*ARGBCopyAlphaRow)(const uint8* src_argb, uint8* dst_argb, int width) =
-      ARGBCopyAlphaRow_C;
-  if (!src_argb || !dst_argb || width <= 0 || height == 0) {
-    return -1;
-  }
-  // Negative height means invert the image.
-  if (height < 0) {
-    height = -height;
-    src_argb = src_argb + (height - 1) * src_stride_argb;
-    src_stride_argb = -src_stride_argb;
-  }
-  // Coalesce rows.
-  if (src_stride_argb == width * 4 &&
-      dst_stride_argb == width * 4) {
-    width *= height;
-    height = 1;
-    src_stride_argb = dst_stride_argb = 0;
-  }
-#if defined(HAS_ARGBCOPYALPHAROW_SSE2)
-  if (TestCpuFlag(kCpuHasSSE2) &&
-      IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16) &&
-      IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16) &&
-      IS_ALIGNED(width, 8)) {
-    ARGBCopyAlphaRow = ARGBCopyAlphaRow_SSE2;
-  }
-#endif
-#if defined(HAS_ARGBCOPYALPHAROW_AVX2)
-  if (TestCpuFlag(kCpuHasAVX2) && IS_ALIGNED(width, 16)) {
-    ARGBCopyAlphaRow = ARGBCopyAlphaRow_AVX2;
-  }
-#endif
-
-  for (y = 0; y < height; ++y) {
-    ARGBCopyAlphaRow(src_argb, dst_argb, width);
-    src_argb += src_stride_argb;
-    dst_argb += dst_stride_argb;
-  }
-  return 0;
-}
-
-// Copy a planar Y channel to the alpha channel of a destination ARGB image.
-LIBYUV_API
-int ARGBCopyYToAlpha(const uint8* src_y, int src_stride_y,
-                     uint8* dst_argb, int dst_stride_argb,
-                     int width, int height) {
-  int y;
-  void (*ARGBCopyYToAlphaRow)(const uint8* src_y, uint8* dst_argb, int width) =
-      ARGBCopyYToAlphaRow_C;
-  if (!src_y || !dst_argb || width <= 0 || height == 0) {
-    return -1;
-  }
-  // Negative height means invert the image.
-  if (height < 0) {
-    height = -height;
-    src_y = src_y + (height - 1) * src_stride_y;
-    src_stride_y = -src_stride_y;
-  }
-  // Coalesce rows.
-  if (src_stride_y == width &&
-      dst_stride_argb == width * 4) {
-    width *= height;
-    height = 1;
-    src_stride_y = dst_stride_argb = 0;
-  }
-#if defined(HAS_ARGBCOPYYTOALPHAROW_SSE2)
-  if (TestCpuFlag(kCpuHasSSE2) &&
-      IS_ALIGNED(src_y, 16) && IS_ALIGNED(src_stride_y, 16) &&
-      IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16) &&
-      IS_ALIGNED(width, 8)) {
-    ARGBCopyYToAlphaRow = ARGBCopyYToAlphaRow_SSE2;
-  }
-#endif
-#if defined(HAS_ARGBCOPYYTOALPHAROW_AVX2)
-  if (TestCpuFlag(kCpuHasAVX2) && IS_ALIGNED(width, 16)) {
-    ARGBCopyYToAlphaRow = ARGBCopyYToAlphaRow_AVX2;
-  }
-#endif
-
-  for (y = 0; y < height; ++y) {
-    ARGBCopyYToAlphaRow(src_y, dst_argb, width);
-    src_y += src_stride_y;
-    dst_argb += dst_stride_argb;
-  }
-  return 0;
-}
-
-#ifdef __cplusplus
-}  // extern "C"
-}  // namespace libyuv
-#endif
diff --git a/drivers/theoraplayer/src/YUV/libyuv/src/rotate.cc b/drivers/theoraplayer/src/YUV/libyuv/src/rotate.cc
deleted file mode 100755
index b052ac1dc4..0000000000
--- a/drivers/theoraplayer/src/YUV/libyuv/src/rotate.cc
+++ /dev/null
@@ -1,1301 +0,0 @@
-/*
- *  Copyright 2011 The LibYuv Project Authors. All rights reserved.
- *
- *  Use of this source code is governed by a BSD-style license
- *  that can be found in the LICENSE file in the root of the source
- *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS. All contributing project authors may
- *  be found in the AUTHORS file in the root of the source tree.
- */
-
-#include "libyuv/rotate.h"
-
-#include "libyuv/cpu_id.h"
-#include "libyuv/convert.h"
-#include "libyuv/planar_functions.h"
-#include "libyuv/row.h"
-
-#ifdef __cplusplus
-namespace libyuv {
-extern "C" {
-#endif
-
-#if !defined(LIBYUV_DISABLE_X86) && \
-    (defined(_M_IX86) || defined(__x86_64__) || defined(__i386__))
-#if defined(__APPLE__) && defined(__i386__)
-#define DECLARE_FUNCTION(name)                                                 \
-    ".text                                     \n"                             \
-    ".private_extern _" #name "                \n"                             \
-    ".align 4,0x90                             \n"                             \
-"_" #name ":                                   \n"
-#elif defined(__MINGW32__) || defined(__CYGWIN__) && defined(__i386__)
-#define DECLARE_FUNCTION(name)                                                 \
-    ".text                                     \n"                             \
-    ".align 4,0x90                             \n"                             \
-"_" #name ":                                   \n"
-#else
-#define DECLARE_FUNCTION(name)                                                 \
-    ".text                                     \n"                             \
-    ".align 4,0x90                             \n"                             \
-#name ":                                       \n"
-#endif
-#endif
-
-#if !defined(LIBYUV_DISABLE_NEON) && !defined(__native_client__) && \
-    (defined(__ARM_NEON__) || defined(LIBYUV_NEON))
-#define HAS_MIRRORROW_NEON
-void MirrorRow_NEON(const uint8* src, uint8* dst, int width);
-#define HAS_MIRRORROW_UV_NEON
-void MirrorUVRow_NEON(const uint8* src, uint8* dst_a, uint8* dst_b, int width);
-#define HAS_TRANSPOSE_WX8_NEON
-void TransposeWx8_NEON(const uint8* src, int src_stride,
-                       uint8* dst, int dst_stride, int width);
-#define HAS_TRANSPOSE_UVWX8_NEON
-void TransposeUVWx8_NEON(const uint8* src, int src_stride,
-                         uint8* dst_a, int dst_stride_a,
-                         uint8* dst_b, int dst_stride_b,
-                         int width);
-#endif  // defined(__ARM_NEON__)
-
-#if !defined(LIBYUV_DISABLE_MIPS) && !defined(__native_client__) && \
-    defined(__mips__) && \
-    defined(__mips_dsp) && (__mips_dsp_rev >= 2)
-#define HAS_TRANSPOSE_WX8_MIPS_DSPR2
-void TransposeWx8_MIPS_DSPR2(const uint8* src, int src_stride,
-                             uint8* dst, int dst_stride, int width);
-
-void TransposeWx8_FAST_MIPS_DSPR2(const uint8* src, int src_stride,
-                                  uint8* dst, int dst_stride, int width);
-#define HAS_TRANSPOSE_UVWx8_MIPS_DSPR2
-void TransposeUVWx8_MIPS_DSPR2(const uint8* src, int src_stride,
-                               uint8* dst_a, int dst_stride_a,
-                               uint8* dst_b, int dst_stride_b,
-                               int width);
-#endif  // defined(__mips__)
-
-#if !defined(LIBYUV_DISABLE_X86) && \
-    defined(_M_IX86) && defined(_MSC_VER)
-#define HAS_TRANSPOSE_WX8_SSSE3
-__declspec(naked) __declspec(align(16))
-static void TransposeWx8_SSSE3(const uint8* src, int src_stride,
-                               uint8* dst, int dst_stride, int width) {
-  __asm {
-    push      edi
-    push      esi
-    push      ebp
-    mov       eax, [esp + 12 + 4]   // src
-    mov       edi, [esp + 12 + 8]   // src_stride
-    mov       edx, [esp + 12 + 12]  // dst
-    mov       esi, [esp + 12 + 16]  // dst_stride
-    mov       ecx, [esp + 12 + 20]  // width
-
-    // Read in the data from the source pointer.
-    // First round of bit swap.
-    align      4
- convertloop:
-    movq      xmm0, qword ptr [eax]
-    lea       ebp, [eax + 8]
-    movq      xmm1, qword ptr [eax + edi]
-    lea       eax, [eax + 2 * edi]
-    punpcklbw xmm0, xmm1
-    movq      xmm2, qword ptr [eax]
-    movdqa    xmm1, xmm0
-    palignr   xmm1, xmm1, 8
-    movq      xmm3, qword ptr [eax + edi]
-    lea       eax, [eax + 2 * edi]
-    punpcklbw xmm2, xmm3
-    movdqa    xmm3, xmm2
-    movq      xmm4, qword ptr [eax]
-    palignr   xmm3, xmm3, 8
-    movq      xmm5, qword ptr [eax + edi]
-    punpcklbw xmm4, xmm5
-    lea       eax, [eax + 2 * edi]
-    movdqa    xmm5, xmm4
-    movq      xmm6, qword ptr [eax]
-    palignr   xmm5, xmm5, 8
-    movq      xmm7, qword ptr [eax + edi]
-    punpcklbw xmm6, xmm7
-    mov       eax, ebp
-    movdqa    xmm7, xmm6
-    palignr   xmm7, xmm7, 8
-    // Second round of bit swap.
-    punpcklwd xmm0, xmm2
-    punpcklwd xmm1, xmm3
-    movdqa    xmm2, xmm0
-    movdqa    xmm3, xmm1
-    palignr   xmm2, xmm2, 8
-    palignr   xmm3, xmm3, 8
-    punpcklwd xmm4, xmm6
-    punpcklwd xmm5, xmm7
-    movdqa    xmm6, xmm4
-    movdqa    xmm7, xmm5
-    palignr   xmm6, xmm6, 8
-    palignr   xmm7, xmm7, 8
-    // Third round of bit swap.
-    // Write to the destination pointer.
-    punpckldq xmm0, xmm4
-    movq      qword ptr [edx], xmm0
-    movdqa    xmm4, xmm0
-    palignr   xmm4, xmm4, 8
-    movq      qword ptr [edx + esi], xmm4
-    lea       edx, [edx + 2 * esi]
-    punpckldq xmm2, xmm6
-    movdqa    xmm6, xmm2
-    palignr   xmm6, xmm6, 8
-    movq      qword ptr [edx], xmm2
-    punpckldq xmm1, xmm5
-    movq      qword ptr [edx + esi], xmm6
-    lea       edx, [edx + 2 * esi]
-    movdqa    xmm5, xmm1
-    movq      qword ptr [edx], xmm1
-    palignr   xmm5, xmm5, 8
-    punpckldq xmm3, xmm7
-    movq      qword ptr [edx + esi], xmm5
-    lea       edx, [edx + 2 * esi]
-    movq      qword ptr [edx], xmm3
-    movdqa    xmm7, xmm3
-    palignr   xmm7, xmm7, 8
-    sub       ecx, 8
-    movq      qword ptr [edx + esi], xmm7
-    lea       edx, [edx + 2 * esi]
-    jg        convertloop
-
-    pop       ebp
-    pop       esi
-    pop       edi
-    ret
-  }
-}
-
-#define HAS_TRANSPOSE_UVWX8_SSE2
-__declspec(naked) __declspec(align(16))
-static void TransposeUVWx8_SSE2(const uint8* src, int src_stride,
-                                uint8* dst_a, int dst_stride_a,
-                                uint8* dst_b, int dst_stride_b,
-                                int w) {
-  __asm {
-    push      ebx
-    push      esi
-    push      edi
-    push      ebp
-    mov       eax, [esp + 16 + 4]   // src
-    mov       edi, [esp + 16 + 8]   // src_stride
-    mov       edx, [esp + 16 + 12]  // dst_a
-    mov       esi, [esp + 16 + 16]  // dst_stride_a
-    mov       ebx, [esp + 16 + 20]  // dst_b
-    mov       ebp, [esp + 16 + 24]  // dst_stride_b
-    mov       ecx, esp
-    sub       esp, 4 + 16
-    and       esp, ~15
-    mov       [esp + 16], ecx
-    mov       ecx, [ecx + 16 + 28]  // w
-
-    align      4
- convertloop:
-    // Read in the data from the source pointer.
-    // First round of bit swap.
-    movdqa    xmm0, [eax]
-    movdqa    xmm1, [eax + edi]
-    lea       eax, [eax + 2 * edi]
-    movdqa    xmm7, xmm0  // use xmm7 as temp register.
-    punpcklbw xmm0, xmm1
-    punpckhbw xmm7, xmm1
-    movdqa    xmm1, xmm7
-    movdqa    xmm2, [eax]
-    movdqa    xmm3, [eax + edi]
-    lea       eax, [eax + 2 * edi]
-    movdqa    xmm7, xmm2
-    punpcklbw xmm2, xmm3
-    punpckhbw xmm7, xmm3
-    movdqa    xmm3, xmm7
-    movdqa    xmm4, [eax]
-    movdqa    xmm5, [eax + edi]
-    lea       eax, [eax + 2 * edi]
-    movdqa    xmm7, xmm4
-    punpcklbw xmm4, xmm5
-    punpckhbw xmm7, xmm5
-    movdqa    xmm5, xmm7
-    movdqa    xmm6, [eax]
-    movdqa    xmm7, [eax + edi]
-    lea       eax, [eax + 2 * edi]
-    movdqa    [esp], xmm5  // backup xmm5
-    neg       edi
-    movdqa    xmm5, xmm6   // use xmm5 as temp register.
-    punpcklbw xmm6, xmm7
-    punpckhbw xmm5, xmm7
-    movdqa    xmm7, xmm5
-    lea       eax, [eax + 8 * edi + 16]
-    neg       edi
-    // Second round of bit swap.
-    movdqa    xmm5, xmm0
-    punpcklwd xmm0, xmm2
-    punpckhwd xmm5, xmm2
-    movdqa    xmm2, xmm5
-    movdqa    xmm5, xmm1
-    punpcklwd xmm1, xmm3
-    punpckhwd xmm5, xmm3
-    movdqa    xmm3, xmm5
-    movdqa    xmm5, xmm4
-    punpcklwd xmm4, xmm6
-    punpckhwd xmm5, xmm6
-    movdqa    xmm6, xmm5
-    movdqa    xmm5, [esp]  // restore xmm5
-    movdqa    [esp], xmm6  // backup xmm6
-    movdqa    xmm6, xmm5    // use xmm6 as temp register.
-    punpcklwd xmm5, xmm7
-    punpckhwd xmm6, xmm7
-    movdqa    xmm7, xmm6
-    // Third round of bit swap.
-    // Write to the destination pointer.
-    movdqa    xmm6, xmm0
-    punpckldq xmm0, xmm4
-    punpckhdq xmm6, xmm4
-    movdqa    xmm4, xmm6
-    movdqa    xmm6, [esp]  // restore xmm6
-    movlpd    qword ptr [edx], xmm0
-    movhpd    qword ptr [ebx], xmm0
-    movlpd    qword ptr [edx + esi], xmm4
-    lea       edx, [edx + 2 * esi]
-    movhpd    qword ptr [ebx + ebp], xmm4
-    lea       ebx, [ebx + 2 * ebp]
-    movdqa    xmm0, xmm2   // use xmm0 as the temp register.
-    punpckldq xmm2, xmm6
-    movlpd    qword ptr [edx], xmm2
-    movhpd    qword ptr [ebx], xmm2
-    punpckhdq xmm0, xmm6
-    movlpd    qword ptr [edx + esi], xmm0
-    lea       edx, [edx + 2 * esi]
-    movhpd    qword ptr [ebx + ebp], xmm0
-    lea       ebx, [ebx + 2 * ebp]
-    movdqa    xmm0, xmm1   // use xmm0 as the temp register.
-    punpckldq xmm1, xmm5
-    movlpd    qword ptr [edx], xmm1
-    movhpd    qword ptr [ebx], xmm1
-    punpckhdq xmm0, xmm5
-    movlpd    qword ptr [edx + esi], xmm0
-    lea       edx, [edx + 2 * esi]
-    movhpd    qword ptr [ebx + ebp], xmm0
-    lea       ebx, [ebx + 2 * ebp]
-    movdqa    xmm0, xmm3   // use xmm0 as the temp register.
-    punpckldq xmm3, xmm7
-    movlpd    qword ptr [edx], xmm3
-    movhpd    qword ptr [ebx], xmm3
-    punpckhdq xmm0, xmm7
-    sub       ecx, 8
-    movlpd    qword ptr [edx + esi], xmm0
-    lea       edx, [edx + 2 * esi]
-    movhpd    qword ptr [ebx + ebp], xmm0
-    lea       ebx, [ebx + 2 * ebp]
-    jg        convertloop
-
-    mov       esp, [esp + 16]
-    pop       ebp
-    pop       edi
-    pop       esi
-    pop       ebx
-    ret
-  }
-}
-#elif !defined(LIBYUV_DISABLE_X86) && \
-    (defined(__i386__) || (defined(__x86_64__) && !defined(__native_client__)))
-#define HAS_TRANSPOSE_WX8_SSSE3
-static void TransposeWx8_SSSE3(const uint8* src, int src_stride,
-                               uint8* dst, int dst_stride, int width) {
-  asm volatile (
-    // Read in the data from the source pointer.
-    // First round of bit swap.
-    ".p2align  2                                 \n"
-  "1:                                            \n"
-    "movq       (%0),%%xmm0                      \n"
-    "movq       (%0,%3),%%xmm1                   \n"
-    "lea        (%0,%3,2),%0                     \n"
-    "punpcklbw  %%xmm1,%%xmm0                    \n"
-    "movq       (%0),%%xmm2                      \n"
-    "movdqa     %%xmm0,%%xmm1                    \n"
-    "palignr    $0x8,%%xmm1,%%xmm1               \n"
-    "movq       (%0,%3),%%xmm3                   \n"
-    "lea        (%0,%3,2),%0                     \n"
-    "punpcklbw  %%xmm3,%%xmm2                    \n"
-    "movdqa     %%xmm2,%%xmm3                    \n"
-    "movq       (%0),%%xmm4                      \n"
-    "palignr    $0x8,%%xmm3,%%xmm3               \n"
-    "movq       (%0,%3),%%xmm5                   \n"
-    "lea        (%0,%3,2),%0                     \n"
-    "punpcklbw  %%xmm5,%%xmm4                    \n"
-    "movdqa     %%xmm4,%%xmm5                    \n"
-    "movq       (%0),%%xmm6                      \n"
-    "palignr    $0x8,%%xmm5,%%xmm5               \n"
-    "movq       (%0,%3),%%xmm7                   \n"
-    "lea        (%0,%3,2),%0                     \n"
-    "punpcklbw  %%xmm7,%%xmm6                    \n"
-    "neg        %3                               \n"
-    "movdqa     %%xmm6,%%xmm7                    \n"
-    "lea        0x8(%0,%3,8),%0                  \n"
-    "palignr    $0x8,%%xmm7,%%xmm7               \n"
-    "neg        %3                               \n"
-     // Second round of bit swap.
-    "punpcklwd  %%xmm2,%%xmm0                    \n"
-    "punpcklwd  %%xmm3,%%xmm1                    \n"
-    "movdqa     %%xmm0,%%xmm2                    \n"
-    "movdqa     %%xmm1,%%xmm3                    \n"
-    "palignr    $0x8,%%xmm2,%%xmm2               \n"
-    "palignr    $0x8,%%xmm3,%%xmm3               \n"
-    "punpcklwd  %%xmm6,%%xmm4                    \n"
-    "punpcklwd  %%xmm7,%%xmm5                    \n"
-    "movdqa     %%xmm4,%%xmm6                    \n"
-    "movdqa     %%xmm5,%%xmm7                    \n"
-    "palignr    $0x8,%%xmm6,%%xmm6               \n"
-    "palignr    $0x8,%%xmm7,%%xmm7               \n"
-    // Third round of bit swap.
-    // Write to the destination pointer.
-    "punpckldq  %%xmm4,%%xmm0                    \n"
-    "movq       %%xmm0,(%1)                      \n"
-    "movdqa     %%xmm0,%%xmm4                    \n"
-    "palignr    $0x8,%%xmm4,%%xmm4               \n"
-    "movq       %%xmm4,(%1,%4)                   \n"
-    "lea        (%1,%4,2),%1                     \n"
-    "punpckldq  %%xmm6,%%xmm2                    \n"
-    "movdqa     %%xmm2,%%xmm6                    \n"
-    "movq       %%xmm2,(%1)                      \n"
-    "palignr    $0x8,%%xmm6,%%xmm6               \n"
-    "punpckldq  %%xmm5,%%xmm1                    \n"
-    "movq       %%xmm6,(%1,%4)                   \n"
-    "lea        (%1,%4,2),%1                     \n"
-    "movdqa     %%xmm1,%%xmm5                    \n"
-    "movq       %%xmm1,(%1)                      \n"
-    "palignr    $0x8,%%xmm5,%%xmm5               \n"
-    "movq       %%xmm5,(%1,%4)                   \n"
-    "lea        (%1,%4,2),%1                     \n"
-    "punpckldq  %%xmm7,%%xmm3                    \n"
-    "movq       %%xmm3,(%1)                      \n"
-    "movdqa     %%xmm3,%%xmm7                    \n"
-    "palignr    $0x8,%%xmm7,%%xmm7               \n"
-    "sub        $0x8,%2                          \n"
-    "movq       %%xmm7,(%1,%4)                   \n"
-    "lea        (%1,%4,2),%1                     \n"
-    "jg         1b                               \n"
-    : "+r"(src),    // %0
-      "+r"(dst),    // %1
-      "+r"(width)   // %2
-    : "r"((intptr_t)(src_stride)),  // %3
-      "r"((intptr_t)(dst_stride))   // %4
-    : "memory", "cc"
-  #if defined(__SSE2__)
-      , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7"
-  #endif
-  );
-}
-
-#if !defined(LIBYUV_DISABLE_X86) && defined(__i386__)
-#define HAS_TRANSPOSE_UVWX8_SSE2
-extern "C" void TransposeUVWx8_SSE2(const uint8* src, int src_stride,
-                                    uint8* dst_a, int dst_stride_a,
-                                    uint8* dst_b, int dst_stride_b,
-                                    int w);
-  asm (
-    DECLARE_FUNCTION(TransposeUVWx8_SSE2)
-    "push   %ebx                               \n"
-    "push   %esi                               \n"
-    "push   %edi                               \n"
-    "push   %ebp                               \n"
-    "mov    0x14(%esp),%eax                    \n"
-    "mov    0x18(%esp),%edi                    \n"
-    "mov    0x1c(%esp),%edx                    \n"
-    "mov    0x20(%esp),%esi                    \n"
-    "mov    0x24(%esp),%ebx                    \n"
-    "mov    0x28(%esp),%ebp                    \n"
-    "mov    %esp,%ecx                          \n"
-    "sub    $0x14,%esp                         \n"
-    "and    $0xfffffff0,%esp                   \n"
-    "mov    %ecx,0x10(%esp)                    \n"
-    "mov    0x2c(%ecx),%ecx                    \n"
-
-"1:                                            \n"
-    "movdqa (%eax),%xmm0                       \n"
-    "movdqa (%eax,%edi,1),%xmm1                \n"
-    "lea    (%eax,%edi,2),%eax                 \n"
-    "movdqa %xmm0,%xmm7                        \n"
-    "punpcklbw %xmm1,%xmm0                     \n"
-    "punpckhbw %xmm1,%xmm7                     \n"
-    "movdqa %xmm7,%xmm1                        \n"
-    "movdqa (%eax),%xmm2                       \n"
-    "movdqa (%eax,%edi,1),%xmm3                \n"
-    "lea    (%eax,%edi,2),%eax                 \n"
-    "movdqa %xmm2,%xmm7                        \n"
-    "punpcklbw %xmm3,%xmm2                     \n"
-    "punpckhbw %xmm3,%xmm7                     \n"
-    "movdqa %xmm7,%xmm3                        \n"
-    "movdqa (%eax),%xmm4                       \n"
-    "movdqa (%eax,%edi,1),%xmm5                \n"
-    "lea    (%eax,%edi,2),%eax                 \n"
-    "movdqa %xmm4,%xmm7                        \n"
-    "punpcklbw %xmm5,%xmm4                     \n"
-    "punpckhbw %xmm5,%xmm7                     \n"
-    "movdqa %xmm7,%xmm5                        \n"
-    "movdqa (%eax),%xmm6                       \n"
-    "movdqa (%eax,%edi,1),%xmm7                \n"
-    "lea    (%eax,%edi,2),%eax                 \n"
-    "movdqa %xmm5,(%esp)                       \n"
-    "neg    %edi                               \n"
-    "movdqa %xmm6,%xmm5                        \n"
-    "punpcklbw %xmm7,%xmm6                     \n"
-    "punpckhbw %xmm7,%xmm5                     \n"
-    "movdqa %xmm5,%xmm7                        \n"
-    "lea    0x10(%eax,%edi,8),%eax             \n"
-    "neg    %edi                               \n"
-    "movdqa %xmm0,%xmm5                        \n"
-    "punpcklwd %xmm2,%xmm0                     \n"
-    "punpckhwd %xmm2,%xmm5                     \n"
-    "movdqa %xmm5,%xmm2                        \n"
-    "movdqa %xmm1,%xmm5                        \n"
-    "punpcklwd %xmm3,%xmm1                     \n"
-    "punpckhwd %xmm3,%xmm5                     \n"
-    "movdqa %xmm5,%xmm3                        \n"
-    "movdqa %xmm4,%xmm5                        \n"
-    "punpcklwd %xmm6,%xmm4                     \n"
-    "punpckhwd %xmm6,%xmm5                     \n"
-    "movdqa %xmm5,%xmm6                        \n"
-    "movdqa (%esp),%xmm5                       \n"
-    "movdqa %xmm6,(%esp)                       \n"
-    "movdqa %xmm5,%xmm6                        \n"
-    "punpcklwd %xmm7,%xmm5                     \n"
-    "punpckhwd %xmm7,%xmm6                     \n"
-    "movdqa %xmm6,%xmm7                        \n"
-    "movdqa %xmm0,%xmm6                        \n"
-    "punpckldq %xmm4,%xmm0                     \n"
-    "punpckhdq %xmm4,%xmm6                     \n"
-    "movdqa %xmm6,%xmm4                        \n"
-    "movdqa (%esp),%xmm6                       \n"
-    "movlpd %xmm0,(%edx)                       \n"
-    "movhpd %xmm0,(%ebx)                       \n"
-    "movlpd %xmm4,(%edx,%esi,1)                \n"
-    "lea    (%edx,%esi,2),%edx                 \n"
-    "movhpd %xmm4,(%ebx,%ebp,1)                \n"
-    "lea    (%ebx,%ebp,2),%ebx                 \n"
-    "movdqa %xmm2,%xmm0                        \n"
-    "punpckldq %xmm6,%xmm2                     \n"
-    "movlpd %xmm2,(%edx)                       \n"
-    "movhpd %xmm2,(%ebx)                       \n"
-    "punpckhdq %xmm6,%xmm0                     \n"
-    "movlpd %xmm0,(%edx,%esi,1)                \n"
-    "lea    (%edx,%esi,2),%edx                 \n"
-    "movhpd %xmm0,(%ebx,%ebp,1)                \n"
-    "lea    (%ebx,%ebp,2),%ebx                 \n"
-    "movdqa %xmm1,%xmm0                        \n"
-    "punpckldq %xmm5,%xmm1                     \n"
-    "movlpd %xmm1,(%edx)                       \n"
-    "movhpd %xmm1,(%ebx)                       \n"
-    "punpckhdq %xmm5,%xmm0                     \n"
-    "movlpd %xmm0,(%edx,%esi,1)                \n"
-    "lea    (%edx,%esi,2),%edx                 \n"
-    "movhpd %xmm0,(%ebx,%ebp,1)                \n"
-    "lea    (%ebx,%ebp,2),%ebx                 \n"
-    "movdqa %xmm3,%xmm0                        \n"
-    "punpckldq %xmm7,%xmm3                     \n"
-    "movlpd %xmm3,(%edx)                       \n"
-    "movhpd %xmm3,(%ebx)                       \n"
-    "punpckhdq %xmm7,%xmm0                     \n"
-    "sub    $0x8,%ecx                          \n"
-    "movlpd %xmm0,(%edx,%esi,1)                \n"
-    "lea    (%edx,%esi,2),%edx                 \n"
-    "movhpd %xmm0,(%ebx,%ebp,1)                \n"
-    "lea    (%ebx,%ebp,2),%ebx                 \n"
-    "jg     1b                                 \n"
-    "mov    0x10(%esp),%esp                    \n"
-    "pop    %ebp                               \n"
-    "pop    %edi                               \n"
-    "pop    %esi                               \n"
-    "pop    %ebx                               \n"
-#if defined(__native_client__)
-    "pop    %ecx                               \n"
-    "and    $0xffffffe0,%ecx                   \n"
-    "jmp    *%ecx                              \n"
-#else
-    "ret                                       \n"
-#endif
-);
-#elif !defined(LIBYUV_DISABLE_X86) && !defined(__native_client__) && \
-    defined(__x86_64__)
-// 64 bit version has enough registers to do 16x8 to 8x16 at a time.
-#define HAS_TRANSPOSE_WX8_FAST_SSSE3
-static void TransposeWx8_FAST_SSSE3(const uint8* src, int src_stride,
-                                    uint8* dst, int dst_stride, int width) {
-  asm volatile (
-  // Read in the data from the source pointer.
-  // First round of bit swap.
-  ".p2align  2                                 \n"
-"1:                                            \n"
-  "movdqa     (%0),%%xmm0                      \n"
-  "movdqa     (%0,%3),%%xmm1                   \n"
-  "lea        (%0,%3,2),%0                     \n"
-  "movdqa     %%xmm0,%%xmm8                    \n"
-  "punpcklbw  %%xmm1,%%xmm0                    \n"
-  "punpckhbw  %%xmm1,%%xmm8                    \n"
-  "movdqa     (%0),%%xmm2                      \n"
-  "movdqa     %%xmm0,%%xmm1                    \n"
-  "movdqa     %%xmm8,%%xmm9                    \n"
-  "palignr    $0x8,%%xmm1,%%xmm1               \n"
-  "palignr    $0x8,%%xmm9,%%xmm9               \n"
-  "movdqa     (%0,%3),%%xmm3                   \n"
-  "lea        (%0,%3,2),%0                     \n"
-  "movdqa     %%xmm2,%%xmm10                   \n"
-  "punpcklbw  %%xmm3,%%xmm2                    \n"
-  "punpckhbw  %%xmm3,%%xmm10                   \n"
-  "movdqa     %%xmm2,%%xmm3                    \n"
-  "movdqa     %%xmm10,%%xmm11                  \n"
-  "movdqa     (%0),%%xmm4                      \n"
-  "palignr    $0x8,%%xmm3,%%xmm3               \n"
-  "palignr    $0x8,%%xmm11,%%xmm11             \n"
-  "movdqa     (%0,%3),%%xmm5                   \n"
-  "lea        (%0,%3,2),%0                     \n"
-  "movdqa     %%xmm4,%%xmm12                   \n"
-  "punpcklbw  %%xmm5,%%xmm4                    \n"
-  "punpckhbw  %%xmm5,%%xmm12                   \n"
-  "movdqa     %%xmm4,%%xmm5                    \n"
-  "movdqa     %%xmm12,%%xmm13                  \n"
-  "movdqa     (%0),%%xmm6                      \n"
-  "palignr    $0x8,%%xmm5,%%xmm5               \n"
-  "palignr    $0x8,%%xmm13,%%xmm13             \n"
-  "movdqa     (%0,%3),%%xmm7                   \n"
-  "lea        (%0,%3,2),%0                     \n"
-  "movdqa     %%xmm6,%%xmm14                   \n"
-  "punpcklbw  %%xmm7,%%xmm6                    \n"
-  "punpckhbw  %%xmm7,%%xmm14                   \n"
-  "neg        %3                               \n"
-  "movdqa     %%xmm6,%%xmm7                    \n"
-  "movdqa     %%xmm14,%%xmm15                  \n"
-  "lea        0x10(%0,%3,8),%0                 \n"
-  "palignr    $0x8,%%xmm7,%%xmm7               \n"
-  "palignr    $0x8,%%xmm15,%%xmm15             \n"
-  "neg        %3                               \n"
-   // Second round of bit swap.
-  "punpcklwd  %%xmm2,%%xmm0                    \n"
-  "punpcklwd  %%xmm3,%%xmm1                    \n"
-  "movdqa     %%xmm0,%%xmm2                    \n"
-  "movdqa     %%xmm1,%%xmm3                    \n"
-  "palignr    $0x8,%%xmm2,%%xmm2               \n"
-  "palignr    $0x8,%%xmm3,%%xmm3               \n"
-  "punpcklwd  %%xmm6,%%xmm4                    \n"
-  "punpcklwd  %%xmm7,%%xmm5                    \n"
-  "movdqa     %%xmm4,%%xmm6                    \n"
-  "movdqa     %%xmm5,%%xmm7                    \n"
-  "palignr    $0x8,%%xmm6,%%xmm6               \n"
-  "palignr    $0x8,%%xmm7,%%xmm7               \n"
-  "punpcklwd  %%xmm10,%%xmm8                   \n"
-  "punpcklwd  %%xmm11,%%xmm9                   \n"
-  "movdqa     %%xmm8,%%xmm10                   \n"
-  "movdqa     %%xmm9,%%xmm11                   \n"
-  "palignr    $0x8,%%xmm10,%%xmm10             \n"
-  "palignr    $0x8,%%xmm11,%%xmm11             \n"
-  "punpcklwd  %%xmm14,%%xmm12                  \n"
-  "punpcklwd  %%xmm15,%%xmm13                  \n"
-  "movdqa     %%xmm12,%%xmm14                  \n"
-  "movdqa     %%xmm13,%%xmm15                  \n"
-  "palignr    $0x8,%%xmm14,%%xmm14             \n"
-  "palignr    $0x8,%%xmm15,%%xmm15             \n"
-  // Third round of bit swap.
-  // Write to the destination pointer.
-  "punpckldq  %%xmm4,%%xmm0                    \n"
-  "movq       %%xmm0,(%1)                      \n"
-  "movdqa     %%xmm0,%%xmm4                    \n"
-  "palignr    $0x8,%%xmm4,%%xmm4               \n"
-  "movq       %%xmm4,(%1,%4)                   \n"
-  "lea        (%1,%4,2),%1                     \n"
-  "punpckldq  %%xmm6,%%xmm2                    \n"
-  "movdqa     %%xmm2,%%xmm6                    \n"
-  "movq       %%xmm2,(%1)                      \n"
-  "palignr    $0x8,%%xmm6,%%xmm6               \n"
-  "punpckldq  %%xmm5,%%xmm1                    \n"
-  "movq       %%xmm6,(%1,%4)                   \n"
-  "lea        (%1,%4,2),%1                     \n"
-  "movdqa     %%xmm1,%%xmm5                    \n"
-  "movq       %%xmm1,(%1)                      \n"
-  "palignr    $0x8,%%xmm5,%%xmm5               \n"
-  "movq       %%xmm5,(%1,%4)                   \n"
-  "lea        (%1,%4,2),%1                     \n"
-  "punpckldq  %%xmm7,%%xmm3                    \n"
-  "movq       %%xmm3,(%1)                      \n"
-  "movdqa     %%xmm3,%%xmm7                    \n"
-  "palignr    $0x8,%%xmm7,%%xmm7               \n"
-  "movq       %%xmm7,(%1,%4)                   \n"
-  "lea        (%1,%4,2),%1                     \n"
-  "punpckldq  %%xmm12,%%xmm8                   \n"
-  "movq       %%xmm8,(%1)                      \n"
-  "movdqa     %%xmm8,%%xmm12                   \n"
-  "palignr    $0x8,%%xmm12,%%xmm12             \n"
-  "movq       %%xmm12,(%1,%4)                  \n"
-  "lea        (%1,%4,2),%1                     \n"
-  "punpckldq  %%xmm14,%%xmm10                  \n"
-  "movdqa     %%xmm10,%%xmm14                  \n"
-  "movq       %%xmm10,(%1)                     \n"
-  "palignr    $0x8,%%xmm14,%%xmm14             \n"
-  "punpckldq  %%xmm13,%%xmm9                   \n"
-  "movq       %%xmm14,(%1,%4)                  \n"
-  "lea        (%1,%4,2),%1                     \n"
-  "movdqa     %%xmm9,%%xmm13                   \n"
-  "movq       %%xmm9,(%1)                      \n"
-  "palignr    $0x8,%%xmm13,%%xmm13             \n"
-  "movq       %%xmm13,(%1,%4)                  \n"
-  "lea        (%1,%4,2),%1                     \n"
-  "punpckldq  %%xmm15,%%xmm11                  \n"
-  "movq       %%xmm11,(%1)                     \n"
-  "movdqa     %%xmm11,%%xmm15                  \n"
-  "palignr    $0x8,%%xmm15,%%xmm15             \n"
-  "sub        $0x10,%2                         \n"
-  "movq       %%xmm15,(%1,%4)                  \n"
-  "lea        (%1,%4,2),%1                     \n"
-  "jg         1b                               \n"
-  : "+r"(src),    // %0
-    "+r"(dst),    // %1
-    "+r"(width)   // %2
-  : "r"((intptr_t)(src_stride)),  // %3
-    "r"((intptr_t)(dst_stride))   // %4
-  : "memory", "cc",
-    "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7",
-    "xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13",  "xmm14",  "xmm15"
-);
-}
-
-#define HAS_TRANSPOSE_UVWX8_SSE2
-static void TransposeUVWx8_SSE2(const uint8* src, int src_stride,
-                                uint8* dst_a, int dst_stride_a,
-                                uint8* dst_b, int dst_stride_b,
-                                int w) {
-  asm volatile (
-  // Read in the data from the source pointer.
-  // First round of bit swap.
-  ".p2align  2                                 \n"
-"1:                                            \n"
-  "movdqa     (%0),%%xmm0                      \n"
-  "movdqa     (%0,%4),%%xmm1                   \n"
-  "lea        (%0,%4,2),%0                     \n"
-  "movdqa     %%xmm0,%%xmm8                    \n"
-  "punpcklbw  %%xmm1,%%xmm0                    \n"
-  "punpckhbw  %%xmm1,%%xmm8                    \n"
-  "movdqa     %%xmm8,%%xmm1                    \n"
-  "movdqa     (%0),%%xmm2                      \n"
-  "movdqa     (%0,%4),%%xmm3                   \n"
-  "lea        (%0,%4,2),%0                     \n"
-  "movdqa     %%xmm2,%%xmm8                    \n"
-  "punpcklbw  %%xmm3,%%xmm2                    \n"
-  "punpckhbw  %%xmm3,%%xmm8                    \n"
-  "movdqa     %%xmm8,%%xmm3                    \n"
-  "movdqa     (%0),%%xmm4                      \n"
-  "movdqa     (%0,%4),%%xmm5                   \n"
-  "lea        (%0,%4,2),%0                     \n"
-  "movdqa     %%xmm4,%%xmm8                    \n"
-  "punpcklbw  %%xmm5,%%xmm4                    \n"
-  "punpckhbw  %%xmm5,%%xmm8                    \n"
-  "movdqa     %%xmm8,%%xmm5                    \n"
-  "movdqa     (%0),%%xmm6                      \n"
-  "movdqa     (%0,%4),%%xmm7                   \n"
-  "lea        (%0,%4,2),%0                     \n"
-  "movdqa     %%xmm6,%%xmm8                    \n"
-  "punpcklbw  %%xmm7,%%xmm6                    \n"
-  "neg        %4                               \n"
-  "lea        0x10(%0,%4,8),%0                 \n"
-  "punpckhbw  %%xmm7,%%xmm8                    \n"
-  "movdqa     %%xmm8,%%xmm7                    \n"
-  "neg        %4                               \n"
-   // Second round of bit swap.
-  "movdqa     %%xmm0,%%xmm8                    \n"
-  "movdqa     %%xmm1,%%xmm9                    \n"
-  "punpckhwd  %%xmm2,%%xmm8                    \n"
-  "punpckhwd  %%xmm3,%%xmm9                    \n"
-  "punpcklwd  %%xmm2,%%xmm0                    \n"
-  "punpcklwd  %%xmm3,%%xmm1                    \n"
-  "movdqa     %%xmm8,%%xmm2                    \n"
-  "movdqa     %%xmm9,%%xmm3                    \n"
-  "movdqa     %%xmm4,%%xmm8                    \n"
-  "movdqa     %%xmm5,%%xmm9                    \n"
-  "punpckhwd  %%xmm6,%%xmm8                    \n"
-  "punpckhwd  %%xmm7,%%xmm9                    \n"
-  "punpcklwd  %%xmm6,%%xmm4                    \n"
-  "punpcklwd  %%xmm7,%%xmm5                    \n"
-  "movdqa     %%xmm8,%%xmm6                    \n"
-  "movdqa     %%xmm9,%%xmm7                    \n"
-  // Third round of bit swap.
-  // Write to the destination pointer.
-  "movdqa     %%xmm0,%%xmm8                    \n"
-  "punpckldq  %%xmm4,%%xmm0                    \n"
-  "movlpd     %%xmm0,(%1)                      \n"  // Write back U channel
-  "movhpd     %%xmm0,(%2)                      \n"  // Write back V channel
-  "punpckhdq  %%xmm4,%%xmm8                    \n"
-  "movlpd     %%xmm8,(%1,%5)                   \n"
-  "lea        (%1,%5,2),%1                     \n"
-  "movhpd     %%xmm8,(%2,%6)                   \n"
-  "lea        (%2,%6,2),%2                     \n"
-  "movdqa     %%xmm2,%%xmm8                    \n"
-  "punpckldq  %%xmm6,%%xmm2                    \n"
-  "movlpd     %%xmm2,(%1)                      \n"
-  "movhpd     %%xmm2,(%2)                      \n"
-  "punpckhdq  %%xmm6,%%xmm8                    \n"
-  "movlpd     %%xmm8,(%1,%5)                   \n"
-  "lea        (%1,%5,2),%1                     \n"
-  "movhpd     %%xmm8,(%2,%6)                   \n"
-  "lea        (%2,%6,2),%2                     \n"
-  "movdqa     %%xmm1,%%xmm8                    \n"
-  "punpckldq  %%xmm5,%%xmm1                    \n"
-  "movlpd     %%xmm1,(%1)                      \n"
-  "movhpd     %%xmm1,(%2)                      \n"
-  "punpckhdq  %%xmm5,%%xmm8                    \n"
-  "movlpd     %%xmm8,(%1,%5)                   \n"
-  "lea        (%1,%5,2),%1                     \n"
-  "movhpd     %%xmm8,(%2,%6)                   \n"
-  "lea        (%2,%6,2),%2                     \n"
-  "movdqa     %%xmm3,%%xmm8                    \n"
-  "punpckldq  %%xmm7,%%xmm3                    \n"
-  "movlpd     %%xmm3,(%1)                      \n"
-  "movhpd     %%xmm3,(%2)                      \n"
-  "punpckhdq  %%xmm7,%%xmm8                    \n"
-  "sub        $0x8,%3                          \n"
-  "movlpd     %%xmm8,(%1,%5)                   \n"
-  "lea        (%1,%5,2),%1                     \n"
-  "movhpd     %%xmm8,(%2,%6)                   \n"
-  "lea        (%2,%6,2),%2                     \n"
-  "jg         1b                               \n"
-  : "+r"(src),    // %0
-    "+r"(dst_a),  // %1
-    "+r"(dst_b),  // %2
-    "+r"(w)   // %3
-  : "r"((intptr_t)(src_stride)),    // %4
-    "r"((intptr_t)(dst_stride_a)),  // %5
-    "r"((intptr_t)(dst_stride_b))   // %6
-  : "memory", "cc",
-    "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7",
-    "xmm8", "xmm9"
-);
-}
-#endif
-#endif
-
-static void TransposeWx8_C(const uint8* src, int src_stride,
-                           uint8* dst, int dst_stride,
-                           int width) {
-  int i;
-  for (i = 0; i < width; ++i) {
-    dst[0] = src[0 * src_stride];
-    dst[1] = src[1 * src_stride];
-    dst[2] = src[2 * src_stride];
-    dst[3] = src[3 * src_stride];
-    dst[4] = src[4 * src_stride];
-    dst[5] = src[5 * src_stride];
-    dst[6] = src[6 * src_stride];
-    dst[7] = src[7 * src_stride];
-    ++src;
-    dst += dst_stride;
-  }
-}
-
-static void TransposeWxH_C(const uint8* src, int src_stride,
-                           uint8* dst, int dst_stride,
-                           int width, int height) {
-  int i;
-  for (i = 0; i < width; ++i) {
-    int j;
-    for (j = 0; j < height; ++j) {
-      dst[i * dst_stride + j] = src[j * src_stride + i];
-    }
-  }
-}
-
-LIBYUV_API
-void TransposePlane(const uint8* src, int src_stride,
-                    uint8* dst, int dst_stride,
-                    int width, int height) {
-  int i = height;
-  void (*TransposeWx8)(const uint8* src, int src_stride,
-                       uint8* dst, int dst_stride,
-                       int width) = TransposeWx8_C;
-#if defined(HAS_TRANSPOSE_WX8_NEON)
-  if (TestCpuFlag(kCpuHasNEON)) {
-    TransposeWx8 = TransposeWx8_NEON;
-  }
-#endif
-#if defined(HAS_TRANSPOSE_WX8_SSSE3)
-  if (TestCpuFlag(kCpuHasSSSE3) && IS_ALIGNED(width, 8)) {
-    TransposeWx8 = TransposeWx8_SSSE3;
-  }
-#endif
-#if defined(HAS_TRANSPOSE_WX8_FAST_SSSE3)
-  if (TestCpuFlag(kCpuHasSSSE3) &&
-      IS_ALIGNED(width, 16) &&
-      IS_ALIGNED(src, 16) && IS_ALIGNED(src_stride, 16)) {
-    TransposeWx8 = TransposeWx8_FAST_SSSE3;
-  }
-#endif
-#if defined(HAS_TRANSPOSE_WX8_MIPS_DSPR2)
-  if (TestCpuFlag(kCpuHasMIPS_DSPR2)) {
-    if (IS_ALIGNED(width, 4) &&
-        IS_ALIGNED(src, 4) && IS_ALIGNED(src_stride, 4)) {
-      TransposeWx8 = TransposeWx8_FAST_MIPS_DSPR2;
-    } else {
-      TransposeWx8 = TransposeWx8_MIPS_DSPR2;
-    }
-  }
-#endif
-
-  // Work across the source in 8x8 tiles
-  while (i >= 8) {
-    TransposeWx8(src, src_stride, dst, dst_stride, width);
-    src += 8 * src_stride;    // Go down 8 rows.
-    dst += 8;                 // Move over 8 columns.
-    i -= 8;
-  }
-
-  TransposeWxH_C(src, src_stride, dst, dst_stride, width, i);
-}
-
-LIBYUV_API
-void RotatePlane90(const uint8* src, int src_stride,
-                   uint8* dst, int dst_stride,
-                   int width, int height) {
-  // Rotate by 90 is a transpose with the source read
-  // from bottom to top. So set the source pointer to the end
-  // of the buffer and flip the sign of the source stride.
-  src += src_stride * (height - 1);
-  src_stride = -src_stride;
-  TransposePlane(src, src_stride, dst, dst_stride, width, height);
-}
-
-LIBYUV_API
-void RotatePlane270(const uint8* src, int src_stride,
-                    uint8* dst, int dst_stride,
-                    int width, int height) {
-  // Rotate by 270 is a transpose with the destination written
-  // from bottom to top. So set the destination pointer to the end
-  // of the buffer and flip the sign of the destination stride.
-  dst += dst_stride * (width - 1);
-  dst_stride = -dst_stride;
-  TransposePlane(src, src_stride, dst, dst_stride, width, height);
-}
-
-LIBYUV_API
-void RotatePlane180(const uint8* src, int src_stride,
-                    uint8* dst, int dst_stride,
-                    int width, int height) {
-  // Swap first and last row and mirror the content. Uses a temporary row.
-  align_buffer_64(row, width);
-  const uint8* src_bot = src + src_stride * (height - 1);
-  uint8* dst_bot = dst + dst_stride * (height - 1);
-  int half_height = (height + 1) >> 1;
-  int y;
-  void (*MirrorRow)(const uint8* src, uint8* dst, int width) = MirrorRow_C;
-  void (*CopyRow)(const uint8* src, uint8* dst, int width) = CopyRow_C;
-#if defined(HAS_MIRRORROW_NEON)
-  if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 16)) {
-    MirrorRow = MirrorRow_NEON;
-  }
-#endif
-#if defined(HAS_MIRRORROW_SSE2)
-  if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(width, 16) &&
-      IS_ALIGNED(src, 16) && IS_ALIGNED(src_stride, 16) &&
-      IS_ALIGNED(dst, 16) && IS_ALIGNED(dst_stride, 16)) {
-    MirrorRow = MirrorRow_SSE2;
-  }
-#endif
-#if defined(HAS_MIRRORROW_SSSE3)
-  if (TestCpuFlag(kCpuHasSSSE3) && IS_ALIGNED(width, 16) &&
-      IS_ALIGNED(src, 16) && IS_ALIGNED(src_stride, 16) &&
-      IS_ALIGNED(dst, 16) && IS_ALIGNED(dst_stride, 16)) {
-    MirrorRow = MirrorRow_SSSE3;
-  }
-#endif
-#if defined(HAS_MIRRORROW_AVX2)
-  if (TestCpuFlag(kCpuHasAVX2) && IS_ALIGNED(width, 32)) {
-    MirrorRow = MirrorRow_AVX2;
-  }
-#endif
-#if defined(HAS_MIRRORROW_MIPS_DSPR2)
-  if (TestCpuFlag(kCpuHasMIPS_DSPR2) &&
-      IS_ALIGNED(src, 4) && IS_ALIGNED(src_stride, 4) &&
-      IS_ALIGNED(dst, 4) && IS_ALIGNED(dst_stride, 4)) {
-    MirrorRow = MirrorRow_MIPS_DSPR2;
-  }
-#endif
-#if defined(HAS_COPYROW_NEON)
-  if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 32)) {
-    CopyRow = CopyRow_NEON;
-  }
-#endif
-#if defined(HAS_COPYROW_X86)
-  if (TestCpuFlag(kCpuHasX86) && IS_ALIGNED(width, 4)) {
-    CopyRow = CopyRow_X86;
-  }
-#endif
-#if defined(HAS_COPYROW_SSE2)
-  if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(width, 32) &&
-      IS_ALIGNED(src, 16) && IS_ALIGNED(src_stride, 16) &&
-      IS_ALIGNED(dst, 16) && IS_ALIGNED(dst_stride, 16)) {
-    CopyRow = CopyRow_SSE2;
-  }
-#endif
-#if defined(HAS_COPYROW_ERMS)
-  if (TestCpuFlag(kCpuHasERMS)) {
-    CopyRow = CopyRow_ERMS;
-  }
-#endif
-#if defined(HAS_COPYROW_MIPS)
-  if (TestCpuFlag(kCpuHasMIPS)) {
-    CopyRow = CopyRow_MIPS;
-  }
-#endif
-
-  // Odd height will harmlessly mirror the middle row twice.
-  for (y = 0; y < half_height; ++y) {
-    MirrorRow(src, row, width);  // Mirror first row into a buffer
-    src += src_stride;
-    MirrorRow(src_bot, dst, width);  // Mirror last row into first row
-    dst += dst_stride;
-    CopyRow(row, dst_bot, width);  // Copy first mirrored row into last
-    src_bot -= src_stride;
-    dst_bot -= dst_stride;
-  }
-  free_aligned_buffer_64(row);
-}
-
-static void TransposeUVWx8_C(const uint8* src, int src_stride,
-                             uint8* dst_a, int dst_stride_a,
-                             uint8* dst_b, int dst_stride_b,
-                             int width) {
-  int i;
-  for (i = 0; i < width; ++i) {
-    dst_a[0] = src[0 * src_stride + 0];
-    dst_b[0] = src[0 * src_stride + 1];
-    dst_a[1] = src[1 * src_stride + 0];
-    dst_b[1] = src[1 * src_stride + 1];
-    dst_a[2] = src[2 * src_stride + 0];
-    dst_b[2] = src[2 * src_stride + 1];
-    dst_a[3] = src[3 * src_stride + 0];
-    dst_b[3] = src[3 * src_stride + 1];
-    dst_a[4] = src[4 * src_stride + 0];
-    dst_b[4] = src[4 * src_stride + 1];
-    dst_a[5] = src[5 * src_stride + 0];
-    dst_b[5] = src[5 * src_stride + 1];
-    dst_a[6] = src[6 * src_stride + 0];
-    dst_b[6] = src[6 * src_stride + 1];
-    dst_a[7] = src[7 * src_stride + 0];
-    dst_b[7] = src[7 * src_stride + 1];
-    src += 2;
-    dst_a += dst_stride_a;
-    dst_b += dst_stride_b;
-  }
-}
-
-static void TransposeUVWxH_C(const uint8* src, int src_stride,
-                             uint8* dst_a, int dst_stride_a,
-                             uint8* dst_b, int dst_stride_b,
-                             int width, int height) {
-  int i;
-  for (i = 0; i < width * 2; i += 2) {
-    int j;
-    for (j = 0; j < height; ++j) {
-      dst_a[j + ((i >> 1) * dst_stride_a)] = src[i + (j * src_stride)];
-      dst_b[j + ((i >> 1) * dst_stride_b)] = src[i + (j * src_stride) + 1];
-    }
-  }
-}
-
-LIBYUV_API
-void TransposeUV(const uint8* src, int src_stride,
-                 uint8* dst_a, int dst_stride_a,
-                 uint8* dst_b, int dst_stride_b,
-                 int width, int height) {
-  int i = height;
-  void (*TransposeUVWx8)(const uint8* src, int src_stride,
-                         uint8* dst_a, int dst_stride_a,
-                         uint8* dst_b, int dst_stride_b,
-                         int width) = TransposeUVWx8_C;
-#if defined(HAS_TRANSPOSE_UVWX8_NEON)
-  if (TestCpuFlag(kCpuHasNEON)) {
-    TransposeUVWx8 = TransposeUVWx8_NEON;
-  }
-#elif defined(HAS_TRANSPOSE_UVWX8_SSE2)
-  if (TestCpuFlag(kCpuHasSSE2) &&
-      IS_ALIGNED(width, 8) &&
-      IS_ALIGNED(src, 16) && IS_ALIGNED(src_stride, 16)) {
-    TransposeUVWx8 = TransposeUVWx8_SSE2;
-  }
-#elif defined(HAS_TRANSPOSE_UVWx8_MIPS_DSPR2)
-  if (TestCpuFlag(kCpuHasMIPS_DSPR2) && IS_ALIGNED(width, 2) &&
-      IS_ALIGNED(src, 4) && IS_ALIGNED(src_stride, 4)) {
-    TransposeUVWx8 = TransposeUVWx8_MIPS_DSPR2;
-  }
-#endif
-
-  // Work through the source in 8x8 tiles.
-  while (i >= 8) {
-    TransposeUVWx8(src, src_stride,
-                   dst_a, dst_stride_a,
-                   dst_b, dst_stride_b,
-                   width);
-    src += 8 * src_stride;    // Go down 8 rows.
-    dst_a += 8;               // Move over 8 columns.
-    dst_b += 8;               // Move over 8 columns.
-    i -= 8;
-  }
-
-  TransposeUVWxH_C(src, src_stride,
-                   dst_a, dst_stride_a,
-                   dst_b, dst_stride_b,
-                   width, i);
-}
-
-LIBYUV_API
-void RotateUV90(const uint8* src, int src_stride,
-                uint8* dst_a, int dst_stride_a,
-                uint8* dst_b, int dst_stride_b,
-                int width, int height) {
-  src += src_stride * (height - 1);
-  src_stride = -src_stride;
-
-  TransposeUV(src, src_stride,
-              dst_a, dst_stride_a,
-              dst_b, dst_stride_b,
-              width, height);
-}
-
-LIBYUV_API
-void RotateUV270(const uint8* src, int src_stride,
-                 uint8* dst_a, int dst_stride_a,
-                 uint8* dst_b, int dst_stride_b,
-                 int width, int height) {
-  dst_a += dst_stride_a * (width - 1);
-  dst_b += dst_stride_b * (width - 1);
-  dst_stride_a = -dst_stride_a;
-  dst_stride_b = -dst_stride_b;
-
-  TransposeUV(src, src_stride,
-              dst_a, dst_stride_a,
-              dst_b, dst_stride_b,
-              width, height);
-}
-
-// Rotate 180 is a horizontal and vertical flip.
-LIBYUV_API
-void RotateUV180(const uint8* src, int src_stride,
-                 uint8* dst_a, int dst_stride_a,
-                 uint8* dst_b, int dst_stride_b,
-                 int width, int height) {
-  int i;
-  void (*MirrorRowUV)(const uint8* src, uint8* dst_u, uint8* dst_v, int width) =
-      MirrorUVRow_C;
-#if defined(HAS_MIRRORUVROW_NEON)
-  if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 8)) {
-    MirrorRowUV = MirrorUVRow_NEON;
-  }
-#elif defined(HAS_MIRRORROW_UV_SSSE3)
-  if (TestCpuFlag(kCpuHasSSSE3) && IS_ALIGNED(width, 16) &&
-      IS_ALIGNED(src, 16) && IS_ALIGNED(src_stride, 16)) {
-    MirrorRowUV = MirrorUVRow_SSSE3;
-  }
-#elif defined(HAS_MIRRORUVROW_MIPS_DSPR2)
-  if (TestCpuFlag(kCpuHasMIPS_DSPR2) &&
-      IS_ALIGNED(src, 4) && IS_ALIGNED(src_stride, 4)) {
-    MirrorRowUV = MirrorUVRow_MIPS_DSPR2;
-  }
-#endif
-
-  dst_a += dst_stride_a * (height - 1);
-  dst_b += dst_stride_b * (height - 1);
-
-  for (i = 0; i < height; ++i) {
-    MirrorRowUV(src, dst_a, dst_b, width);
-    src += src_stride;
-    dst_a -= dst_stride_a;
-    dst_b -= dst_stride_b;
-  }
-}
-
-LIBYUV_API
-int RotatePlane(const uint8* src, int src_stride,
-                uint8* dst, int dst_stride,
-                int width, int height,
-                enum RotationMode mode) {
-  if (!src || width <= 0 || height == 0 || !dst) {
-    return -1;
-  }
-
-  // Negative height means invert the image.
-  if (height < 0) {
-    height = -height;
-    src = src + (height - 1) * src_stride;
-    src_stride = -src_stride;
-  }
-
-  switch (mode) {
-    case kRotate0:
-      // copy frame
-      CopyPlane(src, src_stride,
-                dst, dst_stride,
-                width, height);
-      return 0;
-    case kRotate90:
-      RotatePlane90(src, src_stride,
-                    dst, dst_stride,
-                    width, height);
-      return 0;
-    case kRotate270:
-      RotatePlane270(src, src_stride,
-                     dst, dst_stride,
-                     width, height);
-      return 0;
-    case kRotate180:
-      RotatePlane180(src, src_stride,
-                     dst, dst_stride,
-                     width, height);
-      return 0;
-    default:
-      break;
-  }
-  return -1;
-}
-
-LIBYUV_API
-int I420Rotate(const uint8* src_y, int src_stride_y,
-               const uint8* src_u, int src_stride_u,
-               const uint8* src_v, int src_stride_v,
-               uint8* dst_y, int dst_stride_y,
-               uint8* dst_u, int dst_stride_u,
-               uint8* dst_v, int dst_stride_v,
-               int width, int height,
-               enum RotationMode mode) {
-  int halfwidth = (width + 1) >> 1;
-  int halfheight = (height + 1) >> 1;
-  if (!src_y || !src_u || !src_v || width <= 0 || height == 0 ||
-      !dst_y || !dst_u || !dst_v) {
-    return -1;
-  }
-
-  // Negative height means invert the image.
-  if (height < 0) {
-    height = -height;
-    halfheight = (height + 1) >> 1;
-    src_y = src_y + (height - 1) * src_stride_y;
-    src_u = src_u + (halfheight - 1) * src_stride_u;
-    src_v = src_v + (halfheight - 1) * src_stride_v;
-    src_stride_y = -src_stride_y;
-    src_stride_u = -src_stride_u;
-    src_stride_v = -src_stride_v;
-  }
-
-  switch (mode) {
-    case kRotate0:
-      // copy frame
-      return I420Copy(src_y, src_stride_y,
-                      src_u, src_stride_u,
-                      src_v, src_stride_v,
-                      dst_y, dst_stride_y,
-                      dst_u, dst_stride_u,
-                      dst_v, dst_stride_v,
-                      width, height);
-    case kRotate90:
-      RotatePlane90(src_y, src_stride_y,
-                    dst_y, dst_stride_y,
-                    width, height);
-      RotatePlane90(src_u, src_stride_u,
-                    dst_u, dst_stride_u,
-                    halfwidth, halfheight);
-      RotatePlane90(src_v, src_stride_v,
-                    dst_v, dst_stride_v,
-                    halfwidth, halfheight);
-      return 0;
-    case kRotate270:
-      RotatePlane270(src_y, src_stride_y,
-                     dst_y, dst_stride_y,
-                     width, height);
-      RotatePlane270(src_u, src_stride_u,
-                     dst_u, dst_stride_u,
-                     halfwidth, halfheight);
-      RotatePlane270(src_v, src_stride_v,
-                     dst_v, dst_stride_v,
-                     halfwidth, halfheight);
-      return 0;
-    case kRotate180:
-      RotatePlane180(src_y, src_stride_y,
-                     dst_y, dst_stride_y,
-                     width, height);
-      RotatePlane180(src_u, src_stride_u,
-                     dst_u, dst_stride_u,
-                     halfwidth, halfheight);
-      RotatePlane180(src_v, src_stride_v,
-                     dst_v, dst_stride_v,
-                     halfwidth, halfheight);
-      return 0;
-    default:
-      break;
-  }
-  return -1;
-}
-
-LIBYUV_API
-int NV12ToI420Rotate(const uint8* src_y, int src_stride_y,
-                     const uint8* src_uv, int src_stride_uv,
-                     uint8* dst_y, int dst_stride_y,
-                     uint8* dst_u, int dst_stride_u,
-                     uint8* dst_v, int dst_stride_v,
-                     int width, int height,
-                     enum RotationMode mode) {
-  int halfwidth = (width + 1) >> 1;
-  int halfheight = (height + 1) >> 1;
-  if (!src_y || !src_uv || width <= 0 || height == 0 ||
-      !dst_y || !dst_u || !dst_v) {
-    return -1;
-  }
-
-  // Negative height means invert the image.
-  if (height < 0) {
-    height = -height;
-    halfheight = (height + 1) >> 1;
-    src_y = src_y + (height - 1) * src_stride_y;
-    src_uv = src_uv + (halfheight - 1) * src_stride_uv;
-    src_stride_y = -src_stride_y;
-    src_stride_uv = -src_stride_uv;
-  }
-
-  switch (mode) {
-    case kRotate0:
-      // copy frame
-      return NV12ToI420(src_y, src_stride_y,
-                        src_uv, src_stride_uv,
-                        dst_y, dst_stride_y,
-                        dst_u, dst_stride_u,
-                        dst_v, dst_stride_v,
-                        width, height);
-    case kRotate90:
-      RotatePlane90(src_y, src_stride_y,
-                    dst_y, dst_stride_y,
-                    width, height);
-      RotateUV90(src_uv, src_stride_uv,
-                 dst_u, dst_stride_u,
-                 dst_v, dst_stride_v,
-                 halfwidth, halfheight);
-      return 0;
-    case kRotate270:
-      RotatePlane270(src_y, src_stride_y,
-                     dst_y, dst_stride_y,
-                     width, height);
-      RotateUV270(src_uv, src_stride_uv,
-                  dst_u, dst_stride_u,
-                  dst_v, dst_stride_v,
-                  halfwidth, halfheight);
-      return 0;
-    case kRotate180:
-      RotatePlane180(src_y, src_stride_y,
-                     dst_y, dst_stride_y,
-                     width, height);
-      RotateUV180(src_uv, src_stride_uv,
-                  dst_u, dst_stride_u,
-                  dst_v, dst_stride_v,
-                  halfwidth, halfheight);
-      return 0;
-    default:
-      break;
-  }
-  return -1;
-}
-
-#ifdef __cplusplus
-}  // extern "C"
-}  // namespace libyuv
-#endif
diff --git a/drivers/theoraplayer/src/YUV/libyuv/src/rotate_argb.cc b/drivers/theoraplayer/src/YUV/libyuv/src/rotate_argb.cc
deleted file mode 100755
index ab0f9ce070..0000000000
--- a/drivers/theoraplayer/src/YUV/libyuv/src/rotate_argb.cc
+++ /dev/null
@@ -1,209 +0,0 @@
-/*
- *  Copyright 2012 The LibYuv Project Authors. All rights reserved.
- *
- *  Use of this source code is governed by a BSD-style license
- *  that can be found in the LICENSE file in the root of the source
- *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS. All contributing project authors may
- *  be found in the AUTHORS file in the root of the source tree.
- */
-
-#include "libyuv/rotate.h"
-
-#include "libyuv/cpu_id.h"
-#include "libyuv/convert.h"
-#include "libyuv/planar_functions.h"
-#include "libyuv/row.h"
-
-#ifdef __cplusplus
-namespace libyuv {
-extern "C" {
-#endif
-
-// ARGBScale has a function to copy pixels to a row, striding each source
-// pixel by a constant.
-#if !defined(LIBYUV_DISABLE_X86) && \
-    (defined(_M_IX86) || \
-    (defined(__x86_64__) && !defined(__native_client__)) || defined(__i386__))
-#define HAS_SCALEARGBROWDOWNEVEN_SSE2
-void ScaleARGBRowDownEven_SSE2(const uint8* src_ptr, int src_stride,
-                               int src_stepx,
-                               uint8* dst_ptr, int dst_width);
-#endif
-#if !defined(LIBYUV_DISABLE_NEON) && !defined(__native_client__) && \
-    (defined(__ARM_NEON__) || defined(LIBYUV_NEON))
-#define HAS_SCALEARGBROWDOWNEVEN_NEON
-void ScaleARGBRowDownEven_NEON(const uint8* src_ptr, int src_stride,
-                               int src_stepx,
-                               uint8* dst_ptr, int dst_width);
-#endif
-
-void ScaleARGBRowDownEven_C(const uint8* src_ptr, int,
-                            int src_stepx,
-                            uint8* dst_ptr, int dst_width);
-
-static void ARGBTranspose(const uint8* src, int src_stride,
-                          uint8* dst, int dst_stride,
-                          int width, int height) {
-  int i;
-  int src_pixel_step = src_stride >> 2;
-  void (*ScaleARGBRowDownEven)(const uint8* src_ptr, int src_stride,
-      int src_step, uint8* dst_ptr, int dst_width) = ScaleARGBRowDownEven_C;
-#if defined(HAS_SCALEARGBROWDOWNEVEN_SSE2)
-  if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(height, 4) &&  // Width of dest.
-      IS_ALIGNED(dst, 16) && IS_ALIGNED(dst_stride, 16)) {
-    ScaleARGBRowDownEven = ScaleARGBRowDownEven_SSE2;
-  }
-#elif defined(HAS_SCALEARGBROWDOWNEVEN_NEON)
-  if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(height, 4) &&  // Width of dest.
-      IS_ALIGNED(src, 4)) {
-    ScaleARGBRowDownEven = ScaleARGBRowDownEven_NEON;
-  }
-#endif
-
-  for (i = 0; i < width; ++i) {  // column of source to row of dest.
-    ScaleARGBRowDownEven(src, 0, src_pixel_step, dst, height);
-    dst += dst_stride;
-    src += 4;
-  }
-}
-
-void ARGBRotate90(const uint8* src, int src_stride,
-                  uint8* dst, int dst_stride,
-                  int width, int height) {
-  // Rotate by 90 is a ARGBTranspose with the source read
-  // from bottom to top. So set the source pointer to the end
-  // of the buffer and flip the sign of the source stride.
-  src += src_stride * (height - 1);
-  src_stride = -src_stride;
-  ARGBTranspose(src, src_stride, dst, dst_stride, width, height);
-}
-
-void ARGBRotate270(const uint8* src, int src_stride,
-                    uint8* dst, int dst_stride,
-                    int width, int height) {
-  // Rotate by 270 is a ARGBTranspose with the destination written
-  // from bottom to top. So set the destination pointer to the end
-  // of the buffer and flip the sign of the destination stride.
-  dst += dst_stride * (width - 1);
-  dst_stride = -dst_stride;
-  ARGBTranspose(src, src_stride, dst, dst_stride, width, height);
-}
-
-void ARGBRotate180(const uint8* src, int src_stride,
-                   uint8* dst, int dst_stride,
-                   int width, int height) {
-  // Swap first and last row and mirror the content. Uses a temporary row.
-  align_buffer_64(row, width * 4);
-  const uint8* src_bot = src + src_stride * (height - 1);
-  uint8* dst_bot = dst + dst_stride * (height - 1);
-  int half_height = (height + 1) >> 1;
-  int y;
-  void (*ARGBMirrorRow)(const uint8* src, uint8* dst, int width) =
-      ARGBMirrorRow_C;
-  void (*CopyRow)(const uint8* src, uint8* dst, int width) = CopyRow_C;
-#if defined(HAS_ARGBMIRRORROW_SSSE3)
-  if (TestCpuFlag(kCpuHasSSSE3) && IS_ALIGNED(width, 4) &&
-      IS_ALIGNED(src, 16) && IS_ALIGNED(src_stride, 16) &&
-      IS_ALIGNED(dst, 16) && IS_ALIGNED(dst_stride, 16)) {
-    ARGBMirrorRow = ARGBMirrorRow_SSSE3;
-  }
-#endif
-#if defined(HAS_ARGBMIRRORROW_AVX2)
-  if (TestCpuFlag(kCpuHasAVX2) && IS_ALIGNED(width, 8)) {
-    ARGBMirrorRow = ARGBMirrorRow_AVX2;
-  }
-#endif
-#if defined(HAS_ARGBMIRRORROW_NEON)
-  if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 4)) {
-    ARGBMirrorRow = ARGBMirrorRow_NEON;
-  }
-#endif
-#if defined(HAS_COPYROW_NEON)
-  if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width * 4, 32)) {
-    CopyRow = CopyRow_NEON;
-  }
-#endif
-#if defined(HAS_COPYROW_X86)
-  if (TestCpuFlag(kCpuHasX86)) {
-    CopyRow = CopyRow_X86;
-  }
-#endif
-#if defined(HAS_COPYROW_SSE2)
-  if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(width * 4, 32) &&
-      IS_ALIGNED(src, 16) && IS_ALIGNED(src_stride, 16) &&
-      IS_ALIGNED(dst, 16) && IS_ALIGNED(dst_stride, 16)) {
-    CopyRow = CopyRow_SSE2;
-  }
-#endif
-#if defined(HAS_COPYROW_ERMS)
-  if (TestCpuFlag(kCpuHasERMS)) {
-    CopyRow = CopyRow_ERMS;
-  }
-#endif
-#if defined(HAS_COPYROW_MIPS)
-  if (TestCpuFlag(kCpuHasMIPS)) {
-    CopyRow = CopyRow_MIPS;
-  }
-#endif
-
-  // Odd height will harmlessly mirror the middle row twice.
-  for (y = 0; y < half_height; ++y) {
-    ARGBMirrorRow(src, row, width);  // Mirror first row into a buffer
-    ARGBMirrorRow(src_bot, dst, width);  // Mirror last row into first row
-    CopyRow(row, dst_bot, width * 4);  // Copy first mirrored row into last
-    src += src_stride;
-    dst += dst_stride;
-    src_bot -= src_stride;
-    dst_bot -= dst_stride;
-  }
-  free_aligned_buffer_64(row);
-}
-
-LIBYUV_API
-int ARGBRotate(const uint8* src_argb, int src_stride_argb,
-               uint8* dst_argb, int dst_stride_argb,
-               int width, int height,
-               enum RotationMode mode) {
-  if (!src_argb || width <= 0 || height == 0 || !dst_argb) {
-    return -1;
-  }
-
-  // Negative height means invert the image.
-  if (height < 0) {
-    height = -height;
-    src_argb = src_argb + (height - 1) * src_stride_argb;
-    src_stride_argb = -src_stride_argb;
-  }
-
-  switch (mode) {
-    case kRotate0:
-      // copy frame
-      return ARGBCopy(src_argb, src_stride_argb,
-                      dst_argb, dst_stride_argb,
-                      width, height);
-    case kRotate90:
-      ARGBRotate90(src_argb, src_stride_argb,
-                   dst_argb, dst_stride_argb,
-                   width, height);
-      return 0;
-    case kRotate270:
-      ARGBRotate270(src_argb, src_stride_argb,
-                    dst_argb, dst_stride_argb,
-                    width, height);
-      return 0;
-    case kRotate180:
-      ARGBRotate180(src_argb, src_stride_argb,
-                    dst_argb, dst_stride_argb,
-                    width, height);
-      return 0;
-    default:
-      break;
-  }
-  return -1;
-}
-
-#ifdef __cplusplus
-}  // extern "C"
-}  // namespace libyuv
-#endif
diff --git a/drivers/theoraplayer/src/YUV/libyuv/src/rotate_mips.cc b/drivers/theoraplayer/src/YUV/libyuv/src/rotate_mips.cc
deleted file mode 100755
index 04d5a663f7..0000000000
--- a/drivers/theoraplayer/src/YUV/libyuv/src/rotate_mips.cc
+++ /dev/null
@@ -1,486 +0,0 @@
-/*
- *  Copyright 2011 The LibYuv Project Authors. All rights reserved.
- *
- *  Use of this source code is governed by a BSD-style license
- *  that can be found in the LICENSE file in the root of the source
- *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS. All contributing project authors may
- *  be found in the AUTHORS file in the root of the source tree.
- */
-
-#include "libyuv/row.h"
-
-#include "libyuv/basic_types.h"
-
-#ifdef __cplusplus
-namespace libyuv {
-extern "C" {
-#endif
-
-#if !defined(LIBYUV_DISABLE_MIPS) && \
-    defined(__mips_dsp) && (__mips_dsp_rev >= 2)
-
-void TransposeWx8_MIPS_DSPR2(const uint8* src, int src_stride,
-                             uint8* dst, int dst_stride,
-                             int width) {
-   __asm__ __volatile__ (
-      ".set push                                         \n"
-      ".set noreorder                                    \n"
-      "sll              $t2, %[src_stride], 0x1          \n" // src_stride x 2
-      "sll              $t4, %[src_stride], 0x2          \n" // src_stride x 4
-      "sll              $t9, %[src_stride], 0x3          \n" // src_stride x 8
-      "addu             $t3, $t2, %[src_stride]          \n"
-      "addu             $t5, $t4, %[src_stride]          \n"
-      "addu             $t6, $t2, $t4                    \n"
-      "andi             $t0, %[dst], 0x3                 \n"
-      "andi             $t1, %[dst_stride], 0x3          \n"
-      "or               $t0, $t0, $t1                    \n"
-      "bnez             $t0, 11f                         \n"
-      " subu            $t7, $t9, %[src_stride]          \n"
-//dst + dst_stride word aligned
-    "1:                                                  \n"
-      "lbu              $t0, 0(%[src])                   \n"
-      "lbux             $t1, %[src_stride](%[src])       \n"
-      "lbux             $t8, $t2(%[src])                 \n"
-      "lbux             $t9, $t3(%[src])                 \n"
-      "sll              $t1, $t1, 16                     \n"
-      "sll              $t9, $t9, 16                     \n"
-      "or               $t0, $t0, $t1                    \n"
-      "or               $t8, $t8, $t9                    \n"
-      "precr.qb.ph      $s0, $t8, $t0                    \n"
-      "lbux             $t0, $t4(%[src])                 \n"
-      "lbux             $t1, $t5(%[src])                 \n"
-      "lbux             $t8, $t6(%[src])                 \n"
-      "lbux             $t9, $t7(%[src])                 \n"
-      "sll              $t1, $t1, 16                     \n"
-      "sll              $t9, $t9, 16                     \n"
-      "or               $t0, $t0, $t1                    \n"
-      "or               $t8, $t8, $t9                    \n"
-      "precr.qb.ph      $s1, $t8, $t0                    \n"
-      "sw               $s0, 0(%[dst])                   \n"
-      "addiu            %[width], -1                     \n"
-      "addiu            %[src], 1                        \n"
-      "sw               $s1, 4(%[dst])                   \n"
-      "bnez             %[width], 1b                     \n"
-      " addu            %[dst], %[dst], %[dst_stride]    \n"
-      "b                2f                               \n"
-//dst + dst_stride unaligned
-   "11:                                                  \n"
-      "lbu              $t0, 0(%[src])                   \n"
-      "lbux             $t1, %[src_stride](%[src])       \n"
-      "lbux             $t8, $t2(%[src])                 \n"
-      "lbux             $t9, $t3(%[src])                 \n"
-      "sll              $t1, $t1, 16                     \n"
-      "sll              $t9, $t9, 16                     \n"
-      "or               $t0, $t0, $t1                    \n"
-      "or               $t8, $t8, $t9                    \n"
-      "precr.qb.ph      $s0, $t8, $t0                    \n"
-      "lbux             $t0, $t4(%[src])                 \n"
-      "lbux             $t1, $t5(%[src])                 \n"
-      "lbux             $t8, $t6(%[src])                 \n"
-      "lbux             $t9, $t7(%[src])                 \n"
-      "sll              $t1, $t1, 16                     \n"
-      "sll              $t9, $t9, 16                     \n"
-      "or               $t0, $t0, $t1                    \n"
-      "or               $t8, $t8, $t9                    \n"
-      "precr.qb.ph      $s1, $t8, $t0                    \n"
-      "swr              $s0, 0(%[dst])                   \n"
-      "swl              $s0, 3(%[dst])                   \n"
-      "addiu            %[width], -1                     \n"
-      "addiu            %[src], 1                        \n"
-      "swr              $s1, 4(%[dst])                   \n"
-      "swl              $s1, 7(%[dst])                   \n"
-      "bnez             %[width], 11b                    \n"
-       "addu             %[dst], %[dst], %[dst_stride]   \n"
-    "2:                                                  \n"
-      ".set pop                                          \n"
-      :[src] "+r" (src),
-       [dst] "+r" (dst),
-       [width] "+r" (width)
-      :[src_stride] "r" (src_stride),
-       [dst_stride] "r" (dst_stride)
-      : "t0", "t1",  "t2", "t3", "t4", "t5",
-        "t6", "t7", "t8", "t9",
-        "s0", "s1"
-  );
-}
-
-void TransposeWx8_FAST_MIPS_DSPR2(const uint8* src, int src_stride,
-                                  uint8* dst, int dst_stride,
-                                  int width) {
-  __asm__ __volatile__ (
-      ".set noat                                         \n"
-      ".set push                                         \n"
-      ".set noreorder                                    \n"
-      "beqz             %[width], 2f                     \n"
-      " sll             $t2, %[src_stride], 0x1          \n"  // src_stride x 2
-      "sll              $t4, %[src_stride], 0x2          \n"  // src_stride x 4
-      "sll              $t9, %[src_stride], 0x3          \n"  // src_stride x 8
-      "addu             $t3, $t2, %[src_stride]          \n"
-      "addu             $t5, $t4, %[src_stride]          \n"
-      "addu             $t6, $t2, $t4                    \n"
-
-      "srl              $AT, %[width], 0x2               \n"
-      "andi             $t0, %[dst], 0x3                 \n"
-      "andi             $t1, %[dst_stride], 0x3          \n"
-      "or               $t0, $t0, $t1                    \n"
-      "bnez             $t0, 11f                         \n"
-      " subu            $t7, $t9, %[src_stride]          \n"
-//dst + dst_stride word aligned
-      "1:                                                \n"
-      "lw               $t0, 0(%[src])                   \n"
-      "lwx              $t1, %[src_stride](%[src])       \n"
-      "lwx              $t8, $t2(%[src])                 \n"
-      "lwx              $t9, $t3(%[src])                 \n"
-
-// t0 = | 30 | 20 | 10 | 00 |
-// t1 = | 31 | 21 | 11 | 01 |
-// t8 = | 32 | 22 | 12 | 02 |
-// t9 = | 33 | 23 | 13 | 03 |
-
-      "precr.qb.ph     $s0, $t1, $t0                     \n"
-      "precr.qb.ph     $s1, $t9, $t8                     \n"
-      "precrq.qb.ph    $s2, $t1, $t0                     \n"
-      "precrq.qb.ph    $s3, $t9, $t8                     \n"
-
-  // s0 = | 21 | 01 | 20 | 00 |
-  // s1 = | 23 | 03 | 22 | 02 |
-  // s2 = | 31 | 11 | 30 | 10 |
-  // s3 = | 33 | 13 | 32 | 12 |
-
-      "precr.qb.ph     $s4, $s1, $s0                     \n"
-      "precrq.qb.ph    $s5, $s1, $s0                     \n"
-      "precr.qb.ph     $s6, $s3, $s2                     \n"
-      "precrq.qb.ph    $s7, $s3, $s2                     \n"
-
-  // s4 = | 03 | 02 | 01 | 00 |
-  // s5 = | 23 | 22 | 21 | 20 |
-  // s6 = | 13 | 12 | 11 | 10 |
-  // s7 = | 33 | 32 | 31 | 30 |
-
-      "lwx              $t0, $t4(%[src])                 \n"
-      "lwx              $t1, $t5(%[src])                 \n"
-      "lwx              $t8, $t6(%[src])                 \n"
-      "lwx              $t9, $t7(%[src])                 \n"
-
-// t0 = | 34 | 24 | 14 | 04 |
-// t1 = | 35 | 25 | 15 | 05 |
-// t8 = | 36 | 26 | 16 | 06 |
-// t9 = | 37 | 27 | 17 | 07 |
-
-      "precr.qb.ph     $s0, $t1, $t0                     \n"
-      "precr.qb.ph     $s1, $t9, $t8                     \n"
-      "precrq.qb.ph    $s2, $t1, $t0                     \n"
-      "precrq.qb.ph    $s3, $t9, $t8                     \n"
-
-  // s0 = | 25 | 05 | 24 | 04 |
-  // s1 = | 27 | 07 | 26 | 06 |
-  // s2 = | 35 | 15 | 34 | 14 |
-  // s3 = | 37 | 17 | 36 | 16 |
-
-      "precr.qb.ph     $t0, $s1, $s0                     \n"
-      "precrq.qb.ph    $t1, $s1, $s0                     \n"
-      "precr.qb.ph     $t8, $s3, $s2                     \n"
-      "precrq.qb.ph    $t9, $s3, $s2                     \n"
-
-  // t0 = | 07 | 06 | 05 | 04 |
-  // t1 = | 27 | 26 | 25 | 24 |
-  // t8 = | 17 | 16 | 15 | 14 |
-  // t9 = | 37 | 36 | 35 | 34 |
-
-      "addu            $s0, %[dst], %[dst_stride]        \n"
-      "addu            $s1, $s0, %[dst_stride]           \n"
-      "addu            $s2, $s1, %[dst_stride]           \n"
-
-      "sw              $s4, 0(%[dst])                    \n"
-      "sw              $t0, 4(%[dst])                    \n"
-      "sw              $s6, 0($s0)                       \n"
-      "sw              $t8, 4($s0)                       \n"
-      "sw              $s5, 0($s1)                       \n"
-      "sw              $t1, 4($s1)                       \n"
-      "sw              $s7, 0($s2)                       \n"
-      "sw              $t9, 4($s2)                       \n"
-
-      "addiu            $AT, -1                          \n"
-      "addiu            %[src], 4                        \n"
-
-      "bnez             $AT, 1b                          \n"
-      " addu            %[dst], $s2, %[dst_stride]       \n"
-      "b                2f                               \n"
-//dst + dst_stride unaligned
-      "11:                                               \n"
-      "lw               $t0, 0(%[src])                   \n"
-      "lwx              $t1, %[src_stride](%[src])       \n"
-      "lwx              $t8, $t2(%[src])                 \n"
-      "lwx              $t9, $t3(%[src])                 \n"
-
-// t0 = | 30 | 20 | 10 | 00 |
-// t1 = | 31 | 21 | 11 | 01 |
-// t8 = | 32 | 22 | 12 | 02 |
-// t9 = | 33 | 23 | 13 | 03 |
-
-      "precr.qb.ph     $s0, $t1, $t0                     \n"
-      "precr.qb.ph     $s1, $t9, $t8                     \n"
-      "precrq.qb.ph    $s2, $t1, $t0                     \n"
-      "precrq.qb.ph    $s3, $t9, $t8                     \n"
-
-  // s0 = | 21 | 01 | 20 | 00 |
-  // s1 = | 23 | 03 | 22 | 02 |
-  // s2 = | 31 | 11 | 30 | 10 |
-  // s3 = | 33 | 13 | 32 | 12 |
-
-      "precr.qb.ph     $s4, $s1, $s0                     \n"
-      "precrq.qb.ph    $s5, $s1, $s0                     \n"
-      "precr.qb.ph     $s6, $s3, $s2                     \n"
-      "precrq.qb.ph    $s7, $s3, $s2                     \n"
-
-  // s4 = | 03 | 02 | 01 | 00 |
-  // s5 = | 23 | 22 | 21 | 20 |
-  // s6 = | 13 | 12 | 11 | 10 |
-  // s7 = | 33 | 32 | 31 | 30 |
-
-      "lwx              $t0, $t4(%[src])                 \n"
-      "lwx              $t1, $t5(%[src])                 \n"
-      "lwx              $t8, $t6(%[src])                 \n"
-      "lwx              $t9, $t7(%[src])                 \n"
-
-// t0 = | 34 | 24 | 14 | 04 |
-// t1 = | 35 | 25 | 15 | 05 |
-// t8 = | 36 | 26 | 16 | 06 |
-// t9 = | 37 | 27 | 17 | 07 |
-
-      "precr.qb.ph     $s0, $t1, $t0                     \n"
-      "precr.qb.ph     $s1, $t9, $t8                     \n"
-      "precrq.qb.ph    $s2, $t1, $t0                     \n"
-      "precrq.qb.ph    $s3, $t9, $t8                     \n"
-
-  // s0 = | 25 | 05 | 24 | 04 |
-  // s1 = | 27 | 07 | 26 | 06 |
-  // s2 = | 35 | 15 | 34 | 14 |
-  // s3 = | 37 | 17 | 36 | 16 |
-
-      "precr.qb.ph     $t0, $s1, $s0                     \n"
-      "precrq.qb.ph    $t1, $s1, $s0                     \n"
-      "precr.qb.ph     $t8, $s3, $s2                     \n"
-      "precrq.qb.ph    $t9, $s3, $s2                     \n"
-
-  // t0 = | 07 | 06 | 05 | 04 |
-  // t1 = | 27 | 26 | 25 | 24 |
-  // t8 = | 17 | 16 | 15 | 14 |
-  // t9 = | 37 | 36 | 35 | 34 |
-
-      "addu            $s0, %[dst], %[dst_stride]        \n"
-      "addu            $s1, $s0, %[dst_stride]           \n"
-      "addu            $s2, $s1, %[dst_stride]           \n"
-
-      "swr              $s4, 0(%[dst])                   \n"
-      "swl              $s4, 3(%[dst])                   \n"
-      "swr              $t0, 4(%[dst])                   \n"
-      "swl              $t0, 7(%[dst])                   \n"
-      "swr              $s6, 0($s0)                      \n"
-      "swl              $s6, 3($s0)                      \n"
-      "swr              $t8, 4($s0)                      \n"
-      "swl              $t8, 7($s0)                      \n"
-      "swr              $s5, 0($s1)                      \n"
-      "swl              $s5, 3($s1)                      \n"
-      "swr              $t1, 4($s1)                      \n"
-      "swl              $t1, 7($s1)                      \n"
-      "swr              $s7, 0($s2)                      \n"
-      "swl              $s7, 3($s2)                      \n"
-      "swr              $t9, 4($s2)                      \n"
-      "swl              $t9, 7($s2)                      \n"
-
-      "addiu            $AT, -1                          \n"
-      "addiu            %[src], 4                        \n"
-
-      "bnez             $AT, 11b                         \n"
-      " addu            %[dst], $s2, %[dst_stride]       \n"
-      "2:                                                \n"
-      ".set pop                                          \n"
-      ".set at                                           \n"
-      :[src] "+r" (src),
-       [dst] "+r" (dst),
-       [width] "+r" (width)
-      :[src_stride] "r" (src_stride),
-       [dst_stride] "r" (dst_stride)
-      : "t0", "t1",  "t2", "t3",  "t4", "t5",
-        "t6", "t7", "t8", "t9",
-        "s0", "s1", "s2", "s3", "s4",
-        "s5", "s6", "s7"
-  );
-}
-
-void TransposeUVWx8_MIPS_DSPR2(const uint8* src, int src_stride,
-                               uint8* dst_a, int dst_stride_a,
-                               uint8* dst_b, int dst_stride_b,
-                               int width) {
-  __asm__ __volatile__ (
-      ".set push                                         \n"
-      ".set noreorder                                    \n"
-      "beqz            %[width], 2f                      \n"
-      " sll            $t2, %[src_stride], 0x1           \n" // src_stride x 2
-      "sll             $t4, %[src_stride], 0x2           \n" // src_stride x 4
-      "sll             $t9, %[src_stride], 0x3           \n" // src_stride x 8
-      "addu            $t3, $t2, %[src_stride]           \n"
-      "addu            $t5, $t4, %[src_stride]           \n"
-      "addu            $t6, $t2, $t4                     \n"
-      "subu            $t7, $t9, %[src_stride]           \n"
-      "srl             $t1, %[width], 1                  \n"
-
-// check word aligment for dst_a, dst_b, dst_stride_a and dst_stride_b
-      "andi            $t0, %[dst_a], 0x3                \n"
-      "andi            $t8, %[dst_b], 0x3                \n"
-      "or              $t0, $t0, $t8                     \n"
-      "andi            $t8, %[dst_stride_a], 0x3         \n"
-      "andi            $s5, %[dst_stride_b], 0x3         \n"
-      "or              $t8, $t8, $s5                     \n"
-      "or              $t0, $t0, $t8                     \n"
-      "bnez            $t0, 11f                          \n"
-      " nop                                              \n"
-// dst + dst_stride word aligned (both, a & b dst addresses)
-    "1:                                                  \n"
-      "lw              $t0, 0(%[src])                    \n" // |B0|A0|b0|a0|
-      "lwx             $t8, %[src_stride](%[src])        \n" // |B1|A1|b1|a1|
-      "addu            $s5, %[dst_a], %[dst_stride_a]    \n"
-      "lwx             $t9, $t2(%[src])                  \n" // |B2|A2|b2|a2|
-      "lwx             $s0, $t3(%[src])                  \n" // |B3|A3|b3|a3|
-      "addu            $s6, %[dst_b], %[dst_stride_b]    \n"
-
-      "precrq.ph.w     $s1, $t8, $t0                     \n" // |B1|A1|B0|A0|
-      "precrq.ph.w     $s2, $s0, $t9                     \n" // |B3|A3|B2|A2|
-      "precr.qb.ph     $s3, $s2, $s1                     \n" // |A3|A2|A1|A0|
-      "precrq.qb.ph    $s4, $s2, $s1                     \n" // |B3|B2|B1|B0|
-
-      "sll             $t0, $t0, 16                      \n"
-      "packrl.ph       $s1, $t8, $t0                     \n" // |b1|a1|b0|a0|
-      "sll             $t9, $t9, 16                      \n"
-      "packrl.ph       $s2, $s0, $t9                     \n" // |b3|a3|b2|a2|
-
-      "sw              $s3, 0($s5)                       \n"
-      "sw              $s4, 0($s6)                       \n"
-
-      "precr.qb.ph     $s3, $s2, $s1                     \n" // |a3|a2|a1|a0|
-      "precrq.qb.ph    $s4, $s2, $s1                     \n" // |b3|b2|b1|b0|
-
-      "lwx             $t0, $t4(%[src])                  \n" // |B4|A4|b4|a4|
-      "lwx             $t8, $t5(%[src])                  \n" // |B5|A5|b5|a5|
-      "lwx             $t9, $t6(%[src])                  \n" // |B6|A6|b6|a6|
-      "lwx             $s0, $t7(%[src])                  \n" // |B7|A7|b7|a7|
-      "sw              $s3, 0(%[dst_a])                  \n"
-      "sw              $s4, 0(%[dst_b])                  \n"
-
-      "precrq.ph.w     $s1, $t8, $t0                     \n" // |B5|A5|B4|A4|
-      "precrq.ph.w     $s2, $s0, $t9                     \n" // |B6|A6|B7|A7|
-      "precr.qb.ph     $s3, $s2, $s1                     \n" // |A7|A6|A5|A4|
-      "precrq.qb.ph    $s4, $s2, $s1                     \n" // |B7|B6|B5|B4|
-
-      "sll             $t0, $t0, 16                      \n"
-      "packrl.ph       $s1, $t8, $t0                     \n" // |b5|a5|b4|a4|
-      "sll             $t9, $t9, 16                      \n"
-      "packrl.ph       $s2, $s0, $t9                     \n" // |b7|a7|b6|a6|
-      "sw              $s3, 4($s5)                       \n"
-      "sw              $s4, 4($s6)                       \n"
-
-      "precr.qb.ph     $s3, $s2, $s1                     \n" // |a7|a6|a5|a4|
-      "precrq.qb.ph    $s4, $s2, $s1                     \n" // |b7|b6|b5|b4|
-
-      "addiu           %[src], 4                         \n"
-      "addiu           $t1, -1                           \n"
-      "sll             $t0, %[dst_stride_a], 1           \n"
-      "sll             $t8, %[dst_stride_b], 1           \n"
-      "sw              $s3, 4(%[dst_a])                  \n"
-      "sw              $s4, 4(%[dst_b])                  \n"
-      "addu            %[dst_a], %[dst_a], $t0           \n"
-      "bnez            $t1, 1b                           \n"
-      " addu           %[dst_b], %[dst_b], $t8           \n"
-      "b               2f                                \n"
-      " nop                                              \n"
-
-// dst_a or dst_b or dst_stride_a or dst_stride_b not word aligned
-   "11:                                                  \n"
-      "lw              $t0, 0(%[src])                    \n" // |B0|A0|b0|a0|
-      "lwx             $t8, %[src_stride](%[src])        \n" // |B1|A1|b1|a1|
-      "addu            $s5, %[dst_a], %[dst_stride_a]    \n"
-      "lwx             $t9, $t2(%[src])                  \n" // |B2|A2|b2|a2|
-      "lwx             $s0, $t3(%[src])                  \n" // |B3|A3|b3|a3|
-      "addu            $s6, %[dst_b], %[dst_stride_b]    \n"
-
-      "precrq.ph.w     $s1, $t8, $t0                     \n" // |B1|A1|B0|A0|
-      "precrq.ph.w     $s2, $s0, $t9                     \n" // |B3|A3|B2|A2|
-      "precr.qb.ph     $s3, $s2, $s1                     \n" // |A3|A2|A1|A0|
-      "precrq.qb.ph    $s4, $s2, $s1                     \n" // |B3|B2|B1|B0|
-
-      "sll             $t0, $t0, 16                      \n"
-      "packrl.ph       $s1, $t8, $t0                     \n" // |b1|a1|b0|a0|
-      "sll             $t9, $t9, 16                      \n"
-      "packrl.ph       $s2, $s0, $t9                     \n" // |b3|a3|b2|a2|
-
-      "swr             $s3, 0($s5)                       \n"
-      "swl             $s3, 3($s5)                       \n"
-      "swr             $s4, 0($s6)                       \n"
-      "swl             $s4, 3($s6)                       \n"
-
-      "precr.qb.ph     $s3, $s2, $s1                     \n" // |a3|a2|a1|a0|
-      "precrq.qb.ph    $s4, $s2, $s1                     \n" // |b3|b2|b1|b0|
-
-      "lwx             $t0, $t4(%[src])                  \n" // |B4|A4|b4|a4|
-      "lwx             $t8, $t5(%[src])                  \n" // |B5|A5|b5|a5|
-      "lwx             $t9, $t6(%[src])                  \n" // |B6|A6|b6|a6|
-      "lwx             $s0, $t7(%[src])                  \n" // |B7|A7|b7|a7|
-      "swr             $s3, 0(%[dst_a])                  \n"
-      "swl             $s3, 3(%[dst_a])                  \n"
-      "swr             $s4, 0(%[dst_b])                  \n"
-      "swl             $s4, 3(%[dst_b])                  \n"
-
-      "precrq.ph.w     $s1, $t8, $t0                     \n" // |B5|A5|B4|A4|
-      "precrq.ph.w     $s2, $s0, $t9                     \n" // |B6|A6|B7|A7|
-      "precr.qb.ph     $s3, $s2, $s1                     \n" // |A7|A6|A5|A4|
-      "precrq.qb.ph    $s4, $s2, $s1                     \n" // |B7|B6|B5|B4|
-
-      "sll             $t0, $t0, 16                      \n"
-      "packrl.ph       $s1, $t8, $t0                     \n" // |b5|a5|b4|a4|
-      "sll             $t9, $t9, 16                      \n"
-      "packrl.ph       $s2, $s0, $t9                     \n" // |b7|a7|b6|a6|
-
-      "swr             $s3, 4($s5)                       \n"
-      "swl             $s3, 7($s5)                       \n"
-      "swr             $s4, 4($s6)                       \n"
-      "swl             $s4, 7($s6)                       \n"
-
-      "precr.qb.ph     $s3, $s2, $s1                     \n" // |a7|a6|a5|a4|
-      "precrq.qb.ph    $s4, $s2, $s1                     \n" // |b7|b6|b5|b4|
-
-      "addiu           %[src], 4                         \n"
-      "addiu           $t1, -1                           \n"
-      "sll             $t0, %[dst_stride_a], 1           \n"
-      "sll             $t8, %[dst_stride_b], 1           \n"
-      "swr             $s3, 4(%[dst_a])                  \n"
-      "swl             $s3, 7(%[dst_a])                  \n"
-      "swr             $s4, 4(%[dst_b])                  \n"
-      "swl             $s4, 7(%[dst_b])                  \n"
-      "addu            %[dst_a], %[dst_a], $t0           \n"
-      "bnez            $t1, 11b                          \n"
-      " addu           %[dst_b], %[dst_b], $t8           \n"
-
-      "2:                                                \n"
-      ".set pop                                          \n"
-      : [src] "+r" (src),
-        [dst_a] "+r" (dst_a),
-        [dst_b] "+r" (dst_b),
-        [width] "+r" (width),
-        [src_stride] "+r" (src_stride)
-      : [dst_stride_a] "r" (dst_stride_a),
-        [dst_stride_b] "r" (dst_stride_b)
-      : "t0", "t1",  "t2", "t3",  "t4", "t5",
-        "t6", "t7", "t8", "t9",
-        "s0", "s1", "s2", "s3",
-        "s4", "s5", "s6"
-  );
-}
-
-#endif  // defined(__mips_dsp) && (__mips_dsp_rev >= 2)
-
-#ifdef __cplusplus
-}  // extern "C"
-}  // namespace libyuv
-#endif
diff --git a/drivers/theoraplayer/src/YUV/libyuv/src/rotate_neon.cc b/drivers/theoraplayer/src/YUV/libyuv/src/rotate_neon.cc
deleted file mode 100755
index 274c4109cd..0000000000
--- a/drivers/theoraplayer/src/YUV/libyuv/src/rotate_neon.cc
+++ /dev/null
@@ -1,412 +0,0 @@
-/*
- *  Copyright 2011 The LibYuv Project Authors. All rights reserved.
- *
- *  Use of this source code is governed by a BSD-style license
- *  that can be found in the LICENSE file in the root of the source
- *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS. All contributing project authors may
- *  be found in the AUTHORS file in the root of the source tree.
- */
-
-#include "libyuv/row.h"
-
-#include "libyuv/basic_types.h"
-
-#ifdef __cplusplus
-namespace libyuv {
-extern "C" {
-#endif
-
-#if !defined(LIBYUV_DISABLE_NEON) && defined(__ARM_NEON__)
-static uvec8 kVTbl4x4Transpose =
-  { 0,  4,  8, 12,  1,  5,  9, 13,  2,  6, 10, 14,  3,  7, 11, 15 };
-
-void TransposeWx8_NEON(const uint8* src, int src_stride,
-                       uint8* dst, int dst_stride,
-                       int width) {
-  const uint8* src_temp = NULL;
-  asm volatile (
-    // loops are on blocks of 8. loop will stop when
-    // counter gets to or below 0. starting the counter
-    // at w-8 allow for this
-#ifdef _ANDROID
-				".fpu neon\n"
-#endif
-    "sub         %5, #8                        \n"
-
-    // handle 8x8 blocks. this should be the majority of the plane
-    ".p2align  2                               \n"
-    "1:                                        \n"
-      "mov         %0, %1                      \n"
-
-      "vld1.8      {d0}, [%0], %2              \n"
-      "vld1.8      {d1}, [%0], %2              \n"
-      "vld1.8      {d2}, [%0], %2              \n"
-      "vld1.8      {d3}, [%0], %2              \n"
-      "vld1.8      {d4}, [%0], %2              \n"
-      "vld1.8      {d5}, [%0], %2              \n"
-      "vld1.8      {d6}, [%0], %2              \n"
-      "vld1.8      {d7}, [%0]                  \n"
-
-      "vtrn.8      d1, d0                      \n"
-      "vtrn.8      d3, d2                      \n"
-      "vtrn.8      d5, d4                      \n"
-      "vtrn.8      d7, d6                      \n"
-
-      "vtrn.16     d1, d3                      \n"
-      "vtrn.16     d0, d2                      \n"
-      "vtrn.16     d5, d7                      \n"
-      "vtrn.16     d4, d6                      \n"
-
-      "vtrn.32     d1, d5                      \n"
-      "vtrn.32     d0, d4                      \n"
-      "vtrn.32     d3, d7                      \n"
-      "vtrn.32     d2, d6                      \n"
-
-      "vrev16.8    q0, q0                      \n"
-      "vrev16.8    q1, q1                      \n"
-      "vrev16.8    q2, q2                      \n"
-      "vrev16.8    q3, q3                      \n"
-
-      "mov         %0, %3                      \n"
-
-      "vst1.8      {d1}, [%0], %4              \n"
-      "vst1.8      {d0}, [%0], %4              \n"
-      "vst1.8      {d3}, [%0], %4              \n"
-      "vst1.8      {d2}, [%0], %4              \n"
-      "vst1.8      {d5}, [%0], %4              \n"
-      "vst1.8      {d4}, [%0], %4              \n"
-      "vst1.8      {d7}, [%0], %4              \n"
-      "vst1.8      {d6}, [%0]                  \n"
-
-      "add         %1, #8                      \n"  // src += 8
-      "add         %3, %3, %4, lsl #3          \n"  // dst += 8 * dst_stride
-      "subs        %5,  #8                     \n"  // w   -= 8
-      "bge         1b                          \n"
-
-    // add 8 back to counter. if the result is 0 there are
-    // no residuals.
-    "adds        %5, #8                        \n"
-    "beq         4f                            \n"
-
-    // some residual, so between 1 and 7 lines left to transpose
-    "cmp         %5, #2                        \n"
-    "blt         3f                            \n"
-
-    "cmp         %5, #4                        \n"
-    "blt         2f                            \n"
-
-    // 4x8 block
-    "mov         %0, %1                        \n"
-    "vld1.32     {d0[0]}, [%0], %2             \n"
-    "vld1.32     {d0[1]}, [%0], %2             \n"
-    "vld1.32     {d1[0]}, [%0], %2             \n"
-    "vld1.32     {d1[1]}, [%0], %2             \n"
-    "vld1.32     {d2[0]}, [%0], %2             \n"
-    "vld1.32     {d2[1]}, [%0], %2             \n"
-    "vld1.32     {d3[0]}, [%0], %2             \n"
-    "vld1.32     {d3[1]}, [%0]                 \n"
-
-    "mov         %0, %3                        \n"
-
-    "vld1.8      {q3}, [%6]                    \n"
-
-    "vtbl.8      d4, {d0, d1}, d6              \n"
-    "vtbl.8      d5, {d0, d1}, d7              \n"
-    "vtbl.8      d0, {d2, d3}, d6              \n"
-    "vtbl.8      d1, {d2, d3}, d7              \n"
-
-    // TODO(frkoenig): Rework shuffle above to
-    // write out with 4 instead of 8 writes.
-    "vst1.32     {d4[0]}, [%0], %4             \n"
-    "vst1.32     {d4[1]}, [%0], %4             \n"
-    "vst1.32     {d5[0]}, [%0], %4             \n"
-    "vst1.32     {d5[1]}, [%0]                 \n"
-
-    "add         %0, %3, #4                    \n"
-    "vst1.32     {d0[0]}, [%0], %4             \n"
-    "vst1.32     {d0[1]}, [%0], %4             \n"
-    "vst1.32     {d1[0]}, [%0], %4             \n"
-    "vst1.32     {d1[1]}, [%0]                 \n"
-
-    "add         %1, #4                        \n"  // src += 4
-    "add         %3, %3, %4, lsl #2            \n"  // dst += 4 * dst_stride
-    "subs        %5,  #4                       \n"  // w   -= 4
-    "beq         4f                            \n"
-
-    // some residual, check to see if it includes a 2x8 block,
-    // or less
-    "cmp         %5, #2                        \n"
-    "blt         3f                            \n"
-
-    // 2x8 block
-    "2:                                        \n"
-    "mov         %0, %1                        \n"
-    "vld1.16     {d0[0]}, [%0], %2             \n"
-    "vld1.16     {d1[0]}, [%0], %2             \n"
-    "vld1.16     {d0[1]}, [%0], %2             \n"
-    "vld1.16     {d1[1]}, [%0], %2             \n"
-    "vld1.16     {d0[2]}, [%0], %2             \n"
-    "vld1.16     {d1[2]}, [%0], %2             \n"
-    "vld1.16     {d0[3]}, [%0], %2             \n"
-    "vld1.16     {d1[3]}, [%0]                 \n"
-
-    "vtrn.8      d0, d1                        \n"
-
-    "mov         %0, %3                        \n"
-
-    "vst1.64     {d0}, [%0], %4                \n"
-    "vst1.64     {d1}, [%0]                    \n"
-
-    "add         %1, #2                        \n"  // src += 2
-    "add         %3, %3, %4, lsl #1            \n"  // dst += 2 * dst_stride
-    "subs        %5,  #2                       \n"  // w   -= 2
-    "beq         4f                            \n"
-
-    // 1x8 block
-    "3:                                        \n"
-    "vld1.8      {d0[0]}, [%1], %2             \n"
-    "vld1.8      {d0[1]}, [%1], %2             \n"
-    "vld1.8      {d0[2]}, [%1], %2             \n"
-    "vld1.8      {d0[3]}, [%1], %2             \n"
-    "vld1.8      {d0[4]}, [%1], %2             \n"
-    "vld1.8      {d0[5]}, [%1], %2             \n"
-    "vld1.8      {d0[6]}, [%1], %2             \n"
-    "vld1.8      {d0[7]}, [%1]                 \n"
-
-    "vst1.64     {d0}, [%3]                    \n"
-
-    "4:                                        \n"
-
-    : "+r"(src_temp),          // %0
-      "+r"(src),               // %1
-      "+r"(src_stride),        // %2
-      "+r"(dst),               // %3
-      "+r"(dst_stride),        // %4
-      "+r"(width)              // %5
-    : "r"(&kVTbl4x4Transpose)  // %6
-    : "memory", "cc", "q0", "q1", "q2", "q3"
-  );
-}
-
-static uvec8 kVTbl4x4TransposeDi =
-  { 0,  8,  1,  9,  2, 10,  3, 11,  4, 12,  5, 13,  6, 14,  7, 15 };
-
-void TransposeUVWx8_NEON(const uint8* src, int src_stride,
-                         uint8* dst_a, int dst_stride_a,
-                         uint8* dst_b, int dst_stride_b,
-                         int width) {
-  const uint8* src_temp = NULL;
-  asm volatile (
-    // loops are on blocks of 8. loop will stop when
-    // counter gets to or below 0. starting the counter
-    // at w-8 allow for this
-    "sub         %7, #8                        \n"
-
-    // handle 8x8 blocks. this should be the majority of the plane
-    ".p2align  2                               \n"
-    "1:                                        \n"
-      "mov         %0, %1                      \n"
-
-      "vld2.8      {d0,  d1},  [%0], %2        \n"
-      "vld2.8      {d2,  d3},  [%0], %2        \n"
-      "vld2.8      {d4,  d5},  [%0], %2        \n"
-      "vld2.8      {d6,  d7},  [%0], %2        \n"
-      "vld2.8      {d16, d17}, [%0], %2        \n"
-      "vld2.8      {d18, d19}, [%0], %2        \n"
-      "vld2.8      {d20, d21}, [%0], %2        \n"
-      "vld2.8      {d22, d23}, [%0]            \n"
-
-      "vtrn.8      q1, q0                      \n"
-      "vtrn.8      q3, q2                      \n"
-      "vtrn.8      q9, q8                      \n"
-      "vtrn.8      q11, q10                    \n"
-
-      "vtrn.16     q1, q3                      \n"
-      "vtrn.16     q0, q2                      \n"
-      "vtrn.16     q9, q11                     \n"
-      "vtrn.16     q8, q10                     \n"
-
-      "vtrn.32     q1, q9                      \n"
-      "vtrn.32     q0, q8                      \n"
-      "vtrn.32     q3, q11                     \n"
-      "vtrn.32     q2, q10                     \n"
-
-      "vrev16.8    q0, q0                      \n"
-      "vrev16.8    q1, q1                      \n"
-      "vrev16.8    q2, q2                      \n"
-      "vrev16.8    q3, q3                      \n"
-      "vrev16.8    q8, q8                      \n"
-      "vrev16.8    q9, q9                      \n"
-      "vrev16.8    q10, q10                    \n"
-      "vrev16.8    q11, q11                    \n"
-
-      "mov         %0, %3                      \n"
-
-      "vst1.8      {d2},  [%0], %4             \n"
-      "vst1.8      {d0},  [%0], %4             \n"
-      "vst1.8      {d6},  [%0], %4             \n"
-      "vst1.8      {d4},  [%0], %4             \n"
-      "vst1.8      {d18}, [%0], %4             \n"
-      "vst1.8      {d16}, [%0], %4             \n"
-      "vst1.8      {d22}, [%0], %4             \n"
-      "vst1.8      {d20}, [%0]                 \n"
-
-      "mov         %0, %5                      \n"
-
-      "vst1.8      {d3},  [%0], %6             \n"
-      "vst1.8      {d1},  [%0], %6             \n"
-      "vst1.8      {d7},  [%0], %6             \n"
-      "vst1.8      {d5},  [%0], %6             \n"
-      "vst1.8      {d19}, [%0], %6             \n"
-      "vst1.8      {d17}, [%0], %6             \n"
-      "vst1.8      {d23}, [%0], %6             \n"
-      "vst1.8      {d21}, [%0]                 \n"
-
-      "add         %1, #8*2                    \n"  // src   += 8*2
-      "add         %3, %3, %4, lsl #3          \n"  // dst_a += 8 * dst_stride_a
-      "add         %5, %5, %6, lsl #3          \n"  // dst_b += 8 * dst_stride_b
-      "subs        %7,  #8                     \n"  // w     -= 8
-      "bge         1b                          \n"
-
-    // add 8 back to counter. if the result is 0 there are
-    // no residuals.
-    "adds        %7, #8                        \n"
-    "beq         4f                            \n"
-
-    // some residual, so between 1 and 7 lines left to transpose
-    "cmp         %7, #2                        \n"
-    "blt         3f                            \n"
-
-    "cmp         %7, #4                        \n"
-    "blt         2f                            \n"
-
-    //TODO(frkoenig): Clean this up
-    // 4x8 block
-    "mov         %0, %1                        \n"
-    "vld1.64     {d0}, [%0], %2                \n"
-    "vld1.64     {d1}, [%0], %2                \n"
-    "vld1.64     {d2}, [%0], %2                \n"
-    "vld1.64     {d3}, [%0], %2                \n"
-    "vld1.64     {d4}, [%0], %2                \n"
-    "vld1.64     {d5}, [%0], %2                \n"
-    "vld1.64     {d6}, [%0], %2                \n"
-    "vld1.64     {d7}, [%0]                    \n"
-
-    "vld1.8      {q15}, [%8]                   \n"
-
-    "vtrn.8      q0, q1                        \n"
-    "vtrn.8      q2, q3                        \n"
-
-    "vtbl.8      d16, {d0, d1}, d30            \n"
-    "vtbl.8      d17, {d0, d1}, d31            \n"
-    "vtbl.8      d18, {d2, d3}, d30            \n"
-    "vtbl.8      d19, {d2, d3}, d31            \n"
-    "vtbl.8      d20, {d4, d5}, d30            \n"
-    "vtbl.8      d21, {d4, d5}, d31            \n"
-    "vtbl.8      d22, {d6, d7}, d30            \n"
-    "vtbl.8      d23, {d6, d7}, d31            \n"
-
-    "mov         %0, %3                        \n"
-
-    "vst1.32     {d16[0]},  [%0], %4           \n"
-    "vst1.32     {d16[1]},  [%0], %4           \n"
-    "vst1.32     {d17[0]},  [%0], %4           \n"
-    "vst1.32     {d17[1]},  [%0], %4           \n"
-
-    "add         %0, %3, #4                    \n"
-    "vst1.32     {d20[0]}, [%0], %4            \n"
-    "vst1.32     {d20[1]}, [%0], %4            \n"
-    "vst1.32     {d21[0]}, [%0], %4            \n"
-    "vst1.32     {d21[1]}, [%0]                \n"
-
-    "mov         %0, %5                        \n"
-
-    "vst1.32     {d18[0]}, [%0], %6            \n"
-    "vst1.32     {d18[1]}, [%0], %6            \n"
-    "vst1.32     {d19[0]}, [%0], %6            \n"
-    "vst1.32     {d19[1]}, [%0], %6            \n"
-
-    "add         %0, %5, #4                    \n"
-    "vst1.32     {d22[0]},  [%0], %6           \n"
-    "vst1.32     {d22[1]},  [%0], %6           \n"
-    "vst1.32     {d23[0]},  [%0], %6           \n"
-    "vst1.32     {d23[1]},  [%0]               \n"
-
-    "add         %1, #4*2                      \n"  // src   += 4 * 2
-    "add         %3, %3, %4, lsl #2            \n"  // dst_a += 4 * dst_stride_a
-    "add         %5, %5, %6, lsl #2            \n"  // dst_b += 4 * dst_stride_b
-    "subs        %7,  #4                       \n"  // w     -= 4
-    "beq         4f                            \n"
-
-    // some residual, check to see if it includes a 2x8 block,
-    // or less
-    "cmp         %7, #2                        \n"
-    "blt         3f                            \n"
-
-    // 2x8 block
-    "2:                                        \n"
-    "mov         %0, %1                        \n"
-    "vld2.16     {d0[0], d2[0]}, [%0], %2      \n"
-    "vld2.16     {d1[0], d3[0]}, [%0], %2      \n"
-    "vld2.16     {d0[1], d2[1]}, [%0], %2      \n"
-    "vld2.16     {d1[1], d3[1]}, [%0], %2      \n"
-    "vld2.16     {d0[2], d2[2]}, [%0], %2      \n"
-    "vld2.16     {d1[2], d3[2]}, [%0], %2      \n"
-    "vld2.16     {d0[3], d2[3]}, [%0], %2      \n"
-    "vld2.16     {d1[3], d3[3]}, [%0]          \n"
-
-    "vtrn.8      d0, d1                        \n"
-    "vtrn.8      d2, d3                        \n"
-
-    "mov         %0, %3                        \n"
-
-    "vst1.64     {d0}, [%0], %4                \n"
-    "vst1.64     {d2}, [%0]                    \n"
-
-    "mov         %0, %5                        \n"
-
-    "vst1.64     {d1}, [%0], %6                \n"
-    "vst1.64     {d3}, [%0]                    \n"
-
-    "add         %1, #2*2                      \n"  // src   += 2 * 2
-    "add         %3, %3, %4, lsl #1            \n"  // dst_a += 2 * dst_stride_a
-    "add         %5, %5, %6, lsl #1            \n"  // dst_b += 2 * dst_stride_b
-    "subs        %7,  #2                       \n"  // w     -= 2
-    "beq         4f                            \n"
-
-    // 1x8 block
-    "3:                                        \n"
-    "vld2.8      {d0[0], d1[0]}, [%1], %2      \n"
-    "vld2.8      {d0[1], d1[1]}, [%1], %2      \n"
-    "vld2.8      {d0[2], d1[2]}, [%1], %2      \n"
-    "vld2.8      {d0[3], d1[3]}, [%1], %2      \n"
-    "vld2.8      {d0[4], d1[4]}, [%1], %2      \n"
-    "vld2.8      {d0[5], d1[5]}, [%1], %2      \n"
-    "vld2.8      {d0[6], d1[6]}, [%1], %2      \n"
-    "vld2.8      {d0[7], d1[7]}, [%1]          \n"
-
-    "vst1.64     {d0}, [%3]                    \n"
-    "vst1.64     {d1}, [%5]                    \n"
-
-    "4:                                        \n"
-
-    : "+r"(src_temp),            // %0
-      "+r"(src),                 // %1
-      "+r"(src_stride),          // %2
-      "+r"(dst_a),               // %3
-      "+r"(dst_stride_a),        // %4
-      "+r"(dst_b),               // %5
-      "+r"(dst_stride_b),        // %6
-      "+r"(width)                // %7
-    : "r"(&kVTbl4x4TransposeDi)  // %8
-    : "memory", "cc",
-      "q0", "q1", "q2", "q3", "q8", "q9", "q10", "q11"
-  );
-}
-#endif
-
-#ifdef __cplusplus
-}  // extern "C"
-}  // namespace libyuv
-#endif
diff --git a/drivers/theoraplayer/src/YUV/libyuv/src/row_any.cc b/drivers/theoraplayer/src/YUV/libyuv/src/row_any.cc
deleted file mode 100755
index 90c6a3ff5f..0000000000
--- a/drivers/theoraplayer/src/YUV/libyuv/src/row_any.cc
+++ /dev/null
@@ -1,542 +0,0 @@
-/*
- *  Copyright 2012 The LibYuv Project Authors. All rights reserved.
- *
- *  Use of this source code is governed by a BSD-style license
- *  that can be found in the LICENSE file in the root of the source
- *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS. All contributing project authors may
- *  be found in the AUTHORS file in the root of the source tree.
- */
-
-#include "libyuv/row.h"
-
-#include "libyuv/basic_types.h"
-
-#ifdef __cplusplus
-namespace libyuv {
-extern "C" {
-#endif
-
-// TODO(fbarchard): Consider 'any' functions handling any quantity of pixels.
-// TODO(fbarchard): Consider 'any' functions handling odd alignment.
-// YUV to RGB does multiple of 8 with SIMD and remainder with C.
-#define YANY(NAMEANY, I420TORGB_SIMD, I420TORGB_C, UV_SHIFT, BPP, MASK)        \
-    void NAMEANY(const uint8* y_buf,                                           \
-                 const uint8* u_buf,                                           \
-                 const uint8* v_buf,                                           \
-                 uint8* rgb_buf,                                               \
-                 int width) {                                                  \
-      int n = width & ~MASK;                                                   \
-      I420TORGB_SIMD(y_buf, u_buf, v_buf, rgb_buf, n);                         \
-      I420TORGB_C(y_buf + n,                                                   \
-                  u_buf + (n >> UV_SHIFT),                                     \
-                  v_buf + (n >> UV_SHIFT),                                     \
-                  rgb_buf + n * BPP, width & MASK);                            \
-    }
-
-#ifdef HAS_I422TOARGBROW_SSSE3
-YANY(I444ToARGBRow_Any_SSSE3, I444ToARGBRow_Unaligned_SSSE3, I444ToARGBRow_C,
-     0, 4, 7)
-YANY(I422ToARGBRow_Any_SSSE3, I422ToARGBRow_Unaligned_SSSE3, I422ToARGBRow_C,
-     1, 4, 7)
-YANY(I411ToARGBRow_Any_SSSE3, I411ToARGBRow_Unaligned_SSSE3, I411ToARGBRow_C,
-     2, 4, 7)
-YANY(I422ToBGRARow_Any_SSSE3, I422ToBGRARow_Unaligned_SSSE3, I422ToBGRARow_C,
-     1, 4, 7)
-YANY(I422ToABGRRow_Any_SSSE3, I422ToABGRRow_Unaligned_SSSE3, I422ToABGRRow_C,
-     1, 4, 7)
-YANY(I422ToRGBARow_Any_SSSE3, I422ToRGBARow_Unaligned_SSSE3, I422ToRGBARow_C,
-     1, 4, 7)
-// I422ToRGB565Row_SSSE3 is unaligned.
-YANY(I422ToARGB4444Row_Any_SSSE3, I422ToARGB4444Row_SSSE3, I422ToARGB4444Row_C,
-     1, 2, 7)
-YANY(I422ToARGB1555Row_Any_SSSE3, I422ToARGB1555Row_SSSE3, I422ToARGB1555Row_C,
-     1, 2, 7)
-YANY(I422ToRGB565Row_Any_SSSE3, I422ToRGB565Row_SSSE3, I422ToRGB565Row_C,
-     1, 2, 7)
-// I422ToRGB24Row_SSSE3 is unaligned.
-YANY(I422ToRGB24Row_Any_SSSE3, I422ToRGB24Row_SSSE3, I422ToRGB24Row_C, 1, 3, 7)
-YANY(I422ToRAWRow_Any_SSSE3, I422ToRAWRow_SSSE3, I422ToRAWRow_C, 1, 3, 7)
-YANY(I422ToYUY2Row_Any_SSE2, I422ToYUY2Row_SSE2, I422ToYUY2Row_C, 1, 2, 15)
-YANY(I422ToUYVYRow_Any_SSE2, I422ToUYVYRow_SSE2, I422ToUYVYRow_C, 1, 2, 15)
-#endif  // HAS_I422TOARGBROW_SSSE3
-#ifdef HAS_I422TOARGBROW_AVX2
-YANY(I422ToARGBRow_Any_AVX2, I422ToARGBRow_AVX2, I422ToARGBRow_C, 1, 4, 15)
-#endif  // HAS_I422TOARGBROW_AVX2
-#ifdef HAS_I422TOARGBROW_NEON
-YANY(I444ToARGBRow_Any_NEON, I444ToARGBRow_NEON, I444ToARGBRow_C, 0, 4, 7)
-YANY(I422ToARGBRow_Any_NEON, I422ToARGBRow_NEON, I422ToARGBRow_C, 1, 4, 7)
-YANY(I411ToARGBRow_Any_NEON, I411ToARGBRow_NEON, I411ToARGBRow_C, 2, 4, 7)
-YANY(I422ToBGRARow_Any_NEON, I422ToBGRARow_NEON, I422ToBGRARow_C, 1, 4, 7)
-YANY(I422ToABGRRow_Any_NEON, I422ToABGRRow_NEON, I422ToABGRRow_C, 1, 4, 7)
-YANY(I422ToRGBARow_Any_NEON, I422ToRGBARow_NEON, I422ToRGBARow_C, 1, 4, 7)
-YANY(I422ToRGB24Row_Any_NEON, I422ToRGB24Row_NEON, I422ToRGB24Row_C, 1, 3, 7)
-YANY(I422ToRAWRow_Any_NEON, I422ToRAWRow_NEON, I422ToRAWRow_C, 1, 3, 7)
-YANY(I422ToARGB4444Row_Any_NEON, I422ToARGB4444Row_NEON, I422ToARGB4444Row_C,
-     1, 2, 7)
-YANY(I422ToARGB1555Row_Any_NEON, I422ToARGB1555Row_NEON, I422ToARGB1555Row_C,
-     1, 2, 7)
-YANY(I422ToRGB565Row_Any_NEON, I422ToRGB565Row_NEON, I422ToRGB565Row_C, 1, 2, 7)
-YANY(I422ToYUY2Row_Any_NEON, I422ToYUY2Row_NEON, I422ToYUY2Row_C, 1, 2, 15)
-YANY(I422ToUYVYRow_Any_NEON, I422ToUYVYRow_NEON, I422ToUYVYRow_C, 1, 2, 15)
-#endif  // HAS_I422TOARGBROW_NEON
-#undef YANY
-
-// Wrappers to handle odd width
-#define NV2NY(NAMEANY, NV12TORGB_SIMD, NV12TORGB_C, UV_SHIFT, BPP)             \
-    void NAMEANY(const uint8* y_buf,                                           \
-                 const uint8* uv_buf,                                          \
-                 uint8* rgb_buf,                                               \
-                 int width) {                                                  \
-      int n = width & ~7;                                                      \
-      NV12TORGB_SIMD(y_buf, uv_buf, rgb_buf, n);                               \
-      NV12TORGB_C(y_buf + n,                                                   \
-                  uv_buf + (n >> UV_SHIFT),                                    \
-                  rgb_buf + n * BPP, width & 7);                               \
-    }
-
-#ifdef HAS_NV12TOARGBROW_SSSE3
-NV2NY(NV12ToARGBRow_Any_SSSE3, NV12ToARGBRow_Unaligned_SSSE3, NV12ToARGBRow_C,
-      0, 4)
-NV2NY(NV21ToARGBRow_Any_SSSE3, NV21ToARGBRow_Unaligned_SSSE3, NV21ToARGBRow_C,
-      0, 4)
-#endif  // HAS_NV12TOARGBROW_SSSE3
-#ifdef HAS_NV12TOARGBROW_NEON
-NV2NY(NV12ToARGBRow_Any_NEON, NV12ToARGBRow_NEON, NV12ToARGBRow_C, 0, 4)
-NV2NY(NV21ToARGBRow_Any_NEON, NV21ToARGBRow_NEON, NV21ToARGBRow_C, 0, 4)
-#endif  // HAS_NV12TOARGBROW_NEON
-#ifdef HAS_NV12TORGB565ROW_SSSE3
-NV2NY(NV12ToRGB565Row_Any_SSSE3, NV12ToRGB565Row_SSSE3, NV12ToRGB565Row_C,
-      0, 2)
-NV2NY(NV21ToRGB565Row_Any_SSSE3, NV21ToRGB565Row_SSSE3, NV21ToRGB565Row_C,
-      0, 2)
-#endif  // HAS_NV12TORGB565ROW_SSSE3
-#ifdef HAS_NV12TORGB565ROW_NEON
-NV2NY(NV12ToRGB565Row_Any_NEON, NV12ToRGB565Row_NEON, NV12ToRGB565Row_C, 0, 2)
-NV2NY(NV21ToRGB565Row_Any_NEON, NV21ToRGB565Row_NEON, NV21ToRGB565Row_C, 0, 2)
-#endif  // HAS_NV12TORGB565ROW_NEON
-#undef NVANY
-
-#define RGBANY(NAMEANY, ARGBTORGB_SIMD, ARGBTORGB_C, MASK, SBPP, BPP)          \
-    void NAMEANY(const uint8* src,                                             \
-                 uint8* dst,                                                   \
-                 int width) {                                                  \
-      int n = width & ~MASK;                                                   \
-      ARGBTORGB_SIMD(src, dst, n);                                             \
-      ARGBTORGB_C(src + n * SBPP, dst + n * BPP, width & MASK);                \
-    }
-
-#if defined(HAS_ARGBTORGB24ROW_SSSE3)
-RGBANY(ARGBToRGB24Row_Any_SSSE3, ARGBToRGB24Row_SSSE3, ARGBToRGB24Row_C,
-       15, 4, 3)
-RGBANY(ARGBToRAWRow_Any_SSSE3, ARGBToRAWRow_SSSE3, ARGBToRAWRow_C,
-       15, 4, 3)
-RGBANY(ARGBToRGB565Row_Any_SSE2, ARGBToRGB565Row_SSE2, ARGBToRGB565Row_C,
-       3, 4, 2)
-RGBANY(ARGBToARGB1555Row_Any_SSE2, ARGBToARGB1555Row_SSE2, ARGBToARGB1555Row_C,
-       3, 4, 2)
-RGBANY(ARGBToARGB4444Row_Any_SSE2, ARGBToARGB4444Row_SSE2, ARGBToARGB4444Row_C,
-       3, 4, 2)
-#endif
-#if defined(HAS_I400TOARGBROW_SSE2)
-RGBANY(I400ToARGBRow_Any_SSE2, I400ToARGBRow_Unaligned_SSE2, I400ToARGBRow_C,
-       7, 1, 4)
-#endif
-#if defined(HAS_YTOARGBROW_SSE2)
-RGBANY(YToARGBRow_Any_SSE2, YToARGBRow_SSE2, YToARGBRow_C,
-       7, 1, 4)
-RGBANY(YUY2ToARGBRow_Any_SSSE3, YUY2ToARGBRow_Unaligned_SSSE3, YUY2ToARGBRow_C,
-       15, 2, 4)
-RGBANY(UYVYToARGBRow_Any_SSSE3, UYVYToARGBRow_Unaligned_SSSE3, UYVYToARGBRow_C,
-       15, 2, 4)
-// These require alignment on ARGB, so C is used for remainder.
-RGBANY(RGB24ToARGBRow_Any_SSSE3, RGB24ToARGBRow_SSSE3, RGB24ToARGBRow_C,
-       15, 3, 4)
-RGBANY(RAWToARGBRow_Any_SSSE3, RAWToARGBRow_SSSE3, RAWToARGBRow_C,
-       15, 3, 4)
-RGBANY(RGB565ToARGBRow_Any_SSE2, RGB565ToARGBRow_SSE2, RGB565ToARGBRow_C,
-       7, 2, 4)
-RGBANY(ARGB1555ToARGBRow_Any_SSE2, ARGB1555ToARGBRow_SSE2, ARGB1555ToARGBRow_C,
-       7, 2, 4)
-RGBANY(ARGB4444ToARGBRow_Any_SSE2, ARGB4444ToARGBRow_SSE2, ARGB4444ToARGBRow_C,
-       7, 2, 4)
-#endif
-#if defined(HAS_ARGBTORGB24ROW_NEON)
-RGBANY(ARGBToRGB24Row_Any_NEON, ARGBToRGB24Row_NEON, ARGBToRGB24Row_C, 7, 4, 3)
-RGBANY(ARGBToRAWRow_Any_NEON, ARGBToRAWRow_NEON, ARGBToRAWRow_C, 7, 4, 3)
-RGBANY(ARGBToRGB565Row_Any_NEON, ARGBToRGB565Row_NEON, ARGBToRGB565Row_C,
-       7, 4, 2)
-RGBANY(ARGBToARGB1555Row_Any_NEON, ARGBToARGB1555Row_NEON, ARGBToARGB1555Row_C,
-       7, 4, 2)
-RGBANY(ARGBToARGB4444Row_Any_NEON, ARGBToARGB4444Row_NEON, ARGBToARGB4444Row_C,
-       7, 4, 2)
-RGBANY(I400ToARGBRow_Any_NEON, I400ToARGBRow_NEON, I400ToARGBRow_C,
-       7, 1, 4)
-RGBANY(YToARGBRow_Any_NEON, YToARGBRow_NEON, YToARGBRow_C,
-       7, 1, 4)
-RGBANY(YUY2ToARGBRow_Any_NEON, YUY2ToARGBRow_NEON, YUY2ToARGBRow_C,
-       7, 2, 4)
-RGBANY(UYVYToARGBRow_Any_NEON, UYVYToARGBRow_NEON, UYVYToARGBRow_C,
-       7, 2, 4)
-#endif
-#undef RGBANY
-
-// ARGB to Bayer does multiple of 4 pixels, SSSE3 aligned src, unaligned dst.
-#define BAYERANY(NAMEANY, ARGBTORGB_SIMD, ARGBTORGB_C, MASK, SBPP, BPP)        \
-    void NAMEANY(const uint8* src,                                             \
-                 uint8* dst, uint32 selector,                                  \
-                 int width) {                                                  \
-      int n = width & ~MASK;                                                   \
-      ARGBTORGB_SIMD(src, dst, selector, n);                                   \
-      ARGBTORGB_C(src + n * SBPP, dst + n * BPP, selector, width & MASK);      \
-    }
-
-#if defined(HAS_ARGBTOBAYERROW_SSSE3)
-BAYERANY(ARGBToBayerRow_Any_SSSE3, ARGBToBayerRow_SSSE3, ARGBToBayerRow_C,
-         7, 4, 1)
-#endif
-#if defined(HAS_ARGBTOBAYERROW_NEON)
-BAYERANY(ARGBToBayerRow_Any_NEON, ARGBToBayerRow_NEON, ARGBToBayerRow_C,
-         7, 4, 1)
-#endif
-#if defined(HAS_ARGBTOBAYERGGROW_SSE2)
-BAYERANY(ARGBToBayerGGRow_Any_SSE2, ARGBToBayerGGRow_SSE2, ARGBToBayerGGRow_C,
-         7, 4, 1)
-#endif
-#if defined(HAS_ARGBTOBAYERGGROW_NEON)
-BAYERANY(ARGBToBayerGGRow_Any_NEON, ARGBToBayerGGRow_NEON, ARGBToBayerGGRow_C,
-         7, 4, 1)
-#endif
-
-#undef BAYERANY
-
-// RGB/YUV to Y does multiple of 16 with SIMD and last 16 with SIMD.
-#define YANY(NAMEANY, ARGBTOY_SIMD, SBPP, BPP, NUM)                            \
-    void NAMEANY(const uint8* src_argb, uint8* dst_y, int width) {             \
-      ARGBTOY_SIMD(src_argb, dst_y, width - NUM);                              \
-      ARGBTOY_SIMD(src_argb + (width - NUM) * SBPP,                            \
-                   dst_y + (width - NUM) * BPP, NUM);                          \
-    }
-
-#ifdef HAS_ARGBTOYROW_AVX2
-YANY(ARGBToYRow_Any_AVX2, ARGBToYRow_AVX2, 4, 1, 32)
-YANY(ARGBToYJRow_Any_AVX2, ARGBToYJRow_AVX2, 4, 1, 32)
-YANY(YUY2ToYRow_Any_AVX2, YUY2ToYRow_AVX2, 2, 1, 32)
-YANY(UYVYToYRow_Any_AVX2, UYVYToYRow_AVX2, 2, 1, 32)
-#endif
-#ifdef HAS_ARGBTOYROW_SSSE3
-YANY(ARGBToYRow_Any_SSSE3, ARGBToYRow_Unaligned_SSSE3, 4, 1, 16)
-#endif
-#ifdef HAS_BGRATOYROW_SSSE3
-YANY(BGRAToYRow_Any_SSSE3, BGRAToYRow_Unaligned_SSSE3, 4, 1, 16)
-YANY(ABGRToYRow_Any_SSSE3, ABGRToYRow_Unaligned_SSSE3, 4, 1, 16)
-YANY(RGBAToYRow_Any_SSSE3, RGBAToYRow_Unaligned_SSSE3, 4, 1, 16)
-YANY(YUY2ToYRow_Any_SSE2, YUY2ToYRow_Unaligned_SSE2, 2, 1, 16)
-YANY(UYVYToYRow_Any_SSE2, UYVYToYRow_Unaligned_SSE2, 2, 1, 16)
-#endif
-#ifdef HAS_ARGBTOYJROW_SSSE3
-YANY(ARGBToYJRow_Any_SSSE3, ARGBToYJRow_Unaligned_SSSE3, 4, 1, 16)
-#endif
-#ifdef HAS_ARGBTOYROW_NEON
-YANY(ARGBToYRow_Any_NEON, ARGBToYRow_NEON, 4, 1, 8)
-YANY(ARGBToYJRow_Any_NEON, ARGBToYJRow_NEON, 4, 1, 8)
-YANY(BGRAToYRow_Any_NEON, BGRAToYRow_NEON, 4, 1, 8)
-YANY(ABGRToYRow_Any_NEON, ABGRToYRow_NEON, 4, 1, 8)
-YANY(RGBAToYRow_Any_NEON, RGBAToYRow_NEON, 4, 1, 8)
-YANY(RGB24ToYRow_Any_NEON, RGB24ToYRow_NEON, 3, 1, 8)
-YANY(RAWToYRow_Any_NEON, RAWToYRow_NEON, 3, 1, 8)
-YANY(RGB565ToYRow_Any_NEON, RGB565ToYRow_NEON, 2, 1, 8)
-YANY(ARGB1555ToYRow_Any_NEON, ARGB1555ToYRow_NEON, 2, 1, 8)
-YANY(ARGB4444ToYRow_Any_NEON, ARGB4444ToYRow_NEON, 2, 1, 8)
-YANY(YUY2ToYRow_Any_NEON, YUY2ToYRow_NEON, 2, 1, 16)
-YANY(UYVYToYRow_Any_NEON, UYVYToYRow_NEON, 2, 1, 16)
-YANY(RGB24ToARGBRow_Any_NEON, RGB24ToARGBRow_NEON, 3, 4, 8)
-YANY(RAWToARGBRow_Any_NEON, RAWToARGBRow_NEON, 3, 4, 8)
-YANY(RGB565ToARGBRow_Any_NEON, RGB565ToARGBRow_NEON, 2, 4, 8)
-YANY(ARGB1555ToARGBRow_Any_NEON, ARGB1555ToARGBRow_NEON, 2, 4, 8)
-YANY(ARGB4444ToARGBRow_Any_NEON, ARGB4444ToARGBRow_NEON, 2, 4, 8)
-#endif
-#undef YANY
-
-#define YANY(NAMEANY, ARGBTOY_SIMD, ARGBTOY_C, SBPP, BPP, MASK)                \
-    void NAMEANY(const uint8* src_argb, uint8* dst_y, int width) {             \
-      int n = width & ~MASK;                                                   \
-      ARGBTOY_SIMD(src_argb, dst_y, n);                                        \
-      ARGBTOY_C(src_argb + n * SBPP,                                           \
-                dst_y  + n * BPP, width & MASK);                               \
-    }
-
-// Attenuate is destructive so last16 method can not be used due to overlap.
-#ifdef HAS_ARGBATTENUATEROW_SSSE3
-YANY(ARGBAttenuateRow_Any_SSSE3, ARGBAttenuateRow_SSSE3, ARGBAttenuateRow_C,
-     4, 4, 3)
-#endif
-#ifdef HAS_ARGBATTENUATEROW_SSE2
-YANY(ARGBAttenuateRow_Any_SSE2, ARGBAttenuateRow_SSE2, ARGBAttenuateRow_C,
-     4, 4, 3)
-#endif
-#ifdef HAS_ARGBUNATTENUATEROW_SSE2
-YANY(ARGBUnattenuateRow_Any_SSE2, ARGBUnattenuateRow_SSE2, ARGBUnattenuateRow_C,
-     4, 4, 3)
-#endif
-#ifdef HAS_ARGBATTENUATEROW_AVX2
-YANY(ARGBAttenuateRow_Any_AVX2, ARGBAttenuateRow_AVX2, ARGBAttenuateRow_C,
-     4, 4, 7)
-#endif
-#ifdef HAS_ARGBUNATTENUATEROW_AVX2
-YANY(ARGBUnattenuateRow_Any_AVX2, ARGBUnattenuateRow_AVX2, ARGBUnattenuateRow_C,
-     4, 4, 7)
-#endif
-#ifdef HAS_ARGBATTENUATEROW_NEON
-YANY(ARGBAttenuateRow_Any_NEON, ARGBAttenuateRow_NEON, ARGBAttenuateRow_C,
-     4, 4, 7)
-#endif
-#undef YANY
-
-// RGB/YUV to UV does multiple of 16 with SIMD and remainder with C.
-#define UVANY(NAMEANY, ANYTOUV_SIMD, ANYTOUV_C, BPP, MASK)                     \
-    void NAMEANY(const uint8* src_argb, int src_stride_argb,                   \
-                 uint8* dst_u, uint8* dst_v, int width) {                      \
-      int n = width & ~MASK;                                                   \
-      ANYTOUV_SIMD(src_argb, src_stride_argb, dst_u, dst_v, n);                \
-      ANYTOUV_C(src_argb  + n * BPP, src_stride_argb,                          \
-                dst_u + (n >> 1),                                              \
-                dst_v + (n >> 1),                                              \
-                width & MASK);                                                 \
-    }
-
-#ifdef HAS_ARGBTOUVROW_AVX2
-UVANY(ARGBToUVRow_Any_AVX2, ARGBToUVRow_AVX2, ARGBToUVRow_C, 4, 31)
-UVANY(YUY2ToUVRow_Any_AVX2, YUY2ToUVRow_AVX2, YUY2ToUVRow_C, 2, 31)
-UVANY(UYVYToUVRow_Any_AVX2, UYVYToUVRow_AVX2, UYVYToUVRow_C, 2, 31)
-#endif
-#ifdef HAS_ARGBTOUVROW_SSSE3
-UVANY(ARGBToUVRow_Any_SSSE3, ARGBToUVRow_Unaligned_SSSE3, ARGBToUVRow_C, 4, 15)
-UVANY(ARGBToUVJRow_Any_SSSE3, ARGBToUVJRow_Unaligned_SSSE3, ARGBToUVJRow_C,
-      4, 15)
-UVANY(BGRAToUVRow_Any_SSSE3, BGRAToUVRow_Unaligned_SSSE3, BGRAToUVRow_C, 4, 15)
-UVANY(ABGRToUVRow_Any_SSSE3, ABGRToUVRow_Unaligned_SSSE3, ABGRToUVRow_C, 4, 15)
-UVANY(RGBAToUVRow_Any_SSSE3, RGBAToUVRow_Unaligned_SSSE3, RGBAToUVRow_C, 4, 15)
-UVANY(YUY2ToUVRow_Any_SSE2, YUY2ToUVRow_Unaligned_SSE2, YUY2ToUVRow_C, 2, 15)
-UVANY(UYVYToUVRow_Any_SSE2, UYVYToUVRow_Unaligned_SSE2, UYVYToUVRow_C, 2, 15)
-#endif
-#ifdef HAS_ARGBTOUVROW_NEON
-UVANY(ARGBToUVRow_Any_NEON, ARGBToUVRow_NEON, ARGBToUVRow_C, 4, 15)
-UVANY(ARGBToUVJRow_Any_NEON, ARGBToUVJRow_NEON, ARGBToUVJRow_C, 4, 15)
-UVANY(BGRAToUVRow_Any_NEON, BGRAToUVRow_NEON, BGRAToUVRow_C, 4, 15)
-UVANY(ABGRToUVRow_Any_NEON, ABGRToUVRow_NEON, ABGRToUVRow_C, 4, 15)
-UVANY(RGBAToUVRow_Any_NEON, RGBAToUVRow_NEON, RGBAToUVRow_C, 4, 15)
-UVANY(RGB24ToUVRow_Any_NEON, RGB24ToUVRow_NEON, RGB24ToUVRow_C, 3, 15)
-UVANY(RAWToUVRow_Any_NEON, RAWToUVRow_NEON, RAWToUVRow_C, 3, 15)
-UVANY(RGB565ToUVRow_Any_NEON, RGB565ToUVRow_NEON, RGB565ToUVRow_C, 2, 15)
-UVANY(ARGB1555ToUVRow_Any_NEON, ARGB1555ToUVRow_NEON, ARGB1555ToUVRow_C, 2, 15)
-UVANY(ARGB4444ToUVRow_Any_NEON, ARGB4444ToUVRow_NEON, ARGB4444ToUVRow_C, 2, 15)
-UVANY(YUY2ToUVRow_Any_NEON, YUY2ToUVRow_NEON, YUY2ToUVRow_C, 2, 15)
-UVANY(UYVYToUVRow_Any_NEON, UYVYToUVRow_NEON, UYVYToUVRow_C, 2, 15)
-#endif
-#undef UVANY
-
-#define UV422ANY(NAMEANY, ANYTOUV_SIMD, ANYTOUV_C, BPP, MASK, SHIFT)           \
-    void NAMEANY(const uint8* src_uv,                                          \
-                 uint8* dst_u, uint8* dst_v, int width) {                      \
-      int n = width & ~MASK;                                                   \
-      ANYTOUV_SIMD(src_uv, dst_u, dst_v, n);                                   \
-      ANYTOUV_C(src_uv  + n * BPP,                                             \
-                dst_u + (n >> SHIFT),                                          \
-                dst_v + (n >> SHIFT),                                          \
-                width & MASK);                                                 \
-    }
-
-#ifdef HAS_ARGBTOUV444ROW_SSSE3
-UV422ANY(ARGBToUV444Row_Any_SSSE3, ARGBToUV444Row_Unaligned_SSSE3,
-         ARGBToUV444Row_C, 4, 15, 0)
-#endif
-#ifdef HAS_YUY2TOUV422ROW_AVX2
-UV422ANY(YUY2ToUV422Row_Any_AVX2, YUY2ToUV422Row_AVX2,
-         YUY2ToUV422Row_C, 2, 31, 1)
-UV422ANY(UYVYToUV422Row_Any_AVX2, UYVYToUV422Row_AVX2,
-         UYVYToUV422Row_C, 2, 31, 1)
-#endif
-#ifdef HAS_ARGBTOUVROW_SSSE3
-UV422ANY(ARGBToUV422Row_Any_SSSE3, ARGBToUV422Row_Unaligned_SSSE3,
-         ARGBToUV422Row_C, 4, 15, 1)
-UV422ANY(YUY2ToUV422Row_Any_SSE2, YUY2ToUV422Row_Unaligned_SSE2,
-         YUY2ToUV422Row_C, 2, 15, 1)
-UV422ANY(UYVYToUV422Row_Any_SSE2, UYVYToUV422Row_Unaligned_SSE2,
-         UYVYToUV422Row_C, 2, 15, 1)
-#endif
-#ifdef HAS_YUY2TOUV422ROW_NEON
-UV422ANY(ARGBToUV444Row_Any_NEON, ARGBToUV444Row_NEON,
-         ARGBToUV444Row_C, 4, 7, 0)
-UV422ANY(ARGBToUV422Row_Any_NEON, ARGBToUV422Row_NEON,
-         ARGBToUV422Row_C, 4, 15, 1)
-UV422ANY(ARGBToUV411Row_Any_NEON, ARGBToUV411Row_NEON,
-         ARGBToUV411Row_C, 4, 31, 2)
-UV422ANY(YUY2ToUV422Row_Any_NEON, YUY2ToUV422Row_NEON,
-         YUY2ToUV422Row_C, 2, 15, 1)
-UV422ANY(UYVYToUV422Row_Any_NEON, UYVYToUV422Row_NEON,
-         UYVYToUV422Row_C, 2, 15, 1)
-#endif
-#undef UV422ANY
-
-#define SPLITUVROWANY(NAMEANY, ANYTOUV_SIMD, ANYTOUV_C, MASK)                  \
-    void NAMEANY(const uint8* src_uv,                                          \
-                 uint8* dst_u, uint8* dst_v, int width) {                      \
-      int n = width & ~MASK;                                                   \
-      ANYTOUV_SIMD(src_uv, dst_u, dst_v, n);                                   \
-      ANYTOUV_C(src_uv + n * 2,                                                \
-                dst_u + n,                                                     \
-                dst_v + n,                                                     \
-                width & MASK);                                                 \
-    }
-
-#ifdef HAS_SPLITUVROW_SSE2
-SPLITUVROWANY(SplitUVRow_Any_SSE2, SplitUVRow_Unaligned_SSE2, SplitUVRow_C, 15)
-#endif
-#ifdef HAS_SPLITUVROW_AVX2
-SPLITUVROWANY(SplitUVRow_Any_AVX2, SplitUVRow_AVX2, SplitUVRow_C, 31)
-#endif
-#ifdef HAS_SPLITUVROW_NEON
-SPLITUVROWANY(SplitUVRow_Any_NEON, SplitUVRow_NEON, SplitUVRow_C, 15)
-#endif
-#ifdef HAS_SPLITUVROW_MIPS_DSPR2
-SPLITUVROWANY(SplitUVRow_Any_MIPS_DSPR2, SplitUVRow_Unaligned_MIPS_DSPR2,
-              SplitUVRow_C, 15)
-#endif
-#undef SPLITUVROWANY
-
-#define MERGEUVROW_ANY(NAMEANY, ANYTOUV_SIMD, ANYTOUV_C, MASK)                 \
-    void NAMEANY(const uint8* src_u, const uint8* src_v,                       \
-                 uint8* dst_uv, int width) {                                   \
-      int n = width & ~MASK;                                                   \
-      ANYTOUV_SIMD(src_u, src_v, dst_uv, n);                                   \
-      ANYTOUV_C(src_u + n,                                                     \
-                src_v + n,                                                     \
-                dst_uv + n * 2,                                                \
-                width & MASK);                                                 \
-    }
-
-#ifdef HAS_MERGEUVROW_SSE2
-MERGEUVROW_ANY(MergeUVRow_Any_SSE2, MergeUVRow_Unaligned_SSE2, MergeUVRow_C, 15)
-#endif
-#ifdef HAS_MERGEUVROW_AVX2
-MERGEUVROW_ANY(MergeUVRow_Any_AVX2, MergeUVRow_AVX2, MergeUVRow_C, 31)
-#endif
-#ifdef HAS_MERGEUVROW_NEON
-MERGEUVROW_ANY(MergeUVRow_Any_NEON, MergeUVRow_NEON, MergeUVRow_C, 15)
-#endif
-#undef MERGEUVROW_ANY
-
-#define MATHROW_ANY(NAMEANY, ARGBMATH_SIMD, ARGBMATH_C, MASK)                  \
-    void NAMEANY(const uint8* src_argb0, const uint8* src_argb1,               \
-                 uint8* dst_argb, int width) {                                 \
-      int n = width & ~MASK;                                                   \
-      ARGBMATH_SIMD(src_argb0, src_argb1, dst_argb, n);                        \
-      ARGBMATH_C(src_argb0 + n * 4,                                            \
-                 src_argb1 + n * 4,                                            \
-                 dst_argb + n * 4,                                             \
-                 width & MASK);                                                \
-    }
-
-#ifdef HAS_ARGBMULTIPLYROW_SSE2
-MATHROW_ANY(ARGBMultiplyRow_Any_SSE2, ARGBMultiplyRow_SSE2, ARGBMultiplyRow_C,
-            3)
-#endif
-#ifdef HAS_ARGBADDROW_SSE2
-MATHROW_ANY(ARGBAddRow_Any_SSE2, ARGBAddRow_SSE2, ARGBAddRow_C, 3)
-#endif
-#ifdef HAS_ARGBSUBTRACTROW_SSE2
-MATHROW_ANY(ARGBSubtractRow_Any_SSE2, ARGBSubtractRow_SSE2, ARGBSubtractRow_C,
-            3)
-#endif
-#ifdef HAS_ARGBMULTIPLYROW_AVX2
-MATHROW_ANY(ARGBMultiplyRow_Any_AVX2, ARGBMultiplyRow_AVX2, ARGBMultiplyRow_C,
-            7)
-#endif
-#ifdef HAS_ARGBADDROW_AVX2
-MATHROW_ANY(ARGBAddRow_Any_AVX2, ARGBAddRow_AVX2, ARGBAddRow_C, 7)
-#endif
-#ifdef HAS_ARGBSUBTRACTROW_AVX2
-MATHROW_ANY(ARGBSubtractRow_Any_AVX2, ARGBSubtractRow_AVX2, ARGBSubtractRow_C,
-            7)
-#endif
-#ifdef HAS_ARGBMULTIPLYROW_NEON
-MATHROW_ANY(ARGBMultiplyRow_Any_NEON, ARGBMultiplyRow_NEON, ARGBMultiplyRow_C,
-            7)
-#endif
-#ifdef HAS_ARGBADDROW_NEON
-MATHROW_ANY(ARGBAddRow_Any_NEON, ARGBAddRow_NEON, ARGBAddRow_C, 7)
-#endif
-#ifdef HAS_ARGBSUBTRACTROW_NEON
-MATHROW_ANY(ARGBSubtractRow_Any_NEON, ARGBSubtractRow_NEON, ARGBSubtractRow_C,
-            7)
-#endif
-#undef MATHROW_ANY
-
-// Shuffle may want to work in place, so last16 method can not be used.
-#define YANY(NAMEANY, ARGBTOY_SIMD, ARGBTOY_C, SBPP, BPP, MASK)                \
-    void NAMEANY(const uint8* src_argb, uint8* dst_argb,                       \
-                 const uint8* shuffler, int width) {                           \
-      int n = width & ~MASK;                                                   \
-      ARGBTOY_SIMD(src_argb, dst_argb, shuffler, n);                           \
-      ARGBTOY_C(src_argb + n * SBPP,                                           \
-                dst_argb  + n * BPP, shuffler, width & MASK);                  \
-    }
-
-#ifdef HAS_ARGBSHUFFLEROW_SSE2
-YANY(ARGBShuffleRow_Any_SSE2, ARGBShuffleRow_SSE2,
-     ARGBShuffleRow_C, 4, 4, 3)
-#endif
-#ifdef HAS_ARGBSHUFFLEROW_SSSE3
-YANY(ARGBShuffleRow_Any_SSSE3, ARGBShuffleRow_Unaligned_SSSE3,
-     ARGBShuffleRow_C, 4, 4, 7)
-#endif
-#ifdef HAS_ARGBSHUFFLEROW_AVX2
-YANY(ARGBShuffleRow_Any_AVX2, ARGBShuffleRow_AVX2,
-     ARGBShuffleRow_C, 4, 4, 15)
-#endif
-#ifdef HAS_ARGBSHUFFLEROW_NEON
-YANY(ARGBShuffleRow_Any_NEON, ARGBShuffleRow_NEON,
-     ARGBShuffleRow_C, 4, 4, 3)
-#endif
-#undef YANY
-
-// Interpolate may want to work in place, so last16 method can not be used.
-#define NANY(NAMEANY, TERP_SIMD, TERP_C, SBPP, BPP, MASK)                      \
-    void NAMEANY(uint8* dst_ptr, const uint8* src_ptr,                         \
-                 ptrdiff_t src_stride_ptr, int width,                          \
-                 int source_y_fraction) {                                      \
-      int n = width & ~MASK;                                                   \
-      TERP_SIMD(dst_ptr, src_ptr, src_stride_ptr,                              \
-                n, source_y_fraction);                                         \
-      TERP_C(dst_ptr + n * BPP,                                                \
-             src_ptr + n * SBPP, src_stride_ptr,                               \
-             width & MASK, source_y_fraction);                                 \
-    }
-
-#ifdef HAS_INTERPOLATEROW_AVX2
-NANY(InterpolateRow_Any_AVX2, InterpolateRow_AVX2,
-     InterpolateRow_C, 1, 1, 32)
-#endif
-#ifdef HAS_INTERPOLATEROW_SSSE3
-NANY(InterpolateRow_Any_SSSE3, InterpolateRow_Unaligned_SSSE3,
-     InterpolateRow_C, 1, 1, 15)
-#endif
-#ifdef HAS_INTERPOLATEROW_SSE2
-NANY(InterpolateRow_Any_SSE2, InterpolateRow_Unaligned_SSE2,
-     InterpolateRow_C, 1, 1, 15)
-#endif
-#ifdef HAS_INTERPOLATEROW_NEON
-NANY(InterpolateRow_Any_NEON, InterpolateRow_NEON,
-     InterpolateRow_C, 1, 1, 15)
-#endif
-#ifdef HAS_INTERPOLATEROW_MIPS_DSPR2
-NANY(InterpolateRow_Any_MIPS_DSPR2, InterpolateRow_MIPS_DSPR2,
-     InterpolateRow_C, 1, 1, 3)
-#endif
-#undef NANY
-
-#ifdef __cplusplus
-}  // extern "C"
-}  // namespace libyuv
-#endif
diff --git a/drivers/theoraplayer/src/YUV/libyuv/src/row_common.cc b/drivers/theoraplayer/src/YUV/libyuv/src/row_common.cc
deleted file mode 100755
index 135bdc9084..0000000000
--- a/drivers/theoraplayer/src/YUV/libyuv/src/row_common.cc
+++ /dev/null
@@ -1,2247 +0,0 @@
-/*
- *  Copyright 2011 The LibYuv Project Authors. All rights reserved.
- *
- *  Use of this source code is governed by a BSD-style license
- *  that can be found in the LICENSE file in the root of the source
- *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS. All contributing project authors may
- *  be found in the AUTHORS file in the root of the source tree.
- */
-
-#include "libyuv/row.h"
-
-#include <string.h>  // For memcpy and memset.
-
-#include "libyuv/basic_types.h"
-
-#ifdef __cplusplus
-namespace libyuv {
-extern "C" {
-#endif
-
-// llvm x86 is poor at ternary operator, so use branchless min/max.
-
-#define USE_BRANCHLESS 1
-#if USE_BRANCHLESS
-static __inline int32 clamp0(int32 v) {
-  return ((-(v) >> 31) & (v));
-}
-
-static __inline int32 clamp255(int32 v) {
-  return (((255 - (v)) >> 31) | (v)) & 255;
-}
-
-static __inline uint32 Clamp(int32 val) {
-  int v = clamp0(val);
-  return (uint32)(clamp255(v));
-}
-
-static __inline uint32 Abs(int32 v) {
-  int m = v >> 31;
-  return (v + m) ^ m;
-}
-#else  // USE_BRANCHLESS
-static __inline int32 clamp0(int32 v) {
-  return (v < 0) ? 0 : v;
-}
-
-static __inline int32 clamp255(int32 v) {
-  return (v > 255) ? 255 : v;
-}
-
-static __inline uint32 Clamp(int32 val) {
-  int v = clamp0(val);
-  return (uint32)(clamp255(v));
-}
-
-static __inline uint32 Abs(int32 v) {
-  return (v < 0) ? -v : v;
-}
-#endif  // USE_BRANCHLESS
-
-#ifdef LIBYUV_LITTLE_ENDIAN
-#define WRITEWORD(p, v) *(uint32*)(p) = v
-#else
-static inline void WRITEWORD(uint8* p, uint32 v) {
-  p[0] = (uint8)(v & 255);
-  p[1] = (uint8)((v >> 8) & 255);
-  p[2] = (uint8)((v >> 16) & 255);
-  p[3] = (uint8)((v >> 24) & 255);
-}
-#endif
-
-void RGB24ToARGBRow_C(const uint8* src_rgb24, uint8* dst_argb, int width) {
-  int x;
-  for (x = 0; x < width; ++x) {
-    uint8 b = src_rgb24[0];
-    uint8 g = src_rgb24[1];
-    uint8 r = src_rgb24[2];
-    dst_argb[0] = b;
-    dst_argb[1] = g;
-    dst_argb[2] = r;
-    dst_argb[3] = 255u;
-    dst_argb += 4;
-    src_rgb24 += 3;
-  }
-}
-
-void RAWToARGBRow_C(const uint8* src_raw, uint8* dst_argb, int width) {
-  int x;
-  for (x = 0; x < width; ++x) {
-    uint8 r = src_raw[0];
-    uint8 g = src_raw[1];
-    uint8 b = src_raw[2];
-    dst_argb[0] = b;
-    dst_argb[1] = g;
-    dst_argb[2] = r;
-    dst_argb[3] = 255u;
-    dst_argb += 4;
-    src_raw += 3;
-  }
-}
-
-void RGB565ToARGBRow_C(const uint8* src_rgb565, uint8* dst_argb, int width) {
-  int x;
-  for (x = 0; x < width; ++x) {
-    uint8 b = src_rgb565[0] & 0x1f;
-    uint8 g = (src_rgb565[0] >> 5) | ((src_rgb565[1] & 0x07) << 3);
-    uint8 r = src_rgb565[1] >> 3;
-    dst_argb[0] = (b << 3) | (b >> 2);
-    dst_argb[1] = (g << 2) | (g >> 4);
-    dst_argb[2] = (r << 3) | (r >> 2);
-    dst_argb[3] = 255u;
-    dst_argb += 4;
-    src_rgb565 += 2;
-  }
-}
-
-void ARGB1555ToARGBRow_C(const uint8* src_argb1555, uint8* dst_argb,
-                         int width) {
-  int x;
-  for (x = 0; x < width; ++x) {
-    uint8 b = src_argb1555[0] & 0x1f;
-    uint8 g = (src_argb1555[0] >> 5) | ((src_argb1555[1] & 0x03) << 3);
-    uint8 r = (src_argb1555[1] & 0x7c) >> 2;
-    uint8 a = src_argb1555[1] >> 7;
-    dst_argb[0] = (b << 3) | (b >> 2);
-    dst_argb[1] = (g << 3) | (g >> 2);
-    dst_argb[2] = (r << 3) | (r >> 2);
-    dst_argb[3] = -a;
-    dst_argb += 4;
-    src_argb1555 += 2;
-  }
-}
-
-void ARGB4444ToARGBRow_C(const uint8* src_argb4444, uint8* dst_argb,
-                         int width) {
-  int x;
-  for (x = 0; x < width; ++x) {
-    uint8 b = src_argb4444[0] & 0x0f;
-    uint8 g = src_argb4444[0] >> 4;
-    uint8 r = src_argb4444[1] & 0x0f;
-    uint8 a = src_argb4444[1] >> 4;
-    dst_argb[0] = (b << 4) | b;
-    dst_argb[1] = (g << 4) | g;
-    dst_argb[2] = (r << 4) | r;
-    dst_argb[3] = (a << 4) | a;
-    dst_argb += 4;
-    src_argb4444 += 2;
-  }
-}
-
-void ARGBToRGB24Row_C(const uint8* src_argb, uint8* dst_rgb, int width) {
-  int x;
-  for (x = 0; x < width; ++x) {
-    uint8 b = src_argb[0];
-    uint8 g = src_argb[1];
-    uint8 r = src_argb[2];
-    dst_rgb[0] = b;
-    dst_rgb[1] = g;
-    dst_rgb[2] = r;
-    dst_rgb += 3;
-    src_argb += 4;
-  }
-}
-
-void ARGBToRAWRow_C(const uint8* src_argb, uint8* dst_rgb, int width) {
-  int x;
-  for (x = 0; x < width; ++x) {
-    uint8 b = src_argb[0];
-    uint8 g = src_argb[1];
-    uint8 r = src_argb[2];
-    dst_rgb[0] = r;
-    dst_rgb[1] = g;
-    dst_rgb[2] = b;
-    dst_rgb += 3;
-    src_argb += 4;
-  }
-}
-
-void ARGBToRGB565Row_C(const uint8* src_argb, uint8* dst_rgb, int width) {
-  int x;
-  for (x = 0; x < width - 1; x += 2) {
-    uint8 b0 = src_argb[0] >> 3;
-    uint8 g0 = src_argb[1] >> 2;
-    uint8 r0 = src_argb[2] >> 3;
-    uint8 b1 = src_argb[4] >> 3;
-    uint8 g1 = src_argb[5] >> 2;
-    uint8 r1 = src_argb[6] >> 3;
-    WRITEWORD(dst_rgb, b0 | (g0 << 5) | (r0 << 11) |
-              (b1 << 16) | (g1 << 21) | (r1 << 27));
-    dst_rgb += 4;
-    src_argb += 8;
-  }
-  if (width & 1) {
-    uint8 b0 = src_argb[0] >> 3;
-    uint8 g0 = src_argb[1] >> 2;
-    uint8 r0 = src_argb[2] >> 3;
-    *(uint16*)(dst_rgb) = b0 | (g0 << 5) | (r0 << 11);
-  }
-}
-
-void ARGBToARGB1555Row_C(const uint8* src_argb, uint8* dst_rgb, int width) {
-  int x;
-  for (x = 0; x < width - 1; x += 2) {
-    uint8 b0 = src_argb[0] >> 3;
-    uint8 g0 = src_argb[1] >> 3;
-    uint8 r0 = src_argb[2] >> 3;
-    uint8 a0 = src_argb[3] >> 7;
-    uint8 b1 = src_argb[4] >> 3;
-    uint8 g1 = src_argb[5] >> 3;
-    uint8 r1 = src_argb[6] >> 3;
-    uint8 a1 = src_argb[7] >> 7;
-    *(uint32*)(dst_rgb) =
-        b0 | (g0 << 5) | (r0 << 10) | (a0 << 15) |
-        (b1 << 16) | (g1 << 21) | (r1 << 26) | (a1 << 31);
-    dst_rgb += 4;
-    src_argb += 8;
-  }
-  if (width & 1) {
-    uint8 b0 = src_argb[0] >> 3;
-    uint8 g0 = src_argb[1] >> 3;
-    uint8 r0 = src_argb[2] >> 3;
-    uint8 a0 = src_argb[3] >> 7;
-    *(uint16*)(dst_rgb) =
-        b0 | (g0 << 5) | (r0 << 10) | (a0 << 15);
-  }
-}
-
-void ARGBToARGB4444Row_C(const uint8* src_argb, uint8* dst_rgb, int width) {
-  int x;
-  for (x = 0; x < width - 1; x += 2) {
-    uint8 b0 = src_argb[0] >> 4;
-    uint8 g0 = src_argb[1] >> 4;
-    uint8 r0 = src_argb[2] >> 4;
-    uint8 a0 = src_argb[3] >> 4;
-    uint8 b1 = src_argb[4] >> 4;
-    uint8 g1 = src_argb[5] >> 4;
-    uint8 r1 = src_argb[6] >> 4;
-    uint8 a1 = src_argb[7] >> 4;
-    *(uint32*)(dst_rgb) =
-        b0 | (g0 << 4) | (r0 << 8) | (a0 << 12) |
-        (b1 << 16) | (g1 << 20) | (r1 << 24) | (a1 << 28);
-    dst_rgb += 4;
-    src_argb += 8;
-  }
-  if (width & 1) {
-    uint8 b0 = src_argb[0] >> 4;
-    uint8 g0 = src_argb[1] >> 4;
-    uint8 r0 = src_argb[2] >> 4;
-    uint8 a0 = src_argb[3] >> 4;
-    *(uint16*)(dst_rgb) =
-        b0 | (g0 << 4) | (r0 << 8) | (a0 << 12);
-  }
-}
-
-static __inline int RGBToY(uint8 r, uint8 g, uint8 b) {
-  return (66 * r + 129 * g +  25 * b + 0x1080) >> 8;
-}
-
-static __inline int RGBToU(uint8 r, uint8 g, uint8 b) {
-  return (112 * b - 74 * g - 38 * r + 0x8080) >> 8;
-}
-static __inline int RGBToV(uint8 r, uint8 g, uint8 b) {
-  return (112 * r - 94 * g - 18 * b + 0x8080) >> 8;
-}
-
-#define MAKEROWY(NAME, R, G, B, BPP) \
-void NAME ## ToYRow_C(const uint8* src_argb0, uint8* dst_y, int width) {       \
-  int x;                                                                       \
-  for (x = 0; x < width; ++x) {                                                \
-    dst_y[0] = RGBToY(src_argb0[R], src_argb0[G], src_argb0[B]);               \
-    src_argb0 += BPP;                                                          \
-    dst_y += 1;                                                                \
-  }                                                                            \
-}                                                                              \
-void NAME ## ToUVRow_C(const uint8* src_rgb0, int src_stride_rgb,              \
-                       uint8* dst_u, uint8* dst_v, int width) {                \
-  const uint8* src_rgb1 = src_rgb0 + src_stride_rgb;                           \
-  int x;                                                                       \
-  for (x = 0; x < width - 1; x += 2) {                                         \
-    uint8 ab = (src_rgb0[B] + src_rgb0[B + BPP] +                              \
-               src_rgb1[B] + src_rgb1[B + BPP]) >> 2;                          \
-    uint8 ag = (src_rgb0[G] + src_rgb0[G + BPP] +                              \
-               src_rgb1[G] + src_rgb1[G + BPP]) >> 2;                          \
-    uint8 ar = (src_rgb0[R] + src_rgb0[R + BPP] +                              \
-               src_rgb1[R] + src_rgb1[R + BPP]) >> 2;                          \
-    dst_u[0] = RGBToU(ar, ag, ab);                                             \
-    dst_v[0] = RGBToV(ar, ag, ab);                                             \
-    src_rgb0 += BPP * 2;                                                       \
-    src_rgb1 += BPP * 2;                                                       \
-    dst_u += 1;                                                                \
-    dst_v += 1;                                                                \
-  }                                                                            \
-  if (width & 1) {                                                             \
-    uint8 ab = (src_rgb0[B] + src_rgb1[B]) >> 1;                               \
-    uint8 ag = (src_rgb0[G] + src_rgb1[G]) >> 1;                               \
-    uint8 ar = (src_rgb0[R] + src_rgb1[R]) >> 1;                               \
-    dst_u[0] = RGBToU(ar, ag, ab);                                             \
-    dst_v[0] = RGBToV(ar, ag, ab);                                             \
-  }                                                                            \
-}
-
-MAKEROWY(ARGB, 2, 1, 0, 4)
-MAKEROWY(BGRA, 1, 2, 3, 4)
-MAKEROWY(ABGR, 0, 1, 2, 4)
-MAKEROWY(RGBA, 3, 2, 1, 4)
-MAKEROWY(RGB24, 2, 1, 0, 3)
-MAKEROWY(RAW, 0, 1, 2, 3)
-#undef MAKEROWY
-
-// JPeg uses a variation on BT.601-1 full range
-// y =  0.29900 * r + 0.58700 * g + 0.11400 * b
-// u = -0.16874 * r - 0.33126 * g + 0.50000 * b  + center
-// v =  0.50000 * r - 0.41869 * g - 0.08131 * b  + center
-// BT.601 Mpeg range uses:
-// b 0.1016 * 255 = 25.908 = 25
-// g 0.5078 * 255 = 129.489 = 129
-// r 0.2578 * 255 = 65.739 = 66
-// JPeg 8 bit Y (not used):
-// b 0.11400 * 256 = 29.184 = 29
-// g 0.58700 * 256 = 150.272 = 150
-// r 0.29900 * 256 = 76.544 = 77
-// JPeg 7 bit Y:
-// b 0.11400 * 128 = 14.592 = 15
-// g 0.58700 * 128 = 75.136 = 75
-// r 0.29900 * 128 = 38.272 = 38
-// JPeg 8 bit U:
-// b  0.50000 * 255 = 127.5 = 127
-// g -0.33126 * 255 = -84.4713 = -84
-// r -0.16874 * 255 = -43.0287 = -43
-// JPeg 8 bit V:
-// b -0.08131 * 255 = -20.73405 = -20
-// g -0.41869 * 255 = -106.76595 = -107
-// r  0.50000 * 255 = 127.5 = 127
-
-static __inline int RGBToYJ(uint8 r, uint8 g, uint8 b) {
-  return (38 * r + 75 * g +  15 * b + 64) >> 7;
-}
-
-static __inline int RGBToUJ(uint8 r, uint8 g, uint8 b) {
-  return (127 * b - 84 * g - 43 * r + 0x8080) >> 8;
-}
-static __inline int RGBToVJ(uint8 r, uint8 g, uint8 b) {
-  return (127 * r - 107 * g - 20 * b + 0x8080) >> 8;
-}
-
-#define AVGB(a, b) (((a) + (b) + 1) >> 1)
-
-#define MAKEROWYJ(NAME, R, G, B, BPP) \
-void NAME ## ToYJRow_C(const uint8* src_argb0, uint8* dst_y, int width) {      \
-  int x;                                                                       \
-  for (x = 0; x < width; ++x) {                                                \
-    dst_y[0] = RGBToYJ(src_argb0[R], src_argb0[G], src_argb0[B]);              \
-    src_argb0 += BPP;                                                          \
-    dst_y += 1;                                                                \
-  }                                                                            \
-}                                                                              \
-void NAME ## ToUVJRow_C(const uint8* src_rgb0, int src_stride_rgb,             \
-                        uint8* dst_u, uint8* dst_v, int width) {               \
-  const uint8* src_rgb1 = src_rgb0 + src_stride_rgb;                           \
-  int x;                                                                       \
-  for (x = 0; x < width - 1; x += 2) {                                         \
-    uint8 ab = AVGB(AVGB(src_rgb0[B], src_rgb1[B]),                            \
-                    AVGB(src_rgb0[B + BPP], src_rgb1[B + BPP]));               \
-    uint8 ag = AVGB(AVGB(src_rgb0[G], src_rgb1[G]),                            \
-                    AVGB(src_rgb0[G + BPP], src_rgb1[G + BPP]));               \
-    uint8 ar = AVGB(AVGB(src_rgb0[R], src_rgb1[R]),                            \
-                    AVGB(src_rgb0[R + BPP], src_rgb1[R + BPP]));               \
-    dst_u[0] = RGBToUJ(ar, ag, ab);                                            \
-    dst_v[0] = RGBToVJ(ar, ag, ab);                                            \
-    src_rgb0 += BPP * 2;                                                       \
-    src_rgb1 += BPP * 2;                                                       \
-    dst_u += 1;                                                                \
-    dst_v += 1;                                                                \
-  }                                                                            \
-  if (width & 1) {                                                             \
-    uint8 ab = AVGB(src_rgb0[B], src_rgb1[B]);                                 \
-    uint8 ag = AVGB(src_rgb0[G], src_rgb1[G]);                                 \
-    uint8 ar = AVGB(src_rgb0[R], src_rgb1[R]);                                 \
-    dst_u[0] = RGBToUJ(ar, ag, ab);                                            \
-    dst_v[0] = RGBToVJ(ar, ag, ab);                                            \
-  }                                                                            \
-}
-
-MAKEROWYJ(ARGB, 2, 1, 0, 4)
-#undef MAKEROWYJ
-
-void RGB565ToYRow_C(const uint8* src_rgb565, uint8* dst_y, int width) {
-  int x;
-  for (x = 0; x < width; ++x) {
-    uint8 b = src_rgb565[0] & 0x1f;
-    uint8 g = (src_rgb565[0] >> 5) | ((src_rgb565[1] & 0x07) << 3);
-    uint8 r = src_rgb565[1] >> 3;
-    b = (b << 3) | (b >> 2);
-    g = (g << 2) | (g >> 4);
-    r = (r << 3) | (r >> 2);
-    dst_y[0] = RGBToY(r, g, b);
-    src_rgb565 += 2;
-    dst_y += 1;
-  }
-}
-
-void ARGB1555ToYRow_C(const uint8* src_argb1555, uint8* dst_y, int width) {
-  int x;
-  for (x = 0; x < width; ++x) {
-    uint8 b = src_argb1555[0] & 0x1f;
-    uint8 g = (src_argb1555[0] >> 5) | ((src_argb1555[1] & 0x03) << 3);
-    uint8 r = (src_argb1555[1] & 0x7c) >> 2;
-    b = (b << 3) | (b >> 2);
-    g = (g << 3) | (g >> 2);
-    r = (r << 3) | (r >> 2);
-    dst_y[0] = RGBToY(r, g, b);
-    src_argb1555 += 2;
-    dst_y += 1;
-  }
-}
-
-void ARGB4444ToYRow_C(const uint8* src_argb4444, uint8* dst_y, int width) {
-  int x;
-  for (x = 0; x < width; ++x) {
-    uint8 b = src_argb4444[0] & 0x0f;
-    uint8 g = src_argb4444[0] >> 4;
-    uint8 r = src_argb4444[1] & 0x0f;
-    b = (b << 4) | b;
-    g = (g << 4) | g;
-    r = (r << 4) | r;
-    dst_y[0] = RGBToY(r, g, b);
-    src_argb4444 += 2;
-    dst_y += 1;
-  }
-}
-
-void RGB565ToUVRow_C(const uint8* src_rgb565, int src_stride_rgb565,
-                     uint8* dst_u, uint8* dst_v, int width) {
-  const uint8* next_rgb565 = src_rgb565 + src_stride_rgb565;
-  int x;
-  for (x = 0; x < width - 1; x += 2) {
-    uint8 b0 = src_rgb565[0] & 0x1f;
-    uint8 g0 = (src_rgb565[0] >> 5) | ((src_rgb565[1] & 0x07) << 3);
-    uint8 r0 = src_rgb565[1] >> 3;
-    uint8 b1 = src_rgb565[2] & 0x1f;
-    uint8 g1 = (src_rgb565[2] >> 5) | ((src_rgb565[3] & 0x07) << 3);
-    uint8 r1 = src_rgb565[3] >> 3;
-    uint8 b2 = next_rgb565[0] & 0x1f;
-    uint8 g2 = (next_rgb565[0] >> 5) | ((next_rgb565[1] & 0x07) << 3);
-    uint8 r2 = next_rgb565[1] >> 3;
-    uint8 b3 = next_rgb565[2] & 0x1f;
-    uint8 g3 = (next_rgb565[2] >> 5) | ((next_rgb565[3] & 0x07) << 3);
-    uint8 r3 = next_rgb565[3] >> 3;
-    uint8 b = (b0 + b1 + b2 + b3);  // 565 * 4 = 787.
-    uint8 g = (g0 + g1 + g2 + g3);
-    uint8 r = (r0 + r1 + r2 + r3);
-    b = (b << 1) | (b >> 6);  // 787 -> 888.
-    r = (r << 1) | (r >> 6);
-    dst_u[0] = RGBToU(r, g, b);
-    dst_v[0] = RGBToV(r, g, b);
-    src_rgb565 += 4;
-    next_rgb565 += 4;
-    dst_u += 1;
-    dst_v += 1;
-  }
-  if (width & 1) {
-    uint8 b0 = src_rgb565[0] & 0x1f;
-    uint8 g0 = (src_rgb565[0] >> 5) | ((src_rgb565[1] & 0x07) << 3);
-    uint8 r0 = src_rgb565[1] >> 3;
-    uint8 b2 = next_rgb565[0] & 0x1f;
-    uint8 g2 = (next_rgb565[0] >> 5) | ((next_rgb565[1] & 0x07) << 3);
-    uint8 r2 = next_rgb565[1] >> 3;
-    uint8 b = (b0 + b2);  // 565 * 2 = 676.
-    uint8 g = (g0 + g2);
-    uint8 r = (r0 + r2);
-    b = (b << 2) | (b >> 4);  // 676 -> 888
-    g = (g << 1) | (g >> 6);
-    r = (r << 2) | (r >> 4);
-    dst_u[0] = RGBToU(r, g, b);
-    dst_v[0] = RGBToV(r, g, b);
-  }
-}
-
-void ARGB1555ToUVRow_C(const uint8* src_argb1555, int src_stride_argb1555,
-                       uint8* dst_u, uint8* dst_v, int width) {
-  const uint8* next_argb1555 = src_argb1555 + src_stride_argb1555;
-  int x;
-  for (x = 0; x < width - 1; x += 2) {
-    uint8 b0 = src_argb1555[0] & 0x1f;
-    uint8 g0 = (src_argb1555[0] >> 5) | ((src_argb1555[1] & 0x03) << 3);
-    uint8 r0 = (src_argb1555[1] & 0x7c) >> 2;
-    uint8 b1 = src_argb1555[2] & 0x1f;
-    uint8 g1 = (src_argb1555[2] >> 5) | ((src_argb1555[3] & 0x03) << 3);
-    uint8 r1 = (src_argb1555[3] & 0x7c) >> 2;
-    uint8 b2 = next_argb1555[0] & 0x1f;
-    uint8 g2 = (next_argb1555[0] >> 5) | ((next_argb1555[1] & 0x03) << 3);
-    uint8 r2 = (next_argb1555[1] & 0x7c) >> 2;
-    uint8 b3 = next_argb1555[2] & 0x1f;
-    uint8 g3 = (next_argb1555[2] >> 5) | ((next_argb1555[3] & 0x03) << 3);
-    uint8 r3 = (next_argb1555[3] & 0x7c) >> 2;
-    uint8 b = (b0 + b1 + b2 + b3);  // 555 * 4 = 777.
-    uint8 g = (g0 + g1 + g2 + g3);
-    uint8 r = (r0 + r1 + r2 + r3);
-    b = (b << 1) | (b >> 6);  // 777 -> 888.
-    g = (g << 1) | (g >> 6);
-    r = (r << 1) | (r >> 6);
-    dst_u[0] = RGBToU(r, g, b);
-    dst_v[0] = RGBToV(r, g, b);
-    src_argb1555 += 4;
-    next_argb1555 += 4;
-    dst_u += 1;
-    dst_v += 1;
-  }
-  if (width & 1) {
-    uint8 b0 = src_argb1555[0] & 0x1f;
-    uint8 g0 = (src_argb1555[0] >> 5) | ((src_argb1555[1] & 0x03) << 3);
-    uint8 r0 = (src_argb1555[1] & 0x7c) >> 2;
-    uint8 b2 = next_argb1555[0] & 0x1f;
-    uint8 g2 = (next_argb1555[0] >> 5) | ((next_argb1555[1] & 0x03) << 3);
-    uint8 r2 = next_argb1555[1] >> 3;
-    uint8 b = (b0 + b2);  // 555 * 2 = 666.
-    uint8 g = (g0 + g2);
-    uint8 r = (r0 + r2);
-    b = (b << 2) | (b >> 4);  // 666 -> 888.
-    g = (g << 2) | (g >> 4);
-    r = (r << 2) | (r >> 4);
-    dst_u[0] = RGBToU(r, g, b);
-    dst_v[0] = RGBToV(r, g, b);
-  }
-}
-
-void ARGB4444ToUVRow_C(const uint8* src_argb4444, int src_stride_argb4444,
-                       uint8* dst_u, uint8* dst_v, int width) {
-  const uint8* next_argb4444 = src_argb4444 + src_stride_argb4444;
-  int x;
-  for (x = 0; x < width - 1; x += 2) {
-    uint8 b0 = src_argb4444[0] & 0x0f;
-    uint8 g0 = src_argb4444[0] >> 4;
-    uint8 r0 = src_argb4444[1] & 0x0f;
-    uint8 b1 = src_argb4444[2] & 0x0f;
-    uint8 g1 = src_argb4444[2] >> 4;
-    uint8 r1 = src_argb4444[3] & 0x0f;
-    uint8 b2 = next_argb4444[0] & 0x0f;
-    uint8 g2 = next_argb4444[0] >> 4;
-    uint8 r2 = next_argb4444[1] & 0x0f;
-    uint8 b3 = next_argb4444[2] & 0x0f;
-    uint8 g3 = next_argb4444[2] >> 4;
-    uint8 r3 = next_argb4444[3] & 0x0f;
-    uint8 b = (b0 + b1 + b2 + b3);  // 444 * 4 = 666.
-    uint8 g = (g0 + g1 + g2 + g3);
-    uint8 r = (r0 + r1 + r2 + r3);
-    b = (b << 2) | (b >> 4);  // 666 -> 888.
-    g = (g << 2) | (g >> 4);
-    r = (r << 2) | (r >> 4);
-    dst_u[0] = RGBToU(r, g, b);
-    dst_v[0] = RGBToV(r, g, b);
-    src_argb4444 += 4;
-    next_argb4444 += 4;
-    dst_u += 1;
-    dst_v += 1;
-  }
-  if (width & 1) {
-    uint8 b0 = src_argb4444[0] & 0x0f;
-    uint8 g0 = src_argb4444[0] >> 4;
-    uint8 r0 = src_argb4444[1] & 0x0f;
-    uint8 b2 = next_argb4444[0] & 0x0f;
-    uint8 g2 = next_argb4444[0] >> 4;
-    uint8 r2 = next_argb4444[1] & 0x0f;
-    uint8 b = (b0 + b2);  // 444 * 2 = 555.
-    uint8 g = (g0 + g2);
-    uint8 r = (r0 + r2);
-    b = (b << 3) | (b >> 2);  // 555 -> 888.
-    g = (g << 3) | (g >> 2);
-    r = (r << 3) | (r >> 2);
-    dst_u[0] = RGBToU(r, g, b);
-    dst_v[0] = RGBToV(r, g, b);
-  }
-}
-
-void ARGBToUV444Row_C(const uint8* src_argb,
-                      uint8* dst_u, uint8* dst_v, int width) {
-  int x;
-  for (x = 0; x < width; ++x) {
-    uint8 ab = src_argb[0];
-    uint8 ag = src_argb[1];
-    uint8 ar = src_argb[2];
-    dst_u[0] = RGBToU(ar, ag, ab);
-    dst_v[0] = RGBToV(ar, ag, ab);
-    src_argb += 4;
-    dst_u += 1;
-    dst_v += 1;
-  }
-}
-
-void ARGBToUV422Row_C(const uint8* src_argb,
-                      uint8* dst_u, uint8* dst_v, int width) {
-  int x;
-  for (x = 0; x < width - 1; x += 2) {
-    uint8 ab = (src_argb[0] + src_argb[4]) >> 1;
-    uint8 ag = (src_argb[1] + src_argb[5]) >> 1;
-    uint8 ar = (src_argb[2] + src_argb[6]) >> 1;
-    dst_u[0] = RGBToU(ar, ag, ab);
-    dst_v[0] = RGBToV(ar, ag, ab);
-    src_argb += 8;
-    dst_u += 1;
-    dst_v += 1;
-  }
-  if (width & 1) {
-    uint8 ab = src_argb[0];
-    uint8 ag = src_argb[1];
-    uint8 ar = src_argb[2];
-    dst_u[0] = RGBToU(ar, ag, ab);
-    dst_v[0] = RGBToV(ar, ag, ab);
-  }
-}
-
-void ARGBToUV411Row_C(const uint8* src_argb,
-                      uint8* dst_u, uint8* dst_v, int width) {
-  int x;
-  for (x = 0; x < width - 3; x += 4) {
-    uint8 ab = (src_argb[0] + src_argb[4] + src_argb[8] + src_argb[12]) >> 2;
-    uint8 ag = (src_argb[1] + src_argb[5] + src_argb[9] + src_argb[13]) >> 2;
-    uint8 ar = (src_argb[2] + src_argb[6] + src_argb[10] + src_argb[14]) >> 2;
-    dst_u[0] = RGBToU(ar, ag, ab);
-    dst_v[0] = RGBToV(ar, ag, ab);
-    src_argb += 16;
-    dst_u += 1;
-    dst_v += 1;
-  }
-  if ((width & 3) == 3) {
-    uint8 ab = (src_argb[0] + src_argb[4] + src_argb[8]) / 3;
-    uint8 ag = (src_argb[1] + src_argb[5] + src_argb[9]) / 3;
-    uint8 ar = (src_argb[2] + src_argb[6] + src_argb[10]) / 3;
-    dst_u[0] = RGBToU(ar, ag, ab);
-    dst_v[0] = RGBToV(ar, ag, ab);
-  } else if ((width & 3) == 2) {
-    uint8 ab = (src_argb[0] + src_argb[4]) >> 1;
-    uint8 ag = (src_argb[1] + src_argb[5]) >> 1;
-    uint8 ar = (src_argb[2] + src_argb[6]) >> 1;
-    dst_u[0] = RGBToU(ar, ag, ab);
-    dst_v[0] = RGBToV(ar, ag, ab);
-  } else if ((width & 3) == 1) {
-    uint8 ab = src_argb[0];
-    uint8 ag = src_argb[1];
-    uint8 ar = src_argb[2];
-    dst_u[0] = RGBToU(ar, ag, ab);
-    dst_v[0] = RGBToV(ar, ag, ab);
-  }
-}
-
-void ARGBGrayRow_C(const uint8* src_argb, uint8* dst_argb, int width) {
-  int x;
-  for (x = 0; x < width; ++x) {
-    uint8 y = RGBToYJ(src_argb[2], src_argb[1], src_argb[0]);
-    dst_argb[2] = dst_argb[1] = dst_argb[0] = y;
-    dst_argb[3] = src_argb[3];
-    dst_argb += 4;
-    src_argb += 4;
-  }
-}
-
-// Convert a row of image to Sepia tone.
-void ARGBSepiaRow_C(uint8* dst_argb, int width) {
-  int x;
-  for (x = 0; x < width; ++x) {
-    int b = dst_argb[0];
-    int g = dst_argb[1];
-    int r = dst_argb[2];
-    int sb = (b * 17 + g * 68 + r * 35) >> 7;
-    int sg = (b * 22 + g * 88 + r * 45) >> 7;
-    int sr = (b * 24 + g * 98 + r * 50) >> 7;
-    // b does not over flow. a is preserved from original.
-    dst_argb[0] = sb;
-    dst_argb[1] = clamp255(sg);
-    dst_argb[2] = clamp255(sr);
-    dst_argb += 4;
-  }
-}
-
-// Apply color matrix to a row of image. Matrix is signed.
-// TODO(fbarchard): Consider adding rounding (+32).
-void ARGBColorMatrixRow_C(const uint8* src_argb, uint8* dst_argb,
-                          const int8* matrix_argb, int width) {
-  int x;
-  for (x = 0; x < width; ++x) {
-    int b = src_argb[0];
-    int g = src_argb[1];
-    int r = src_argb[2];
-    int a = src_argb[3];
-    int sb = (b * matrix_argb[0] + g * matrix_argb[1] +
-              r * matrix_argb[2] + a * matrix_argb[3]) >> 6;
-    int sg = (b * matrix_argb[4] + g * matrix_argb[5] +
-              r * matrix_argb[6] + a * matrix_argb[7]) >> 6;
-    int sr = (b * matrix_argb[8] + g * matrix_argb[9] +
-              r * matrix_argb[10] + a * matrix_argb[11]) >> 6;
-    int sa = (b * matrix_argb[12] + g * matrix_argb[13] +
-              r * matrix_argb[14] + a * matrix_argb[15]) >> 6;
-    dst_argb[0] = Clamp(sb);
-    dst_argb[1] = Clamp(sg);
-    dst_argb[2] = Clamp(sr);
-    dst_argb[3] = Clamp(sa);
-    src_argb += 4;
-    dst_argb += 4;
-  }
-}
-
-// Apply color table to a row of image.
-void ARGBColorTableRow_C(uint8* dst_argb, const uint8* table_argb, int width) {
-  int x;
-  for (x = 0; x < width; ++x) {
-    int b = dst_argb[0];
-    int g = dst_argb[1];
-    int r = dst_argb[2];
-    int a = dst_argb[3];
-    dst_argb[0] = table_argb[b * 4 + 0];
-    dst_argb[1] = table_argb[g * 4 + 1];
-    dst_argb[2] = table_argb[r * 4 + 2];
-    dst_argb[3] = table_argb[a * 4 + 3];
-    dst_argb += 4;
-  }
-}
-
-// Apply color table to a row of image.
-void RGBColorTableRow_C(uint8* dst_argb, const uint8* table_argb, int width) {
-  int x;
-  for (x = 0; x < width; ++x) {
-    int b = dst_argb[0];
-    int g = dst_argb[1];
-    int r = dst_argb[2];
-    dst_argb[0] = table_argb[b * 4 + 0];
-    dst_argb[1] = table_argb[g * 4 + 1];
-    dst_argb[2] = table_argb[r * 4 + 2];
-    dst_argb += 4;
-  }
-}
-
-void ARGBQuantizeRow_C(uint8* dst_argb, int scale, int interval_size,
-                       int interval_offset, int width) {
-  int x;
-  for (x = 0; x < width; ++x) {
-    int b = dst_argb[0];
-    int g = dst_argb[1];
-    int r = dst_argb[2];
-    dst_argb[0] = (b * scale >> 16) * interval_size + interval_offset;
-    dst_argb[1] = (g * scale >> 16) * interval_size + interval_offset;
-    dst_argb[2] = (r * scale >> 16) * interval_size + interval_offset;
-    dst_argb += 4;
-  }
-}
-
-#define REPEAT8(v) (v) | ((v) << 8)
-#define SHADE(f, v) v * f >> 24
-
-void ARGBShadeRow_C(const uint8* src_argb, uint8* dst_argb, int width,
-                    uint32 value) {
-  const uint32 b_scale = REPEAT8(value & 0xff);
-  const uint32 g_scale = REPEAT8((value >> 8) & 0xff);
-  const uint32 r_scale = REPEAT8((value >> 16) & 0xff);
-  const uint32 a_scale = REPEAT8(value >> 24);
-
-  int i;
-  for (i = 0; i < width; ++i) {
-    const uint32 b = REPEAT8(src_argb[0]);
-    const uint32 g = REPEAT8(src_argb[1]);
-    const uint32 r = REPEAT8(src_argb[2]);
-    const uint32 a = REPEAT8(src_argb[3]);
-    dst_argb[0] = SHADE(b, b_scale);
-    dst_argb[1] = SHADE(g, g_scale);
-    dst_argb[2] = SHADE(r, r_scale);
-    dst_argb[3] = SHADE(a, a_scale);
-    src_argb += 4;
-    dst_argb += 4;
-  }
-}
-#undef REPEAT8
-#undef SHADE
-
-#define REPEAT8(v) (v) | ((v) << 8)
-#define SHADE(f, v) v * f >> 16
-
-void ARGBMultiplyRow_C(const uint8* src_argb0, const uint8* src_argb1,
-                       uint8* dst_argb, int width) {
-  int i;
-  for (i = 0; i < width; ++i) {
-    const uint32 b = REPEAT8(src_argb0[0]);
-    const uint32 g = REPEAT8(src_argb0[1]);
-    const uint32 r = REPEAT8(src_argb0[2]);
-    const uint32 a = REPEAT8(src_argb0[3]);
-    const uint32 b_scale = src_argb1[0];
-    const uint32 g_scale = src_argb1[1];
-    const uint32 r_scale = src_argb1[2];
-    const uint32 a_scale = src_argb1[3];
-    dst_argb[0] = SHADE(b, b_scale);
-    dst_argb[1] = SHADE(g, g_scale);
-    dst_argb[2] = SHADE(r, r_scale);
-    dst_argb[3] = SHADE(a, a_scale);
-    src_argb0 += 4;
-    src_argb1 += 4;
-    dst_argb += 4;
-  }
-}
-#undef REPEAT8
-#undef SHADE
-
-#define SHADE(f, v) clamp255(v + f)
-
-void ARGBAddRow_C(const uint8* src_argb0, const uint8* src_argb1,
-                  uint8* dst_argb, int width) {
-  int i;
-  for (i = 0; i < width; ++i) {
-    const int b = src_argb0[0];
-    const int g = src_argb0[1];
-    const int r = src_argb0[2];
-    const int a = src_argb0[3];
-    const int b_add = src_argb1[0];
-    const int g_add = src_argb1[1];
-    const int r_add = src_argb1[2];
-    const int a_add = src_argb1[3];
-    dst_argb[0] = SHADE(b, b_add);
-    dst_argb[1] = SHADE(g, g_add);
-    dst_argb[2] = SHADE(r, r_add);
-    dst_argb[3] = SHADE(a, a_add);
-    src_argb0 += 4;
-    src_argb1 += 4;
-    dst_argb += 4;
-  }
-}
-#undef SHADE
-
-#define SHADE(f, v) clamp0(f - v)
-
-void ARGBSubtractRow_C(const uint8* src_argb0, const uint8* src_argb1,
-                       uint8* dst_argb, int width) {
-  int i;
-  for (i = 0; i < width; ++i) {
-    const int b = src_argb0[0];
-    const int g = src_argb0[1];
-    const int r = src_argb0[2];
-    const int a = src_argb0[3];
-    const int b_sub = src_argb1[0];
-    const int g_sub = src_argb1[1];
-    const int r_sub = src_argb1[2];
-    const int a_sub = src_argb1[3];
-    dst_argb[0] = SHADE(b, b_sub);
-    dst_argb[1] = SHADE(g, g_sub);
-    dst_argb[2] = SHADE(r, r_sub);
-    dst_argb[3] = SHADE(a, a_sub);
-    src_argb0 += 4;
-    src_argb1 += 4;
-    dst_argb += 4;
-  }
-}
-#undef SHADE
-
-// Sobel functions which mimics SSSE3.
-void SobelXRow_C(const uint8* src_y0, const uint8* src_y1, const uint8* src_y2,
-                 uint8* dst_sobelx, int width) {
-  int i;
-  for (i = 0; i < width; ++i) {
-    int a = src_y0[i];
-    int b = src_y1[i];
-    int c = src_y2[i];
-    int a_sub = src_y0[i + 2];
-    int b_sub = src_y1[i + 2];
-    int c_sub = src_y2[i + 2];
-    int a_diff = a - a_sub;
-    int b_diff = b - b_sub;
-    int c_diff = c - c_sub;
-    int sobel = Abs(a_diff + b_diff * 2 + c_diff);
-    dst_sobelx[i] = (uint8)(clamp255(sobel));
-  }
-}
-
-void SobelYRow_C(const uint8* src_y0, const uint8* src_y1,
-                 uint8* dst_sobely, int width) {
-  int i;
-  for (i = 0; i < width; ++i) {
-    int a = src_y0[i + 0];
-    int b = src_y0[i + 1];
-    int c = src_y0[i + 2];
-    int a_sub = src_y1[i + 0];
-    int b_sub = src_y1[i + 1];
-    int c_sub = src_y1[i + 2];
-    int a_diff = a - a_sub;
-    int b_diff = b - b_sub;
-    int c_diff = c - c_sub;
-    int sobel = Abs(a_diff + b_diff * 2 + c_diff);
-    dst_sobely[i] = (uint8)(clamp255(sobel));
-  }
-}
-
-void SobelRow_C(const uint8* src_sobelx, const uint8* src_sobely,
-                uint8* dst_argb, int width) {
-  int i;
-  for (i = 0; i < width; ++i) {
-    int r = src_sobelx[i];
-    int b = src_sobely[i];
-    int s = clamp255(r + b);
-    dst_argb[0] = (uint8)(s);
-    dst_argb[1] = (uint8)(s);
-    dst_argb[2] = (uint8)(s);
-    dst_argb[3] = (uint8)(255u);
-    dst_argb += 4;
-  }
-}
-
-void SobelToPlaneRow_C(const uint8* src_sobelx, const uint8* src_sobely,
-                       uint8* dst_y, int width) {
-  int i;
-  for (i = 0; i < width; ++i) {
-    int r = src_sobelx[i];
-    int b = src_sobely[i];
-    int s = clamp255(r + b);
-    dst_y[i] = (uint8)(s);
-  }
-}
-
-void SobelXYRow_C(const uint8* src_sobelx, const uint8* src_sobely,
-                  uint8* dst_argb, int width) {
-  int i;
-  for (i = 0; i < width; ++i) {
-    int r = src_sobelx[i];
-    int b = src_sobely[i];
-    int g = clamp255(r + b);
-    dst_argb[0] = (uint8)(b);
-    dst_argb[1] = (uint8)(g);
-    dst_argb[2] = (uint8)(r);
-    dst_argb[3] = (uint8)(255u);
-    dst_argb += 4;
-  }
-}
-
-void I400ToARGBRow_C(const uint8* src_y, uint8* dst_argb, int width) {
-  // Copy a Y to RGB.
-  int x;
-  for (x = 0; x < width; ++x) {
-    uint8 y = src_y[0];
-    dst_argb[2] = dst_argb[1] = dst_argb[0] = y;
-    dst_argb[3] = 255u;
-    dst_argb += 4;
-    ++src_y;
-  }
-}
-
-// C reference code that mimics the YUV assembly.
-
-#define YG 74 /* (int8)(1.164 * 64 + 0.5) */
-
-#define UB 127 /* min(63,(int8)(2.018 * 64)) */
-#define UG -25 /* (int8)(-0.391 * 64 - 0.5) */
-#define UR 0
-
-#define VB 0
-#define VG -52 /* (int8)(-0.813 * 64 - 0.5) */
-#define VR 102 /* (int8)(1.596 * 64 + 0.5) */
-
-// Bias
-#define BB UB * 128 + VB * 128
-#define BG UG * 128 + VG * 128
-#define BR UR * 128 + VR * 128
-
-static __inline void YuvPixel(uint8 y, uint8 u, uint8 v,
-                              uint8* b, uint8* g, uint8* r) {
-  int32 y1 = ((int32)(y) - 16) * YG;
-  *b = Clamp((int32)((u * UB + v * VB) - (BB) + y1) >> 6);
-  *g = Clamp((int32)((u * UG + v * VG) - (BG) + y1) >> 6);
-  *r = Clamp((int32)((u * UR + v * VR) - (BR) + y1) >> 6);
-}
-
-#if !defined(LIBYUV_DISABLE_NEON) && \
-    (defined(__ARM_NEON__) || defined(LIBYUV_NEON))
-// C mimic assembly.
-// TODO(fbarchard): Remove subsampling from Neon.
-void I444ToARGBRow_C(const uint8* src_y,
-                     const uint8* src_u,
-                     const uint8* src_v,
-                     uint8* rgb_buf,
-                     int width) {
-  int x;
-  for (x = 0; x < width - 1; x += 2) {
-    uint8 u = (src_u[0] + src_u[1] + 1) >> 1;
-    uint8 v = (src_v[0] + src_v[1] + 1) >> 1;
-    YuvPixel(src_y[0], u, v, rgb_buf + 0, rgb_buf + 1, rgb_buf + 2);
-    rgb_buf[3] = 255;
-    YuvPixel(src_y[1], u, v, rgb_buf + 4, rgb_buf + 5, rgb_buf + 6);
-    rgb_buf[7] = 255;
-    src_y += 2;
-    src_u += 2;
-    src_v += 2;
-    rgb_buf += 8;  // Advance 2 pixels.
-  }
-  if (width & 1) {
-    YuvPixel(src_y[0], src_u[0], src_v[0],
-             rgb_buf + 0, rgb_buf + 1, rgb_buf + 2);
-  }
-}
-#else
-void I444ToARGBRow_C(const uint8* src_y,
-                     const uint8* src_u,
-                     const uint8* src_v,
-                     uint8* rgb_buf,
-                     int width) {
-  int x;
-  for (x = 0; x < width; ++x) {
-    YuvPixel(src_y[0], src_u[0], src_v[0],
-             rgb_buf + 0, rgb_buf + 1, rgb_buf + 2);
-    rgb_buf[3] = 255;
-    src_y += 1;
-    src_u += 1;
-    src_v += 1;
-    rgb_buf += 4;  // Advance 1 pixel.
-  }
-}
-#endif
-// Also used for 420
-void I422ToARGBRow_C(const uint8* src_y,
-                     const uint8* src_u,
-                     const uint8* src_v,
-                     uint8* rgb_buf,
-                     int width) {
-  int x;
-  for (x = 0; x < width - 1; x += 2) {
-    YuvPixel(src_y[0], src_u[0], src_v[0],
-             rgb_buf + 0, rgb_buf + 1, rgb_buf + 2);
-    rgb_buf[3] = 255;
-    YuvPixel(src_y[1], src_u[0], src_v[0],
-             rgb_buf + 4, rgb_buf + 5, rgb_buf + 6);
-    rgb_buf[7] = 255;
-    src_y += 2;
-    src_u += 1;
-    src_v += 1;
-    rgb_buf += 8;  // Advance 2 pixels.
-  }
-  if (width & 1) {
-    YuvPixel(src_y[0], src_u[0], src_v[0],
-             rgb_buf + 0, rgb_buf + 1, rgb_buf + 2);
-    rgb_buf[3] = 255;
-  }
-}
-
-void I422ToRGB24Row_C(const uint8* src_y,
-                      const uint8* src_u,
-                      const uint8* src_v,
-                      uint8* rgb_buf,
-                      int width) {
-  int x;
-  for (x = 0; x < width - 1; x += 2) {
-    YuvPixel(src_y[0], src_u[0], src_v[0],
-             rgb_buf + 0, rgb_buf + 1, rgb_buf + 2);
-    YuvPixel(src_y[1], src_u[0], src_v[0],
-             rgb_buf + 3, rgb_buf + 4, rgb_buf + 5);
-    src_y += 2;
-    src_u += 1;
-    src_v += 1;
-    rgb_buf += 6;  // Advance 2 pixels.
-  }
-  if (width & 1) {
-    YuvPixel(src_y[0], src_u[0], src_v[0],
-             rgb_buf + 0, rgb_buf + 1, rgb_buf + 2);
-  }
-}
-
-void I422ToRAWRow_C(const uint8* src_y,
-                    const uint8* src_u,
-                    const uint8* src_v,
-                    uint8* rgb_buf,
-                    int width) {
-  int x;
-  for (x = 0; x < width - 1; x += 2) {
-    YuvPixel(src_y[0], src_u[0], src_v[0],
-             rgb_buf + 2, rgb_buf + 1, rgb_buf + 0);
-    YuvPixel(src_y[1], src_u[0], src_v[0],
-             rgb_buf + 5, rgb_buf + 4, rgb_buf + 3);
-    src_y += 2;
-    src_u += 1;
-    src_v += 1;
-    rgb_buf += 6;  // Advance 2 pixels.
-  }
-  if (width & 1) {
-    YuvPixel(src_y[0], src_u[0], src_v[0],
-             rgb_buf + 2, rgb_buf + 1, rgb_buf + 0);
-  }
-}
-
-void I422ToARGB4444Row_C(const uint8* src_y,
-                         const uint8* src_u,
-                         const uint8* src_v,
-                         uint8* dst_argb4444,
-                         int width) {
-  uint8 b0;
-  uint8 g0;
-  uint8 r0;
-  uint8 b1;
-  uint8 g1;
-  uint8 r1;
-  int x;
-  for (x = 0; x < width - 1; x += 2) {
-    YuvPixel(src_y[0], src_u[0], src_v[0], &b0, &g0, &r0);
-    YuvPixel(src_y[1], src_u[0], src_v[0], &b1, &g1, &r1);
-    b0 = b0 >> 4;
-    g0 = g0 >> 4;
-    r0 = r0 >> 4;
-    b1 = b1 >> 4;
-    g1 = g1 >> 4;
-    r1 = r1 >> 4;
-    *(uint32*)(dst_argb4444) = b0 | (g0 << 4) | (r0 << 8) |
-        (b1 << 16) | (g1 << 20) | (r1 << 24) | 0xf000f000;
-    src_y += 2;
-    src_u += 1;
-    src_v += 1;
-    dst_argb4444 += 4;  // Advance 2 pixels.
-  }
-  if (width & 1) {
-    YuvPixel(src_y[0], src_u[0], src_v[0], &b0, &g0, &r0);
-    b0 = b0 >> 4;
-    g0 = g0 >> 4;
-    r0 = r0 >> 4;
-    *(uint16*)(dst_argb4444) = b0 | (g0 << 4) | (r0 << 8) |
-        0xf000;
-  }
-}
-
-void I422ToARGB1555Row_C(const uint8* src_y,
-                         const uint8* src_u,
-                         const uint8* src_v,
-                         uint8* dst_argb1555,
-                         int width) {
-  uint8 b0;
-  uint8 g0;
-  uint8 r0;
-  uint8 b1;
-  uint8 g1;
-  uint8 r1;
-  int x;
-  for (x = 0; x < width - 1; x += 2) {
-    YuvPixel(src_y[0], src_u[0], src_v[0], &b0, &g0, &r0);
-    YuvPixel(src_y[1], src_u[0], src_v[0], &b1, &g1, &r1);
-    b0 = b0 >> 3;
-    g0 = g0 >> 3;
-    r0 = r0 >> 3;
-    b1 = b1 >> 3;
-    g1 = g1 >> 3;
-    r1 = r1 >> 3;
-    *(uint32*)(dst_argb1555) = b0 | (g0 << 5) | (r0 << 10) |
-        (b1 << 16) | (g1 << 21) | (r1 << 26) | 0x80008000;
-    src_y += 2;
-    src_u += 1;
-    src_v += 1;
-    dst_argb1555 += 4;  // Advance 2 pixels.
-  }
-  if (width & 1) {
-    YuvPixel(src_y[0], src_u[0], src_v[0], &b0, &g0, &r0);
-    b0 = b0 >> 3;
-    g0 = g0 >> 3;
-    r0 = r0 >> 3;
-    *(uint16*)(dst_argb1555) = b0 | (g0 << 5) | (r0 << 10) |
-        0x8000;
-  }
-}
-
-void I422ToRGB565Row_C(const uint8* src_y,
-                       const uint8* src_u,
-                       const uint8* src_v,
-                       uint8* dst_rgb565,
-                       int width) {
-  uint8 b0;
-  uint8 g0;
-  uint8 r0;
-  uint8 b1;
-  uint8 g1;
-  uint8 r1;
-  int x;
-  for (x = 0; x < width - 1; x += 2) {
-    YuvPixel(src_y[0], src_u[0], src_v[0], &b0, &g0, &r0);
-    YuvPixel(src_y[1], src_u[0], src_v[0], &b1, &g1, &r1);
-    b0 = b0 >> 3;
-    g0 = g0 >> 2;
-    r0 = r0 >> 3;
-    b1 = b1 >> 3;
-    g1 = g1 >> 2;
-    r1 = r1 >> 3;
-    *(uint32*)(dst_rgb565) = b0 | (g0 << 5) | (r0 << 11) |
-        (b1 << 16) | (g1 << 21) | (r1 << 27);
-    src_y += 2;
-    src_u += 1;
-    src_v += 1;
-    dst_rgb565 += 4;  // Advance 2 pixels.
-  }
-  if (width & 1) {
-    YuvPixel(src_y[0], src_u[0], src_v[0], &b0, &g0, &r0);
-    b0 = b0 >> 3;
-    g0 = g0 >> 2;
-    r0 = r0 >> 3;
-    *(uint16*)(dst_rgb565) = b0 | (g0 << 5) | (r0 << 11);
-  }
-}
-
-void I411ToARGBRow_C(const uint8* src_y,
-                     const uint8* src_u,
-                     const uint8* src_v,
-                     uint8* rgb_buf,
-                     int width) {
-  int x;
-  for (x = 0; x < width - 3; x += 4) {
-    YuvPixel(src_y[0], src_u[0], src_v[0],
-             rgb_buf + 0, rgb_buf + 1, rgb_buf + 2);
-    rgb_buf[3] = 255;
-    YuvPixel(src_y[1], src_u[0], src_v[0],
-             rgb_buf + 4, rgb_buf + 5, rgb_buf + 6);
-    rgb_buf[7] = 255;
-    YuvPixel(src_y[2], src_u[0], src_v[0],
-             rgb_buf + 8, rgb_buf + 9, rgb_buf + 10);
-    rgb_buf[11] = 255;
-    YuvPixel(src_y[3], src_u[0], src_v[0],
-             rgb_buf + 12, rgb_buf + 13, rgb_buf + 14);
-    rgb_buf[15] = 255;
-    src_y += 4;
-    src_u += 1;
-    src_v += 1;
-    rgb_buf += 16;  // Advance 4 pixels.
-  }
-  if (width & 2) {
-    YuvPixel(src_y[0], src_u[0], src_v[0],
-             rgb_buf + 0, rgb_buf + 1, rgb_buf + 2);
-    rgb_buf[3] = 255;
-    YuvPixel(src_y[1], src_u[0], src_v[0],
-             rgb_buf + 4, rgb_buf + 5, rgb_buf + 6);
-    rgb_buf[7] = 255;
-    src_y += 2;
-    rgb_buf += 8;  // Advance 2 pixels.
-  }
-  if (width & 1) {
-    YuvPixel(src_y[0], src_u[0], src_v[0],
-             rgb_buf + 0, rgb_buf + 1, rgb_buf + 2);
-    rgb_buf[3] = 255;
-  }
-}
-
-void NV12ToARGBRow_C(const uint8* src_y,
-                     const uint8* usrc_v,
-                     uint8* rgb_buf,
-                     int width) {
-  int x;
-  for (x = 0; x < width - 1; x += 2) {
-    YuvPixel(src_y[0], usrc_v[0], usrc_v[1],
-             rgb_buf + 0, rgb_buf + 1, rgb_buf + 2);
-    rgb_buf[3] = 255;
-    YuvPixel(src_y[1], usrc_v[0], usrc_v[1],
-             rgb_buf + 4, rgb_buf + 5, rgb_buf + 6);
-    rgb_buf[7] = 255;
-    src_y += 2;
-    usrc_v += 2;
-    rgb_buf += 8;  // Advance 2 pixels.
-  }
-  if (width & 1) {
-    YuvPixel(src_y[0], usrc_v[0], usrc_v[1],
-             rgb_buf + 0, rgb_buf + 1, rgb_buf + 2);
-    rgb_buf[3] = 255;
-  }
-}
-
-void NV21ToARGBRow_C(const uint8* src_y,
-                     const uint8* src_vu,
-                     uint8* rgb_buf,
-                     int width) {
-  int x;
-  for (x = 0; x < width - 1; x += 2) {
-    YuvPixel(src_y[0], src_vu[1], src_vu[0],
-             rgb_buf + 0, rgb_buf + 1, rgb_buf + 2);
-    rgb_buf[3] = 255;
-
-    YuvPixel(src_y[1], src_vu[1], src_vu[0],
-             rgb_buf + 4, rgb_buf + 5, rgb_buf + 6);
-    rgb_buf[7] = 255;
-
-    src_y += 2;
-    src_vu += 2;
-    rgb_buf += 8;  // Advance 2 pixels.
-  }
-  if (width & 1) {
-    YuvPixel(src_y[0], src_vu[1], src_vu[0],
-             rgb_buf + 0, rgb_buf + 1, rgb_buf + 2);
-    rgb_buf[3] = 255;
-  }
-}
-
-void NV12ToRGB565Row_C(const uint8* src_y,
-                       const uint8* usrc_v,
-                       uint8* dst_rgb565,
-                       int width) {
-  uint8 b0;
-  uint8 g0;
-  uint8 r0;
-  uint8 b1;
-  uint8 g1;
-  uint8 r1;
-  int x;
-  for (x = 0; x < width - 1; x += 2) {
-    YuvPixel(src_y[0], usrc_v[0], usrc_v[1], &b0, &g0, &r0);
-    YuvPixel(src_y[1], usrc_v[0], usrc_v[1], &b1, &g1, &r1);
-    b0 = b0 >> 3;
-    g0 = g0 >> 2;
-    r0 = r0 >> 3;
-    b1 = b1 >> 3;
-    g1 = g1 >> 2;
-    r1 = r1 >> 3;
-    *(uint32*)(dst_rgb565) = b0 | (g0 << 5) | (r0 << 11) |
-        (b1 << 16) | (g1 << 21) | (r1 << 27);
-    src_y += 2;
-    usrc_v += 2;
-    dst_rgb565 += 4;  // Advance 2 pixels.
-  }
-  if (width & 1) {
-    YuvPixel(src_y[0], usrc_v[0], usrc_v[1], &b0, &g0, &r0);
-    b0 = b0 >> 3;
-    g0 = g0 >> 2;
-    r0 = r0 >> 3;
-    *(uint16*)(dst_rgb565) = b0 | (g0 << 5) | (r0 << 11);
-  }
-}
-
-void NV21ToRGB565Row_C(const uint8* src_y,
-                       const uint8* vsrc_u,
-                       uint8* dst_rgb565,
-                       int width) {
-  uint8 b0;
-  uint8 g0;
-  uint8 r0;
-  uint8 b1;
-  uint8 g1;
-  uint8 r1;
-  int x;
-  for (x = 0; x < width - 1; x += 2) {
-    YuvPixel(src_y[0], vsrc_u[1], vsrc_u[0], &b0, &g0, &r0);
-    YuvPixel(src_y[1], vsrc_u[1], vsrc_u[0], &b1, &g1, &r1);
-    b0 = b0 >> 3;
-    g0 = g0 >> 2;
-    r0 = r0 >> 3;
-    b1 = b1 >> 3;
-    g1 = g1 >> 2;
-    r1 = r1 >> 3;
-    *(uint32*)(dst_rgb565) = b0 | (g0 << 5) | (r0 << 11) |
-        (b1 << 16) | (g1 << 21) | (r1 << 27);
-    src_y += 2;
-    vsrc_u += 2;
-    dst_rgb565 += 4;  // Advance 2 pixels.
-  }
-  if (width & 1) {
-    YuvPixel(src_y[0], vsrc_u[1], vsrc_u[0], &b0, &g0, &r0);
-    b0 = b0 >> 3;
-    g0 = g0 >> 2;
-    r0 = r0 >> 3;
-    *(uint16*)(dst_rgb565) = b0 | (g0 << 5) | (r0 << 11);
-  }
-}
-
-void YUY2ToARGBRow_C(const uint8* src_yuy2,
-                     uint8* rgb_buf,
-                     int width) {
-  int x;
-  for (x = 0; x < width - 1; x += 2) {
-    YuvPixel(src_yuy2[0], src_yuy2[1], src_yuy2[3],
-             rgb_buf + 0, rgb_buf + 1, rgb_buf + 2);
-    rgb_buf[3] = 255;
-    YuvPixel(src_yuy2[2], src_yuy2[1], src_yuy2[3],
-             rgb_buf + 4, rgb_buf + 5, rgb_buf + 6);
-    rgb_buf[7] = 255;
-    src_yuy2 += 4;
-    rgb_buf += 8;  // Advance 2 pixels.
-  }
-  if (width & 1) {
-    YuvPixel(src_yuy2[0], src_yuy2[1], src_yuy2[3],
-             rgb_buf + 0, rgb_buf + 1, rgb_buf + 2);
-    rgb_buf[3] = 255;
-  }
-}
-
-void UYVYToARGBRow_C(const uint8* src_uyvy,
-                     uint8* rgb_buf,
-                     int width) {
-  int x;
-  for (x = 0; x < width - 1; x += 2) {
-    YuvPixel(src_uyvy[1], src_uyvy[0], src_uyvy[2],
-             rgb_buf + 0, rgb_buf + 1, rgb_buf + 2);
-    rgb_buf[3] = 255;
-    YuvPixel(src_uyvy[3], src_uyvy[0], src_uyvy[2],
-             rgb_buf + 4, rgb_buf + 5, rgb_buf + 6);
-    rgb_buf[7] = 255;
-    src_uyvy += 4;
-    rgb_buf += 8;  // Advance 2 pixels.
-  }
-  if (width & 1) {
-    YuvPixel(src_uyvy[1], src_uyvy[0], src_uyvy[2],
-             rgb_buf + 0, rgb_buf + 1, rgb_buf + 2);
-    rgb_buf[3] = 255;
-  }
-}
-
-void I422ToBGRARow_C(const uint8* src_y,
-                     const uint8* src_u,
-                     const uint8* src_v,
-                     uint8* rgb_buf,
-                     int width) {
-  int x;
-  for (x = 0; x < width - 1; x += 2) {
-    YuvPixel(src_y[0], src_u[0], src_v[0],
-             rgb_buf + 3, rgb_buf + 2, rgb_buf + 1);
-    rgb_buf[0] = 255;
-    YuvPixel(src_y[1], src_u[0], src_v[0],
-             rgb_buf + 7, rgb_buf + 6, rgb_buf + 5);
-    rgb_buf[4] = 255;
-    src_y += 2;
-    src_u += 1;
-    src_v += 1;
-    rgb_buf += 8;  // Advance 2 pixels.
-  }
-  if (width & 1) {
-    YuvPixel(src_y[0], src_u[0], src_v[0],
-             rgb_buf + 3, rgb_buf + 2, rgb_buf + 1);
-    rgb_buf[0] = 255;
-  }
-}
-
-void I422ToABGRRow_C(const uint8* src_y,
-                     const uint8* src_u,
-                     const uint8* src_v,
-                     uint8* rgb_buf,
-                     int width) {
-  int x;
-  for (x = 0; x < width - 1; x += 2) {
-    YuvPixel(src_y[0], src_u[0], src_v[0],
-             rgb_buf + 2, rgb_buf + 1, rgb_buf + 0);
-    rgb_buf[3] = 255;
-    YuvPixel(src_y[1], src_u[0], src_v[0],
-             rgb_buf + 6, rgb_buf + 5, rgb_buf + 4);
-    rgb_buf[7] = 255;
-    src_y += 2;
-    src_u += 1;
-    src_v += 1;
-    rgb_buf += 8;  // Advance 2 pixels.
-  }
-  if (width & 1) {
-    YuvPixel(src_y[0], src_u[0], src_v[0],
-             rgb_buf + 2, rgb_buf + 1, rgb_buf + 0);
-    rgb_buf[3] = 255;
-  }
-}
-
-void I422ToRGBARow_C(const uint8* src_y,
-                     const uint8* src_u,
-                     const uint8* src_v,
-                     uint8* rgb_buf,
-                     int width) {
-  int x;
-  for (x = 0; x < width - 1; x += 2) {
-    YuvPixel(src_y[0], src_u[0], src_v[0],
-             rgb_buf + 1, rgb_buf + 2, rgb_buf + 3);
-    rgb_buf[0] = 255;
-    YuvPixel(src_y[1], src_u[0], src_v[0],
-             rgb_buf + 5, rgb_buf + 6, rgb_buf + 7);
-    rgb_buf[4] = 255;
-    src_y += 2;
-    src_u += 1;
-    src_v += 1;
-    rgb_buf += 8;  // Advance 2 pixels.
-  }
-  if (width & 1) {
-    YuvPixel(src_y[0], src_u[0], src_v[0],
-             rgb_buf + 1, rgb_buf + 2, rgb_buf + 3);
-    rgb_buf[0] = 255;
-  }
-}
-
-void YToARGBRow_C(const uint8* src_y, uint8* rgb_buf, int width) {
-  int x;
-  for (x = 0; x < width - 1; x += 2) {
-    YuvPixel(src_y[0], 128, 128,
-             rgb_buf + 0, rgb_buf + 1, rgb_buf + 2);
-    rgb_buf[3] = 255;
-    YuvPixel(src_y[1], 128, 128,
-             rgb_buf + 4, rgb_buf + 5, rgb_buf + 6);
-    rgb_buf[7] = 255;
-    src_y += 2;
-    rgb_buf += 8;  // Advance 2 pixels.
-  }
-  if (width & 1) {
-    YuvPixel(src_y[0], 128, 128,
-             rgb_buf + 0, rgb_buf + 1, rgb_buf + 2);
-    rgb_buf[3] = 255;
-  }
-}
-
-void MirrorRow_C(const uint8* src, uint8* dst, int width) {
-  int x;
-  src += width - 1;
-  for (x = 0; x < width - 1; x += 2) {
-    dst[x] = src[0];
-    dst[x + 1] = src[-1];
-    src -= 2;
-  }
-  if (width & 1) {
-    dst[width - 1] = src[0];
-  }
-}
-
-void MirrorUVRow_C(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int width) {
-  int x;
-  src_uv += (width - 1) << 1;
-  for (x = 0; x < width - 1; x += 2) {
-    dst_u[x] = src_uv[0];
-    dst_u[x + 1] = src_uv[-2];
-    dst_v[x] = src_uv[1];
-    dst_v[x + 1] = src_uv[-2 + 1];
-    src_uv -= 4;
-  }
-  if (width & 1) {
-    dst_u[width - 1] = src_uv[0];
-    dst_v[width - 1] = src_uv[1];
-  }
-}
-
-void ARGBMirrorRow_C(const uint8* src, uint8* dst, int width) {
-  int x;
-  const uint32* src32 = (const uint32*)(src);
-  uint32* dst32 = (uint32*)(dst);
-  src32 += width - 1;
-  for (x = 0; x < width - 1; x += 2) {
-    dst32[x] = src32[0];
-    dst32[x + 1] = src32[-1];
-    src32 -= 2;
-  }
-  if (width & 1) {
-    dst32[width - 1] = src32[0];
-  }
-}
-
-void SplitUVRow_C(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int width) {
-  int x;
-  for (x = 0; x < width - 1; x += 2) {
-    dst_u[x] = src_uv[0];
-    dst_u[x + 1] = src_uv[2];
-    dst_v[x] = src_uv[1];
-    dst_v[x + 1] = src_uv[3];
-    src_uv += 4;
-  }
-  if (width & 1) {
-    dst_u[width - 1] = src_uv[0];
-    dst_v[width - 1] = src_uv[1];
-  }
-}
-
-void MergeUVRow_C(const uint8* src_u, const uint8* src_v, uint8* dst_uv,
-                  int width) {
-  int x;
-  for (x = 0; x < width - 1; x += 2) {
-    dst_uv[0] = src_u[x];
-    dst_uv[1] = src_v[x];
-    dst_uv[2] = src_u[x + 1];
-    dst_uv[3] = src_v[x + 1];
-    dst_uv += 4;
-  }
-  if (width & 1) {
-    dst_uv[0] = src_u[width - 1];
-    dst_uv[1] = src_v[width - 1];
-  }
-}
-
-void CopyRow_C(const uint8* src, uint8* dst, int count) {
-  memcpy(dst, src, count);
-}
-
-void SetRow_C(uint8* dst, uint32 v8, int count) {
-#ifdef _MSC_VER
-  // VC will generate rep stosb.
-  int x;
-  for (x = 0; x < count; ++x) {
-    dst[x] = v8;
-  }
-#else
-  memset(dst, v8, count);
-#endif
-}
-
-void ARGBSetRows_C(uint8* dst, uint32 v32, int width,
-                 int dst_stride, int height) {
-  int y;
-  for (y = 0; y < height; ++y) {
-    uint32* d = (uint32*)(dst);
-    int x;
-    for (x = 0; x < width; ++x) {
-      d[x] = v32;
-    }
-    dst += dst_stride;
-  }
-}
-
-// Filter 2 rows of YUY2 UV's (422) into U and V (420).
-void YUY2ToUVRow_C(const uint8* src_yuy2, int src_stride_yuy2,
-                   uint8* dst_u, uint8* dst_v, int width) {
-  // Output a row of UV values, filtering 2 rows of YUY2.
-  int x;
-  for (x = 0; x < width; x += 2) {
-    dst_u[0] = (src_yuy2[1] + src_yuy2[src_stride_yuy2 + 1] + 1) >> 1;
-    dst_v[0] = (src_yuy2[3] + src_yuy2[src_stride_yuy2 + 3] + 1) >> 1;
-    src_yuy2 += 4;
-    dst_u += 1;
-    dst_v += 1;
-  }
-}
-
-// Copy row of YUY2 UV's (422) into U and V (422).
-void YUY2ToUV422Row_C(const uint8* src_yuy2,
-                      uint8* dst_u, uint8* dst_v, int width) {
-  // Output a row of UV values.
-  int x;
-  for (x = 0; x < width; x += 2) {
-    dst_u[0] = src_yuy2[1];
-    dst_v[0] = src_yuy2[3];
-    src_yuy2 += 4;
-    dst_u += 1;
-    dst_v += 1;
-  }
-}
-
-// Copy row of YUY2 Y's (422) into Y (420/422).
-void YUY2ToYRow_C(const uint8* src_yuy2, uint8* dst_y, int width) {
-  // Output a row of Y values.
-  int x;
-  for (x = 0; x < width - 1; x += 2) {
-    dst_y[x] = src_yuy2[0];
-    dst_y[x + 1] = src_yuy2[2];
-    src_yuy2 += 4;
-  }
-  if (width & 1) {
-    dst_y[width - 1] = src_yuy2[0];
-  }
-}
-
-// Filter 2 rows of UYVY UV's (422) into U and V (420).
-void UYVYToUVRow_C(const uint8* src_uyvy, int src_stride_uyvy,
-                   uint8* dst_u, uint8* dst_v, int width) {
-  // Output a row of UV values.
-  int x;
-  for (x = 0; x < width; x += 2) {
-    dst_u[0] = (src_uyvy[0] + src_uyvy[src_stride_uyvy + 0] + 1) >> 1;
-    dst_v[0] = (src_uyvy[2] + src_uyvy[src_stride_uyvy + 2] + 1) >> 1;
-    src_uyvy += 4;
-    dst_u += 1;
-    dst_v += 1;
-  }
-}
-
-// Copy row of UYVY UV's (422) into U and V (422).
-void UYVYToUV422Row_C(const uint8* src_uyvy,
-                      uint8* dst_u, uint8* dst_v, int width) {
-  // Output a row of UV values.
-  int x;
-  for (x = 0; x < width; x += 2) {
-    dst_u[0] = src_uyvy[0];
-    dst_v[0] = src_uyvy[2];
-    src_uyvy += 4;
-    dst_u += 1;
-    dst_v += 1;
-  }
-}
-
-// Copy row of UYVY Y's (422) into Y (420/422).
-void UYVYToYRow_C(const uint8* src_uyvy, uint8* dst_y, int width) {
-  // Output a row of Y values.
-  int x;
-  for (x = 0; x < width - 1; x += 2) {
-    dst_y[x] = src_uyvy[1];
-    dst_y[x + 1] = src_uyvy[3];
-    src_uyvy += 4;
-  }
-  if (width & 1) {
-    dst_y[width - 1] = src_uyvy[1];
-  }
-}
-
-#define BLEND(f, b, a) (((256 - a) * b) >> 8) + f
-
-// Blend src_argb0 over src_argb1 and store to dst_argb.
-// dst_argb may be src_argb0 or src_argb1.
-// This code mimics the SSSE3 version for better testability.
-void ARGBBlendRow_C(const uint8* src_argb0, const uint8* src_argb1,
-                    uint8* dst_argb, int width) {
-  int x;
-  for (x = 0; x < width - 1; x += 2) {
-    uint32 fb = src_argb0[0];
-    uint32 fg = src_argb0[1];
-    uint32 fr = src_argb0[2];
-    uint32 a = src_argb0[3];
-    uint32 bb = src_argb1[0];
-    uint32 bg = src_argb1[1];
-    uint32 br = src_argb1[2];
-    dst_argb[0] = BLEND(fb, bb, a);
-    dst_argb[1] = BLEND(fg, bg, a);
-    dst_argb[2] = BLEND(fr, br, a);
-    dst_argb[3] = 255u;
-
-    fb = src_argb0[4 + 0];
-    fg = src_argb0[4 + 1];
-    fr = src_argb0[4 + 2];
-    a = src_argb0[4 + 3];
-    bb = src_argb1[4 + 0];
-    bg = src_argb1[4 + 1];
-    br = src_argb1[4 + 2];
-    dst_argb[4 + 0] = BLEND(fb, bb, a);
-    dst_argb[4 + 1] = BLEND(fg, bg, a);
-    dst_argb[4 + 2] = BLEND(fr, br, a);
-    dst_argb[4 + 3] = 255u;
-    src_argb0 += 8;
-    src_argb1 += 8;
-    dst_argb += 8;
-  }
-
-  if (width & 1) {
-    uint32 fb = src_argb0[0];
-    uint32 fg = src_argb0[1];
-    uint32 fr = src_argb0[2];
-    uint32 a = src_argb0[3];
-    uint32 bb = src_argb1[0];
-    uint32 bg = src_argb1[1];
-    uint32 br = src_argb1[2];
-    dst_argb[0] = BLEND(fb, bb, a);
-    dst_argb[1] = BLEND(fg, bg, a);
-    dst_argb[2] = BLEND(fr, br, a);
-    dst_argb[3] = 255u;
-  }
-}
-#undef BLEND
-#define ATTENUATE(f, a) (a | (a << 8)) * (f | (f << 8)) >> 24
-
-// Multiply source RGB by alpha and store to destination.
-// This code mimics the SSSE3 version for better testability.
-void ARGBAttenuateRow_C(const uint8* src_argb, uint8* dst_argb, int width) {
-  int i;
-  for (i = 0; i < width - 1; i += 2) {
-    uint32 b = src_argb[0];
-    uint32 g = src_argb[1];
-    uint32 r = src_argb[2];
-    uint32 a = src_argb[3];
-    dst_argb[0] = ATTENUATE(b, a);
-    dst_argb[1] = ATTENUATE(g, a);
-    dst_argb[2] = ATTENUATE(r, a);
-    dst_argb[3] = a;
-    b = src_argb[4];
-    g = src_argb[5];
-    r = src_argb[6];
-    a = src_argb[7];
-    dst_argb[4] = ATTENUATE(b, a);
-    dst_argb[5] = ATTENUATE(g, a);
-    dst_argb[6] = ATTENUATE(r, a);
-    dst_argb[7] = a;
-    src_argb += 8;
-    dst_argb += 8;
-  }
-
-  if (width & 1) {
-    const uint32 b = src_argb[0];
-    const uint32 g = src_argb[1];
-    const uint32 r = src_argb[2];
-    const uint32 a = src_argb[3];
-    dst_argb[0] = ATTENUATE(b, a);
-    dst_argb[1] = ATTENUATE(g, a);
-    dst_argb[2] = ATTENUATE(r, a);
-    dst_argb[3] = a;
-  }
-}
-#undef ATTENUATE
-
-// Divide source RGB by alpha and store to destination.
-// b = (b * 255 + (a / 2)) / a;
-// g = (g * 255 + (a / 2)) / a;
-// r = (r * 255 + (a / 2)) / a;
-// Reciprocal method is off by 1 on some values. ie 125
-// 8.8 fixed point inverse table with 1.0 in upper short and 1 / a in lower.
-#define T(a) 0x01000000 + (0x10000 / a)
-const uint32 fixed_invtbl8[256] = {
-  0x01000000, 0x0100ffff, T(0x02), T(0x03), T(0x04), T(0x05), T(0x06), T(0x07),
-  T(0x08), T(0x09), T(0x0a), T(0x0b), T(0x0c), T(0x0d), T(0x0e), T(0x0f),
-  T(0x10), T(0x11), T(0x12), T(0x13), T(0x14), T(0x15), T(0x16), T(0x17),
-  T(0x18), T(0x19), T(0x1a), T(0x1b), T(0x1c), T(0x1d), T(0x1e), T(0x1f),
-  T(0x20), T(0x21), T(0x22), T(0x23), T(0x24), T(0x25), T(0x26), T(0x27),
-  T(0x28), T(0x29), T(0x2a), T(0x2b), T(0x2c), T(0x2d), T(0x2e), T(0x2f),
-  T(0x30), T(0x31), T(0x32), T(0x33), T(0x34), T(0x35), T(0x36), T(0x37),
-  T(0x38), T(0x39), T(0x3a), T(0x3b), T(0x3c), T(0x3d), T(0x3e), T(0x3f),
-  T(0x40), T(0x41), T(0x42), T(0x43), T(0x44), T(0x45), T(0x46), T(0x47),
-  T(0x48), T(0x49), T(0x4a), T(0x4b), T(0x4c), T(0x4d), T(0x4e), T(0x4f),
-  T(0x50), T(0x51), T(0x52), T(0x53), T(0x54), T(0x55), T(0x56), T(0x57),
-  T(0x58), T(0x59), T(0x5a), T(0x5b), T(0x5c), T(0x5d), T(0x5e), T(0x5f),
-  T(0x60), T(0x61), T(0x62), T(0x63), T(0x64), T(0x65), T(0x66), T(0x67),
-  T(0x68), T(0x69), T(0x6a), T(0x6b), T(0x6c), T(0x6d), T(0x6e), T(0x6f),
-  T(0x70), T(0x71), T(0x72), T(0x73), T(0x74), T(0x75), T(0x76), T(0x77),
-  T(0x78), T(0x79), T(0x7a), T(0x7b), T(0x7c), T(0x7d), T(0x7e), T(0x7f),
-  T(0x80), T(0x81), T(0x82), T(0x83), T(0x84), T(0x85), T(0x86), T(0x87),
-  T(0x88), T(0x89), T(0x8a), T(0x8b), T(0x8c), T(0x8d), T(0x8e), T(0x8f),
-  T(0x90), T(0x91), T(0x92), T(0x93), T(0x94), T(0x95), T(0x96), T(0x97),
-  T(0x98), T(0x99), T(0x9a), T(0x9b), T(0x9c), T(0x9d), T(0x9e), T(0x9f),
-  T(0xa0), T(0xa1), T(0xa2), T(0xa3), T(0xa4), T(0xa5), T(0xa6), T(0xa7),
-  T(0xa8), T(0xa9), T(0xaa), T(0xab), T(0xac), T(0xad), T(0xae), T(0xaf),
-  T(0xb0), T(0xb1), T(0xb2), T(0xb3), T(0xb4), T(0xb5), T(0xb6), T(0xb7),
-  T(0xb8), T(0xb9), T(0xba), T(0xbb), T(0xbc), T(0xbd), T(0xbe), T(0xbf),
-  T(0xc0), T(0xc1), T(0xc2), T(0xc3), T(0xc4), T(0xc5), T(0xc6), T(0xc7),
-  T(0xc8), T(0xc9), T(0xca), T(0xcb), T(0xcc), T(0xcd), T(0xce), T(0xcf),
-  T(0xd0), T(0xd1), T(0xd2), T(0xd3), T(0xd4), T(0xd5), T(0xd6), T(0xd7),
-  T(0xd8), T(0xd9), T(0xda), T(0xdb), T(0xdc), T(0xdd), T(0xde), T(0xdf),
-  T(0xe0), T(0xe1), T(0xe2), T(0xe3), T(0xe4), T(0xe5), T(0xe6), T(0xe7),
-  T(0xe8), T(0xe9), T(0xea), T(0xeb), T(0xec), T(0xed), T(0xee), T(0xef),
-  T(0xf0), T(0xf1), T(0xf2), T(0xf3), T(0xf4), T(0xf5), T(0xf6), T(0xf7),
-  T(0xf8), T(0xf9), T(0xfa), T(0xfb), T(0xfc), T(0xfd), T(0xfe), 0x01000100 };
-#undef T
-
-void ARGBUnattenuateRow_C(const uint8* src_argb, uint8* dst_argb, int width) {
-  int i;
-  for (i = 0; i < width; ++i) {
-    uint32 b = src_argb[0];
-    uint32 g = src_argb[1];
-    uint32 r = src_argb[2];
-    const uint32 a = src_argb[3];
-    const uint32 ia = fixed_invtbl8[a] & 0xffff;  // 8.8 fixed point
-    b = (b * ia) >> 8;
-    g = (g * ia) >> 8;
-    r = (r * ia) >> 8;
-    // Clamping should not be necessary but is free in assembly.
-    dst_argb[0] = clamp255(b);
-    dst_argb[1] = clamp255(g);
-    dst_argb[2] = clamp255(r);
-    dst_argb[3] = a;
-    src_argb += 4;
-    dst_argb += 4;
-  }
-}
-
-void ComputeCumulativeSumRow_C(const uint8* row, int32* cumsum,
-                               const int32* previous_cumsum, int width) {
-  int32 row_sum[4] = {0, 0, 0, 0};
-  int x;
-  for (x = 0; x < width; ++x) {
-    row_sum[0] += row[x * 4 + 0];
-    row_sum[1] += row[x * 4 + 1];
-    row_sum[2] += row[x * 4 + 2];
-    row_sum[3] += row[x * 4 + 3];
-    cumsum[x * 4 + 0] = row_sum[0]  + previous_cumsum[x * 4 + 0];
-    cumsum[x * 4 + 1] = row_sum[1]  + previous_cumsum[x * 4 + 1];
-    cumsum[x * 4 + 2] = row_sum[2]  + previous_cumsum[x * 4 + 2];
-    cumsum[x * 4 + 3] = row_sum[3]  + previous_cumsum[x * 4 + 3];
-  }
-}
-
-void CumulativeSumToAverageRow_C(const int32* tl, const int32* bl,
-                                int w, int area, uint8* dst, int count) {
-  float ooa = 1.0f / area;
-  int i;
-  for (i = 0; i < count; ++i) {
-    dst[0] = (uint8)((bl[w + 0] + tl[0] - bl[0] - tl[w + 0]) * ooa);
-    dst[1] = (uint8)((bl[w + 1] + tl[1] - bl[1] - tl[w + 1]) * ooa);
-    dst[2] = (uint8)((bl[w + 2] + tl[2] - bl[2] - tl[w + 2]) * ooa);
-    dst[3] = (uint8)((bl[w + 3] + tl[3] - bl[3] - tl[w + 3]) * ooa);
-    dst += 4;
-    tl += 4;
-    bl += 4;
-  }
-}
-
-// Copy pixels from rotated source to destination row with a slope.
-LIBYUV_API
-void ARGBAffineRow_C(const uint8* src_argb, int src_argb_stride,
-                     uint8* dst_argb, const float* uv_dudv, int width) {
-  int i;
-  // Render a row of pixels from source into a buffer.
-  float uv[2];
-  uv[0] = uv_dudv[0];
-  uv[1] = uv_dudv[1];
-  for (i = 0; i < width; ++i) {
-    int x = (int)(uv[0]);
-    int y = (int)(uv[1]);
-    *(uint32*)(dst_argb) =
-        *(const uint32*)(src_argb + y * src_argb_stride +
-                                         x * 4);
-    dst_argb += 4;
-    uv[0] += uv_dudv[2];
-    uv[1] += uv_dudv[3];
-  }
-}
-
-// Blend 2 rows into 1 for conversions such as I422ToI420.
-void HalfRow_C(const uint8* src_uv, int src_uv_stride,
-               uint8* dst_uv, int pix) {
-  int x;
-  for (x = 0; x < pix; ++x) {
-    dst_uv[x] = (src_uv[x] + src_uv[src_uv_stride + x] + 1) >> 1;
-  }
-}
-
-// C version 2x2 -> 2x1.
-void InterpolateRow_C(uint8* dst_ptr, const uint8* src_ptr,
-                      ptrdiff_t src_stride,
-                      int width, int source_y_fraction) {
-  int y1_fraction = source_y_fraction;
-  int y0_fraction = 256 - y1_fraction;
-  const uint8* src_ptr1 = src_ptr + src_stride;
-  int x;
-  if (source_y_fraction == 0) {
-    memcpy(dst_ptr, src_ptr, width);
-    return;
-  }
-  if (source_y_fraction == 128) {
-    HalfRow_C(src_ptr, (int)(src_stride), dst_ptr, width);
-    return;
-  }
-  for (x = 0; x < width - 1; x += 2) {
-    dst_ptr[0] = (src_ptr[0] * y0_fraction + src_ptr1[0] * y1_fraction) >> 8;
-    dst_ptr[1] = (src_ptr[1] * y0_fraction + src_ptr1[1] * y1_fraction) >> 8;
-    src_ptr += 2;
-    src_ptr1 += 2;
-    dst_ptr += 2;
-  }
-  if (width & 1) {
-    dst_ptr[0] = (src_ptr[0] * y0_fraction + src_ptr1[0] * y1_fraction) >> 8;
-  }
-}
-
-// Select 2 channels from ARGB on alternating pixels.  e.g.  BGBGBGBG
-void ARGBToBayerRow_C(const uint8* src_argb,
-                      uint8* dst_bayer, uint32 selector, int pix) {
-  int index0 = selector & 0xff;
-  int index1 = (selector >> 8) & 0xff;
-  // Copy a row of Bayer.
-  int x;
-  for (x = 0; x < pix - 1; x += 2) {
-    dst_bayer[0] = src_argb[index0];
-    dst_bayer[1] = src_argb[index1];
-    src_argb += 8;
-    dst_bayer += 2;
-  }
-  if (pix & 1) {
-    dst_bayer[0] = src_argb[index0];
-  }
-}
-
-// Select G channel from ARGB.  e.g.  GGGGGGGG
-void ARGBToBayerGGRow_C(const uint8* src_argb,
-                        uint8* dst_bayer, uint32 selector, int pix) {
-  // Copy a row of G.
-  int x;
-  for (x = 0; x < pix - 1; x += 2) {
-    dst_bayer[0] = src_argb[1];
-    dst_bayer[1] = src_argb[5];
-    src_argb += 8;
-    dst_bayer += 2;
-  }
-  if (pix & 1) {
-    dst_bayer[0] = src_argb[1];
-  }
-}
-
-// Use first 4 shuffler values to reorder ARGB channels.
-void ARGBShuffleRow_C(const uint8* src_argb, uint8* dst_argb,
-                      const uint8* shuffler, int pix) {
-  int index0 = shuffler[0];
-  int index1 = shuffler[1];
-  int index2 = shuffler[2];
-  int index3 = shuffler[3];
-  // Shuffle a row of ARGB.
-  int x;
-  for (x = 0; x < pix; ++x) {
-    // To support in-place conversion.
-    uint8 b = src_argb[index0];
-    uint8 g = src_argb[index1];
-    uint8 r = src_argb[index2];
-    uint8 a = src_argb[index3];
-    dst_argb[0] = b;
-    dst_argb[1] = g;
-    dst_argb[2] = r;
-    dst_argb[3] = a;
-    src_argb += 4;
-    dst_argb += 4;
-  }
-}
-
-void I422ToYUY2Row_C(const uint8* src_y,
-                     const uint8* src_u,
-                     const uint8* src_v,
-                     uint8* dst_frame, int width) {
-  int x;
-  for (x = 0; x < width - 1; x += 2) {
-    dst_frame[0] = src_y[0];
-    dst_frame[1] = src_u[0];
-    dst_frame[2] = src_y[1];
-    dst_frame[3] = src_v[0];
-    dst_frame += 4;
-    src_y += 2;
-    src_u += 1;
-    src_v += 1;
-  }
-  if (width & 1) {
-    dst_frame[0] = src_y[0];
-    dst_frame[1] = src_u[0];
-    dst_frame[2] = src_y[0];  // duplicate last y
-    dst_frame[3] = src_v[0];
-  }
-}
-
-void I422ToUYVYRow_C(const uint8* src_y,
-                     const uint8* src_u,
-                     const uint8* src_v,
-                     uint8* dst_frame, int width) {
-  int x;
-  for (x = 0; x < width - 1; x += 2) {
-    dst_frame[0] = src_u[0];
-    dst_frame[1] = src_y[0];
-    dst_frame[2] = src_v[0];
-    dst_frame[3] = src_y[1];
-    dst_frame += 4;
-    src_y += 2;
-    src_u += 1;
-    src_v += 1;
-  }
-  if (width & 1) {
-    dst_frame[0] = src_u[0];
-    dst_frame[1] = src_y[0];
-    dst_frame[2] = src_v[0];
-    dst_frame[3] = src_y[0];  // duplicate last y
-  }
-}
-
-#if !defined(LIBYUV_DISABLE_X86) && defined(HAS_I422TOARGBROW_SSSE3)
-// row_win.cc has asm version, but GCC uses 2 step wrapper.
-#if !defined(_MSC_VER) && (defined(__x86_64__) || defined(__i386__))
-void I422ToRGB565Row_SSSE3(const uint8* src_y,
-                           const uint8* src_u,
-                           const uint8* src_v,
-                           uint8* rgb_buf,
-                           int width) {
-  // Allocate a row of ARGB.
-  align_buffer_64(row, width * 4);
-  I422ToARGBRow_SSSE3(src_y, src_u, src_v, row, width);
-  ARGBToRGB565Row_SSE2(row, rgb_buf, width);
-  free_aligned_buffer_64(row);
-}
-#endif  // !defined(_MSC_VER) && (defined(__x86_64__) || defined(__i386__))
-
-#if defined(_M_IX86) || defined(__x86_64__) || defined(__i386__)
-void I422ToARGB1555Row_SSSE3(const uint8* src_y,
-                             const uint8* src_u,
-                             const uint8* src_v,
-                             uint8* rgb_buf,
-                             int width) {
-  // Allocate a row of ARGB.
-  align_buffer_64(row, width * 4);
-  I422ToARGBRow_SSSE3(src_y, src_u, src_v, row, width);
-  ARGBToARGB1555Row_SSE2(row, rgb_buf, width);
-  free_aligned_buffer_64(row);
-}
-
-void I422ToARGB4444Row_SSSE3(const uint8* src_y,
-                             const uint8* src_u,
-                             const uint8* src_v,
-                             uint8* rgb_buf,
-                             int width) {
-  // Allocate a row of ARGB.
-  align_buffer_64(row, width * 4);
-  I422ToARGBRow_SSSE3(src_y, src_u, src_v, row, width);
-  ARGBToARGB4444Row_SSE2(row, rgb_buf, width);
-  free_aligned_buffer_64(row);
-}
-
-void NV12ToRGB565Row_SSSE3(const uint8* src_y,
-                           const uint8* src_uv,
-                           uint8* dst_rgb565,
-                           int width) {
-  // Allocate a row of ARGB.
-  align_buffer_64(row, width * 4);
-  NV12ToARGBRow_SSSE3(src_y, src_uv, row, width);
-  ARGBToRGB565Row_SSE2(row, dst_rgb565, width);
-  free_aligned_buffer_64(row);
-}
-
-void NV21ToRGB565Row_SSSE3(const uint8* src_y,
-                           const uint8* src_vu,
-                           uint8* dst_rgb565,
-                           int width) {
-  // Allocate a row of ARGB.
-  align_buffer_64(row, width * 4);
-  NV21ToARGBRow_SSSE3(src_y, src_vu, row, width);
-  ARGBToRGB565Row_SSE2(row, dst_rgb565, width);
-  free_aligned_buffer_64(row);
-}
-
-void YUY2ToARGBRow_SSSE3(const uint8* src_yuy2,
-                         uint8* dst_argb,
-                         int width) {
-  // Allocate a rows of yuv.
-  align_buffer_64(row_y, ((width + 63) & ~63) * 2);
-  uint8* row_u = row_y + ((width + 63) & ~63);
-  uint8* row_v = row_u + ((width + 63) & ~63) / 2;
-  YUY2ToUV422Row_SSE2(src_yuy2, row_u, row_v, width);
-  YUY2ToYRow_SSE2(src_yuy2, row_y, width);
-  I422ToARGBRow_SSSE3(row_y, row_u, row_v, dst_argb, width);
-  free_aligned_buffer_64(row_y);
-}
-
-void YUY2ToARGBRow_Unaligned_SSSE3(const uint8* src_yuy2,
-                                   uint8* dst_argb,
-                                   int width) {
-  // Allocate a rows of yuv.
-  align_buffer_64(row_y, ((width + 63) & ~63) * 2);
-  uint8* row_u = row_y + ((width + 63) & ~63);
-  uint8* row_v = row_u + ((width + 63) & ~63) / 2;
-  YUY2ToUV422Row_Unaligned_SSE2(src_yuy2, row_u, row_v, width);
-  YUY2ToYRow_Unaligned_SSE2(src_yuy2, row_y, width);
-  I422ToARGBRow_Unaligned_SSSE3(row_y, row_u, row_v, dst_argb, width);
-  free_aligned_buffer_64(row_y);
-}
-
-void UYVYToARGBRow_SSSE3(const uint8* src_uyvy,
-                         uint8* dst_argb,
-                         int width) {
-  // Allocate a rows of yuv.
-  align_buffer_64(row_y, ((width + 63) & ~63) * 2);
-  uint8* row_u = row_y + ((width + 63) & ~63);
-  uint8* row_v = row_u + ((width + 63) & ~63) / 2;
-  UYVYToUV422Row_SSE2(src_uyvy, row_u, row_v, width);
-  UYVYToYRow_SSE2(src_uyvy, row_y, width);
-  I422ToARGBRow_SSSE3(row_y, row_u, row_v, dst_argb, width);
-  free_aligned_buffer_64(row_y);
-}
-
-void UYVYToARGBRow_Unaligned_SSSE3(const uint8* src_uyvy,
-                                   uint8* dst_argb,
-                                   int width) {
-  // Allocate a rows of yuv.
-  align_buffer_64(row_y, ((width + 63) & ~63) * 2);
-  uint8* row_u = row_y + ((width + 63) & ~63);
-  uint8* row_v = row_u + ((width + 63) & ~63) / 2;
-  UYVYToUV422Row_Unaligned_SSE2(src_uyvy, row_u, row_v, width);
-  UYVYToYRow_Unaligned_SSE2(src_uyvy, row_y, width);
-  I422ToARGBRow_Unaligned_SSSE3(row_y, row_u, row_v, dst_argb, width);
-  free_aligned_buffer_64(row_y);
-}
-
-#endif  // defined(_M_IX86) || defined(__x86_64__) || defined(__i386__)
-#endif  // !defined(LIBYUV_DISABLE_X86)
-
-void ARGBPolynomialRow_C(const uint8* src_argb,
-                         uint8* dst_argb, const float* poly,
-                         int width) {
-  int i;
-  for (i = 0; i < width; ++i) {
-    float b = (float)(src_argb[0]);
-    float g = (float)(src_argb[1]);
-    float r = (float)(src_argb[2]);
-    float a = (float)(src_argb[3]);
-    float b2 = b * b;
-    float g2 = g * g;
-    float r2 = r * r;
-    float a2 = a * a;
-    float db = poly[0] + poly[4] * b;
-    float dg = poly[1] + poly[5] * g;
-    float dr = poly[2] + poly[6] * r;
-    float da = poly[3] + poly[7] * a;
-    float b3 = b2 * b;
-    float g3 = g2 * g;
-    float r3 = r2 * r;
-    float a3 = a2 * a;
-    db += poly[8] * b2;
-    dg += poly[9] * g2;
-    dr += poly[10] * r2;
-    da += poly[11] * a2;
-    db += poly[12] * b3;
-    dg += poly[13] * g3;
-    dr += poly[14] * r3;
-    da += poly[15] * a3;
-
-    dst_argb[0] = Clamp((int32)(db));
-    dst_argb[1] = Clamp((int32)(dg));
-    dst_argb[2] = Clamp((int32)(dr));
-    dst_argb[3] = Clamp((int32)(da));
-    src_argb += 4;
-    dst_argb += 4;
-  }
-}
-
-void ARGBLumaColorTableRow_C(const uint8* src_argb, uint8* dst_argb, int width,
-                             const uint8* luma, uint32 lumacoeff) {
-  uint32 bc = lumacoeff & 0xff;
-  uint32 gc = (lumacoeff >> 8) & 0xff;
-  uint32 rc = (lumacoeff >> 16) & 0xff;
-
-  int i;
-  for (i = 0; i < width - 1; i += 2) {
-    // Luminance in rows, color values in columns.
-    const uint8* luma0 = ((src_argb[0] * bc + src_argb[1] * gc +
-                           src_argb[2] * rc) & 0x7F00u) + luma;
-    const uint8* luma1;
-    dst_argb[0] = luma0[src_argb[0]];
-    dst_argb[1] = luma0[src_argb[1]];
-    dst_argb[2] = luma0[src_argb[2]];
-    dst_argb[3] = src_argb[3];
-    luma1 = ((src_argb[4] * bc + src_argb[5] * gc +
-              src_argb[6] * rc) & 0x7F00u) + luma;
-    dst_argb[4] = luma1[src_argb[4]];
-    dst_argb[5] = luma1[src_argb[5]];
-    dst_argb[6] = luma1[src_argb[6]];
-    dst_argb[7] = src_argb[7];
-    src_argb += 8;
-    dst_argb += 8;
-  }
-  if (width & 1) {
-    // Luminance in rows, color values in columns.
-    const uint8* luma0 = ((src_argb[0] * bc + src_argb[1] * gc +
-                           src_argb[2] * rc) & 0x7F00u) + luma;
-    dst_argb[0] = luma0[src_argb[0]];
-    dst_argb[1] = luma0[src_argb[1]];
-    dst_argb[2] = luma0[src_argb[2]];
-    dst_argb[3] = src_argb[3];
-  }
-}
-
-void ARGBCopyAlphaRow_C(const uint8* src, uint8* dst, int width) {
-  int i;
-  for (i = 0; i < width - 1; i += 2) {
-    dst[3] = src[3];
-    dst[7] = src[7];
-    dst += 8;
-    src += 8;
-  }
-  if (width & 1) {
-    dst[3] = src[3];
-  }
-}
-
-void ARGBCopyYToAlphaRow_C(const uint8* src, uint8* dst, int width) {
-  int i;
-  for (i = 0; i < width - 1; i += 2) {
-    dst[3] = src[0];
-    dst[7] = src[1];
-    dst += 8;
-    src += 2;
-  }
-  if (width & 1) {
-    dst[3] = src[0];
-  }
-}
-
-#ifdef __cplusplus
-}  // extern "C"
-}  // namespace libyuv
-#endif
diff --git a/drivers/theoraplayer/src/YUV/libyuv/src/row_mips.cc b/drivers/theoraplayer/src/YUV/libyuv/src/row_mips.cc
deleted file mode 100755
index 4435c55c5c..0000000000
--- a/drivers/theoraplayer/src/YUV/libyuv/src/row_mips.cc
+++ /dev/null
@@ -1,991 +0,0 @@
-/*
- *  Copyright (c) 2012 The LibYuv project authors. All Rights Reserved.
- *
- *  Use of this source code is governed by a BSD-style license
- *  that can be found in the LICENSE file in the root of the source
- *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS. All contributing project authors may
- *  be found in the AUTHORS file in the root of the source tree.
- */
-
-#include "libyuv/row.h"
-
-#ifdef __cplusplus
-namespace libyuv {
-extern "C" {
-#endif
-
-// The following are available on Mips platforms:
-#if !defined(LIBYUV_DISABLE_MIPS) && defined(__mips__)
-
-#ifdef HAS_COPYROW_MIPS
-void CopyRow_MIPS(const uint8* src, uint8* dst, int count) {
-  __asm__ __volatile__ (
-    ".set      noreorder                         \n"
-    ".set      noat                              \n"
-    "slti      $at, %[count], 8                  \n"
-    "bne       $at ,$zero, $last8                \n"
-    "xor       $t8, %[src], %[dst]               \n"
-    "andi      $t8, $t8, 0x3                     \n"
-
-    "bne       $t8, $zero, unaligned             \n"
-    "negu      $a3, %[dst]                       \n"
-    // make dst/src aligned
-    "andi      $a3, $a3, 0x3                     \n"
-    "beq       $a3, $zero, $chk16w               \n"
-    // word-aligned now count is the remining bytes count
-    "subu     %[count], %[count], $a3            \n"
-
-    "lwr       $t8, 0(%[src])                    \n"
-    "addu      %[src], %[src], $a3               \n"
-    "swr       $t8, 0(%[dst])                    \n"
-    "addu      %[dst], %[dst], $a3               \n"
-
-    // Now the dst/src are mutually word-aligned with word-aligned addresses
-    "$chk16w:                                    \n"
-    "andi      $t8, %[count], 0x3f               \n"  // whole 64-B chunks?
-    // t8 is the byte count after 64-byte chunks
-    "beq       %[count], $t8, chk8w              \n"
-    // There will be at most 1 32-byte chunk after it
-    "subu      $a3, %[count], $t8                \n"  // the reminder
-    // Here a3 counts bytes in 16w chunks
-    "addu      $a3, %[dst], $a3                  \n"
-    // Now a3 is the final dst after 64-byte chunks
-    "addu      $t0, %[dst], %[count]             \n"
-    // t0 is the "past the end" address
-
-    // When in the loop we exercise "pref 30,x(a1)", the a1+x should not be past
-    // the "t0-32" address
-    // This means: for x=128 the last "safe" a1 address is "t0-160"
-    // Alternatively, for x=64 the last "safe" a1 address is "t0-96"
-    // we will use "pref 30,128(a1)", so "t0-160" is the limit
-    "subu      $t9, $t0, 160                     \n"
-    // t9 is the "last safe pref 30,128(a1)" address
-    "pref      0, 0(%[src])                      \n"  // first line of src
-    "pref      0, 32(%[src])                     \n"  // second line of src
-    "pref      0, 64(%[src])                     \n"
-    "pref      30, 32(%[dst])                    \n"
-    // In case the a1 > t9 don't use "pref 30" at all
-    "sgtu      $v1, %[dst], $t9                  \n"
-    "bgtz      $v1, $loop16w                     \n"
-    "nop                                         \n"
-    // otherwise, start with using pref30
-    "pref      30, 64(%[dst])                    \n"
-    "$loop16w:                                    \n"
-    "pref      0, 96(%[src])                     \n"
-    "lw        $t0, 0(%[src])                    \n"
-    "bgtz      $v1, $skip_pref30_96              \n"  // skip
-    "lw        $t1, 4(%[src])                    \n"
-    "pref      30, 96(%[dst])                    \n"  // continue
-    "$skip_pref30_96:                            \n"
-    "lw        $t2, 8(%[src])                    \n"
-    "lw        $t3, 12(%[src])                   \n"
-    "lw        $t4, 16(%[src])                   \n"
-    "lw        $t5, 20(%[src])                   \n"
-    "lw        $t6, 24(%[src])                   \n"
-    "lw        $t7, 28(%[src])                   \n"
-    "pref      0, 128(%[src])                    \n"
-    //  bring the next lines of src, addr 128
-    "sw        $t0, 0(%[dst])                    \n"
-    "sw        $t1, 4(%[dst])                    \n"
-    "sw        $t2, 8(%[dst])                    \n"
-    "sw        $t3, 12(%[dst])                   \n"
-    "sw        $t4, 16(%[dst])                   \n"
-    "sw        $t5, 20(%[dst])                   \n"
-    "sw        $t6, 24(%[dst])                   \n"
-    "sw        $t7, 28(%[dst])                   \n"
-    "lw        $t0, 32(%[src])                   \n"
-    "bgtz      $v1, $skip_pref30_128             \n"  // skip pref 30,128(a1)
-    "lw        $t1, 36(%[src])                   \n"
-    "pref      30, 128(%[dst])                   \n"  // set dest, addr 128
-    "$skip_pref30_128:                           \n"
-    "lw        $t2, 40(%[src])                   \n"
-    "lw        $t3, 44(%[src])                   \n"
-    "lw        $t4, 48(%[src])                   \n"
-    "lw        $t5, 52(%[src])                   \n"
-    "lw        $t6, 56(%[src])                   \n"
-    "lw        $t7, 60(%[src])                   \n"
-    "pref      0, 160(%[src])                    \n"
-    // bring the next lines of src, addr 160
-    "sw        $t0, 32(%[dst])                   \n"
-    "sw        $t1, 36(%[dst])                   \n"
-    "sw        $t2, 40(%[dst])                   \n"
-    "sw        $t3, 44(%[dst])                   \n"
-    "sw        $t4, 48(%[dst])                   \n"
-    "sw        $t5, 52(%[dst])                   \n"
-    "sw        $t6, 56(%[dst])                   \n"
-    "sw        $t7, 60(%[dst])                   \n"
-
-    "addiu     %[dst], %[dst], 64                \n"  // adding 64 to dest
-    "sgtu      $v1, %[dst], $t9                  \n"
-    "bne       %[dst], $a3, $loop16w             \n"
-    " addiu    %[src], %[src], 64                \n"  // adding 64 to src
-    "move      %[count], $t8                     \n"
-
-    // Here we have src and dest word-aligned but less than 64-bytes to go
-
-    "chk8w:                                      \n"
-    "pref      0, 0x0(%[src])                    \n"
-    "andi      $t8, %[count], 0x1f               \n"  // 32-byte chunk?
-    // the t8 is the reminder count past 32-bytes
-    "beq       %[count], $t8, chk1w              \n"
-    // count=t8,no 32-byte chunk
-    " nop                                        \n"
-
-    "lw        $t0, 0(%[src])                    \n"
-    "lw        $t1, 4(%[src])                    \n"
-    "lw        $t2, 8(%[src])                    \n"
-    "lw        $t3, 12(%[src])                   \n"
-    "lw        $t4, 16(%[src])                   \n"
-    "lw        $t5, 20(%[src])                   \n"
-    "lw        $t6, 24(%[src])                   \n"
-    "lw        $t7, 28(%[src])                   \n"
-    "addiu     %[src], %[src], 32                \n"
-
-    "sw        $t0, 0(%[dst])                    \n"
-    "sw        $t1, 4(%[dst])                    \n"
-    "sw        $t2, 8(%[dst])                    \n"
-    "sw        $t3, 12(%[dst])                   \n"
-    "sw        $t4, 16(%[dst])                   \n"
-    "sw        $t5, 20(%[dst])                   \n"
-    "sw        $t6, 24(%[dst])                   \n"
-    "sw        $t7, 28(%[dst])                   \n"
-    "addiu     %[dst], %[dst], 32                \n"
-
-    "chk1w:                                      \n"
-    "andi      %[count], $t8, 0x3                \n"
-    // now count is the reminder past 1w chunks
-    "beq       %[count], $t8, $last8             \n"
-    " subu     $a3, $t8, %[count]                \n"
-    // a3 is count of bytes in 1w chunks
-    "addu      $a3, %[dst], $a3                  \n"
-    // now a3 is the dst address past the 1w chunks
-    // copying in words (4-byte chunks)
-    "$wordCopy_loop:                             \n"
-    "lw        $t3, 0(%[src])                    \n"
-    // the first t3 may be equal t0 ... optimize?
-    "addiu     %[src], %[src],4                  \n"
-    "addiu     %[dst], %[dst],4                  \n"
-    "bne       %[dst], $a3,$wordCopy_loop        \n"
-    " sw       $t3, -4(%[dst])                   \n"
-
-    // For the last (<8) bytes
-    "$last8:                                     \n"
-    "blez      %[count], leave                   \n"
-    " addu     $a3, %[dst], %[count]             \n"  // a3 -last dst address
-    "$last8loop:                                 \n"
-    "lb        $v1, 0(%[src])                    \n"
-    "addiu     %[src], %[src], 1                 \n"
-    "addiu     %[dst], %[dst], 1                 \n"
-    "bne       %[dst], $a3, $last8loop           \n"
-    " sb       $v1, -1(%[dst])                   \n"
-
-    "leave:                                      \n"
-    "  j       $ra                               \n"
-    "  nop                                       \n"
-
-    //
-    // UNALIGNED case
-    //
-
-    "unaligned:                                  \n"
-    // got here with a3="negu a1"
-    "andi      $a3, $a3, 0x3                     \n"  // a1 is word aligned?
-    "beqz      $a3, $ua_chk16w                   \n"
-    " subu     %[count], %[count], $a3           \n"
-    // bytes left after initial a3 bytes
-    "lwr       $v1, 0(%[src])                    \n"
-    "lwl       $v1, 3(%[src])                    \n"
-    "addu      %[src], %[src], $a3               \n"  // a3 may be 1, 2 or 3
-    "swr       $v1, 0(%[dst])                    \n"
-    "addu      %[dst], %[dst], $a3               \n"
-    // below the dst will be word aligned (NOTE1)
-    "$ua_chk16w:                                 \n"
-    "andi      $t8, %[count], 0x3f               \n"  // whole 64-B chunks?
-    // t8 is the byte count after 64-byte chunks
-    "beq       %[count], $t8, ua_chk8w           \n"
-    // if a2==t8, no 64-byte chunks
-    // There will be at most 1 32-byte chunk after it
-    "subu      $a3, %[count], $t8                \n"  // the reminder
-    // Here a3 counts bytes in 16w chunks
-    "addu      $a3, %[dst], $a3                  \n"
-    // Now a3 is the final dst after 64-byte chunks
-    "addu      $t0, %[dst], %[count]             \n"  // t0 "past the end"
-    "subu      $t9, $t0, 160                     \n"
-    // t9 is the "last safe pref 30,128(a1)" address
-    "pref      0, 0(%[src])                      \n"  // first line of src
-    "pref      0, 32(%[src])                     \n"  // second line  addr 32
-    "pref      0, 64(%[src])                     \n"
-    "pref      30, 32(%[dst])                    \n"
-    // safe, as we have at least 64 bytes ahead
-    // In case the a1 > t9 don't use "pref 30" at all
-    "sgtu      $v1, %[dst], $t9                  \n"
-    "bgtz      $v1, $ua_loop16w                  \n"
-    // skip "pref 30,64(a1)" for too short arrays
-    " nop                                        \n"
-    // otherwise, start with using pref30
-    "pref      30, 64(%[dst])                    \n"
-    "$ua_loop16w:                                \n"
-    "pref      0, 96(%[src])                     \n"
-    "lwr       $t0, 0(%[src])                    \n"
-    "lwl       $t0, 3(%[src])                    \n"
-    "lwr       $t1, 4(%[src])                    \n"
-    "bgtz      $v1, $ua_skip_pref30_96           \n"
-    " lwl      $t1, 7(%[src])                    \n"
-    "pref      30, 96(%[dst])                    \n"
-    // continue setting up the dest, addr 96
-    "$ua_skip_pref30_96:                         \n"
-    "lwr       $t2, 8(%[src])                    \n"
-    "lwl       $t2, 11(%[src])                   \n"
-    "lwr       $t3, 12(%[src])                   \n"
-    "lwl       $t3, 15(%[src])                   \n"
-    "lwr       $t4, 16(%[src])                   \n"
-    "lwl       $t4, 19(%[src])                   \n"
-    "lwr       $t5, 20(%[src])                   \n"
-    "lwl       $t5, 23(%[src])                   \n"
-    "lwr       $t6, 24(%[src])                   \n"
-    "lwl       $t6, 27(%[src])                   \n"
-    "lwr       $t7, 28(%[src])                   \n"
-    "lwl       $t7, 31(%[src])                   \n"
-    "pref      0, 128(%[src])                    \n"
-    // bring the next lines of src, addr 128
-    "sw        $t0, 0(%[dst])                    \n"
-    "sw        $t1, 4(%[dst])                    \n"
-    "sw        $t2, 8(%[dst])                    \n"
-    "sw        $t3, 12(%[dst])                   \n"
-    "sw        $t4, 16(%[dst])                   \n"
-    "sw        $t5, 20(%[dst])                   \n"
-    "sw        $t6, 24(%[dst])                   \n"
-    "sw        $t7, 28(%[dst])                   \n"
-    "lwr       $t0, 32(%[src])                   \n"
-    "lwl       $t0, 35(%[src])                   \n"
-    "lwr       $t1, 36(%[src])                   \n"
-    "bgtz      $v1, ua_skip_pref30_128           \n"
-    " lwl      $t1, 39(%[src])                   \n"
-    "pref      30, 128(%[dst])                   \n"
-    // continue setting up the dest, addr 128
-    "ua_skip_pref30_128:                         \n"
-
-    "lwr       $t2, 40(%[src])                   \n"
-    "lwl       $t2, 43(%[src])                   \n"
-    "lwr       $t3, 44(%[src])                   \n"
-    "lwl       $t3, 47(%[src])                   \n"
-    "lwr       $t4, 48(%[src])                   \n"
-    "lwl       $t4, 51(%[src])                   \n"
-    "lwr       $t5, 52(%[src])                   \n"
-    "lwl       $t5, 55(%[src])                   \n"
-    "lwr       $t6, 56(%[src])                   \n"
-    "lwl       $t6, 59(%[src])                   \n"
-    "lwr       $t7, 60(%[src])                   \n"
-    "lwl       $t7, 63(%[src])                   \n"
-    "pref      0, 160(%[src])                    \n"
-    // bring the next lines of src, addr 160
-    "sw        $t0, 32(%[dst])                   \n"
-    "sw        $t1, 36(%[dst])                   \n"
-    "sw        $t2, 40(%[dst])                   \n"
-    "sw        $t3, 44(%[dst])                   \n"
-    "sw        $t4, 48(%[dst])                   \n"
-    "sw        $t5, 52(%[dst])                   \n"
-    "sw        $t6, 56(%[dst])                   \n"
-    "sw        $t7, 60(%[dst])                   \n"
-
-    "addiu     %[dst],%[dst],64                  \n"  // adding 64 to dest
-    "sgtu      $v1,%[dst],$t9                    \n"
-    "bne       %[dst],$a3,$ua_loop16w            \n"
-    " addiu    %[src],%[src],64                  \n"  // adding 64 to src
-    "move      %[count],$t8                      \n"
-
-    // Here we have src and dest word-aligned but less than 64-bytes to go
-
-    "ua_chk8w:                                   \n"
-    "pref      0, 0x0(%[src])                    \n"
-    "andi      $t8, %[count], 0x1f               \n"  // 32-byte chunk?
-    // the t8 is the reminder count
-    "beq       %[count], $t8, $ua_chk1w          \n"
-    // when count==t8, no 32-byte chunk
-
-    "lwr       $t0, 0(%[src])                    \n"
-    "lwl       $t0, 3(%[src])                    \n"
-    "lwr       $t1, 4(%[src])                    \n"
-    "lwl       $t1, 7(%[src])                    \n"
-    "lwr       $t2, 8(%[src])                    \n"
-    "lwl       $t2, 11(%[src])                   \n"
-    "lwr       $t3, 12(%[src])                   \n"
-    "lwl       $t3, 15(%[src])                   \n"
-    "lwr       $t4, 16(%[src])                   \n"
-    "lwl       $t4, 19(%[src])                   \n"
-    "lwr       $t5, 20(%[src])                   \n"
-    "lwl       $t5, 23(%[src])                   \n"
-    "lwr       $t6, 24(%[src])                   \n"
-    "lwl       $t6, 27(%[src])                   \n"
-    "lwr       $t7, 28(%[src])                   \n"
-    "lwl       $t7, 31(%[src])                   \n"
-    "addiu     %[src], %[src], 32                \n"
-
-    "sw        $t0, 0(%[dst])                    \n"
-    "sw        $t1, 4(%[dst])                    \n"
-    "sw        $t2, 8(%[dst])                    \n"
-    "sw        $t3, 12(%[dst])                   \n"
-    "sw        $t4, 16(%[dst])                   \n"
-    "sw        $t5, 20(%[dst])                   \n"
-    "sw        $t6, 24(%[dst])                   \n"
-    "sw        $t7, 28(%[dst])                   \n"
-    "addiu     %[dst], %[dst], 32                \n"
-
-    "$ua_chk1w:                                  \n"
-    "andi      %[count], $t8, 0x3                \n"
-    // now count is the reminder past 1w chunks
-    "beq       %[count], $t8, ua_smallCopy       \n"
-    "subu      $a3, $t8, %[count]                \n"
-    // a3 is count of bytes in 1w chunks
-    "addu      $a3, %[dst], $a3                  \n"
-    // now a3 is the dst address past the 1w chunks
-
-    // copying in words (4-byte chunks)
-    "$ua_wordCopy_loop:                          \n"
-    "lwr       $v1, 0(%[src])                    \n"
-    "lwl       $v1, 3(%[src])                    \n"
-    "addiu     %[src], %[src], 4                 \n"
-    "addiu     %[dst], %[dst], 4                 \n"
-    // note: dst=a1 is word aligned here, see NOTE1
-    "bne       %[dst], $a3, $ua_wordCopy_loop    \n"
-    " sw       $v1,-4(%[dst])                    \n"
-
-    // Now less than 4 bytes (value in count) left to copy
-    "ua_smallCopy:                               \n"
-    "beqz      %[count], leave                   \n"
-    " addu     $a3, %[dst], %[count]             \n" // a3 = last dst address
-    "$ua_smallCopy_loop:                         \n"
-    "lb        $v1, 0(%[src])                    \n"
-    "addiu     %[src], %[src], 1                 \n"
-    "addiu     %[dst], %[dst], 1                 \n"
-    "bne       %[dst],$a3,$ua_smallCopy_loop     \n"
-    " sb       $v1, -1(%[dst])                   \n"
-
-    "j         $ra                               \n"
-    " nop                                        \n"
-    ".set      at                                \n"
-    ".set      reorder                           \n"
-       : [dst] "+r" (dst), [src] "+r" (src)
-       : [count] "r" (count)
-       : "t0", "t1", "t2", "t3", "t4", "t5", "t6", "t7",
-       "t8", "t9", "a3", "v1", "at"
-  );
-}
-#endif  // HAS_COPYROW_MIPS
-
-// MIPS DSPR2 functions
-#if !defined(LIBYUV_DISABLE_MIPS) && defined(__mips_dsp) && \
-    (__mips_dsp_rev >= 2)
-void SplitUVRow_MIPS_DSPR2(const uint8* src_uv, uint8* dst_u, uint8* dst_v,
-                           int width) {
-  __asm__ __volatile__ (
-    ".set push                                     \n"
-    ".set noreorder                                \n"
-    "srl             $t4, %[width], 4              \n"  // multiplies of 16
-    "blez            $t4, 2f                       \n"
-    " andi           %[width], %[width], 0xf       \n"  // residual
-
-    ".p2align        2                             \n"
-  "1:                                              \n"
-    "addiu           $t4, $t4, -1                  \n"
-    "lw              $t0, 0(%[src_uv])             \n"  // V1 | U1 | V0 | U0
-    "lw              $t1, 4(%[src_uv])             \n"  // V3 | U3 | V2 | U2
-    "lw              $t2, 8(%[src_uv])             \n"  // V5 | U5 | V4 | U4
-    "lw              $t3, 12(%[src_uv])            \n"  // V7 | U7 | V6 | U6
-    "lw              $t5, 16(%[src_uv])            \n"  // V9 | U9 | V8 | U8
-    "lw              $t6, 20(%[src_uv])            \n"  // V11 | U11 | V10 | U10
-    "lw              $t7, 24(%[src_uv])            \n"  // V13 | U13 | V12 | U12
-    "lw              $t8, 28(%[src_uv])            \n"  // V15 | U15 | V14 | U14
-    "addiu           %[src_uv], %[src_uv], 32      \n"
-    "precrq.qb.ph    $t9, $t1, $t0                 \n"  // V3 | V2 | V1 | V0
-    "precr.qb.ph     $t0, $t1, $t0                 \n"  // U3 | U2 | U1 | U0
-    "precrq.qb.ph    $t1, $t3, $t2                 \n"  // V7 | V6 | V5 | V4
-    "precr.qb.ph     $t2, $t3, $t2                 \n"  // U7 | U6 | U5 | U4
-    "precrq.qb.ph    $t3, $t6, $t5                 \n"  // V11 | V10 | V9 | V8
-    "precr.qb.ph     $t5, $t6, $t5                 \n"  // U11 | U10 | U9 | U8
-    "precrq.qb.ph    $t6, $t8, $t7                 \n"  // V15 | V14 | V13 | V12
-    "precr.qb.ph     $t7, $t8, $t7                 \n"  // U15 | U14 | U13 | U12
-    "sw              $t9, 0(%[dst_v])              \n"
-    "sw              $t0, 0(%[dst_u])              \n"
-    "sw              $t1, 4(%[dst_v])              \n"
-    "sw              $t2, 4(%[dst_u])              \n"
-    "sw              $t3, 8(%[dst_v])              \n"
-    "sw              $t5, 8(%[dst_u])              \n"
-    "sw              $t6, 12(%[dst_v])             \n"
-    "sw              $t7, 12(%[dst_u])             \n"
-    "addiu           %[dst_v], %[dst_v], 16        \n"
-    "bgtz            $t4, 1b                       \n"
-    " addiu          %[dst_u], %[dst_u], 16        \n"
-
-    "beqz            %[width], 3f                  \n"
-    " nop                                          \n"
-
-  "2:                                              \n"
-    "lbu             $t0, 0(%[src_uv])             \n"
-    "lbu             $t1, 1(%[src_uv])             \n"
-    "addiu           %[src_uv], %[src_uv], 2       \n"
-    "addiu           %[width], %[width], -1        \n"
-    "sb              $t0, 0(%[dst_u])              \n"
-    "sb              $t1, 0(%[dst_v])              \n"
-    "addiu           %[dst_u], %[dst_u], 1         \n"
-    "bgtz            %[width], 2b                  \n"
-    " addiu          %[dst_v], %[dst_v], 1         \n"
-
-  "3:                                              \n"
-    ".set pop                                      \n"
-     : [src_uv] "+r" (src_uv),
-       [width] "+r" (width),
-       [dst_u] "+r" (dst_u),
-       [dst_v] "+r" (dst_v)
-     :
-     : "t0", "t1", "t2", "t3",
-     "t4", "t5", "t6", "t7", "t8", "t9"
-  );
-}
-
-void SplitUVRow_Unaligned_MIPS_DSPR2(const uint8* src_uv, uint8* dst_u,
-                                     uint8* dst_v, int width) {
-  __asm__ __volatile__ (
-    ".set push                                     \n"
-    ".set noreorder                                \n"
-    "srl             $t4, %[width], 4              \n"  // multiplies of 16
-    "blez            $t4, 2f                       \n"
-    " andi           %[width], %[width], 0xf       \n"  // residual
-
-    ".p2align        2                             \n"
-  "1:                                              \n"
-    "addiu           $t4, $t4, -1                  \n"
-    "lwr             $t0, 0(%[src_uv])             \n"
-    "lwl             $t0, 3(%[src_uv])             \n"  // V1 | U1 | V0 | U0
-    "lwr             $t1, 4(%[src_uv])             \n"
-    "lwl             $t1, 7(%[src_uv])             \n"  // V3 | U3 | V2 | U2
-    "lwr             $t2, 8(%[src_uv])             \n"
-    "lwl             $t2, 11(%[src_uv])            \n"  // V5 | U5 | V4 | U4
-    "lwr             $t3, 12(%[src_uv])            \n"
-    "lwl             $t3, 15(%[src_uv])            \n"  // V7 | U7 | V6 | U6
-    "lwr             $t5, 16(%[src_uv])            \n"
-    "lwl             $t5, 19(%[src_uv])            \n"  // V9 | U9 | V8 | U8
-    "lwr             $t6, 20(%[src_uv])            \n"
-    "lwl             $t6, 23(%[src_uv])            \n"  // V11 | U11 | V10 | U10
-    "lwr             $t7, 24(%[src_uv])            \n"
-    "lwl             $t7, 27(%[src_uv])            \n"  // V13 | U13 | V12 | U12
-    "lwr             $t8, 28(%[src_uv])            \n"
-    "lwl             $t8, 31(%[src_uv])            \n"  // V15 | U15 | V14 | U14
-    "precrq.qb.ph    $t9, $t1, $t0                 \n"  // V3 | V2 | V1 | V0
-    "precr.qb.ph     $t0, $t1, $t0                 \n"  // U3 | U2 | U1 | U0
-    "precrq.qb.ph    $t1, $t3, $t2                 \n"  // V7 | V6 | V5 | V4
-    "precr.qb.ph     $t2, $t3, $t2                 \n"  // U7 | U6 | U5 | U4
-    "precrq.qb.ph    $t3, $t6, $t5                 \n"  // V11 | V10 | V9 | V8
-    "precr.qb.ph     $t5, $t6, $t5                 \n"  // U11 | U10 | U9 | U8
-    "precrq.qb.ph    $t6, $t8, $t7                 \n"  // V15 | V14 | V13 | V12
-    "precr.qb.ph     $t7, $t8, $t7                 \n"  // U15 | U14 | U13 | U12
-    "addiu           %[src_uv], %[src_uv], 32      \n"
-    "swr             $t9, 0(%[dst_v])              \n"
-    "swl             $t9, 3(%[dst_v])              \n"
-    "swr             $t0, 0(%[dst_u])              \n"
-    "swl             $t0, 3(%[dst_u])              \n"
-    "swr             $t1, 4(%[dst_v])              \n"
-    "swl             $t1, 7(%[dst_v])              \n"
-    "swr             $t2, 4(%[dst_u])              \n"
-    "swl             $t2, 7(%[dst_u])              \n"
-    "swr             $t3, 8(%[dst_v])              \n"
-    "swl             $t3, 11(%[dst_v])             \n"
-    "swr             $t5, 8(%[dst_u])              \n"
-    "swl             $t5, 11(%[dst_u])             \n"
-    "swr             $t6, 12(%[dst_v])             \n"
-    "swl             $t6, 15(%[dst_v])             \n"
-    "swr             $t7, 12(%[dst_u])             \n"
-    "swl             $t7, 15(%[dst_u])             \n"
-    "addiu           %[dst_u], %[dst_u], 16        \n"
-    "bgtz            $t4, 1b                       \n"
-    " addiu          %[dst_v], %[dst_v], 16        \n"
-
-    "beqz            %[width], 3f                  \n"
-    " nop                                          \n"
-
-  "2:                                              \n"
-    "lbu             $t0, 0(%[src_uv])             \n"
-    "lbu             $t1, 1(%[src_uv])             \n"
-    "addiu           %[src_uv], %[src_uv], 2       \n"
-    "addiu           %[width], %[width], -1        \n"
-    "sb              $t0, 0(%[dst_u])              \n"
-    "sb              $t1, 0(%[dst_v])              \n"
-    "addiu           %[dst_u], %[dst_u], 1         \n"
-    "bgtz            %[width], 2b                  \n"
-    " addiu          %[dst_v], %[dst_v], 1         \n"
-
-  "3:                                              \n"
-    ".set pop                                      \n"
-     : [src_uv] "+r" (src_uv),
-       [width] "+r" (width),
-       [dst_u] "+r" (dst_u),
-       [dst_v] "+r" (dst_v)
-     :
-     : "t0", "t1", "t2", "t3",
-     "t4", "t5", "t6", "t7", "t8", "t9"
-  );
-}
-
-void MirrorRow_MIPS_DSPR2(const uint8* src, uint8* dst, int width) {
-  __asm__ __volatile__ (
-    ".set push                             \n"
-    ".set noreorder                        \n"
-
-    "srl       $t4, %[width], 4            \n"  // multiplies of 16
-    "andi      $t5, %[width], 0xf          \n"
-    "blez      $t4, 2f                     \n"
-    " addu     %[src], %[src], %[width]    \n"  // src += width
-
-    ".p2align  2                           \n"
-   "1:                                     \n"
-    "lw        $t0, -16(%[src])            \n"  // |3|2|1|0|
-    "lw        $t1, -12(%[src])            \n"  // |7|6|5|4|
-    "lw        $t2, -8(%[src])             \n"  // |11|10|9|8|
-    "lw        $t3, -4(%[src])             \n"  // |15|14|13|12|
-    "wsbh      $t0, $t0                    \n"  // |2|3|0|1|
-    "wsbh      $t1, $t1                    \n"  // |6|7|4|5|
-    "wsbh      $t2, $t2                    \n"  // |10|11|8|9|
-    "wsbh      $t3, $t3                    \n"  // |14|15|12|13|
-    "rotr      $t0, $t0, 16                \n"  // |0|1|2|3|
-    "rotr      $t1, $t1, 16                \n"  // |4|5|6|7|
-    "rotr      $t2, $t2, 16                \n"  // |8|9|10|11|
-    "rotr      $t3, $t3, 16                \n"  // |12|13|14|15|
-    "addiu     %[src], %[src], -16         \n"
-    "addiu     $t4, $t4, -1                \n"
-    "sw        $t3, 0(%[dst])              \n"  // |15|14|13|12|
-    "sw        $t2, 4(%[dst])              \n"  // |11|10|9|8|
-    "sw        $t1, 8(%[dst])              \n"  // |7|6|5|4|
-    "sw        $t0, 12(%[dst])             \n"  // |3|2|1|0|
-    "bgtz      $t4, 1b                     \n"
-    " addiu    %[dst], %[dst], 16          \n"
-    "beqz      $t5, 3f                     \n"
-    " nop                                  \n"
-
-   "2:                                     \n"
-    "lbu       $t0, -1(%[src])             \n"
-    "addiu     $t5, $t5, -1                \n"
-    "addiu     %[src], %[src], -1          \n"
-    "sb        $t0, 0(%[dst])              \n"
-    "bgez      $t5, 2b                     \n"
-    " addiu    %[dst], %[dst], 1           \n"
-
-   "3:                                     \n"
-    ".set pop                              \n"
-      : [src] "+r" (src), [dst] "+r" (dst)
-      : [width] "r" (width)
-      : "t0", "t1", "t2", "t3", "t4", "t5"
-  );
-}
-
-void MirrorUVRow_MIPS_DSPR2(const uint8* src_uv, uint8* dst_u, uint8* dst_v,
-                            int width) {
-  int x = 0;
-  int y = 0;
-  __asm__ __volatile__ (
-    ".set push                                    \n"
-    ".set noreorder                               \n"
-
-    "addu            $t4, %[width], %[width]      \n"
-    "srl             %[x], %[width], 4            \n"
-    "andi            %[y], %[width], 0xf          \n"
-    "blez            %[x], 2f                     \n"
-    " addu           %[src_uv], %[src_uv], $t4    \n"
-
-    ".p2align        2                            \n"
-   "1:                                            \n"
-    "lw              $t0, -32(%[src_uv])          \n"  // |3|2|1|0|
-    "lw              $t1, -28(%[src_uv])          \n"  // |7|6|5|4|
-    "lw              $t2, -24(%[src_uv])          \n"  // |11|10|9|8|
-    "lw              $t3, -20(%[src_uv])          \n"  // |15|14|13|12|
-    "lw              $t4, -16(%[src_uv])          \n"  // |19|18|17|16|
-    "lw              $t6, -12(%[src_uv])          \n"  // |23|22|21|20|
-    "lw              $t7, -8(%[src_uv])           \n"  // |27|26|25|24|
-    "lw              $t8, -4(%[src_uv])           \n"  // |31|30|29|28|
-
-    "rotr            $t0, $t0, 16                 \n"  // |1|0|3|2|
-    "rotr            $t1, $t1, 16                 \n"  // |5|4|7|6|
-    "rotr            $t2, $t2, 16                 \n"  // |9|8|11|10|
-    "rotr            $t3, $t3, 16                 \n"  // |13|12|15|14|
-    "rotr            $t4, $t4, 16                 \n"  // |17|16|19|18|
-    "rotr            $t6, $t6, 16                 \n"  // |21|20|23|22|
-    "rotr            $t7, $t7, 16                 \n"  // |25|24|27|26|
-    "rotr            $t8, $t8, 16                 \n"  // |29|28|31|30|
-    "precr.qb.ph     $t9, $t0, $t1                \n"  // |0|2|4|6|
-    "precrq.qb.ph    $t5, $t0, $t1                \n"  // |1|3|5|7|
-    "precr.qb.ph     $t0, $t2, $t3                \n"  // |8|10|12|14|
-    "precrq.qb.ph    $t1, $t2, $t3                \n"  // |9|11|13|15|
-    "precr.qb.ph     $t2, $t4, $t6                \n"  // |16|18|20|22|
-    "precrq.qb.ph    $t3, $t4, $t6                \n"  // |17|19|21|23|
-    "precr.qb.ph     $t4, $t7, $t8                \n"  // |24|26|28|30|
-    "precrq.qb.ph    $t6, $t7, $t8                \n"  // |25|27|29|31|
-    "addiu           %[src_uv], %[src_uv], -32    \n"
-    "addiu           %[x], %[x], -1               \n"
-    "swr             $t4, 0(%[dst_u])             \n"
-    "swl             $t4, 3(%[dst_u])             \n"  // |30|28|26|24|
-    "swr             $t6, 0(%[dst_v])             \n"
-    "swl             $t6, 3(%[dst_v])             \n"  // |31|29|27|25|
-    "swr             $t2, 4(%[dst_u])             \n"
-    "swl             $t2, 7(%[dst_u])             \n"  // |22|20|18|16|
-    "swr             $t3, 4(%[dst_v])             \n"
-    "swl             $t3, 7(%[dst_v])             \n"  // |23|21|19|17|
-    "swr             $t0, 8(%[dst_u])             \n"
-    "swl             $t0, 11(%[dst_u])            \n"  // |14|12|10|8|
-    "swr             $t1, 8(%[dst_v])             \n"
-    "swl             $t1, 11(%[dst_v])            \n"  // |15|13|11|9|
-    "swr             $t9, 12(%[dst_u])            \n"
-    "swl             $t9, 15(%[dst_u])            \n"  // |6|4|2|0|
-    "swr             $t5, 12(%[dst_v])            \n"
-    "swl             $t5, 15(%[dst_v])            \n"  // |7|5|3|1|
-    "addiu           %[dst_v], %[dst_v], 16       \n"
-    "bgtz            %[x], 1b                     \n"
-    " addiu          %[dst_u], %[dst_u], 16       \n"
-    "beqz            %[y], 3f                     \n"
-    " nop                                         \n"
-    "b               2f                           \n"
-    " nop                                         \n"
-
-   "2:                                            \n"
-    "lbu             $t0, -2(%[src_uv])           \n"
-    "lbu             $t1, -1(%[src_uv])           \n"
-    "addiu           %[src_uv], %[src_uv], -2     \n"
-    "addiu           %[y], %[y], -1               \n"
-    "sb              $t0, 0(%[dst_u])             \n"
-    "sb              $t1, 0(%[dst_v])             \n"
-    "addiu           %[dst_u], %[dst_u], 1        \n"
-    "bgtz            %[y], 2b                     \n"
-    " addiu          %[dst_v], %[dst_v], 1        \n"
-
-   "3:                                            \n"
-    ".set pop                                     \n"
-      : [src_uv] "+r" (src_uv),
-        [dst_u] "+r" (dst_u),
-        [dst_v] "+r" (dst_v),
-        [x] "=&r" (x),
-        [y] "+r" (y)
-      : [width] "r" (width)
-      : "t0", "t1", "t2", "t3", "t4",
-      "t5", "t7", "t8", "t9"
-  );
-}
-
-// Convert (4 Y and 2 VU) I422 and arrange RGB values into
-// t5 = | 0 | B0 | 0 | b0 |
-// t4 = | 0 | B1 | 0 | b1 |
-// t9 = | 0 | G0 | 0 | g0 |
-// t8 = | 0 | G1 | 0 | g1 |
-// t2 = | 0 | R0 | 0 | r0 |
-// t1 = | 0 | R1 | 0 | r1 |
-#define I422ToTransientMipsRGB                                                 \
-      "lw                $t0, 0(%[y_buf])       \n"                            \
-      "lhu               $t1, 0(%[u_buf])       \n"                            \
-      "lhu               $t2, 0(%[v_buf])       \n"                            \
-      "preceu.ph.qbr     $t1, $t1               \n"                            \
-      "preceu.ph.qbr     $t2, $t2               \n"                            \
-      "preceu.ph.qbra    $t3, $t0               \n"                            \
-      "preceu.ph.qbla    $t0, $t0               \n"                            \
-      "subu.ph           $t1, $t1, $s5          \n"                            \
-      "subu.ph           $t2, $t2, $s5          \n"                            \
-      "subu.ph           $t3, $t3, $s4          \n"                            \
-      "subu.ph           $t0, $t0, $s4          \n"                            \
-      "mul.ph            $t3, $t3, $s0          \n"                            \
-      "mul.ph            $t0, $t0, $s0          \n"                            \
-      "shll.ph           $t4, $t1, 0x7          \n"                            \
-      "subu.ph           $t4, $t4, $t1          \n"                            \
-      "mul.ph            $t6, $t1, $s1          \n"                            \
-      "mul.ph            $t1, $t2, $s2          \n"                            \
-      "addq_s.ph         $t5, $t4, $t3          \n"                            \
-      "addq_s.ph         $t4, $t4, $t0          \n"                            \
-      "shra.ph           $t5, $t5, 6            \n"                            \
-      "shra.ph           $t4, $t4, 6            \n"                            \
-      "addiu             %[u_buf], 2            \n"                            \
-      "addiu             %[v_buf], 2            \n"                            \
-      "addu.ph           $t6, $t6, $t1          \n"                            \
-      "mul.ph            $t1, $t2, $s3          \n"                            \
-      "addu.ph           $t9, $t6, $t3          \n"                            \
-      "addu.ph           $t8, $t6, $t0          \n"                            \
-      "shra.ph           $t9, $t9, 6            \n"                            \
-      "shra.ph           $t8, $t8, 6            \n"                            \
-      "addu.ph           $t2, $t1, $t3          \n"                            \
-      "addu.ph           $t1, $t1, $t0          \n"                            \
-      "shra.ph           $t2, $t2, 6            \n"                            \
-      "shra.ph           $t1, $t1, 6            \n"                            \
-      "subu.ph           $t5, $t5, $s5          \n"                            \
-      "subu.ph           $t4, $t4, $s5          \n"                            \
-      "subu.ph           $t9, $t9, $s5          \n"                            \
-      "subu.ph           $t8, $t8, $s5          \n"                            \
-      "subu.ph           $t2, $t2, $s5          \n"                            \
-      "subu.ph           $t1, $t1, $s5          \n"                            \
-      "shll_s.ph         $t5, $t5, 8            \n"                            \
-      "shll_s.ph         $t4, $t4, 8            \n"                            \
-      "shll_s.ph         $t9, $t9, 8            \n"                            \
-      "shll_s.ph         $t8, $t8, 8            \n"                            \
-      "shll_s.ph         $t2, $t2, 8            \n"                            \
-      "shll_s.ph         $t1, $t1, 8            \n"                            \
-      "shra.ph           $t5, $t5, 8            \n"                            \
-      "shra.ph           $t4, $t4, 8            \n"                            \
-      "shra.ph           $t9, $t9, 8            \n"                            \
-      "shra.ph           $t8, $t8, 8            \n"                            \
-      "shra.ph           $t2, $t2, 8            \n"                            \
-      "shra.ph           $t1, $t1, 8            \n"                            \
-      "addu.ph           $t5, $t5, $s5          \n"                            \
-      "addu.ph           $t4, $t4, $s5          \n"                            \
-      "addu.ph           $t9, $t9, $s5          \n"                            \
-      "addu.ph           $t8, $t8, $s5          \n"                            \
-      "addu.ph           $t2, $t2, $s5          \n"                            \
-      "addu.ph           $t1, $t1, $s5          \n"
-
-void I422ToARGBRow_MIPS_DSPR2(const uint8* y_buf,
-                              const uint8* u_buf,
-                              const uint8* v_buf,
-                              uint8* rgb_buf,
-                              int width) {
-  __asm__ __volatile__ (
-    ".set push                                \n"
-    ".set noreorder                           \n"
-    "beqz              %[width], 2f           \n"
-    " repl.ph          $s0, 74                \n"  // |YG|YG| = |74|74|
-    "repl.ph           $s1, -25               \n"  // |UG|UG| = |-25|-25|
-    "repl.ph           $s2, -52               \n"  // |VG|VG| = |-52|-52|
-    "repl.ph           $s3, 102               \n"  // |VR|VR| = |102|102|
-    "repl.ph           $s4, 16                \n"  // |0|16|0|16|
-    "repl.ph           $s5, 128               \n"  // |128|128| // clipping
-    "lui               $s6, 0xff00            \n"
-    "ori               $s6, 0xff00            \n"  // |ff|00|ff|00|ff|
-
-    ".p2align          2                      \n"
-   "1:                                        \n"
-      I422ToTransientMipsRGB
-// Arranging into argb format
-    "precr.qb.ph       $t4, $t8, $t4          \n"  // |G1|g1|B1|b1|
-    "precr.qb.ph       $t5, $t9, $t5          \n"  // |G0|g0|B0|b0|
-    "addiu             %[width], -4           \n"
-    "precrq.qb.ph      $t8, $t4, $t5          \n"  // |G1|B1|G0|B0|
-    "precr.qb.ph       $t9, $t4, $t5          \n"  // |g1|b1|g0|b0|
-    "precr.qb.ph       $t2, $t1, $t2          \n"  // |R1|r1|R0|r0|
-
-    "addiu             %[y_buf], 4            \n"
-    "preceu.ph.qbla    $t1, $t2               \n"  // |0 |R1|0 |R0|
-    "preceu.ph.qbra    $t2, $t2               \n"  // |0 |r1|0 |r0|
-    "or                $t1, $t1, $s6          \n"  // |ff|R1|ff|R0|
-    "or                $t2, $t2, $s6          \n"  // |ff|r1|ff|r0|
-    "precrq.ph.w       $t0, $t2, $t9          \n"  // |ff|r1|g1|b1|
-    "precrq.ph.w       $t3, $t1, $t8          \n"  // |ff|R1|G1|B1|
-    "sll               $t9, $t9, 16           \n"
-    "sll               $t8, $t8, 16           \n"
-    "packrl.ph         $t2, $t2, $t9          \n"  // |ff|r0|g0|b0|
-    "packrl.ph         $t1, $t1, $t8          \n"  // |ff|R0|G0|B0|
-// Store results.
-    "sw                $t2, 0(%[rgb_buf])     \n"
-    "sw                $t0, 4(%[rgb_buf])     \n"
-    "sw                $t1, 8(%[rgb_buf])     \n"
-    "sw                $t3, 12(%[rgb_buf])    \n"
-    "bnez              %[width], 1b           \n"
-    " addiu            %[rgb_buf], 16         \n"
-   "2:                                        \n"
-    ".set pop                                 \n"
-      :[y_buf] "+r" (y_buf),
-       [u_buf] "+r" (u_buf),
-       [v_buf] "+r" (v_buf),
-       [width] "+r" (width),
-       [rgb_buf] "+r" (rgb_buf)
-      :
-      : "t0", "t1",  "t2", "t3",  "t4", "t5",
-      "t6", "t7", "t8", "t9",
-      "s0", "s1", "s2", "s3",
-      "s4", "s5", "s6"
-  );
-}
-
-void I422ToABGRRow_MIPS_DSPR2(const uint8* y_buf,
-                              const uint8* u_buf,
-                              const uint8* v_buf,
-                              uint8* rgb_buf,
-                              int width) {
-  __asm__ __volatile__ (
-    ".set push                                \n"
-    ".set noreorder                           \n"
-    "beqz              %[width], 2f           \n"
-    " repl.ph          $s0, 74                \n"  // |YG|YG| = |74|74|
-    "repl.ph           $s1, -25               \n"  // |UG|UG| = |-25|-25|
-    "repl.ph           $s2, -52               \n"  // |VG|VG| = |-52|-52|
-    "repl.ph           $s3, 102               \n"  // |VR|VR| = |102|102|
-    "repl.ph           $s4, 16                \n"  // |0|16|0|16|
-    "repl.ph           $s5, 128               \n"  // |128|128|
-    "lui               $s6, 0xff00            \n"
-    "ori               $s6, 0xff00            \n"  // |ff|00|ff|00|
-
-    ".p2align          2                       \n"
-   "1:                                         \n"
-      I422ToTransientMipsRGB
-// Arranging into abgr format
-    "precr.qb.ph      $t0, $t8, $t1           \n"  // |G1|g1|R1|r1|
-    "precr.qb.ph      $t3, $t9, $t2           \n"  // |G0|g0|R0|r0|
-    "precrq.qb.ph     $t8, $t0, $t3           \n"  // |G1|R1|G0|R0|
-    "precr.qb.ph      $t9, $t0, $t3           \n"  // |g1|r1|g0|r0|
-
-    "precr.qb.ph       $t2, $t4, $t5          \n"  // |B1|b1|B0|b0|
-    "addiu             %[width], -4           \n"
-    "addiu             %[y_buf], 4            \n"
-    "preceu.ph.qbla    $t1, $t2               \n"  // |0 |B1|0 |B0|
-    "preceu.ph.qbra    $t2, $t2               \n"  // |0 |b1|0 |b0|
-    "or                $t1, $t1, $s6          \n"  // |ff|B1|ff|B0|
-    "or                $t2, $t2, $s6          \n"  // |ff|b1|ff|b0|
-    "precrq.ph.w       $t0, $t2, $t9          \n"  // |ff|b1|g1|r1|
-    "precrq.ph.w       $t3, $t1, $t8          \n"  // |ff|B1|G1|R1|
-    "sll               $t9, $t9, 16           \n"
-    "sll               $t8, $t8, 16           \n"
-    "packrl.ph         $t2, $t2, $t9          \n"  // |ff|b0|g0|r0|
-    "packrl.ph         $t1, $t1, $t8          \n"  // |ff|B0|G0|R0|
-// Store results.
-    "sw                $t2, 0(%[rgb_buf])     \n"
-    "sw                $t0, 4(%[rgb_buf])     \n"
-    "sw                $t1, 8(%[rgb_buf])     \n"
-    "sw                $t3, 12(%[rgb_buf])    \n"
-    "bnez              %[width], 1b           \n"
-    " addiu            %[rgb_buf], 16         \n"
-   "2:                                        \n"
-    ".set pop                                 \n"
-      :[y_buf] "+r" (y_buf),
-       [u_buf] "+r" (u_buf),
-       [v_buf] "+r" (v_buf),
-       [width] "+r" (width),
-       [rgb_buf] "+r" (rgb_buf)
-      :
-      : "t0", "t1",  "t2", "t3",  "t4", "t5",
-      "t6", "t7", "t8", "t9",
-      "s0", "s1", "s2", "s3",
-      "s4", "s5", "s6"
-  );
-}
-
-void I422ToBGRARow_MIPS_DSPR2(const uint8* y_buf,
-                              const uint8* u_buf,
-                              const uint8* v_buf,
-                              uint8* rgb_buf,
-                              int width) {
-  __asm__ __volatile__ (
-    ".set push                                \n"
-    ".set noreorder                           \n"
-    "beqz              %[width], 2f           \n"
-    " repl.ph          $s0, 74                \n"  // |YG|YG| = |74 |74 |
-    "repl.ph           $s1, -25               \n"  // |UG|UG| = |-25|-25|
-    "repl.ph           $s2, -52               \n"  // |VG|VG| = |-52|-52|
-    "repl.ph           $s3, 102               \n"  // |VR|VR| = |102|102|
-    "repl.ph           $s4, 16                \n"  // |0|16|0|16|
-    "repl.ph           $s5, 128               \n"  // |128|128|
-    "lui               $s6, 0xff              \n"
-    "ori               $s6, 0xff              \n"  // |00|ff|00|ff|
-
-    ".p2align          2                      \n"
-   "1:                                        \n"
-      I422ToTransientMipsRGB
-      // Arranging into bgra format
-    "precr.qb.ph       $t4, $t4, $t8          \n"  // |B1|b1|G1|g1|
-    "precr.qb.ph       $t5, $t5, $t9          \n"  // |B0|b0|G0|g0|
-    "precrq.qb.ph      $t8, $t4, $t5          \n"  // |B1|G1|B0|G0|
-    "precr.qb.ph       $t9, $t4, $t5          \n"  // |b1|g1|b0|g0|
-
-    "precr.qb.ph       $t2, $t1, $t2          \n"  // |R1|r1|R0|r0|
-    "addiu             %[width], -4           \n"
-    "addiu             %[y_buf], 4            \n"
-    "preceu.ph.qbla    $t1, $t2               \n"  // |0 |R1|0 |R0|
-    "preceu.ph.qbra    $t2, $t2               \n"  // |0 |r1|0 |r0|
-    "sll               $t1, $t1, 8            \n"  // |R1|0 |R0|0 |
-    "sll               $t2, $t2, 8            \n"  // |r1|0 |r0|0 |
-    "or                $t1, $t1, $s6          \n"  // |R1|ff|R0|ff|
-    "or                $t2, $t2, $s6          \n"  // |r1|ff|r0|ff|
-    "precrq.ph.w       $t0, $t9, $t2          \n"  // |b1|g1|r1|ff|
-    "precrq.ph.w       $t3, $t8, $t1          \n"  // |B1|G1|R1|ff|
-    "sll               $t1, $t1, 16           \n"
-    "sll               $t2, $t2, 16           \n"
-    "packrl.ph         $t2, $t9, $t2          \n"  // |b0|g0|r0|ff|
-    "packrl.ph         $t1, $t8, $t1          \n"  // |B0|G0|R0|ff|
-// Store results.
-    "sw                $t2, 0(%[rgb_buf])     \n"
-    "sw                $t0, 4(%[rgb_buf])     \n"
-    "sw                $t1, 8(%[rgb_buf])     \n"
-    "sw                $t3, 12(%[rgb_buf])    \n"
-    "bnez              %[width], 1b           \n"
-    " addiu            %[rgb_buf], 16         \n"
-   "2:                                        \n"
-    ".set pop                                 \n"
-      :[y_buf] "+r" (y_buf),
-       [u_buf] "+r" (u_buf),
-       [v_buf] "+r" (v_buf),
-       [width] "+r" (width),
-       [rgb_buf] "+r" (rgb_buf)
-      :
-      : "t0", "t1",  "t2", "t3",  "t4", "t5",
-      "t6", "t7", "t8", "t9",
-      "s0", "s1", "s2", "s3",
-      "s4", "s5", "s6"
-  );
-}
-
-// Bilinear filter 8x2 -> 8x1
-void InterpolateRows_MIPS_DSPR2(uint8* dst_ptr, const uint8* src_ptr,
-                                ptrdiff_t src_stride, int dst_width,
-                                int source_y_fraction) {
-    int y0_fraction = 256 - source_y_fraction;
-    const uint8* src_ptr1 = src_ptr + src_stride;
-
-  __asm__ __volatile__ (
-     ".set push                                           \n"
-     ".set noreorder                                      \n"
-
-     "replv.ph          $t0, %[y0_fraction]               \n"
-     "replv.ph          $t1, %[source_y_fraction]         \n"
-
-    ".p2align           2                                 \n"
-   "1:                                                    \n"
-     "lw                $t2, 0(%[src_ptr])                \n"
-     "lw                $t3, 0(%[src_ptr1])               \n"
-     "lw                $t4, 4(%[src_ptr])                \n"
-     "lw                $t5, 4(%[src_ptr1])               \n"
-     "muleu_s.ph.qbl    $t6, $t2, $t0                     \n"
-     "muleu_s.ph.qbr    $t7, $t2, $t0                     \n"
-     "muleu_s.ph.qbl    $t8, $t3, $t1                     \n"
-     "muleu_s.ph.qbr    $t9, $t3, $t1                     \n"
-     "muleu_s.ph.qbl    $t2, $t4, $t0                     \n"
-     "muleu_s.ph.qbr    $t3, $t4, $t0                     \n"
-     "muleu_s.ph.qbl    $t4, $t5, $t1                     \n"
-     "muleu_s.ph.qbr    $t5, $t5, $t1                     \n"
-     "addq.ph           $t6, $t6, $t8                     \n"
-     "addq.ph           $t7, $t7, $t9                     \n"
-     "addq.ph           $t2, $t2, $t4                     \n"
-     "addq.ph           $t3, $t3, $t5                     \n"
-     "shra.ph           $t6, $t6, 8                       \n"
-     "shra.ph           $t7, $t7, 8                       \n"
-     "shra.ph           $t2, $t2, 8                       \n"
-     "shra.ph           $t3, $t3, 8                       \n"
-     "precr.qb.ph       $t6, $t6, $t7                     \n"
-     "precr.qb.ph       $t2, $t2, $t3                     \n"
-     "addiu             %[src_ptr], %[src_ptr], 8         \n"
-     "addiu             %[src_ptr1], %[src_ptr1], 8       \n"
-     "addiu             %[dst_width], %[dst_width], -8    \n"
-     "sw                $t6, 0(%[dst_ptr])                \n"
-     "sw                $t2, 4(%[dst_ptr])                \n"
-     "bgtz              %[dst_width], 1b                  \n"
-     " addiu            %[dst_ptr], %[dst_ptr], 8         \n"
-
-     ".set pop                                            \n"
-  : [dst_ptr] "+r" (dst_ptr),
-    [src_ptr1] "+r" (src_ptr1),
-    [src_ptr] "+r" (src_ptr),
-    [dst_width] "+r" (dst_width)
-  : [source_y_fraction] "r" (source_y_fraction),
-    [y0_fraction] "r" (y0_fraction),
-    [src_stride] "r" (src_stride)
-  : "t0", "t1", "t2", "t3", "t4", "t5",
-    "t6", "t7", "t8", "t9"
-  );
-}
-#endif  // __mips_dsp_rev >= 2
-
-#endif  // defined(__mips__)
-
-#ifdef __cplusplus
-}  // extern "C"
-}  // namespace libyuv
-#endif
diff --git a/drivers/theoraplayer/src/YUV/libyuv/src/row_neon.cc b/drivers/theoraplayer/src/YUV/libyuv/src/row_neon.cc
deleted file mode 100755
index 68e380051b..0000000000
--- a/drivers/theoraplayer/src/YUV/libyuv/src/row_neon.cc
+++ /dev/null
@@ -1,2847 +0,0 @@
-/*
- *  Copyright 2011 The LibYuv Project Authors. All rights reserved.
- *
- *  Use of this source code is governed by a BSD-style license
- *  that can be found in the LICENSE file in the root of the source
- *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS. All contributing project authors may
- *  be found in the AUTHORS file in the root of the source tree.
- */
-
-#include "libyuv/row.h"
-
-#ifdef __cplusplus
-namespace libyuv {
-extern "C" {
-#endif
-
-// This module is for GCC Neon
-#if !defined(LIBYUV_DISABLE_NEON) && defined(__ARM_NEON__)
-
-// Read 8 Y, 4 U and 4 V from 422
-#define READYUV422                                                             \
-    "vld1.8     {d0}, [%0]!                    \n"                             \
-    "vld1.32    {d2[0]}, [%1]!                 \n"                             \
-    "vld1.32    {d2[1]}, [%2]!                 \n"
-
-// Read 8 Y, 2 U and 2 V from 422
-#define READYUV411                                                             \
-    "vld1.8     {d0}, [%0]!                    \n"                             \
-    "vld1.16    {d2[0]}, [%1]!                 \n"                             \
-    "vld1.16    {d2[1]}, [%2]!                 \n"                             \
-    "vmov.u8    d3, d2                         \n"                             \
-    "vzip.u8    d2, d3                         \n"
-
-// Read 8 Y, 8 U and 8 V from 444
-#define READYUV444                                                             \
-    "vld1.8     {d0}, [%0]!                    \n"                             \
-    "vld1.8     {d2}, [%1]!                    \n"                             \
-    "vld1.8     {d3}, [%2]!                    \n"                             \
-    "vpaddl.u8  q1, q1                         \n"                             \
-    "vrshrn.u16 d2, q1, #1                     \n"
-
-// Read 8 Y, and set 4 U and 4 V to 128
-#define READYUV400                                                             \
-    "vld1.8     {d0}, [%0]!                    \n"                             \
-    "vmov.u8    d2, #128                       \n"
-
-// Read 8 Y and 4 UV from NV12
-#define READNV12                                                               \
-    "vld1.8     {d0}, [%0]!                    \n"                             \
-    "vld1.8     {d2}, [%1]!                    \n"                             \
-    "vmov.u8    d3, d2                         \n"/* split odd/even uv apart */\
-    "vuzp.u8    d2, d3                         \n"                             \
-    "vtrn.u32   d2, d3                         \n"
-
-// Read 8 Y and 4 VU from NV21
-#define READNV21                                                               \
-    "vld1.8     {d0}, [%0]!                    \n"                             \
-    "vld1.8     {d2}, [%1]!                    \n"                             \
-    "vmov.u8    d3, d2                         \n"/* split odd/even uv apart */\
-    "vuzp.u8    d3, d2                         \n"                             \
-    "vtrn.u32   d2, d3                         \n"
-
-// Read 8 YUY2
-#define READYUY2                                                               \
-    "vld2.8     {d0, d2}, [%0]!                \n"                             \
-    "vmov.u8    d3, d2                         \n"                             \
-    "vuzp.u8    d2, d3                         \n"                             \
-    "vtrn.u32   d2, d3                         \n"
-
-// Read 8 UYVY
-#define READUYVY                                                               \
-    "vld2.8     {d2, d3}, [%0]!                \n"                             \
-    "vmov.u8    d0, d3                         \n"                             \
-    "vmov.u8    d3, d2                         \n"                             \
-    "vuzp.u8    d2, d3                         \n"                             \
-    "vtrn.u32   d2, d3                         \n"
-
-#define YUV422TORGB                                                            \
-    "veor.u8    d2, d26                        \n"/*subtract 128 from u and v*/\
-    "vmull.s8   q8, d2, d24                    \n"/*  u/v B/R component      */\
-    "vmull.s8   q9, d2, d25                    \n"/*  u/v G component        */\
-    "vmov.u8    d1, #0                         \n"/*  split odd/even y apart */\
-    "vtrn.u8    d0, d1                         \n"                             \
-    "vsub.s16   q0, q0, q15                    \n"/*  offset y               */\
-    "vmul.s16   q0, q0, q14                    \n"                             \
-    "vadd.s16   d18, d19                       \n"                             \
-    "vqadd.s16  d20, d0, d16                   \n" /* B */                     \
-    "vqadd.s16  d21, d1, d16                   \n"                             \
-    "vqadd.s16  d22, d0, d17                   \n" /* R */                     \
-    "vqadd.s16  d23, d1, d17                   \n"                             \
-    "vqadd.s16  d16, d0, d18                   \n" /* G */                     \
-    "vqadd.s16  d17, d1, d18                   \n"                             \
-    "vqshrun.s16 d0, q10, #6                   \n" /* B */                     \
-    "vqshrun.s16 d1, q11, #6                   \n" /* G */                     \
-    "vqshrun.s16 d2, q8, #6                    \n" /* R */                     \
-    "vmovl.u8   q10, d0                        \n"/*  set up for reinterleave*/\
-    "vmovl.u8   q11, d1                        \n"                             \
-    "vmovl.u8   q8, d2                         \n"                             \
-    "vtrn.u8    d20, d21                       \n"                             \
-    "vtrn.u8    d22, d23                       \n"                             \
-    "vtrn.u8    d16, d17                       \n"                             \
-    "vmov.u8    d21, d16                       \n"
-
-static vec8 kUVToRB  = { 127, 127, 127, 127, 102, 102, 102, 102,
-                         0, 0, 0, 0, 0, 0, 0, 0 };
-static vec8 kUVToG = { -25, -25, -25, -25, -52, -52, -52, -52,
-                       0, 0, 0, 0, 0, 0, 0, 0 };
-
-void I444ToARGBRow_NEON(const uint8* src_y,
-                        const uint8* src_u,
-                        const uint8* src_v,
-                        uint8* dst_argb,
-                        int width) {
-  asm volatile (
-#ifdef _ANDROID
-				".fpu neon\n"
-#endif
-    "vld1.8     {d24}, [%5]                    \n"
-    "vld1.8     {d25}, [%6]                    \n"
-    "vmov.u8    d26, #128                      \n"
-    "vmov.u16   q14, #74                       \n"
-    "vmov.u16   q15, #16                       \n"
-    ".p2align   2                              \n"
-  "1:                                          \n"
-    READYUV444
-    YUV422TORGB
-    "subs       %4, %4, #8                     \n"
-    "vmov.u8    d23, #255                      \n"
-    "vst4.8     {d20, d21, d22, d23}, [%3]!    \n"
-    "bgt        1b                             \n"
-    : "+r"(src_y),     // %0
-      "+r"(src_u),     // %1
-      "+r"(src_v),     // %2
-      "+r"(dst_argb),  // %3
-      "+r"(width)      // %4
-    : "r"(&kUVToRB),   // %5
-      "r"(&kUVToG)     // %6
-    : "cc", "memory", "q0", "q1", "q2", "q3",
-      "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
-  );
-}
-
-void I422ToARGBRow_NEON(const uint8* src_y,
-                        const uint8* src_u,
-                        const uint8* src_v,
-                        uint8* dst_argb,
-                        int width) {
-  asm volatile (
-    "vld1.8     {d24}, [%5]                    \n"
-    "vld1.8     {d25}, [%6]                    \n"
-    "vmov.u8    d26, #128                      \n"
-    "vmov.u16   q14, #74                       \n"
-    "vmov.u16   q15, #16                       \n"
-    ".p2align   2                              \n"
-  "1:                                          \n"
-    READYUV422
-    YUV422TORGB
-    "subs       %4, %4, #8                     \n"
-    "vmov.u8    d23, #255                      \n"
-    "vst4.8     {d20, d21, d22, d23}, [%3]!    \n"
-    "bgt        1b                             \n"
-    : "+r"(src_y),     // %0
-      "+r"(src_u),     // %1
-      "+r"(src_v),     // %2
-      "+r"(dst_argb),  // %3
-      "+r"(width)      // %4
-    : "r"(&kUVToRB),   // %5
-      "r"(&kUVToG)     // %6
-    : "cc", "memory", "q0", "q1", "q2", "q3",
-      "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
-  );
-}
-
-void I411ToARGBRow_NEON(const uint8* src_y,
-                        const uint8* src_u,
-                        const uint8* src_v,
-                        uint8* dst_argb,
-                        int width) {
-  asm volatile (
-    "vld1.8     {d24}, [%5]                    \n"
-    "vld1.8     {d25}, [%6]                    \n"
-    "vmov.u8    d26, #128                      \n"
-    "vmov.u16   q14, #74                       \n"
-    "vmov.u16   q15, #16                       \n"
-    ".p2align   2                              \n"
-  "1:                                          \n"
-    READYUV411
-    YUV422TORGB
-    "subs       %4, %4, #8                     \n"
-    "vmov.u8    d23, #255                      \n"
-    "vst4.8     {d20, d21, d22, d23}, [%3]!    \n"
-    "bgt        1b                             \n"
-    : "+r"(src_y),     // %0
-      "+r"(src_u),     // %1
-      "+r"(src_v),     // %2
-      "+r"(dst_argb),  // %3
-      "+r"(width)      // %4
-    : "r"(&kUVToRB),   // %5
-      "r"(&kUVToG)     // %6
-    : "cc", "memory", "q0", "q1", "q2", "q3",
-      "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
-  );
-}
-
-void I422ToBGRARow_NEON(const uint8* src_y,
-                        const uint8* src_u,
-                        const uint8* src_v,
-                        uint8* dst_bgra,
-                        int width) {
-  asm volatile (
-    "vld1.8     {d24}, [%5]                    \n"
-    "vld1.8     {d25}, [%6]                    \n"
-    "vmov.u8    d26, #128                      \n"
-    "vmov.u16   q14, #74                       \n"
-    "vmov.u16   q15, #16                       \n"
-    ".p2align   2                              \n"
-  "1:                                          \n"
-    READYUV422
-    YUV422TORGB
-    "subs       %4, %4, #8                     \n"
-    "vswp.u8    d20, d22                       \n"
-    "vmov.u8    d19, #255                      \n"
-    "vst4.8     {d19, d20, d21, d22}, [%3]!    \n"
-    "bgt        1b                             \n"
-    : "+r"(src_y),     // %0
-      "+r"(src_u),     // %1
-      "+r"(src_v),     // %2
-      "+r"(dst_bgra),  // %3
-      "+r"(width)      // %4
-    : "r"(&kUVToRB),   // %5
-      "r"(&kUVToG)     // %6
-    : "cc", "memory", "q0", "q1", "q2", "q3",
-      "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
-  );
-}
-
-void I422ToABGRRow_NEON(const uint8* src_y,
-                        const uint8* src_u,
-                        const uint8* src_v,
-                        uint8* dst_abgr,
-                        int width) {
-  asm volatile (
-    "vld1.8     {d24}, [%5]                    \n"
-    "vld1.8     {d25}, [%6]                    \n"
-    "vmov.u8    d26, #128                      \n"
-    "vmov.u16   q14, #74                       \n"
-    "vmov.u16   q15, #16                       \n"
-    ".p2align   2                              \n"
-  "1:                                          \n"
-    READYUV422
-    YUV422TORGB
-    "subs       %4, %4, #8                     \n"
-    "vswp.u8    d20, d22                       \n"
-    "vmov.u8    d23, #255                      \n"
-    "vst4.8     {d20, d21, d22, d23}, [%3]!    \n"
-    "bgt        1b                             \n"
-    : "+r"(src_y),     // %0
-      "+r"(src_u),     // %1
-      "+r"(src_v),     // %2
-      "+r"(dst_abgr),  // %3
-      "+r"(width)      // %4
-    : "r"(&kUVToRB),   // %5
-      "r"(&kUVToG)     // %6
-    : "cc", "memory", "q0", "q1", "q2", "q3",
-      "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
-  );
-}
-
-void I422ToRGBARow_NEON(const uint8* src_y,
-                        const uint8* src_u,
-                        const uint8* src_v,
-                        uint8* dst_rgba,
-                        int width) {
-  asm volatile (
-    "vld1.8     {d24}, [%5]                    \n"
-    "vld1.8     {d25}, [%6]                    \n"
-    "vmov.u8    d26, #128                      \n"
-    "vmov.u16   q14, #74                       \n"
-    "vmov.u16   q15, #16                       \n"
-    ".p2align   2                              \n"
-  "1:                                          \n"
-    READYUV422
-    YUV422TORGB
-    "subs       %4, %4, #8                     \n"
-    "vmov.u8    d19, #255                      \n"
-    "vst4.8     {d19, d20, d21, d22}, [%3]!    \n"
-    "bgt        1b                             \n"
-    : "+r"(src_y),     // %0
-      "+r"(src_u),     // %1
-      "+r"(src_v),     // %2
-      "+r"(dst_rgba),  // %3
-      "+r"(width)      // %4
-    : "r"(&kUVToRB),   // %5
-      "r"(&kUVToG)     // %6
-    : "cc", "memory", "q0", "q1", "q2", "q3",
-      "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
-  );
-}
-
-void I422ToRGB24Row_NEON(const uint8* src_y,
-                         const uint8* src_u,
-                         const uint8* src_v,
-                         uint8* dst_rgb24,
-                         int width) {
-  asm volatile (
-    "vld1.8     {d24}, [%5]                    \n"
-    "vld1.8     {d25}, [%6]                    \n"
-    "vmov.u8    d26, #128                      \n"
-    "vmov.u16   q14, #74                       \n"
-    "vmov.u16   q15, #16                       \n"
-    ".p2align   2                              \n"
-  "1:                                          \n"
-    READYUV422
-    YUV422TORGB
-    "subs       %4, %4, #8                     \n"
-    "vst3.8     {d20, d21, d22}, [%3]!         \n"
-    "bgt        1b                             \n"
-    : "+r"(src_y),      // %0
-      "+r"(src_u),      // %1
-      "+r"(src_v),      // %2
-      "+r"(dst_rgb24),  // %3
-      "+r"(width)       // %4
-    : "r"(&kUVToRB),    // %5
-      "r"(&kUVToG)      // %6
-    : "cc", "memory", "q0", "q1", "q2", "q3",
-      "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
-  );
-}
-
-void I422ToRAWRow_NEON(const uint8* src_y,
-                       const uint8* src_u,
-                       const uint8* src_v,
-                       uint8* dst_raw,
-                       int width) {
-  asm volatile (
-    "vld1.8     {d24}, [%5]                    \n"
-    "vld1.8     {d25}, [%6]                    \n"
-    "vmov.u8    d26, #128                      \n"
-    "vmov.u16   q14, #74                       \n"
-    "vmov.u16   q15, #16                       \n"
-    ".p2align   2                              \n"
-  "1:                                          \n"
-    READYUV422
-    YUV422TORGB
-    "subs       %4, %4, #8                     \n"
-    "vswp.u8    d20, d22                       \n"
-    "vst3.8     {d20, d21, d22}, [%3]!         \n"
-    "bgt        1b                             \n"
-    : "+r"(src_y),    // %0
-      "+r"(src_u),    // %1
-      "+r"(src_v),    // %2
-      "+r"(dst_raw),  // %3
-      "+r"(width)     // %4
-    : "r"(&kUVToRB),  // %5
-      "r"(&kUVToG)    // %6
-    : "cc", "memory", "q0", "q1", "q2", "q3",
-      "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
-  );
-}
-
-#define ARGBTORGB565                                                           \
-    "vshr.u8    d20, d20, #3                   \n"  /* B                    */ \
-    "vshr.u8    d21, d21, #2                   \n"  /* G                    */ \
-    "vshr.u8    d22, d22, #3                   \n"  /* R                    */ \
-    "vmovl.u8   q8, d20                        \n"  /* B                    */ \
-    "vmovl.u8   q9, d21                        \n"  /* G                    */ \
-    "vmovl.u8   q10, d22                       \n"  /* R                    */ \
-    "vshl.u16   q9, q9, #5                     \n"  /* G                    */ \
-    "vshl.u16   q10, q10, #11                  \n"  /* R                    */ \
-    "vorr       q0, q8, q9                     \n"  /* BG                   */ \
-    "vorr       q0, q0, q10                    \n"  /* BGR                  */
-
-void I422ToRGB565Row_NEON(const uint8* src_y,
-                          const uint8* src_u,
-                          const uint8* src_v,
-                          uint8* dst_rgb565,
-                          int width) {
-  asm volatile (
-    "vld1.8     {d24}, [%5]                    \n"
-    "vld1.8     {d25}, [%6]                    \n"
-    "vmov.u8    d26, #128                      \n"
-    "vmov.u16   q14, #74                       \n"
-    "vmov.u16   q15, #16                       \n"
-    ".p2align   2                              \n"
-  "1:                                          \n"
-    READYUV422
-    YUV422TORGB
-    "subs       %4, %4, #8                     \n"
-    ARGBTORGB565
-    "vst1.8     {q0}, [%3]!                    \n"  // store 8 pixels RGB565.
-    "bgt        1b                             \n"
-    : "+r"(src_y),    // %0
-      "+r"(src_u),    // %1
-      "+r"(src_v),    // %2
-      "+r"(dst_rgb565),  // %3
-      "+r"(width)     // %4
-    : "r"(&kUVToRB),  // %5
-      "r"(&kUVToG)    // %6
-    : "cc", "memory", "q0", "q1", "q2", "q3",
-      "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
-  );
-}
-
-#define ARGBTOARGB1555                                                         \
-    "vshr.u8    q10, q10, #3                   \n"  /* B                    */ \
-    "vshr.u8    d22, d22, #3                   \n"  /* R                    */ \
-    "vshr.u8    d23, d23, #7                   \n"  /* A                    */ \
-    "vmovl.u8   q8, d20                        \n"  /* B                    */ \
-    "vmovl.u8   q9, d21                        \n"  /* G                    */ \
-    "vmovl.u8   q10, d22                       \n"  /* R                    */ \
-    "vmovl.u8   q11, d23                       \n"  /* A                    */ \
-    "vshl.u16   q9, q9, #5                     \n"  /* G                    */ \
-    "vshl.u16   q10, q10, #10                  \n"  /* R                    */ \
-    "vshl.u16   q11, q11, #15                  \n"  /* A                    */ \
-    "vorr       q0, q8, q9                     \n"  /* BG                   */ \
-    "vorr       q1, q10, q11                   \n"  /* RA                   */ \
-    "vorr       q0, q0, q1                     \n"  /* BGRA                 */
-
-void I422ToARGB1555Row_NEON(const uint8* src_y,
-                            const uint8* src_u,
-                            const uint8* src_v,
-                            uint8* dst_argb1555,
-                            int width) {
-  asm volatile (
-    "vld1.8     {d24}, [%5]                    \n"
-    "vld1.8     {d25}, [%6]                    \n"
-    "vmov.u8    d26, #128                      \n"
-    "vmov.u16   q14, #74                       \n"
-    "vmov.u16   q15, #16                       \n"
-    ".p2align   2                              \n"
-  "1:                                          \n"
-    READYUV422
-    YUV422TORGB
-    "subs       %4, %4, #8                     \n"
-    "vmov.u8    d23, #255                      \n"
-    ARGBTOARGB1555
-    "vst1.8     {q0}, [%3]!                    \n"  // store 8 pixels ARGB1555.
-    "bgt        1b                             \n"
-    : "+r"(src_y),    // %0
-      "+r"(src_u),    // %1
-      "+r"(src_v),    // %2
-      "+r"(dst_argb1555),  // %3
-      "+r"(width)     // %4
-    : "r"(&kUVToRB),  // %5
-      "r"(&kUVToG)    // %6
-    : "cc", "memory", "q0", "q1", "q2", "q3",
-      "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
-  );
-}
-
-#define ARGBTOARGB4444                                                         \
-    "vshr.u8    d20, d20, #4                   \n"  /* B                    */ \
-    "vbic.32    d21, d21, d4                   \n"  /* G                    */ \
-    "vshr.u8    d22, d22, #4                   \n"  /* R                    */ \
-    "vbic.32    d23, d23, d4                   \n"  /* A                    */ \
-    "vorr       d0, d20, d21                   \n"  /* BG                   */ \
-    "vorr       d1, d22, d23                   \n"  /* RA                   */ \
-    "vzip.u8    d0, d1                         \n"  /* BGRA                 */
-
-void I422ToARGB4444Row_NEON(const uint8* src_y,
-                            const uint8* src_u,
-                            const uint8* src_v,
-                            uint8* dst_argb4444,
-                            int width) {
-  asm volatile (
-    "vld1.8     {d24}, [%5]                    \n"
-    "vld1.8     {d25}, [%6]                    \n"
-    "vmov.u8    d26, #128                      \n"
-    "vmov.u16   q14, #74                       \n"
-    "vmov.u16   q15, #16                       \n"
-    "vmov.u8    d4, #0x0f                      \n"  // bits to clear with vbic.
-    ".p2align   2                              \n"
-  "1:                                          \n"
-    READYUV422
-    YUV422TORGB
-    "subs       %4, %4, #8                     \n"
-    "vmov.u8    d23, #255                      \n"
-    ARGBTOARGB4444
-    "vst1.8     {q0}, [%3]!                    \n"  // store 8 pixels ARGB4444.
-    "bgt        1b                             \n"
-    : "+r"(src_y),    // %0
-      "+r"(src_u),    // %1
-      "+r"(src_v),    // %2
-      "+r"(dst_argb4444),  // %3
-      "+r"(width)     // %4
-    : "r"(&kUVToRB),  // %5
-      "r"(&kUVToG)    // %6
-    : "cc", "memory", "q0", "q1", "q2", "q3",
-      "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
-  );
-}
-
-void YToARGBRow_NEON(const uint8* src_y,
-                     uint8* dst_argb,
-                     int width) {
-  asm volatile (
-    "vld1.8     {d24}, [%3]                    \n"
-    "vld1.8     {d25}, [%4]                    \n"
-    "vmov.u8    d26, #128                      \n"
-    "vmov.u16   q14, #74                       \n"
-    "vmov.u16   q15, #16                       \n"
-    ".p2align   2                              \n"
-  "1:                                          \n"
-    READYUV400
-    YUV422TORGB
-    "subs       %2, %2, #8                     \n"
-    "vmov.u8    d23, #255                      \n"
-    "vst4.8     {d20, d21, d22, d23}, [%1]!    \n"
-    "bgt        1b                             \n"
-    : "+r"(src_y),     // %0
-      "+r"(dst_argb),  // %1
-      "+r"(width)      // %2
-    : "r"(&kUVToRB),   // %3
-      "r"(&kUVToG)     // %4
-    : "cc", "memory", "q0", "q1", "q2", "q3",
-      "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
-  );
-}
-
-void I400ToARGBRow_NEON(const uint8* src_y,
-                        uint8* dst_argb,
-                        int width) {
-  asm volatile (
-    ".p2align   2                              \n"
-    "vmov.u8    d23, #255                      \n"
-  "1:                                          \n"
-    "vld1.8     {d20}, [%0]!                   \n"
-    "vmov       d21, d20                       \n"
-    "vmov       d22, d20                       \n"
-    "subs       %2, %2, #8                     \n"
-    "vst4.8     {d20, d21, d22, d23}, [%1]!    \n"
-    "bgt        1b                             \n"
-    : "+r"(src_y),     // %0
-      "+r"(dst_argb),  // %1
-      "+r"(width)      // %2
-    :
-    : "cc", "memory", "d20", "d21", "d22", "d23"
-  );
-}
-
-void NV12ToARGBRow_NEON(const uint8* src_y,
-                        const uint8* src_uv,
-                        uint8* dst_argb,
-                        int width) {
-  asm volatile (
-    "vld1.8     {d24}, [%4]                    \n"
-    "vld1.8     {d25}, [%5]                    \n"
-    "vmov.u8    d26, #128                      \n"
-    "vmov.u16   q14, #74                       \n"
-    "vmov.u16   q15, #16                       \n"
-    ".p2align   2                              \n"
-  "1:                                          \n"
-    READNV12
-    YUV422TORGB
-    "subs       %3, %3, #8                     \n"
-    "vmov.u8    d23, #255                      \n"
-    "vst4.8     {d20, d21, d22, d23}, [%2]!    \n"
-    "bgt        1b                             \n"
-    : "+r"(src_y),     // %0
-      "+r"(src_uv),    // %1
-      "+r"(dst_argb),  // %2
-      "+r"(width)      // %3
-    : "r"(&kUVToRB),   // %4
-      "r"(&kUVToG)     // %5
-    : "cc", "memory", "q0", "q1", "q2", "q3",
-      "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
-  );
-}
-
-void NV21ToARGBRow_NEON(const uint8* src_y,
-                        const uint8* src_uv,
-                        uint8* dst_argb,
-                        int width) {
-  asm volatile (
-    "vld1.8     {d24}, [%4]                    \n"
-    "vld1.8     {d25}, [%5]                    \n"
-    "vmov.u8    d26, #128                      \n"
-    "vmov.u16   q14, #74                       \n"
-    "vmov.u16   q15, #16                       \n"
-    ".p2align   2                              \n"
-  "1:                                          \n"
-    READNV21
-    YUV422TORGB
-    "subs       %3, %3, #8                     \n"
-    "vmov.u8    d23, #255                      \n"
-    "vst4.8     {d20, d21, d22, d23}, [%2]!    \n"
-    "bgt        1b                             \n"
-    : "+r"(src_y),     // %0
-      "+r"(src_uv),    // %1
-      "+r"(dst_argb),  // %2
-      "+r"(width)      // %3
-    : "r"(&kUVToRB),   // %4
-      "r"(&kUVToG)     // %5
-    : "cc", "memory", "q0", "q1", "q2", "q3",
-      "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
-  );
-}
-
-void NV12ToRGB565Row_NEON(const uint8* src_y,
-                          const uint8* src_uv,
-                          uint8* dst_rgb565,
-                          int width) {
-  asm volatile (
-    "vld1.8     {d24}, [%4]                    \n"
-    "vld1.8     {d25}, [%5]                    \n"
-    "vmov.u8    d26, #128                      \n"
-    "vmov.u16   q14, #74                       \n"
-    "vmov.u16   q15, #16                       \n"
-    ".p2align   2                              \n"
-  "1:                                          \n"
-    READNV12
-    YUV422TORGB
-    "subs       %3, %3, #8                     \n"
-    ARGBTORGB565
-    "vst1.8     {q0}, [%2]!                    \n"  // store 8 pixels RGB565.
-    "bgt        1b                             \n"
-    : "+r"(src_y),     // %0
-      "+r"(src_uv),    // %1
-      "+r"(dst_rgb565),  // %2
-      "+r"(width)      // %3
-    : "r"(&kUVToRB),   // %4
-      "r"(&kUVToG)     // %5
-    : "cc", "memory", "q0", "q1", "q2", "q3",
-      "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
-  );
-}
-
-void NV21ToRGB565Row_NEON(const uint8* src_y,
-                          const uint8* src_uv,
-                          uint8* dst_rgb565,
-                          int width) {
-  asm volatile (
-    "vld1.8     {d24}, [%4]                    \n"
-    "vld1.8     {d25}, [%5]                    \n"
-    "vmov.u8    d26, #128                      \n"
-    "vmov.u16   q14, #74                       \n"
-    "vmov.u16   q15, #16                       \n"
-    ".p2align   2                              \n"
-  "1:                                          \n"
-    READNV21
-    YUV422TORGB
-    "subs       %3, %3, #8                     \n"
-    ARGBTORGB565
-    "vst1.8     {q0}, [%2]!                    \n"  // store 8 pixels RGB565.
-    "bgt        1b                             \n"
-    : "+r"(src_y),     // %0
-      "+r"(src_uv),    // %1
-      "+r"(dst_rgb565),  // %2
-      "+r"(width)      // %3
-    : "r"(&kUVToRB),   // %4
-      "r"(&kUVToG)     // %5
-    : "cc", "memory", "q0", "q1", "q2", "q3",
-      "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
-  );
-}
-
-void YUY2ToARGBRow_NEON(const uint8* src_yuy2,
-                        uint8* dst_argb,
-                        int width) {
-  asm volatile (
-    "vld1.8     {d24}, [%3]                    \n"
-    "vld1.8     {d25}, [%4]                    \n"
-    "vmov.u8    d26, #128                      \n"
-    "vmov.u16   q14, #74                       \n"
-    "vmov.u16   q15, #16                       \n"
-    ".p2align   2                              \n"
-  "1:                                          \n"
-    READYUY2
-    YUV422TORGB
-    "subs       %2, %2, #8                     \n"
-    "vmov.u8    d23, #255                      \n"
-    "vst4.8     {d20, d21, d22, d23}, [%1]!    \n"
-    "bgt        1b                             \n"
-    : "+r"(src_yuy2),  // %0
-      "+r"(dst_argb),  // %1
-      "+r"(width)      // %2
-    : "r"(&kUVToRB),   // %3
-      "r"(&kUVToG)     // %4
-    : "cc", "memory", "q0", "q1", "q2", "q3",
-      "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
-  );
-}
-
-void UYVYToARGBRow_NEON(const uint8* src_uyvy,
-                        uint8* dst_argb,
-                        int width) {
-  asm volatile (
-    "vld1.8     {d24}, [%3]                    \n"
-    "vld1.8     {d25}, [%4]                    \n"
-    "vmov.u8    d26, #128                      \n"
-    "vmov.u16   q14, #74                       \n"
-    "vmov.u16   q15, #16                       \n"
-    ".p2align   2                              \n"
-  "1:                                          \n"
-    READUYVY
-    YUV422TORGB
-    "subs       %2, %2, #8                     \n"
-    "vmov.u8    d23, #255                      \n"
-    "vst4.8     {d20, d21, d22, d23}, [%1]!    \n"
-    "bgt        1b                             \n"
-    : "+r"(src_uyvy),  // %0
-      "+r"(dst_argb),  // %1
-      "+r"(width)      // %2
-    : "r"(&kUVToRB),   // %3
-      "r"(&kUVToG)     // %4
-    : "cc", "memory", "q0", "q1", "q2", "q3",
-      "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
-  );
-}
-
-// Reads 16 pairs of UV and write even values to dst_u and odd to dst_v.
-void SplitUVRow_NEON(const uint8* src_uv, uint8* dst_u, uint8* dst_v,
-                     int width) {
-  asm volatile (
-    ".p2align   2                              \n"
-  "1:                                          \n"
-    "vld2.8     {q0, q1}, [%0]!                \n"  // load 16 pairs of UV
-    "subs       %3, %3, #16                    \n"  // 16 processed per loop
-    "vst1.8     {q0}, [%1]!                    \n"  // store U
-    "vst1.8     {q1}, [%2]!                    \n"  // store V
-    "bgt        1b                             \n"
-    : "+r"(src_uv),  // %0
-      "+r"(dst_u),   // %1
-      "+r"(dst_v),   // %2
-      "+r"(width)    // %3  // Output registers
-    :                       // Input registers
-    : "cc", "memory", "q0", "q1"  // Clobber List
-  );
-}
-
-// Reads 16 U's and V's and writes out 16 pairs of UV.
-void MergeUVRow_NEON(const uint8* src_u, const uint8* src_v, uint8* dst_uv,
-                     int width) {
-  asm volatile (
-    ".p2align   2                              \n"
-  "1:                                          \n"
-    "vld1.8     {q0}, [%0]!                    \n"  // load U
-    "vld1.8     {q1}, [%1]!                    \n"  // load V
-    "subs       %3, %3, #16                    \n"  // 16 processed per loop
-    "vst2.u8    {q0, q1}, [%2]!                \n"  // store 16 pairs of UV
-    "bgt        1b                             \n"
-    :
-      "+r"(src_u),   // %0
-      "+r"(src_v),   // %1
-      "+r"(dst_uv),  // %2
-      "+r"(width)    // %3  // Output registers
-    :                       // Input registers
-    : "cc", "memory", "q0", "q1"  // Clobber List
-  );
-}
-
-// Copy multiple of 32.  vld4.8  allow unaligned and is fastest on a15.
-void CopyRow_NEON(const uint8* src, uint8* dst, int count) {
-  asm volatile (
-    ".p2align   2                              \n"
-  "1:                                          \n"
-    "vld1.8     {d0, d1, d2, d3}, [%0]!        \n"  // load 32
-    "subs       %2, %2, #32                    \n"  // 32 processed per loop
-    "vst1.8     {d0, d1, d2, d3}, [%1]!        \n"  // store 32
-    "bgt        1b                             \n"
-  : "+r"(src),   // %0
-    "+r"(dst),   // %1
-    "+r"(count)  // %2  // Output registers
-  :                     // Input registers
-  : "cc", "memory", "q0", "q1"  // Clobber List
-  );
-}
-
-// SetRow8 writes 'count' bytes using a 32 bit value repeated.
-void SetRow_NEON(uint8* dst, uint32 v32, int count) {
-  asm volatile (
-    "vdup.u32  q0, %2                          \n"  // duplicate 4 ints
-    "1:                                        \n"
-    "subs      %1, %1, #16                     \n"  // 16 bytes per loop
-    "vst1.8    {q0}, [%0]!                     \n"  // store
-    "bgt       1b                              \n"
-  : "+r"(dst),   // %0
-    "+r"(count)  // %1
-  : "r"(v32)     // %2
-  : "cc", "memory", "q0"
-  );
-}
-
-// TODO(fbarchard): Make fully assembler
-// SetRow32 writes 'count' words using a 32 bit value repeated.
-void ARGBSetRows_NEON(uint8* dst, uint32 v32, int width,
-                      int dst_stride, int height) {
-  for (int y = 0; y < height; ++y) {
-    SetRow_NEON(dst, v32, width << 2);
-    dst += dst_stride;
-  }
-}
-
-void MirrorRow_NEON(const uint8* src, uint8* dst, int width) {
-  asm volatile (
-    // Start at end of source row.
-    "mov        r3, #-16                       \n"
-    "add        %0, %0, %2                     \n"
-    "sub        %0, #16                        \n"
-
-    ".p2align   2                              \n"
-  "1:                                          \n"
-    "vld1.8     {q0}, [%0], r3                 \n"  // src -= 16
-    "subs       %2, #16                        \n"  // 16 pixels per loop.
-    "vrev64.8   q0, q0                         \n"
-    "vst1.8     {d1}, [%1]!                    \n"  // dst += 16
-    "vst1.8     {d0}, [%1]!                    \n"
-    "bgt        1b                             \n"
-  : "+r"(src),   // %0
-    "+r"(dst),   // %1
-    "+r"(width)  // %2
-  :
-  : "cc", "memory", "r3", "q0"
-  );
-}
-
-void MirrorUVRow_NEON(const uint8* src_uv, uint8* dst_u, uint8* dst_v,
-                      int width) {
-  asm volatile (
-    // Start at end of source row.
-    "mov        r12, #-16                      \n"
-    "add        %0, %0, %3, lsl #1             \n"
-    "sub        %0, #16                        \n"
-
-    ".p2align   2                              \n"
-  "1:                                          \n"
-    "vld2.8     {d0, d1}, [%0], r12            \n"  // src -= 16
-    "subs       %3, #8                         \n"  // 8 pixels per loop.
-    "vrev64.8   q0, q0                         \n"
-    "vst1.8     {d0}, [%1]!                    \n"  // dst += 8
-    "vst1.8     {d1}, [%2]!                    \n"
-    "bgt        1b                             \n"
-  : "+r"(src_uv),  // %0
-    "+r"(dst_u),   // %1
-    "+r"(dst_v),   // %2
-    "+r"(width)    // %3
-  :
-  : "cc", "memory", "r12", "q0"
-  );
-}
-
-void ARGBMirrorRow_NEON(const uint8* src, uint8* dst, int width) {
-  asm volatile (
-    // Start at end of source row.
-    "mov        r3, #-16                       \n"
-    "add        %0, %0, %2, lsl #2             \n"
-    "sub        %0, #16                        \n"
-
-    ".p2align   2                              \n"
-  "1:                                          \n"
-    "vld1.8     {q0}, [%0], r3                 \n"  // src -= 16
-    "subs       %2, #4                         \n"  // 4 pixels per loop.
-    "vrev64.32  q0, q0                         \n"
-    "vst1.8     {d1}, [%1]!                    \n"  // dst += 16
-    "vst1.8     {d0}, [%1]!                    \n"
-    "bgt        1b                             \n"
-  : "+r"(src),   // %0
-    "+r"(dst),   // %1
-    "+r"(width)  // %2
-  :
-  : "cc", "memory", "r3", "q0"
-  );
-}
-
-void RGB24ToARGBRow_NEON(const uint8* src_rgb24, uint8* dst_argb, int pix) {
-  asm volatile (
-    "vmov.u8    d4, #255                       \n"  // Alpha
-    ".p2align   2                              \n"
-  "1:                                          \n"
-    "vld3.8     {d1, d2, d3}, [%0]!            \n"  // load 8 pixels of RGB24.
-    "subs       %2, %2, #8                     \n"  // 8 processed per loop.
-    "vst4.8     {d1, d2, d3, d4}, [%1]!        \n"  // store 8 pixels of ARGB.
-    "bgt        1b                             \n"
-  : "+r"(src_rgb24),  // %0
-    "+r"(dst_argb),   // %1
-    "+r"(pix)         // %2
-  :
-  : "cc", "memory", "d1", "d2", "d3", "d4"  // Clobber List
-  );
-}
-
-void RAWToARGBRow_NEON(const uint8* src_raw, uint8* dst_argb, int pix) {
-  asm volatile (
-    "vmov.u8    d4, #255                       \n"  // Alpha
-    ".p2align   2                              \n"
-  "1:                                          \n"
-    "vld3.8     {d1, d2, d3}, [%0]!            \n"  // load 8 pixels of RAW.
-    "subs       %2, %2, #8                     \n"  // 8 processed per loop.
-    "vswp.u8    d1, d3                         \n"  // swap R, B
-    "vst4.8     {d1, d2, d3, d4}, [%1]!        \n"  // store 8 pixels of ARGB.
-    "bgt        1b                             \n"
-  : "+r"(src_raw),   // %0
-    "+r"(dst_argb),  // %1
-    "+r"(pix)        // %2
-  :
-  : "cc", "memory", "d1", "d2", "d3", "d4"  // Clobber List
-  );
-}
-
-#define RGB565TOARGB                                                           \
-    "vshrn.u16  d6, q0, #5                     \n"  /* G xxGGGGGG           */ \
-    "vuzp.u8    d0, d1                         \n"  /* d0 xxxBBBBB RRRRRxxx */ \
-    "vshl.u8    d6, d6, #2                     \n"  /* G GGGGGG00 upper 6   */ \
-    "vshr.u8    d1, d1, #3                     \n"  /* R 000RRRRR lower 5   */ \
-    "vshl.u8    q0, q0, #3                     \n"  /* B,R BBBBB000 upper 5 */ \
-    "vshr.u8    q2, q0, #5                     \n"  /* B,R 00000BBB lower 3 */ \
-    "vorr.u8    d0, d0, d4                     \n"  /* B                    */ \
-    "vshr.u8    d4, d6, #6                     \n"  /* G 000000GG lower 2   */ \
-    "vorr.u8    d2, d1, d5                     \n"  /* R                    */ \
-    "vorr.u8    d1, d4, d6                     \n"  /* G                    */
-
-void RGB565ToARGBRow_NEON(const uint8* src_rgb565, uint8* dst_argb, int pix) {
-  asm volatile (
-    "vmov.u8    d3, #255                       \n"  // Alpha
-    ".p2align   2                              \n"
-  "1:                                          \n"
-    "vld1.8     {q0}, [%0]!                    \n"  // load 8 RGB565 pixels.
-    "subs       %2, %2, #8                     \n"  // 8 processed per loop.
-    RGB565TOARGB
-    "vst4.8     {d0, d1, d2, d3}, [%1]!        \n"  // store 8 pixels of ARGB.
-    "bgt        1b                             \n"
-  : "+r"(src_rgb565),  // %0
-    "+r"(dst_argb),    // %1
-    "+r"(pix)          // %2
-  :
-  : "cc", "memory", "q0", "q1", "q2", "q3"  // Clobber List
-  );
-}
-
-#define ARGB1555TOARGB                                                         \
-    "vshrn.u16  d7, q0, #8                     \n"  /* A Arrrrrxx           */ \
-    "vshr.u8    d6, d7, #2                     \n"  /* R xxxRRRRR           */ \
-    "vshrn.u16  d5, q0, #5                     \n"  /* G xxxGGGGG           */ \
-    "vmovn.u16  d4, q0                         \n"  /* B xxxBBBBB           */ \
-    "vshr.u8    d7, d7, #7                     \n"  /* A 0000000A           */ \
-    "vneg.s8    d7, d7                         \n"  /* A AAAAAAAA upper 8   */ \
-    "vshl.u8    d6, d6, #3                     \n"  /* R RRRRR000 upper 5   */ \
-    "vshr.u8    q1, q3, #5                     \n"  /* R,A 00000RRR lower 3 */ \
-    "vshl.u8    q0, q2, #3                     \n"  /* B,G BBBBB000 upper 5 */ \
-    "vshr.u8    q2, q0, #5                     \n"  /* B,G 00000BBB lower 3 */ \
-    "vorr.u8    q1, q1, q3                     \n"  /* R,A                  */ \
-    "vorr.u8    q0, q0, q2                     \n"  /* B,G                  */ \
-
-// RGB555TOARGB is same as ARGB1555TOARGB but ignores alpha.
-#define RGB555TOARGB                                                           \
-    "vshrn.u16  d6, q0, #5                     \n"  /* G xxxGGGGG           */ \
-    "vuzp.u8    d0, d1                         \n"  /* d0 xxxBBBBB xRRRRRxx */ \
-    "vshl.u8    d6, d6, #3                     \n"  /* G GGGGG000 upper 5   */ \
-    "vshr.u8    d1, d1, #2                     \n"  /* R 00xRRRRR lower 5   */ \
-    "vshl.u8    q0, q0, #3                     \n"  /* B,R BBBBB000 upper 5 */ \
-    "vshr.u8    q2, q0, #5                     \n"  /* B,R 00000BBB lower 3 */ \
-    "vorr.u8    d0, d0, d4                     \n"  /* B                    */ \
-    "vshr.u8    d4, d6, #5                     \n"  /* G 00000GGG lower 3   */ \
-    "vorr.u8    d2, d1, d5                     \n"  /* R                    */ \
-    "vorr.u8    d1, d4, d6                     \n"  /* G                    */
-
-void ARGB1555ToARGBRow_NEON(const uint8* src_argb1555, uint8* dst_argb,
-                            int pix) {
-  asm volatile (
-    "vmov.u8    d3, #255                       \n"  // Alpha
-    ".p2align   2                              \n"
-  "1:                                          \n"
-    "vld1.8     {q0}, [%0]!                    \n"  // load 8 ARGB1555 pixels.
-    "subs       %2, %2, #8                     \n"  // 8 processed per loop.
-    ARGB1555TOARGB
-    "vst4.8     {d0, d1, d2, d3}, [%1]!        \n"  // store 8 pixels of ARGB.
-    "bgt        1b                             \n"
-  : "+r"(src_argb1555),  // %0
-    "+r"(dst_argb),    // %1
-    "+r"(pix)          // %2
-  :
-  : "cc", "memory", "q0", "q1", "q2", "q3"  // Clobber List
-  );
-}
-
-#define ARGB4444TOARGB                                                         \
-    "vuzp.u8    d0, d1                         \n"  /* d0 BG, d1 RA         */ \
-    "vshl.u8    q2, q0, #4                     \n"  /* B,R BBBB0000         */ \
-    "vshr.u8    q1, q0, #4                     \n"  /* G,A 0000GGGG         */ \
-    "vshr.u8    q0, q2, #4                     \n"  /* B,R 0000BBBB         */ \
-    "vorr.u8    q0, q0, q2                     \n"  /* B,R BBBBBBBB         */ \
-    "vshl.u8    q2, q1, #4                     \n"  /* G,A GGGG0000         */ \
-    "vorr.u8    q1, q1, q2                     \n"  /* G,A GGGGGGGG         */ \
-    "vswp.u8    d1, d2                         \n"  /* B,R,G,A -> B,G,R,A   */
-
-void ARGB4444ToARGBRow_NEON(const uint8* src_argb4444, uint8* dst_argb,
-                            int pix) {
-  asm volatile (
-    "vmov.u8    d3, #255                       \n"  // Alpha
-    ".p2align   2                              \n"
-  "1:                                          \n"
-    "vld1.8     {q0}, [%0]!                    \n"  // load 8 ARGB4444 pixels.
-    "subs       %2, %2, #8                     \n"  // 8 processed per loop.
-    ARGB4444TOARGB
-    "vst4.8     {d0, d1, d2, d3}, [%1]!        \n"  // store 8 pixels of ARGB.
-    "bgt        1b                             \n"
-  : "+r"(src_argb4444),  // %0
-    "+r"(dst_argb),    // %1
-    "+r"(pix)          // %2
-  :
-  : "cc", "memory", "q0", "q1", "q2"  // Clobber List
-  );
-}
-
-void ARGBToRGB24Row_NEON(const uint8* src_argb, uint8* dst_rgb24, int pix) {
-  asm volatile (
-    ".p2align   2                              \n"
-  "1:                                          \n"
-    "vld4.8     {d1, d2, d3, d4}, [%0]!        \n"  // load 8 pixels of ARGB.
-    "subs       %2, %2, #8                     \n"  // 8 processed per loop.
-    "vst3.8     {d1, d2, d3}, [%1]!            \n"  // store 8 pixels of RGB24.
-    "bgt        1b                             \n"
-  : "+r"(src_argb),   // %0
-    "+r"(dst_rgb24),  // %1
-    "+r"(pix)         // %2
-  :
-  : "cc", "memory", "d1", "d2", "d3", "d4"  // Clobber List
-  );
-}
-
-void ARGBToRAWRow_NEON(const uint8* src_argb, uint8* dst_raw, int pix) {
-  asm volatile (
-    ".p2align   2                              \n"
-  "1:                                          \n"
-    "vld4.8     {d1, d2, d3, d4}, [%0]!        \n"  // load 8 pixels of ARGB.
-    "subs       %2, %2, #8                     \n"  // 8 processed per loop.
-    "vswp.u8    d1, d3                         \n"  // swap R, B
-    "vst3.8     {d1, d2, d3}, [%1]!            \n"  // store 8 pixels of RAW.
-    "bgt        1b                             \n"
-  : "+r"(src_argb),  // %0
-    "+r"(dst_raw),   // %1
-    "+r"(pix)        // %2
-  :
-  : "cc", "memory", "d1", "d2", "d3", "d4"  // Clobber List
-  );
-}
-
-void YUY2ToYRow_NEON(const uint8* src_yuy2, uint8* dst_y, int pix) {
-  asm volatile (
-    ".p2align   2                              \n"
-  "1:                                          \n"
-    "vld2.8     {q0, q1}, [%0]!                \n"  // load 16 pixels of YUY2.
-    "subs       %2, %2, #16                    \n"  // 16 processed per loop.
-    "vst1.8     {q0}, [%1]!                    \n"  // store 16 pixels of Y.
-    "bgt        1b                             \n"
-  : "+r"(src_yuy2),  // %0
-    "+r"(dst_y),     // %1
-    "+r"(pix)        // %2
-  :
-  : "cc", "memory", "q0", "q1"  // Clobber List
-  );
-}
-
-void UYVYToYRow_NEON(const uint8* src_uyvy, uint8* dst_y, int pix) {
-  asm volatile (
-    ".p2align   2                              \n"
-  "1:                                          \n"
-    "vld2.8     {q0, q1}, [%0]!                \n"  // load 16 pixels of UYVY.
-    "subs       %2, %2, #16                    \n"  // 16 processed per loop.
-    "vst1.8     {q1}, [%1]!                    \n"  // store 16 pixels of Y.
-    "bgt        1b                             \n"
-  : "+r"(src_uyvy),  // %0
-    "+r"(dst_y),     // %1
-    "+r"(pix)        // %2
-  :
-  : "cc", "memory", "q0", "q1"  // Clobber List
-  );
-}
-
-void YUY2ToUV422Row_NEON(const uint8* src_yuy2, uint8* dst_u, uint8* dst_v,
-                         int pix) {
-  asm volatile (
-    ".p2align   2                              \n"
-  "1:                                          \n"
-    "vld4.8     {d0, d1, d2, d3}, [%0]!        \n"  // load 16 pixels of YUY2.
-    "subs       %3, %3, #16                    \n"  // 16 pixels = 8 UVs.
-    "vst1.8     {d1}, [%1]!                    \n"  // store 8 U.
-    "vst1.8     {d3}, [%2]!                    \n"  // store 8 V.
-    "bgt        1b                             \n"
-  : "+r"(src_yuy2),  // %0
-    "+r"(dst_u),     // %1
-    "+r"(dst_v),     // %2
-    "+r"(pix)        // %3
-  :
-  : "cc", "memory", "d0", "d1", "d2", "d3"  // Clobber List
-  );
-}
-
-void UYVYToUV422Row_NEON(const uint8* src_uyvy, uint8* dst_u, uint8* dst_v,
-                         int pix) {
-  asm volatile (
-    ".p2align   2                              \n"
-  "1:                                          \n"
-    "vld4.8     {d0, d1, d2, d3}, [%0]!        \n"  // load 16 pixels of UYVY.
-    "subs       %3, %3, #16                    \n"  // 16 pixels = 8 UVs.
-    "vst1.8     {d0}, [%1]!                    \n"  // store 8 U.
-    "vst1.8     {d2}, [%2]!                    \n"  // store 8 V.
-    "bgt        1b                             \n"
-  : "+r"(src_uyvy),  // %0
-    "+r"(dst_u),     // %1
-    "+r"(dst_v),     // %2
-    "+r"(pix)        // %3
-  :
-  : "cc", "memory", "d0", "d1", "d2", "d3"  // Clobber List
-  );
-}
-
-void YUY2ToUVRow_NEON(const uint8* src_yuy2, int stride_yuy2,
-                      uint8* dst_u, uint8* dst_v, int pix) {
-  asm volatile (
-    "add        %1, %0, %1                     \n"  // stride + src_yuy2
-    ".p2align   2                              \n"
-  "1:                                          \n"
-    "vld4.8     {d0, d1, d2, d3}, [%0]!        \n"  // load 16 pixels of YUY2.
-    "subs       %4, %4, #16                    \n"  // 16 pixels = 8 UVs.
-    "vld4.8     {d4, d5, d6, d7}, [%1]!        \n"  // load next row YUY2.
-    "vrhadd.u8  d1, d1, d5                     \n"  // average rows of U
-    "vrhadd.u8  d3, d3, d7                     \n"  // average rows of V
-    "vst1.8     {d1}, [%2]!                    \n"  // store 8 U.
-    "vst1.8     {d3}, [%3]!                    \n"  // store 8 V.
-    "bgt        1b                             \n"
-  : "+r"(src_yuy2),     // %0
-    "+r"(stride_yuy2),  // %1
-    "+r"(dst_u),        // %2
-    "+r"(dst_v),        // %3
-    "+r"(pix)           // %4
-  :
-  : "cc", "memory", "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7"  // Clobber List
-  );
-}
-
-void UYVYToUVRow_NEON(const uint8* src_uyvy, int stride_uyvy,
-                      uint8* dst_u, uint8* dst_v, int pix) {
-  asm volatile (
-    "add        %1, %0, %1                     \n"  // stride + src_uyvy
-    ".p2align   2                              \n"
-  "1:                                          \n"
-    "vld4.8     {d0, d1, d2, d3}, [%0]!        \n"  // load 16 pixels of UYVY.
-    "subs       %4, %4, #16                    \n"  // 16 pixels = 8 UVs.
-    "vld4.8     {d4, d5, d6, d7}, [%1]!        \n"  // load next row UYVY.
-    "vrhadd.u8  d0, d0, d4                     \n"  // average rows of U
-    "vrhadd.u8  d2, d2, d6                     \n"  // average rows of V
-    "vst1.8     {d0}, [%2]!                    \n"  // store 8 U.
-    "vst1.8     {d2}, [%3]!                    \n"  // store 8 V.
-    "bgt        1b                             \n"
-  : "+r"(src_uyvy),     // %0
-    "+r"(stride_uyvy),  // %1
-    "+r"(dst_u),        // %2
-    "+r"(dst_v),        // %3
-    "+r"(pix)           // %4
-  :
-  : "cc", "memory", "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7"  // Clobber List
-  );
-}
-
-void HalfRow_NEON(const uint8* src_uv, int src_uv_stride,
-                  uint8* dst_uv, int pix) {
-  asm volatile (
-    // change the stride to row 2 pointer
-    "add        %1, %0                         \n"
-  "1:                                          \n"
-    "vld1.8     {q0}, [%0]!                    \n"  // load row 1 16 pixels.
-    "subs       %3, %3, #16                    \n"  // 16 processed per loop
-    "vld1.8     {q1}, [%1]!                    \n"  // load row 2 16 pixels.
-    "vrhadd.u8  q0, q1                         \n"  // average row 1 and 2
-    "vst1.8     {q0}, [%2]!                    \n"
-    "bgt        1b                             \n"
-  : "+r"(src_uv),         // %0
-    "+r"(src_uv_stride),  // %1
-    "+r"(dst_uv),         // %2
-    "+r"(pix)             // %3
-  :
-  : "cc", "memory", "q0", "q1"  // Clobber List
-  );
-}
-
-// Select 2 channels from ARGB on alternating pixels.  e.g.  BGBGBGBG
-void ARGBToBayerRow_NEON(const uint8* src_argb, uint8* dst_bayer,
-                         uint32 selector, int pix) {
-  asm volatile (
-    "vmov.u32   d6[0], %3                      \n"  // selector
-  "1:                                          \n"
-    "vld1.8     {q0, q1}, [%0]!                \n"  // load row 8 pixels.
-    "subs       %2, %2, #8                     \n"  // 8 processed per loop
-    "vtbl.8     d4, {d0, d1}, d6               \n"  // look up 4 pixels
-    "vtbl.8     d5, {d2, d3}, d6               \n"  // look up 4 pixels
-    "vtrn.u32   d4, d5                         \n"  // combine 8 pixels
-    "vst1.8     {d4}, [%1]!                    \n"  // store 8.
-    "bgt        1b                             \n"
-  : "+r"(src_argb),   // %0
-    "+r"(dst_bayer),  // %1
-    "+r"(pix)         // %2
-  : "r"(selector)     // %3
-  : "cc", "memory", "q0", "q1", "q2", "q3"  // Clobber List
-  );
-}
-
-// Select G channels from ARGB.  e.g.  GGGGGGGG
-void ARGBToBayerGGRow_NEON(const uint8* src_argb, uint8* dst_bayer,
-                           uint32 /*selector*/, int pix) {
-  asm volatile (
-  "1:                                          \n"
-    "vld4.8     {d0, d1, d2, d3}, [%0]!        \n"  // load row 8 pixels.
-    "subs       %2, %2, #8                     \n"  // 8 processed per loop
-    "vst1.8     {d1}, [%1]!                    \n"  // store 8 G's.
-    "bgt        1b                             \n"
-  : "+r"(src_argb),   // %0
-    "+r"(dst_bayer),  // %1
-    "+r"(pix)         // %2
-  :
-  : "cc", "memory", "q0", "q1"  // Clobber List
-  );
-}
-
-// For BGRAToARGB, ABGRToARGB, RGBAToARGB, and ARGBToRGBA.
-void ARGBShuffleRow_NEON(const uint8* src_argb, uint8* dst_argb,
-                         const uint8* shuffler, int pix) {
-  asm volatile (
-    "vld1.8     {q2}, [%3]                     \n"  // shuffler
-  "1:                                          \n"
-    "vld1.8     {q0}, [%0]!                    \n"  // load 4 pixels.
-    "subs       %2, %2, #4                     \n"  // 4 processed per loop
-    "vtbl.8     d2, {d0, d1}, d4               \n"  // look up 2 first pixels
-    "vtbl.8     d3, {d0, d1}, d5               \n"  // look up 2 next pixels
-    "vst1.8     {q1}, [%1]!                    \n"  // store 4.
-    "bgt        1b                             \n"
-  : "+r"(src_argb),  // %0
-    "+r"(dst_argb),  // %1
-    "+r"(pix)        // %2
-  : "r"(shuffler)    // %3
-  : "cc", "memory", "q0", "q1", "q2"  // Clobber List
-  );
-}
-
-void I422ToYUY2Row_NEON(const uint8* src_y,
-                        const uint8* src_u,
-                        const uint8* src_v,
-                        uint8* dst_yuy2, int width) {
-  asm volatile (
-    ".p2align   2                              \n"
-  "1:                                          \n"
-    "vld2.8     {d0, d2}, [%0]!                \n"  // load 16 Ys
-    "vld1.8     {d1}, [%1]!                    \n"  // load 8 Us
-    "vld1.8     {d3}, [%2]!                    \n"  // load 8 Vs
-    "subs       %4, %4, #16                    \n"  // 16 pixels
-    "vst4.8     {d0, d1, d2, d3}, [%3]!        \n"  // Store 8 YUY2/16 pixels.
-    "bgt        1b                             \n"
-  : "+r"(src_y),     // %0
-    "+r"(src_u),     // %1
-    "+r"(src_v),     // %2
-    "+r"(dst_yuy2),  // %3
-    "+r"(width)      // %4
-  :
-  : "cc", "memory", "d0", "d1", "d2", "d3"
-  );
-}
-
-void I422ToUYVYRow_NEON(const uint8* src_y,
-                        const uint8* src_u,
-                        const uint8* src_v,
-                        uint8* dst_uyvy, int width) {
-  asm volatile (
-    ".p2align   2                              \n"
-  "1:                                          \n"
-    "vld2.8     {d1, d3}, [%0]!                \n"  // load 16 Ys
-    "vld1.8     {d0}, [%1]!                    \n"  // load 8 Us
-    "vld1.8     {d2}, [%2]!                    \n"  // load 8 Vs
-    "subs       %4, %4, #16                    \n"  // 16 pixels
-    "vst4.8     {d0, d1, d2, d3}, [%3]!        \n"  // Store 8 UYVY/16 pixels.
-    "bgt        1b                             \n"
-  : "+r"(src_y),     // %0
-    "+r"(src_u),     // %1
-    "+r"(src_v),     // %2
-    "+r"(dst_uyvy),  // %3
-    "+r"(width)      // %4
-  :
-  : "cc", "memory", "d0", "d1", "d2", "d3"
-  );
-}
-
-void ARGBToRGB565Row_NEON(const uint8* src_argb, uint8* dst_rgb565, int pix) {
-  asm volatile (
-    ".p2align   2                              \n"
-  "1:                                          \n"
-    "vld4.8     {d20, d21, d22, d23}, [%0]!    \n"  // load 8 pixels of ARGB.
-    "subs       %2, %2, #8                     \n"  // 8 processed per loop.
-    ARGBTORGB565
-    "vst1.8     {q0}, [%1]!                    \n"  // store 8 pixels RGB565.
-    "bgt        1b                             \n"
-  : "+r"(src_argb),  // %0
-    "+r"(dst_rgb565),  // %1
-    "+r"(pix)        // %2
-  :
-  : "cc", "memory", "q0", "q8", "q9", "q10", "q11"
-  );
-}
-
-void ARGBToARGB1555Row_NEON(const uint8* src_argb, uint8* dst_argb1555,
-                            int pix) {
-  asm volatile (
-    ".p2align   2                              \n"
-  "1:                                          \n"
-    "vld4.8     {d20, d21, d22, d23}, [%0]!    \n"  // load 8 pixels of ARGB.
-    "subs       %2, %2, #8                     \n"  // 8 processed per loop.
-    ARGBTOARGB1555
-    "vst1.8     {q0}, [%1]!                    \n"  // store 8 pixels ARGB1555.
-    "bgt        1b                             \n"
-  : "+r"(src_argb),  // %0
-    "+r"(dst_argb1555),  // %1
-    "+r"(pix)        // %2
-  :
-  : "cc", "memory", "q0", "q8", "q9", "q10", "q11"
-  );
-}
-
-void ARGBToARGB4444Row_NEON(const uint8* src_argb, uint8* dst_argb4444,
-                            int pix) {
-  asm volatile (
-    "vmov.u8    d4, #0x0f                      \n"  // bits to clear with vbic.
-    ".p2align   2                              \n"
-  "1:                                          \n"
-    "vld4.8     {d20, d21, d22, d23}, [%0]!    \n"  // load 8 pixels of ARGB.
-    "subs       %2, %2, #8                     \n"  // 8 processed per loop.
-    ARGBTOARGB4444
-    "vst1.8     {q0}, [%1]!                    \n"  // store 8 pixels ARGB4444.
-    "bgt        1b                             \n"
-  : "+r"(src_argb),      // %0
-    "+r"(dst_argb4444),  // %1
-    "+r"(pix)            // %2
-  :
-  : "cc", "memory", "q0", "q8", "q9", "q10", "q11"
-  );
-}
-
-void ARGBToYRow_NEON(const uint8* src_argb, uint8* dst_y, int pix) {
-  asm volatile (
-    "vmov.u8    d24, #13                       \n"  // B * 0.1016 coefficient
-    "vmov.u8    d25, #65                       \n"  // G * 0.5078 coefficient
-    "vmov.u8    d26, #33                       \n"  // R * 0.2578 coefficient
-    "vmov.u8    d27, #16                       \n"  // Add 16 constant
-    ".p2align   2                              \n"
-  "1:                                          \n"
-    "vld4.8     {d0, d1, d2, d3}, [%0]!        \n"  // load 8 ARGB pixels.
-    "subs       %2, %2, #8                     \n"  // 8 processed per loop.
-    "vmull.u8   q2, d0, d24                    \n"  // B
-    "vmlal.u8   q2, d1, d25                    \n"  // G
-    "vmlal.u8   q2, d2, d26                    \n"  // R
-    "vqrshrun.s16 d0, q2, #7                   \n"  // 16 bit to 8 bit Y
-    "vqadd.u8   d0, d27                        \n"
-    "vst1.8     {d0}, [%1]!                    \n"  // store 8 pixels Y.
-    "bgt        1b                             \n"
-  : "+r"(src_argb),  // %0
-    "+r"(dst_y),     // %1
-    "+r"(pix)        // %2
-  :
-  : "cc", "memory", "q0", "q1", "q2", "q12", "q13"
-  );
-}
-
-void ARGBToYJRow_NEON(const uint8* src_argb, uint8* dst_y, int pix) {
-  asm volatile (
-    "vmov.u8    d24, #15                       \n"  // B * 0.11400 coefficient
-    "vmov.u8    d25, #75                       \n"  // G * 0.58700 coefficient
-    "vmov.u8    d26, #38                       \n"  // R * 0.29900 coefficient
-    ".p2align   2                              \n"
-  "1:                                          \n"
-    "vld4.8     {d0, d1, d2, d3}, [%0]!        \n"  // load 8 ARGB pixels.
-    "subs       %2, %2, #8                     \n"  // 8 processed per loop.
-    "vmull.u8   q2, d0, d24                    \n"  // B
-    "vmlal.u8   q2, d1, d25                    \n"  // G
-    "vmlal.u8   q2, d2, d26                    \n"  // R
-    "vqrshrun.s16 d0, q2, #7                   \n"  // 15 bit to 8 bit Y
-    "vst1.8     {d0}, [%1]!                    \n"  // store 8 pixels Y.
-    "bgt        1b                             \n"
-  : "+r"(src_argb),  // %0
-    "+r"(dst_y),     // %1
-    "+r"(pix)        // %2
-  :
-  : "cc", "memory", "q0", "q1", "q2", "q12", "q13"
-  );
-}
-
-// 8x1 pixels.
-void ARGBToUV444Row_NEON(const uint8* src_argb, uint8* dst_u, uint8* dst_v,
-                         int pix) {
-  asm volatile (
-    "vmov.u8    d24, #112                      \n"  // UB / VR 0.875 coefficient
-    "vmov.u8    d25, #74                       \n"  // UG -0.5781 coefficient
-    "vmov.u8    d26, #38                       \n"  // UR -0.2969 coefficient
-    "vmov.u8    d27, #18                       \n"  // VB -0.1406 coefficient
-    "vmov.u8    d28, #94                       \n"  // VG -0.7344 coefficient
-    "vmov.u16   q15, #0x8080                   \n"  // 128.5
-    ".p2align   2                              \n"
-  "1:                                          \n"
-    "vld4.8     {d0, d1, d2, d3}, [%0]!        \n"  // load 8 ARGB pixels.
-    "subs       %3, %3, #8                     \n"  // 8 processed per loop.
-    "vmull.u8   q2, d0, d24                    \n"  // B
-    "vmlsl.u8   q2, d1, d25                    \n"  // G
-    "vmlsl.u8   q2, d2, d26                    \n"  // R
-    "vadd.u16   q2, q2, q15                    \n"  // +128 -> unsigned
-
-    "vmull.u8   q3, d2, d24                    \n"  // R
-    "vmlsl.u8   q3, d1, d28                    \n"  // G
-    "vmlsl.u8   q3, d0, d27                    \n"  // B
-    "vadd.u16   q3, q3, q15                    \n"  // +128 -> unsigned
-
-    "vqshrn.u16  d0, q2, #8                    \n"  // 16 bit to 8 bit U
-    "vqshrn.u16  d1, q3, #8                    \n"  // 16 bit to 8 bit V
-
-    "vst1.8     {d0}, [%1]!                    \n"  // store 8 pixels U.
-    "vst1.8     {d1}, [%2]!                    \n"  // store 8 pixels V.
-    "bgt        1b                             \n"
-  : "+r"(src_argb),  // %0
-    "+r"(dst_u),     // %1
-    "+r"(dst_v),     // %2
-    "+r"(pix)        // %3
-  :
-  : "cc", "memory", "q0", "q1", "q2", "q3", "q4", "q12", "q13", "q14", "q15"
-  );
-}
-
-// 16x1 pixels -> 8x1.  pix is number of argb pixels. e.g. 16.
-void ARGBToUV422Row_NEON(const uint8* src_argb, uint8* dst_u, uint8* dst_v,
-                         int pix) {
-  asm volatile (
-    "vmov.s16   q10, #112 / 2                  \n"  // UB / VR 0.875 coefficient
-    "vmov.s16   q11, #74 / 2                   \n"  // UG -0.5781 coefficient
-    "vmov.s16   q12, #38 / 2                   \n"  // UR -0.2969 coefficient
-    "vmov.s16   q13, #18 / 2                   \n"  // VB -0.1406 coefficient
-    "vmov.s16   q14, #94 / 2                   \n"  // VG -0.7344 coefficient
-    "vmov.u16   q15, #0x8080                   \n"  // 128.5
-    ".p2align   2                              \n"
-  "1:                                          \n"
-    "vld4.8     {d0, d2, d4, d6}, [%0]!        \n"  // load 8 ARGB pixels.
-    "vld4.8     {d1, d3, d5, d7}, [%0]!        \n"  // load next 8 ARGB pixels.
-
-    "vpaddl.u8  q0, q0                         \n"  // B 16 bytes -> 8 shorts.
-    "vpaddl.u8  q1, q1                         \n"  // G 16 bytes -> 8 shorts.
-    "vpaddl.u8  q2, q2                         \n"  // R 16 bytes -> 8 shorts.
-
-    "subs       %3, %3, #16                    \n"  // 16 processed per loop.
-    "vmul.s16   q8, q0, q10                    \n"  // B
-    "vmls.s16   q8, q1, q11                    \n"  // G
-    "vmls.s16   q8, q2, q12                    \n"  // R
-    "vadd.u16   q8, q8, q15                    \n"  // +128 -> unsigned
-
-    "vmul.s16   q9, q2, q10                    \n"  // R
-    "vmls.s16   q9, q1, q14                    \n"  // G
-    "vmls.s16   q9, q0, q13                    \n"  // B
-    "vadd.u16   q9, q9, q15                    \n"  // +128 -> unsigned
-
-    "vqshrn.u16  d0, q8, #8                    \n"  // 16 bit to 8 bit U
-    "vqshrn.u16  d1, q9, #8                    \n"  // 16 bit to 8 bit V
-
-    "vst1.8     {d0}, [%1]!                    \n"  // store 8 pixels U.
-    "vst1.8     {d1}, [%2]!                    \n"  // store 8 pixels V.
-    "bgt        1b                             \n"
-  : "+r"(src_argb),  // %0
-    "+r"(dst_u),     // %1
-    "+r"(dst_v),     // %2
-    "+r"(pix)        // %3
-  :
-  : "cc", "memory", "q0", "q1", "q2", "q3",
-    "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
-  );
-}
-
-// 32x1 pixels -> 8x1.  pix is number of argb pixels. e.g. 32.
-void ARGBToUV411Row_NEON(const uint8* src_argb, uint8* dst_u, uint8* dst_v,
-                         int pix) {
-  asm volatile (
-    "vmov.s16   q10, #112 / 2                  \n"  // UB / VR 0.875 coefficient
-    "vmov.s16   q11, #74 / 2                   \n"  // UG -0.5781 coefficient
-    "vmov.s16   q12, #38 / 2                   \n"  // UR -0.2969 coefficient
-    "vmov.s16   q13, #18 / 2                   \n"  // VB -0.1406 coefficient
-    "vmov.s16   q14, #94 / 2                   \n"  // VG -0.7344 coefficient
-    "vmov.u16   q15, #0x8080                   \n"  // 128.5
-    ".p2align   2                              \n"
-  "1:                                          \n"
-    "vld4.8     {d0, d2, d4, d6}, [%0]!        \n"  // load 8 ARGB pixels.
-    "vld4.8     {d1, d3, d5, d7}, [%0]!        \n"  // load next 8 ARGB pixels.
-    "vpaddl.u8  q0, q0                         \n"  // B 16 bytes -> 8 shorts.
-    "vpaddl.u8  q1, q1                         \n"  // G 16 bytes -> 8 shorts.
-    "vpaddl.u8  q2, q2                         \n"  // R 16 bytes -> 8 shorts.
-    "vld4.8     {d8, d10, d12, d14}, [%0]!     \n"  // load 8 more ARGB pixels.
-    "vld4.8     {d9, d11, d13, d15}, [%0]!     \n"  // load last 8 ARGB pixels.
-    "vpaddl.u8  q4, q4                         \n"  // B 16 bytes -> 8 shorts.
-    "vpaddl.u8  q5, q5                         \n"  // G 16 bytes -> 8 shorts.
-    "vpaddl.u8  q6, q6                         \n"  // R 16 bytes -> 8 shorts.
-
-    "vpadd.u16  d0, d0, d1                     \n"  // B 16 shorts -> 8 shorts.
-    "vpadd.u16  d1, d8, d9                     \n"  // B
-    "vpadd.u16  d2, d2, d3                     \n"  // G 16 shorts -> 8 shorts.
-    "vpadd.u16  d3, d10, d11                   \n"  // G
-    "vpadd.u16  d4, d4, d5                     \n"  // R 16 shorts -> 8 shorts.
-    "vpadd.u16  d5, d12, d13                   \n"  // R
-
-    "vrshr.u16  q0, q0, #1                     \n"  // 2x average
-    "vrshr.u16  q1, q1, #1                     \n"
-    "vrshr.u16  q2, q2, #1                     \n"
-
-    "subs       %3, %3, #32                    \n"  // 32 processed per loop.
-    "vmul.s16   q8, q0, q10                    \n"  // B
-    "vmls.s16   q8, q1, q11                    \n"  // G
-    "vmls.s16   q8, q2, q12                    \n"  // R
-    "vadd.u16   q8, q8, q15                    \n"  // +128 -> unsigned
-    "vmul.s16   q9, q2, q10                    \n"  // R
-    "vmls.s16   q9, q1, q14                    \n"  // G
-    "vmls.s16   q9, q0, q13                    \n"  // B
-    "vadd.u16   q9, q9, q15                    \n"  // +128 -> unsigned
-    "vqshrn.u16  d0, q8, #8                    \n"  // 16 bit to 8 bit U
-    "vqshrn.u16  d1, q9, #8                    \n"  // 16 bit to 8 bit V
-    "vst1.8     {d0}, [%1]!                    \n"  // store 8 pixels U.
-    "vst1.8     {d1}, [%2]!                    \n"  // store 8 pixels V.
-    "bgt        1b                             \n"
-  : "+r"(src_argb),  // %0
-    "+r"(dst_u),     // %1
-    "+r"(dst_v),     // %2
-    "+r"(pix)        // %3
-  :
-  : "cc", "memory", "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7",
-    "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
-  );
-}
-
-// 16x2 pixels -> 8x1.  pix is number of argb pixels. e.g. 16.
-#define RGBTOUV(QB, QG, QR) \
-    "vmul.s16   q8, " #QB ", q10               \n"  /* B                    */ \
-    "vmls.s16   q8, " #QG ", q11               \n"  /* G                    */ \
-    "vmls.s16   q8, " #QR ", q12               \n"  /* R                    */ \
-    "vadd.u16   q8, q8, q15                    \n"  /* +128 -> unsigned     */ \
-    "vmul.s16   q9, " #QR ", q10               \n"  /* R                    */ \
-    "vmls.s16   q9, " #QG ", q14               \n"  /* G                    */ \
-    "vmls.s16   q9, " #QB ", q13               \n"  /* B                    */ \
-    "vadd.u16   q9, q9, q15                    \n"  /* +128 -> unsigned     */ \
-    "vqshrn.u16  d0, q8, #8                    \n"  /* 16 bit to 8 bit U    */ \
-    "vqshrn.u16  d1, q9, #8                    \n"  /* 16 bit to 8 bit V    */
-
-// TODO(fbarchard): Consider vhadd vertical, then vpaddl horizontal, avoid shr.
-void ARGBToUVRow_NEON(const uint8* src_argb, int src_stride_argb,
-                      uint8* dst_u, uint8* dst_v, int pix) {
-  asm volatile (
-    "add        %1, %0, %1                     \n"  // src_stride + src_argb
-    "vmov.s16   q10, #112 / 2                  \n"  // UB / VR 0.875 coefficient
-    "vmov.s16   q11, #74 / 2                   \n"  // UG -0.5781 coefficient
-    "vmov.s16   q12, #38 / 2                   \n"  // UR -0.2969 coefficient
-    "vmov.s16   q13, #18 / 2                   \n"  // VB -0.1406 coefficient
-    "vmov.s16   q14, #94 / 2                   \n"  // VG -0.7344 coefficient
-    "vmov.u16   q15, #0x8080                   \n"  // 128.5
-    ".p2align   2                              \n"
-  "1:                                          \n"
-    "vld4.8     {d0, d2, d4, d6}, [%0]!        \n"  // load 8 ARGB pixels.
-    "vld4.8     {d1, d3, d5, d7}, [%0]!        \n"  // load next 8 ARGB pixels.
-    "vpaddl.u8  q0, q0                         \n"  // B 16 bytes -> 8 shorts.
-    "vpaddl.u8  q1, q1                         \n"  // G 16 bytes -> 8 shorts.
-    "vpaddl.u8  q2, q2                         \n"  // R 16 bytes -> 8 shorts.
-    "vld4.8     {d8, d10, d12, d14}, [%1]!     \n"  // load 8 more ARGB pixels.
-    "vld4.8     {d9, d11, d13, d15}, [%1]!     \n"  // load last 8 ARGB pixels.
-    "vpadal.u8  q0, q4                         \n"  // B 16 bytes -> 8 shorts.
-    "vpadal.u8  q1, q5                         \n"  // G 16 bytes -> 8 shorts.
-    "vpadal.u8  q2, q6                         \n"  // R 16 bytes -> 8 shorts.
-
-    "vrshr.u16  q0, q0, #1                     \n"  // 2x average
-    "vrshr.u16  q1, q1, #1                     \n"
-    "vrshr.u16  q2, q2, #1                     \n"
-
-    "subs       %4, %4, #16                    \n"  // 32 processed per loop.
-    RGBTOUV(q0, q1, q2)
-    "vst1.8     {d0}, [%2]!                    \n"  // store 8 pixels U.
-    "vst1.8     {d1}, [%3]!                    \n"  // store 8 pixels V.
-    "bgt        1b                             \n"
-  : "+r"(src_argb),  // %0
-    "+r"(src_stride_argb),  // %1
-    "+r"(dst_u),     // %2
-    "+r"(dst_v),     // %3
-    "+r"(pix)        // %4
-  :
-  : "cc", "memory", "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7",
-    "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
-  );
-}
-
-// TODO(fbarchard): Subsample match C code.
-void ARGBToUVJRow_NEON(const uint8* src_argb, int src_stride_argb,
-                       uint8* dst_u, uint8* dst_v, int pix) {
-  asm volatile (
-    "add        %1, %0, %1                     \n"  // src_stride + src_argb
-    "vmov.s16   q10, #127 / 2                  \n"  // UB / VR 0.500 coefficient
-    "vmov.s16   q11, #84 / 2                   \n"  // UG -0.33126 coefficient
-    "vmov.s16   q12, #43 / 2                   \n"  // UR -0.16874 coefficient
-    "vmov.s16   q13, #20 / 2                   \n"  // VB -0.08131 coefficient
-    "vmov.s16   q14, #107 / 2                  \n"  // VG -0.41869 coefficient
-    "vmov.u16   q15, #0x8080                   \n"  // 128.5
-    ".p2align   2                              \n"
-  "1:                                          \n"
-    "vld4.8     {d0, d2, d4, d6}, [%0]!        \n"  // load 8 ARGB pixels.
-    "vld4.8     {d1, d3, d5, d7}, [%0]!        \n"  // load next 8 ARGB pixels.
-    "vpaddl.u8  q0, q0                         \n"  // B 16 bytes -> 8 shorts.
-    "vpaddl.u8  q1, q1                         \n"  // G 16 bytes -> 8 shorts.
-    "vpaddl.u8  q2, q2                         \n"  // R 16 bytes -> 8 shorts.
-    "vld4.8     {d8, d10, d12, d14}, [%1]!     \n"  // load 8 more ARGB pixels.
-    "vld4.8     {d9, d11, d13, d15}, [%1]!     \n"  // load last 8 ARGB pixels.
-    "vpadal.u8  q0, q4                         \n"  // B 16 bytes -> 8 shorts.
-    "vpadal.u8  q1, q5                         \n"  // G 16 bytes -> 8 shorts.
-    "vpadal.u8  q2, q6                         \n"  // R 16 bytes -> 8 shorts.
-
-    "vrshr.u16  q0, q0, #1                     \n"  // 2x average
-    "vrshr.u16  q1, q1, #1                     \n"
-    "vrshr.u16  q2, q2, #1                     \n"
-
-    "subs       %4, %4, #16                    \n"  // 32 processed per loop.
-    RGBTOUV(q0, q1, q2)
-    "vst1.8     {d0}, [%2]!                    \n"  // store 8 pixels U.
-    "vst1.8     {d1}, [%3]!                    \n"  // store 8 pixels V.
-    "bgt        1b                             \n"
-  : "+r"(src_argb),  // %0
-    "+r"(src_stride_argb),  // %1
-    "+r"(dst_u),     // %2
-    "+r"(dst_v),     // %3
-    "+r"(pix)        // %4
-  :
-  : "cc", "memory", "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7",
-    "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
-  );
-}
-
-void BGRAToUVRow_NEON(const uint8* src_bgra, int src_stride_bgra,
-                      uint8* dst_u, uint8* dst_v, int pix) {
-  asm volatile (
-    "add        %1, %0, %1                     \n"  // src_stride + src_bgra
-    "vmov.s16   q10, #112 / 2                  \n"  // UB / VR 0.875 coefficient
-    "vmov.s16   q11, #74 / 2                   \n"  // UG -0.5781 coefficient
-    "vmov.s16   q12, #38 / 2                   \n"  // UR -0.2969 coefficient
-    "vmov.s16   q13, #18 / 2                   \n"  // VB -0.1406 coefficient
-    "vmov.s16   q14, #94 / 2                   \n"  // VG -0.7344 coefficient
-    "vmov.u16   q15, #0x8080                   \n"  // 128.5
-    ".p2align   2                              \n"
-  "1:                                          \n"
-    "vld4.8     {d0, d2, d4, d6}, [%0]!        \n"  // load 8 BGRA pixels.
-    "vld4.8     {d1, d3, d5, d7}, [%0]!        \n"  // load next 8 BGRA pixels.
-    "vpaddl.u8  q3, q3                         \n"  // B 16 bytes -> 8 shorts.
-    "vpaddl.u8  q2, q2                         \n"  // G 16 bytes -> 8 shorts.
-    "vpaddl.u8  q1, q1                         \n"  // R 16 bytes -> 8 shorts.
-    "vld4.8     {d8, d10, d12, d14}, [%1]!     \n"  // load 8 more BGRA pixels.
-    "vld4.8     {d9, d11, d13, d15}, [%1]!     \n"  // load last 8 BGRA pixels.
-    "vpadal.u8  q3, q7                         \n"  // B 16 bytes -> 8 shorts.
-    "vpadal.u8  q2, q6                         \n"  // G 16 bytes -> 8 shorts.
-    "vpadal.u8  q1, q5                         \n"  // R 16 bytes -> 8 shorts.
-
-    "vrshr.u16  q1, q1, #1                     \n"  // 2x average
-    "vrshr.u16  q2, q2, #1                     \n"
-    "vrshr.u16  q3, q3, #1                     \n"
-
-    "subs       %4, %4, #16                    \n"  // 32 processed per loop.
-    RGBTOUV(q3, q2, q1)
-    "vst1.8     {d0}, [%2]!                    \n"  // store 8 pixels U.
-    "vst1.8     {d1}, [%3]!                    \n"  // store 8 pixels V.
-    "bgt        1b                             \n"
-  : "+r"(src_bgra),  // %0
-    "+r"(src_stride_bgra),  // %1
-    "+r"(dst_u),     // %2
-    "+r"(dst_v),     // %3
-    "+r"(pix)        // %4
-  :
-  : "cc", "memory", "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7",
-    "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
-  );
-}
-
-void ABGRToUVRow_NEON(const uint8* src_abgr, int src_stride_abgr,
-                      uint8* dst_u, uint8* dst_v, int pix) {
-  asm volatile (
-    "add        %1, %0, %1                     \n"  // src_stride + src_abgr
-    "vmov.s16   q10, #112 / 2                  \n"  // UB / VR 0.875 coefficient
-    "vmov.s16   q11, #74 / 2                   \n"  // UG -0.5781 coefficient
-    "vmov.s16   q12, #38 / 2                   \n"  // UR -0.2969 coefficient
-    "vmov.s16   q13, #18 / 2                   \n"  // VB -0.1406 coefficient
-    "vmov.s16   q14, #94 / 2                   \n"  // VG -0.7344 coefficient
-    "vmov.u16   q15, #0x8080                   \n"  // 128.5
-    ".p2align   2                              \n"
-  "1:                                          \n"
-    "vld4.8     {d0, d2, d4, d6}, [%0]!        \n"  // load 8 ABGR pixels.
-    "vld4.8     {d1, d3, d5, d7}, [%0]!        \n"  // load next 8 ABGR pixels.
-    "vpaddl.u8  q2, q2                         \n"  // B 16 bytes -> 8 shorts.
-    "vpaddl.u8  q1, q1                         \n"  // G 16 bytes -> 8 shorts.
-    "vpaddl.u8  q0, q0                         \n"  // R 16 bytes -> 8 shorts.
-    "vld4.8     {d8, d10, d12, d14}, [%1]!     \n"  // load 8 more ABGR pixels.
-    "vld4.8     {d9, d11, d13, d15}, [%1]!     \n"  // load last 8 ABGR pixels.
-    "vpadal.u8  q2, q6                         \n"  // B 16 bytes -> 8 shorts.
-    "vpadal.u8  q1, q5                         \n"  // G 16 bytes -> 8 shorts.
-    "vpadal.u8  q0, q4                         \n"  // R 16 bytes -> 8 shorts.
-
-    "vrshr.u16  q0, q0, #1                     \n"  // 2x average
-    "vrshr.u16  q1, q1, #1                     \n"
-    "vrshr.u16  q2, q2, #1                     \n"
-
-    "subs       %4, %4, #16                    \n"  // 32 processed per loop.
-    RGBTOUV(q2, q1, q0)
-    "vst1.8     {d0}, [%2]!                    \n"  // store 8 pixels U.
-    "vst1.8     {d1}, [%3]!                    \n"  // store 8 pixels V.
-    "bgt        1b                             \n"
-  : "+r"(src_abgr),  // %0
-    "+r"(src_stride_abgr),  // %1
-    "+r"(dst_u),     // %2
-    "+r"(dst_v),     // %3
-    "+r"(pix)        // %4
-  :
-  : "cc", "memory", "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7",
-    "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
-  );
-}
-
-void RGBAToUVRow_NEON(const uint8* src_rgba, int src_stride_rgba,
-                      uint8* dst_u, uint8* dst_v, int pix) {
-  asm volatile (
-    "add        %1, %0, %1                     \n"  // src_stride + src_rgba
-    "vmov.s16   q10, #112 / 2                  \n"  // UB / VR 0.875 coefficient
-    "vmov.s16   q11, #74 / 2                   \n"  // UG -0.5781 coefficient
-    "vmov.s16   q12, #38 / 2                   \n"  // UR -0.2969 coefficient
-    "vmov.s16   q13, #18 / 2                   \n"  // VB -0.1406 coefficient
-    "vmov.s16   q14, #94 / 2                   \n"  // VG -0.7344 coefficient
-    "vmov.u16   q15, #0x8080                   \n"  // 128.5
-    ".p2align   2                              \n"
-  "1:                                          \n"
-    "vld4.8     {d0, d2, d4, d6}, [%0]!        \n"  // load 8 RGBA pixels.
-    "vld4.8     {d1, d3, d5, d7}, [%0]!        \n"  // load next 8 RGBA pixels.
-    "vpaddl.u8  q0, q1                         \n"  // B 16 bytes -> 8 shorts.
-    "vpaddl.u8  q1, q2                         \n"  // G 16 bytes -> 8 shorts.
-    "vpaddl.u8  q2, q3                         \n"  // R 16 bytes -> 8 shorts.
-    "vld4.8     {d8, d10, d12, d14}, [%1]!     \n"  // load 8 more RGBA pixels.
-    "vld4.8     {d9, d11, d13, d15}, [%1]!     \n"  // load last 8 RGBA pixels.
-    "vpadal.u8  q0, q5                         \n"  // B 16 bytes -> 8 shorts.
-    "vpadal.u8  q1, q6                         \n"  // G 16 bytes -> 8 shorts.
-    "vpadal.u8  q2, q7                         \n"  // R 16 bytes -> 8 shorts.
-
-    "vrshr.u16  q0, q0, #1                     \n"  // 2x average
-    "vrshr.u16  q1, q1, #1                     \n"
-    "vrshr.u16  q2, q2, #1                     \n"
-
-    "subs       %4, %4, #16                    \n"  // 32 processed per loop.
-    RGBTOUV(q0, q1, q2)
-    "vst1.8     {d0}, [%2]!                    \n"  // store 8 pixels U.
-    "vst1.8     {d1}, [%3]!                    \n"  // store 8 pixels V.
-    "bgt        1b                             \n"
-  : "+r"(src_rgba),  // %0
-    "+r"(src_stride_rgba),  // %1
-    "+r"(dst_u),     // %2
-    "+r"(dst_v),     // %3
-    "+r"(pix)        // %4
-  :
-  : "cc", "memory", "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7",
-    "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
-  );
-}
-
-void RGB24ToUVRow_NEON(const uint8* src_rgb24, int src_stride_rgb24,
-                       uint8* dst_u, uint8* dst_v, int pix) {
-  asm volatile (
-    "add        %1, %0, %1                     \n"  // src_stride + src_rgb24
-    "vmov.s16   q10, #112 / 2                  \n"  // UB / VR 0.875 coefficient
-    "vmov.s16   q11, #74 / 2                   \n"  // UG -0.5781 coefficient
-    "vmov.s16   q12, #38 / 2                   \n"  // UR -0.2969 coefficient
-    "vmov.s16   q13, #18 / 2                   \n"  // VB -0.1406 coefficient
-    "vmov.s16   q14, #94 / 2                   \n"  // VG -0.7344 coefficient
-    "vmov.u16   q15, #0x8080                   \n"  // 128.5
-    ".p2align   2                              \n"
-  "1:                                          \n"
-    "vld3.8     {d0, d2, d4}, [%0]!            \n"  // load 8 RGB24 pixels.
-    "vld3.8     {d1, d3, d5}, [%0]!            \n"  // load next 8 RGB24 pixels.
-    "vpaddl.u8  q0, q0                         \n"  // B 16 bytes -> 8 shorts.
-    "vpaddl.u8  q1, q1                         \n"  // G 16 bytes -> 8 shorts.
-    "vpaddl.u8  q2, q2                         \n"  // R 16 bytes -> 8 shorts.
-    "vld3.8     {d8, d10, d12}, [%1]!          \n"  // load 8 more RGB24 pixels.
-    "vld3.8     {d9, d11, d13}, [%1]!          \n"  // load last 8 RGB24 pixels.
-    "vpadal.u8  q0, q4                         \n"  // B 16 bytes -> 8 shorts.
-    "vpadal.u8  q1, q5                         \n"  // G 16 bytes -> 8 shorts.
-    "vpadal.u8  q2, q6                         \n"  // R 16 bytes -> 8 shorts.
-
-    "vrshr.u16  q0, q0, #1                     \n"  // 2x average
-    "vrshr.u16  q1, q1, #1                     \n"
-    "vrshr.u16  q2, q2, #1                     \n"
-
-    "subs       %4, %4, #16                    \n"  // 32 processed per loop.
-    RGBTOUV(q0, q1, q2)
-    "vst1.8     {d0}, [%2]!                    \n"  // store 8 pixels U.
-    "vst1.8     {d1}, [%3]!                    \n"  // store 8 pixels V.
-    "bgt        1b                             \n"
-  : "+r"(src_rgb24),  // %0
-    "+r"(src_stride_rgb24),  // %1
-    "+r"(dst_u),     // %2
-    "+r"(dst_v),     // %3
-    "+r"(pix)        // %4
-  :
-  : "cc", "memory", "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7",
-    "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
-  );
-}
-
-void RAWToUVRow_NEON(const uint8* src_raw, int src_stride_raw,
-                     uint8* dst_u, uint8* dst_v, int pix) {
-  asm volatile (
-    "add        %1, %0, %1                     \n"  // src_stride + src_raw
-    "vmov.s16   q10, #112 / 2                  \n"  // UB / VR 0.875 coefficient
-    "vmov.s16   q11, #74 / 2                   \n"  // UG -0.5781 coefficient
-    "vmov.s16   q12, #38 / 2                   \n"  // UR -0.2969 coefficient
-    "vmov.s16   q13, #18 / 2                   \n"  // VB -0.1406 coefficient
-    "vmov.s16   q14, #94 / 2                   \n"  // VG -0.7344 coefficient
-    "vmov.u16   q15, #0x8080                   \n"  // 128.5
-    ".p2align   2                              \n"
-  "1:                                          \n"
-    "vld3.8     {d0, d2, d4}, [%0]!            \n"  // load 8 RAW pixels.
-    "vld3.8     {d1, d3, d5}, [%0]!            \n"  // load next 8 RAW pixels.
-    "vpaddl.u8  q2, q2                         \n"  // B 16 bytes -> 8 shorts.
-    "vpaddl.u8  q1, q1                         \n"  // G 16 bytes -> 8 shorts.
-    "vpaddl.u8  q0, q0                         \n"  // R 16 bytes -> 8 shorts.
-    "vld3.8     {d8, d10, d12}, [%1]!          \n"  // load 8 more RAW pixels.
-    "vld3.8     {d9, d11, d13}, [%1]!          \n"  // load last 8 RAW pixels.
-    "vpadal.u8  q2, q6                         \n"  // B 16 bytes -> 8 shorts.
-    "vpadal.u8  q1, q5                         \n"  // G 16 bytes -> 8 shorts.
-    "vpadal.u8  q0, q4                         \n"  // R 16 bytes -> 8 shorts.
-
-    "vrshr.u16  q0, q0, #1                     \n"  // 2x average
-    "vrshr.u16  q1, q1, #1                     \n"
-    "vrshr.u16  q2, q2, #1                     \n"
-
-    "subs       %4, %4, #16                    \n"  // 32 processed per loop.
-    RGBTOUV(q2, q1, q0)
-    "vst1.8     {d0}, [%2]!                    \n"  // store 8 pixels U.
-    "vst1.8     {d1}, [%3]!                    \n"  // store 8 pixels V.
-    "bgt        1b                             \n"
-  : "+r"(src_raw),  // %0
-    "+r"(src_stride_raw),  // %1
-    "+r"(dst_u),     // %2
-    "+r"(dst_v),     // %3
-    "+r"(pix)        // %4
-  :
-  : "cc", "memory", "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7",
-    "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
-  );
-}
-
-// 16x2 pixels -> 8x1.  pix is number of argb pixels. e.g. 16.
-void RGB565ToUVRow_NEON(const uint8* src_rgb565, int src_stride_rgb565,
-                        uint8* dst_u, uint8* dst_v, int pix) {
-  asm volatile (
-    "add        %1, %0, %1                     \n"  // src_stride + src_argb
-    "vmov.s16   q10, #112 / 2                  \n"  // UB / VR 0.875 coefficient
-    "vmov.s16   q11, #74 / 2                   \n"  // UG -0.5781 coefficient
-    "vmov.s16   q12, #38 / 2                   \n"  // UR -0.2969 coefficient
-    "vmov.s16   q13, #18 / 2                   \n"  // VB -0.1406 coefficient
-    "vmov.s16   q14, #94 / 2                   \n"  // VG -0.7344 coefficient
-    "vmov.u16   q15, #0x8080                   \n"  // 128.5
-    ".p2align   2                              \n"
-  "1:                                          \n"
-    "vld1.8     {q0}, [%0]!                    \n"  // load 8 RGB565 pixels.
-    RGB565TOARGB
-    "vpaddl.u8  d8, d0                         \n"  // B 8 bytes -> 4 shorts.
-    "vpaddl.u8  d10, d1                        \n"  // G 8 bytes -> 4 shorts.
-    "vpaddl.u8  d12, d2                        \n"  // R 8 bytes -> 4 shorts.
-    "vld1.8     {q0}, [%0]!                    \n"  // next 8 RGB565 pixels.
-    RGB565TOARGB
-    "vpaddl.u8  d9, d0                         \n"  // B 8 bytes -> 4 shorts.
-    "vpaddl.u8  d11, d1                        \n"  // G 8 bytes -> 4 shorts.
-    "vpaddl.u8  d13, d2                        \n"  // R 8 bytes -> 4 shorts.
-
-    "vld1.8     {q0}, [%1]!                    \n"  // load 8 RGB565 pixels.
-    RGB565TOARGB
-    "vpadal.u8  d8, d0                         \n"  // B 8 bytes -> 4 shorts.
-    "vpadal.u8  d10, d1                        \n"  // G 8 bytes -> 4 shorts.
-    "vpadal.u8  d12, d2                        \n"  // R 8 bytes -> 4 shorts.
-    "vld1.8     {q0}, [%1]!                    \n"  // next 8 RGB565 pixels.
-    RGB565TOARGB
-    "vpadal.u8  d9, d0                         \n"  // B 8 bytes -> 4 shorts.
-    "vpadal.u8  d11, d1                        \n"  // G 8 bytes -> 4 shorts.
-    "vpadal.u8  d13, d2                        \n"  // R 8 bytes -> 4 shorts.
-
-    "vrshr.u16  q4, q4, #1                     \n"  // 2x average
-    "vrshr.u16  q5, q5, #1                     \n"
-    "vrshr.u16  q6, q6, #1                     \n"
-
-    "subs       %4, %4, #16                    \n"  // 16 processed per loop.
-    "vmul.s16   q8, q4, q10                    \n"  // B
-    "vmls.s16   q8, q5, q11                    \n"  // G
-    "vmls.s16   q8, q6, q12                    \n"  // R
-    "vadd.u16   q8, q8, q15                    \n"  // +128 -> unsigned
-    "vmul.s16   q9, q6, q10                    \n"  // R
-    "vmls.s16   q9, q5, q14                    \n"  // G
-    "vmls.s16   q9, q4, q13                    \n"  // B
-    "vadd.u16   q9, q9, q15                    \n"  // +128 -> unsigned
-    "vqshrn.u16  d0, q8, #8                    \n"  // 16 bit to 8 bit U
-    "vqshrn.u16  d1, q9, #8                    \n"  // 16 bit to 8 bit V
-    "vst1.8     {d0}, [%2]!                    \n"  // store 8 pixels U.
-    "vst1.8     {d1}, [%3]!                    \n"  // store 8 pixels V.
-    "bgt        1b                             \n"
-  : "+r"(src_rgb565),  // %0
-    "+r"(src_stride_rgb565),  // %1
-    "+r"(dst_u),     // %2
-    "+r"(dst_v),     // %3
-    "+r"(pix)        // %4
-  :
-  : "cc", "memory", "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7",
-    "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
-  );
-}
-
-// 16x2 pixels -> 8x1.  pix is number of argb pixels. e.g. 16.
-void ARGB1555ToUVRow_NEON(const uint8* src_argb1555, int src_stride_argb1555,
-                        uint8* dst_u, uint8* dst_v, int pix) {
-  asm volatile (
-    "add        %1, %0, %1                     \n"  // src_stride + src_argb
-    "vmov.s16   q10, #112 / 2                  \n"  // UB / VR 0.875 coefficient
-    "vmov.s16   q11, #74 / 2                   \n"  // UG -0.5781 coefficient
-    "vmov.s16   q12, #38 / 2                   \n"  // UR -0.2969 coefficient
-    "vmov.s16   q13, #18 / 2                   \n"  // VB -0.1406 coefficient
-    "vmov.s16   q14, #94 / 2                   \n"  // VG -0.7344 coefficient
-    "vmov.u16   q15, #0x8080                   \n"  // 128.5
-    ".p2align   2                              \n"
-  "1:                                          \n"
-    "vld1.8     {q0}, [%0]!                    \n"  // load 8 ARGB1555 pixels.
-    RGB555TOARGB
-    "vpaddl.u8  d8, d0                         \n"  // B 8 bytes -> 4 shorts.
-    "vpaddl.u8  d10, d1                        \n"  // G 8 bytes -> 4 shorts.
-    "vpaddl.u8  d12, d2                        \n"  // R 8 bytes -> 4 shorts.
-    "vld1.8     {q0}, [%0]!                    \n"  // next 8 ARGB1555 pixels.
-    RGB555TOARGB
-    "vpaddl.u8  d9, d0                         \n"  // B 8 bytes -> 4 shorts.
-    "vpaddl.u8  d11, d1                        \n"  // G 8 bytes -> 4 shorts.
-    "vpaddl.u8  d13, d2                        \n"  // R 8 bytes -> 4 shorts.
-
-    "vld1.8     {q0}, [%1]!                    \n"  // load 8 ARGB1555 pixels.
-    RGB555TOARGB
-    "vpadal.u8  d8, d0                         \n"  // B 8 bytes -> 4 shorts.
-    "vpadal.u8  d10, d1                        \n"  // G 8 bytes -> 4 shorts.
-    "vpadal.u8  d12, d2                        \n"  // R 8 bytes -> 4 shorts.
-    "vld1.8     {q0}, [%1]!                    \n"  // next 8 ARGB1555 pixels.
-    RGB555TOARGB
-    "vpadal.u8  d9, d0                         \n"  // B 8 bytes -> 4 shorts.
-    "vpadal.u8  d11, d1                        \n"  // G 8 bytes -> 4 shorts.
-    "vpadal.u8  d13, d2                        \n"  // R 8 bytes -> 4 shorts.
-
-    "vrshr.u16  q4, q4, #1                     \n"  // 2x average
-    "vrshr.u16  q5, q5, #1                     \n"
-    "vrshr.u16  q6, q6, #1                     \n"
-
-    "subs       %4, %4, #16                    \n"  // 16 processed per loop.
-    "vmul.s16   q8, q4, q10                    \n"  // B
-    "vmls.s16   q8, q5, q11                    \n"  // G
-    "vmls.s16   q8, q6, q12                    \n"  // R
-    "vadd.u16   q8, q8, q15                    \n"  // +128 -> unsigned
-    "vmul.s16   q9, q6, q10                    \n"  // R
-    "vmls.s16   q9, q5, q14                    \n"  // G
-    "vmls.s16   q9, q4, q13                    \n"  // B
-    "vadd.u16   q9, q9, q15                    \n"  // +128 -> unsigned
-    "vqshrn.u16  d0, q8, #8                    \n"  // 16 bit to 8 bit U
-    "vqshrn.u16  d1, q9, #8                    \n"  // 16 bit to 8 bit V
-    "vst1.8     {d0}, [%2]!                    \n"  // store 8 pixels U.
-    "vst1.8     {d1}, [%3]!                    \n"  // store 8 pixels V.
-    "bgt        1b                             \n"
-  : "+r"(src_argb1555),  // %0
-    "+r"(src_stride_argb1555),  // %1
-    "+r"(dst_u),     // %2
-    "+r"(dst_v),     // %3
-    "+r"(pix)        // %4
-  :
-  : "cc", "memory", "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7",
-    "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
-  );
-}
-
-// 16x2 pixels -> 8x1.  pix is number of argb pixels. e.g. 16.
-void ARGB4444ToUVRow_NEON(const uint8* src_argb4444, int src_stride_argb4444,
-                          uint8* dst_u, uint8* dst_v, int pix) {
-  asm volatile (
-    "add        %1, %0, %1                     \n"  // src_stride + src_argb
-    "vmov.s16   q10, #112 / 2                  \n"  // UB / VR 0.875 coefficient
-    "vmov.s16   q11, #74 / 2                   \n"  // UG -0.5781 coefficient
-    "vmov.s16   q12, #38 / 2                   \n"  // UR -0.2969 coefficient
-    "vmov.s16   q13, #18 / 2                   \n"  // VB -0.1406 coefficient
-    "vmov.s16   q14, #94 / 2                   \n"  // VG -0.7344 coefficient
-    "vmov.u16   q15, #0x8080                   \n"  // 128.5
-    ".p2align   2                              \n"
-  "1:                                          \n"
-    "vld1.8     {q0}, [%0]!                    \n"  // load 8 ARGB4444 pixels.
-    ARGB4444TOARGB
-    "vpaddl.u8  d8, d0                         \n"  // B 8 bytes -> 4 shorts.
-    "vpaddl.u8  d10, d1                        \n"  // G 8 bytes -> 4 shorts.
-    "vpaddl.u8  d12, d2                        \n"  // R 8 bytes -> 4 shorts.
-    "vld1.8     {q0}, [%0]!                    \n"  // next 8 ARGB4444 pixels.
-    ARGB4444TOARGB
-    "vpaddl.u8  d9, d0                         \n"  // B 8 bytes -> 4 shorts.
-    "vpaddl.u8  d11, d1                        \n"  // G 8 bytes -> 4 shorts.
-    "vpaddl.u8  d13, d2                        \n"  // R 8 bytes -> 4 shorts.
-
-    "vld1.8     {q0}, [%1]!                    \n"  // load 8 ARGB4444 pixels.
-    ARGB4444TOARGB
-    "vpadal.u8  d8, d0                         \n"  // B 8 bytes -> 4 shorts.
-    "vpadal.u8  d10, d1                        \n"  // G 8 bytes -> 4 shorts.
-    "vpadal.u8  d12, d2                        \n"  // R 8 bytes -> 4 shorts.
-    "vld1.8     {q0}, [%1]!                    \n"  // next 8 ARGB4444 pixels.
-    ARGB4444TOARGB
-    "vpadal.u8  d9, d0                         \n"  // B 8 bytes -> 4 shorts.
-    "vpadal.u8  d11, d1                        \n"  // G 8 bytes -> 4 shorts.
-    "vpadal.u8  d13, d2                        \n"  // R 8 bytes -> 4 shorts.
-
-    "vrshr.u16  q4, q4, #1                     \n"  // 2x average
-    "vrshr.u16  q5, q5, #1                     \n"
-    "vrshr.u16  q6, q6, #1                     \n"
-
-    "subs       %4, %4, #16                    \n"  // 16 processed per loop.
-    "vmul.s16   q8, q4, q10                    \n"  // B
-    "vmls.s16   q8, q5, q11                    \n"  // G
-    "vmls.s16   q8, q6, q12                    \n"  // R
-    "vadd.u16   q8, q8, q15                    \n"  // +128 -> unsigned
-    "vmul.s16   q9, q6, q10                    \n"  // R
-    "vmls.s16   q9, q5, q14                    \n"  // G
-    "vmls.s16   q9, q4, q13                    \n"  // B
-    "vadd.u16   q9, q9, q15                    \n"  // +128 -> unsigned
-    "vqshrn.u16  d0, q8, #8                    \n"  // 16 bit to 8 bit U
-    "vqshrn.u16  d1, q9, #8                    \n"  // 16 bit to 8 bit V
-    "vst1.8     {d0}, [%2]!                    \n"  // store 8 pixels U.
-    "vst1.8     {d1}, [%3]!                    \n"  // store 8 pixels V.
-    "bgt        1b                             \n"
-  : "+r"(src_argb4444),  // %0
-    "+r"(src_stride_argb4444),  // %1
-    "+r"(dst_u),     // %2
-    "+r"(dst_v),     // %3
-    "+r"(pix)        // %4
-  :
-  : "cc", "memory", "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7",
-    "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
-  );
-}
-
-void RGB565ToYRow_NEON(const uint8* src_rgb565, uint8* dst_y, int pix) {
-  asm volatile (
-    "vmov.u8    d24, #13                       \n"  // B * 0.1016 coefficient
-    "vmov.u8    d25, #65                       \n"  // G * 0.5078 coefficient
-    "vmov.u8    d26, #33                       \n"  // R * 0.2578 coefficient
-    "vmov.u8    d27, #16                       \n"  // Add 16 constant
-    ".p2align   2                              \n"
-  "1:                                          \n"
-    "vld1.8     {q0}, [%0]!                    \n"  // load 8 RGB565 pixels.
-    "subs       %2, %2, #8                     \n"  // 8 processed per loop.
-    RGB565TOARGB
-    "vmull.u8   q2, d0, d24                    \n"  // B
-    "vmlal.u8   q2, d1, d25                    \n"  // G
-    "vmlal.u8   q2, d2, d26                    \n"  // R
-    "vqrshrun.s16 d0, q2, #7                   \n"  // 16 bit to 8 bit Y
-    "vqadd.u8   d0, d27                        \n"
-    "vst1.8     {d0}, [%1]!                    \n"  // store 8 pixels Y.
-    "bgt        1b                             \n"
-  : "+r"(src_rgb565),  // %0
-    "+r"(dst_y),       // %1
-    "+r"(pix)          // %2
-  :
-  : "cc", "memory", "q0", "q1", "q2", "q3", "q12", "q13"
-  );
-}
-
-void ARGB1555ToYRow_NEON(const uint8* src_argb1555, uint8* dst_y, int pix) {
-  asm volatile (
-    "vmov.u8    d24, #13                       \n"  // B * 0.1016 coefficient
-    "vmov.u8    d25, #65                       \n"  // G * 0.5078 coefficient
-    "vmov.u8    d26, #33                       \n"  // R * 0.2578 coefficient
-    "vmov.u8    d27, #16                       \n"  // Add 16 constant
-    ".p2align   2                              \n"
-  "1:                                          \n"
-    "vld1.8     {q0}, [%0]!                    \n"  // load 8 ARGB1555 pixels.
-    "subs       %2, %2, #8                     \n"  // 8 processed per loop.
-    ARGB1555TOARGB
-    "vmull.u8   q2, d0, d24                    \n"  // B
-    "vmlal.u8   q2, d1, d25                    \n"  // G
-    "vmlal.u8   q2, d2, d26                    \n"  // R
-    "vqrshrun.s16 d0, q2, #7                   \n"  // 16 bit to 8 bit Y
-    "vqadd.u8   d0, d27                        \n"
-    "vst1.8     {d0}, [%1]!                    \n"  // store 8 pixels Y.
-    "bgt        1b                             \n"
-  : "+r"(src_argb1555),  // %0
-    "+r"(dst_y),         // %1
-    "+r"(pix)            // %2
-  :
-  : "cc", "memory", "q0", "q1", "q2", "q3", "q12", "q13"
-  );
-}
-
-void ARGB4444ToYRow_NEON(const uint8* src_argb4444, uint8* dst_y, int pix) {
-  asm volatile (
-    "vmov.u8    d24, #13                       \n"  // B * 0.1016 coefficient
-    "vmov.u8    d25, #65                       \n"  // G * 0.5078 coefficient
-    "vmov.u8    d26, #33                       \n"  // R * 0.2578 coefficient
-    "vmov.u8    d27, #16                       \n"  // Add 16 constant
-    ".p2align   2                              \n"
-  "1:                                          \n"
-    "vld1.8     {q0}, [%0]!                    \n"  // load 8 ARGB4444 pixels.
-    "subs       %2, %2, #8                     \n"  // 8 processed per loop.
-    ARGB4444TOARGB
-    "vmull.u8   q2, d0, d24                    \n"  // B
-    "vmlal.u8   q2, d1, d25                    \n"  // G
-    "vmlal.u8   q2, d2, d26                    \n"  // R
-    "vqrshrun.s16 d0, q2, #7                   \n"  // 16 bit to 8 bit Y
-    "vqadd.u8   d0, d27                        \n"
-    "vst1.8     {d0}, [%1]!                    \n"  // store 8 pixels Y.
-    "bgt        1b                             \n"
-  : "+r"(src_argb4444),  // %0
-    "+r"(dst_y),         // %1
-    "+r"(pix)            // %2
-  :
-  : "cc", "memory", "q0", "q1", "q2", "q3", "q12", "q13"
-  );
-}
-
-void BGRAToYRow_NEON(const uint8* src_bgra, uint8* dst_y, int pix) {
-  asm volatile (
-    "vmov.u8    d4, #33                        \n"  // R * 0.2578 coefficient
-    "vmov.u8    d5, #65                        \n"  // G * 0.5078 coefficient
-    "vmov.u8    d6, #13                        \n"  // B * 0.1016 coefficient
-    "vmov.u8    d7, #16                        \n"  // Add 16 constant
-    ".p2align   2                              \n"
-  "1:                                          \n"
-    "vld4.8     {d0, d1, d2, d3}, [%0]!        \n"  // load 8 pixels of BGRA.
-    "subs       %2, %2, #8                     \n"  // 8 processed per loop.
-    "vmull.u8   q8, d1, d4                     \n"  // R
-    "vmlal.u8   q8, d2, d5                     \n"  // G
-    "vmlal.u8   q8, d3, d6                     \n"  // B
-    "vqrshrun.s16 d0, q8, #7                   \n"  // 16 bit to 8 bit Y
-    "vqadd.u8   d0, d7                         \n"
-    "vst1.8     {d0}, [%1]!                    \n"  // store 8 pixels Y.
-    "bgt        1b                             \n"
-  : "+r"(src_bgra),  // %0
-    "+r"(dst_y),     // %1
-    "+r"(pix)        // %2
-  :
-  : "cc", "memory", "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7", "q8"
-  );
-}
-
-void ABGRToYRow_NEON(const uint8* src_abgr, uint8* dst_y, int pix) {
-  asm volatile (
-    "vmov.u8    d4, #33                        \n"  // R * 0.2578 coefficient
-    "vmov.u8    d5, #65                        \n"  // G * 0.5078 coefficient
-    "vmov.u8    d6, #13                        \n"  // B * 0.1016 coefficient
-    "vmov.u8    d7, #16                        \n"  // Add 16 constant
-    ".p2align   2                              \n"
-  "1:                                          \n"
-    "vld4.8     {d0, d1, d2, d3}, [%0]!        \n"  // load 8 pixels of ABGR.
-    "subs       %2, %2, #8                     \n"  // 8 processed per loop.
-    "vmull.u8   q8, d0, d4                     \n"  // R
-    "vmlal.u8   q8, d1, d5                     \n"  // G
-    "vmlal.u8   q8, d2, d6                     \n"  // B
-    "vqrshrun.s16 d0, q8, #7                   \n"  // 16 bit to 8 bit Y
-    "vqadd.u8   d0, d7                         \n"
-    "vst1.8     {d0}, [%1]!                    \n"  // store 8 pixels Y.
-    "bgt        1b                             \n"
-  : "+r"(src_abgr),  // %0
-    "+r"(dst_y),  // %1
-    "+r"(pix)        // %2
-  :
-  : "cc", "memory", "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7", "q8"
-  );
-}
-
-void RGBAToYRow_NEON(const uint8* src_rgba, uint8* dst_y, int pix) {
-  asm volatile (
-    "vmov.u8    d4, #13                        \n"  // B * 0.1016 coefficient
-    "vmov.u8    d5, #65                        \n"  // G * 0.5078 coefficient
-    "vmov.u8    d6, #33                        \n"  // R * 0.2578 coefficient
-    "vmov.u8    d7, #16                        \n"  // Add 16 constant
-    ".p2align   2                              \n"
-  "1:                                          \n"
-    "vld4.8     {d0, d1, d2, d3}, [%0]!        \n"  // load 8 pixels of RGBA.
-    "subs       %2, %2, #8                     \n"  // 8 processed per loop.
-    "vmull.u8   q8, d1, d4                     \n"  // B
-    "vmlal.u8   q8, d2, d5                     \n"  // G
-    "vmlal.u8   q8, d3, d6                     \n"  // R
-    "vqrshrun.s16 d0, q8, #7                   \n"  // 16 bit to 8 bit Y
-    "vqadd.u8   d0, d7                         \n"
-    "vst1.8     {d0}, [%1]!                    \n"  // store 8 pixels Y.
-    "bgt        1b                             \n"
-  : "+r"(src_rgba),  // %0
-    "+r"(dst_y),  // %1
-    "+r"(pix)        // %2
-  :
-  : "cc", "memory", "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7", "q8"
-  );
-}
-
-void RGB24ToYRow_NEON(const uint8* src_rgb24, uint8* dst_y, int pix) {
-  asm volatile (
-    "vmov.u8    d4, #13                        \n"  // B * 0.1016 coefficient
-    "vmov.u8    d5, #65                        \n"  // G * 0.5078 coefficient
-    "vmov.u8    d6, #33                        \n"  // R * 0.2578 coefficient
-    "vmov.u8    d7, #16                        \n"  // Add 16 constant
-    ".p2align   2                              \n"
-  "1:                                          \n"
-    "vld3.8     {d0, d1, d2}, [%0]!            \n"  // load 8 pixels of RGB24.
-    "subs       %2, %2, #8                     \n"  // 8 processed per loop.
-    "vmull.u8   q8, d0, d4                     \n"  // B
-    "vmlal.u8   q8, d1, d5                     \n"  // G
-    "vmlal.u8   q8, d2, d6                     \n"  // R
-    "vqrshrun.s16 d0, q8, #7                   \n"  // 16 bit to 8 bit Y
-    "vqadd.u8   d0, d7                         \n"
-    "vst1.8     {d0}, [%1]!                    \n"  // store 8 pixels Y.
-    "bgt        1b                             \n"
-  : "+r"(src_rgb24),  // %0
-    "+r"(dst_y),  // %1
-    "+r"(pix)        // %2
-  :
-  : "cc", "memory", "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7", "q8"
-  );
-}
-
-void RAWToYRow_NEON(const uint8* src_raw, uint8* dst_y, int pix) {
-  asm volatile (
-    "vmov.u8    d4, #33                        \n"  // R * 0.2578 coefficient
-    "vmov.u8    d5, #65                        \n"  // G * 0.5078 coefficient
-    "vmov.u8    d6, #13                        \n"  // B * 0.1016 coefficient
-    "vmov.u8    d7, #16                        \n"  // Add 16 constant
-    ".p2align   2                              \n"
-  "1:                                          \n"
-    "vld3.8     {d0, d1, d2}, [%0]!            \n"  // load 8 pixels of RAW.
-    "subs       %2, %2, #8                     \n"  // 8 processed per loop.
-    "vmull.u8   q8, d0, d4                     \n"  // B
-    "vmlal.u8   q8, d1, d5                     \n"  // G
-    "vmlal.u8   q8, d2, d6                     \n"  // R
-    "vqrshrun.s16 d0, q8, #7                   \n"  // 16 bit to 8 bit Y
-    "vqadd.u8   d0, d7                         \n"
-    "vst1.8     {d0}, [%1]!                    \n"  // store 8 pixels Y.
-    "bgt        1b                             \n"
-  : "+r"(src_raw),  // %0
-    "+r"(dst_y),  // %1
-    "+r"(pix)        // %2
-  :
-  : "cc", "memory", "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7", "q8"
-  );
-}
-
-// Bilinear filter 16x2 -> 16x1
-void InterpolateRow_NEON(uint8* dst_ptr,
-                         const uint8* src_ptr, ptrdiff_t src_stride,
-                         int dst_width, int source_y_fraction) {
-  asm volatile (
-    "cmp        %4, #0                         \n"
-    "beq        100f                           \n"
-    "add        %2, %1                         \n"
-    "cmp        %4, #64                        \n"
-    "beq        75f                            \n"
-    "cmp        %4, #128                       \n"
-    "beq        50f                            \n"
-    "cmp        %4, #192                       \n"
-    "beq        25f                            \n"
-
-    "vdup.8     d5, %4                         \n"
-    "rsb        %4, #256                       \n"
-    "vdup.8     d4, %4                         \n"
-    // General purpose row blend.
-  "1:                                          \n"
-    "vld1.8     {q0}, [%1]!                    \n"
-    "vld1.8     {q1}, [%2]!                    \n"
-    "subs       %3, %3, #16                    \n"
-    "vmull.u8   q13, d0, d4                    \n"
-    "vmull.u8   q14, d1, d4                    \n"
-    "vmlal.u8   q13, d2, d5                    \n"
-    "vmlal.u8   q14, d3, d5                    \n"
-    "vrshrn.u16 d0, q13, #8                    \n"
-    "vrshrn.u16 d1, q14, #8                    \n"
-    "vst1.8     {q0}, [%0]!                    \n"
-    "bgt        1b                             \n"
-    "b          99f                            \n"
-
-    // Blend 25 / 75.
-  "25:                                         \n"
-    "vld1.8     {q0}, [%1]!                    \n"
-    "vld1.8     {q1}, [%2]!                    \n"
-    "subs       %3, %3, #16                    \n"
-    "vrhadd.u8  q0, q1                         \n"
-    "vrhadd.u8  q0, q1                         \n"
-    "vst1.8     {q0}, [%0]!                    \n"
-    "bgt        25b                            \n"
-    "b          99f                            \n"
-
-    // Blend 50 / 50.
-  "50:                                         \n"
-    "vld1.8     {q0}, [%1]!                    \n"
-    "vld1.8     {q1}, [%2]!                    \n"
-    "subs       %3, %3, #16                    \n"
-    "vrhadd.u8  q0, q1                         \n"
-    "vst1.8     {q0}, [%0]!                    \n"
-    "bgt        50b                            \n"
-    "b          99f                            \n"
-
-    // Blend 75 / 25.
-  "75:                                         \n"
-    "vld1.8     {q1}, [%1]!                    \n"
-    "vld1.8     {q0}, [%2]!                    \n"
-    "subs       %3, %3, #16                    \n"
-    "vrhadd.u8  q0, q1                         \n"
-    "vrhadd.u8  q0, q1                         \n"
-    "vst1.8     {q0}, [%0]!                    \n"
-    "bgt        75b                            \n"
-    "b          99f                            \n"
-
-    // Blend 100 / 0 - Copy row unchanged.
-  "100:                                        \n"
-    "vld1.8     {q0}, [%1]!                    \n"
-    "subs       %3, %3, #16                    \n"
-    "vst1.8     {q0}, [%0]!                    \n"
-    "bgt        100b                           \n"
-
-  "99:                                         \n"
-  : "+r"(dst_ptr),          // %0
-    "+r"(src_ptr),          // %1
-    "+r"(src_stride),       // %2
-    "+r"(dst_width),        // %3
-    "+r"(source_y_fraction) // %4
-  :
-  : "cc", "memory", "q0", "q1", "d4", "d5", "q13", "q14"
-  );
-}
-
-// dr * (256 - sa) / 256 + sr = dr - dr * sa / 256 + sr
-void ARGBBlendRow_NEON(const uint8* src_argb0, const uint8* src_argb1,
-                       uint8* dst_argb, int width) {
-  asm volatile (
-    "subs       %3, #8                         \n"
-    "blt        89f                            \n"
-    // Blend 8 pixels.
-  "8:                                          \n"
-    "vld4.8     {d0, d1, d2, d3}, [%0]!        \n"  // load 8 pixels of ARGB0.
-    "vld4.8     {d4, d5, d6, d7}, [%1]!        \n"  // load 8 pixels of ARGB1.
-    "subs       %3, %3, #8                     \n"  // 8 processed per loop.
-    "vmull.u8   q10, d4, d3                    \n"  // db * a
-    "vmull.u8   q11, d5, d3                    \n"  // dg * a
-    "vmull.u8   q12, d6, d3                    \n"  // dr * a
-    "vqrshrn.u16 d20, q10, #8                  \n"  // db >>= 8
-    "vqrshrn.u16 d21, q11, #8                  \n"  // dg >>= 8
-    "vqrshrn.u16 d22, q12, #8                  \n"  // dr >>= 8
-    "vqsub.u8   q2, q2, q10                    \n"  // dbg - dbg * a / 256
-    "vqsub.u8   d6, d6, d22                    \n"  // dr - dr * a / 256
-    "vqadd.u8   q0, q0, q2                     \n"  // + sbg
-    "vqadd.u8   d2, d2, d6                     \n"  // + sr
-    "vmov.u8    d3, #255                       \n"  // a = 255
-    "vst4.8     {d0, d1, d2, d3}, [%2]!        \n"  // store 8 pixels of ARGB.
-    "bge        8b                             \n"
-
-  "89:                                         \n"
-    "adds       %3, #8-1                       \n"
-    "blt        99f                            \n"
-
-    // Blend 1 pixels.
-  "1:                                          \n"
-    "vld4.8     {d0[0],d1[0],d2[0],d3[0]}, [%0]! \n"  // load 1 pixel ARGB0.
-    "vld4.8     {d4[0],d5[0],d6[0],d7[0]}, [%1]! \n"  // load 1 pixel ARGB1.
-    "subs       %3, %3, #1                     \n"  // 1 processed per loop.
-    "vmull.u8   q10, d4, d3                    \n"  // db * a
-    "vmull.u8   q11, d5, d3                    \n"  // dg * a
-    "vmull.u8   q12, d6, d3                    \n"  // dr * a
-    "vqrshrn.u16 d20, q10, #8                  \n"  // db >>= 8
-    "vqrshrn.u16 d21, q11, #8                  \n"  // dg >>= 8
-    "vqrshrn.u16 d22, q12, #8                  \n"  // dr >>= 8
-    "vqsub.u8   q2, q2, q10                    \n"  // dbg - dbg * a / 256
-    "vqsub.u8   d6, d6, d22                    \n"  // dr - dr * a / 256
-    "vqadd.u8   q0, q0, q2                     \n"  // + sbg
-    "vqadd.u8   d2, d2, d6                     \n"  // + sr
-    "vmov.u8    d3, #255                       \n"  // a = 255
-    "vst4.8     {d0[0],d1[0],d2[0],d3[0]}, [%2]! \n"  // store 1 pixel.
-    "bge        1b                             \n"
-
-  "99:                                         \n"
-
-  : "+r"(src_argb0),    // %0
-    "+r"(src_argb1),    // %1
-    "+r"(dst_argb),     // %2
-    "+r"(width)         // %3
-  :
-  : "cc", "memory", "q0", "q1", "q2", "q3", "q10", "q11", "q12"
-  );
-}
-
-// Attenuate 8 pixels at a time.
-void ARGBAttenuateRow_NEON(const uint8* src_argb, uint8* dst_argb, int width) {
-  asm volatile (
-    // Attenuate 8 pixels.
-  "1:                                          \n"
-    "vld4.8     {d0, d1, d2, d3}, [%0]!        \n"  // load 8 pixels of ARGB.
-    "subs       %2, %2, #8                     \n"  // 8 processed per loop.
-    "vmull.u8   q10, d0, d3                    \n"  // b * a
-    "vmull.u8   q11, d1, d3                    \n"  // g * a
-    "vmull.u8   q12, d2, d3                    \n"  // r * a
-    "vqrshrn.u16 d0, q10, #8                   \n"  // b >>= 8
-    "vqrshrn.u16 d1, q11, #8                   \n"  // g >>= 8
-    "vqrshrn.u16 d2, q12, #8                   \n"  // r >>= 8
-    "vst4.8     {d0, d1, d2, d3}, [%1]!        \n"  // store 8 pixels of ARGB.
-    "bgt        1b                             \n"
-  : "+r"(src_argb),   // %0
-    "+r"(dst_argb),   // %1
-    "+r"(width)       // %2
-  :
-  : "cc", "memory", "q0", "q1", "q10", "q11", "q12"
-  );
-}
-
-// Quantize 8 ARGB pixels (32 bytes).
-// dst = (dst * scale >> 16) * interval_size + interval_offset;
-void ARGBQuantizeRow_NEON(uint8* dst_argb, int scale, int interval_size,
-                          int interval_offset, int width) {
-  asm volatile (
-    "vdup.u16   q8, %2                         \n"
-    "vshr.u16   q8, q8, #1                     \n"  // scale >>= 1
-    "vdup.u16   q9, %3                         \n"  // interval multiply.
-    "vdup.u16   q10, %4                        \n"  // interval add
-
-    // 8 pixel loop.
-    ".p2align   2                              \n"
-  "1:                                          \n"
-    "vld4.8     {d0, d2, d4, d6}, [%0]         \n"  // load 8 pixels of ARGB.
-    "subs       %1, %1, #8                     \n"  // 8 processed per loop.
-    "vmovl.u8   q0, d0                         \n"  // b (0 .. 255)
-    "vmovl.u8   q1, d2                         \n"
-    "vmovl.u8   q2, d4                         \n"
-    "vqdmulh.s16 q0, q0, q8                    \n"  // b * scale
-    "vqdmulh.s16 q1, q1, q8                    \n"  // g
-    "vqdmulh.s16 q2, q2, q8                    \n"  // r
-    "vmul.u16   q0, q0, q9                     \n"  // b * interval_size
-    "vmul.u16   q1, q1, q9                     \n"  // g
-    "vmul.u16   q2, q2, q9                     \n"  // r
-    "vadd.u16   q0, q0, q10                    \n"  // b + interval_offset
-    "vadd.u16   q1, q1, q10                    \n"  // g
-    "vadd.u16   q2, q2, q10                    \n"  // r
-    "vqmovn.u16 d0, q0                         \n"
-    "vqmovn.u16 d2, q1                         \n"
-    "vqmovn.u16 d4, q2                         \n"
-    "vst4.8     {d0, d2, d4, d6}, [%0]!        \n"  // store 8 pixels of ARGB.
-    "bgt        1b                             \n"
-  : "+r"(dst_argb),       // %0
-    "+r"(width)           // %1
-  : "r"(scale),           // %2
-    "r"(interval_size),   // %3
-    "r"(interval_offset)  // %4
-  : "cc", "memory", "q0", "q1", "q2", "q3", "q8", "q9", "q10"
-  );
-}
-
-// Shade 8 pixels at a time by specified value.
-// NOTE vqrdmulh.s16 q10, q10, d0[0] must use a scaler register from 0 to 8.
-// Rounding in vqrdmulh does +1 to high if high bit of low s16 is set.
-void ARGBShadeRow_NEON(const uint8* src_argb, uint8* dst_argb, int width,
-                       uint32 value) {
-  asm volatile (
-    "vdup.u32   q0, %3                         \n"  // duplicate scale value.
-    "vzip.u8    d0, d1                         \n"  // d0 aarrggbb.
-    "vshr.u16   q0, q0, #1                     \n"  // scale / 2.
-
-    // 8 pixel loop.
-    ".p2align   2                              \n"
-  "1:                                          \n"
-    "vld4.8     {d20, d22, d24, d26}, [%0]!    \n"  // load 8 pixels of ARGB.
-    "subs       %2, %2, #8                     \n"  // 8 processed per loop.
-    "vmovl.u8   q10, d20                       \n"  // b (0 .. 255)
-    "vmovl.u8   q11, d22                       \n"
-    "vmovl.u8   q12, d24                       \n"
-    "vmovl.u8   q13, d26                       \n"
-    "vqrdmulh.s16 q10, q10, d0[0]              \n"  // b * scale * 2
-    "vqrdmulh.s16 q11, q11, d0[1]              \n"  // g
-    "vqrdmulh.s16 q12, q12, d0[2]              \n"  // r
-    "vqrdmulh.s16 q13, q13, d0[3]              \n"  // a
-    "vqmovn.u16 d20, q10                       \n"
-    "vqmovn.u16 d22, q11                       \n"
-    "vqmovn.u16 d24, q12                       \n"
-    "vqmovn.u16 d26, q13                       \n"
-    "vst4.8     {d20, d22, d24, d26}, [%1]!    \n"  // store 8 pixels of ARGB.
-    "bgt        1b                             \n"
-  : "+r"(src_argb),       // %0
-    "+r"(dst_argb),       // %1
-    "+r"(width)           // %2
-  : "r"(value)            // %3
-  : "cc", "memory", "q0", "q10", "q11", "q12", "q13"
-  );
-}
-
-// Convert 8 ARGB pixels (64 bytes) to 8 Gray ARGB pixels
-// Similar to ARGBToYJ but stores ARGB.
-// C code is (15 * b + 75 * g + 38 * r + 64) >> 7;
-void ARGBGrayRow_NEON(const uint8* src_argb, uint8* dst_argb, int width) {
-  asm volatile (
-    "vmov.u8    d24, #15                       \n"  // B * 0.11400 coefficient
-    "vmov.u8    d25, #75                       \n"  // G * 0.58700 coefficient
-    "vmov.u8    d26, #38                       \n"  // R * 0.29900 coefficient
-    ".p2align   2                              \n"
-  "1:                                          \n"
-    "vld4.8     {d0, d1, d2, d3}, [%0]!        \n"  // load 8 ARGB pixels.
-    "subs       %2, %2, #8                     \n"  // 8 processed per loop.
-    "vmull.u8   q2, d0, d24                    \n"  // B
-    "vmlal.u8   q2, d1, d25                    \n"  // G
-    "vmlal.u8   q2, d2, d26                    \n"  // R
-    "vqrshrun.s16 d0, q2, #7                   \n"  // 15 bit to 8 bit B
-    "vmov       d1, d0                         \n"  // G
-    "vmov       d2, d0                         \n"  // R
-    "vst4.8     {d0, d1, d2, d3}, [%1]!        \n"  // store 8 ARGB pixels.
-    "bgt        1b                             \n"
-  : "+r"(src_argb),  // %0
-    "+r"(dst_argb),  // %1
-    "+r"(width)      // %2
-  :
-  : "cc", "memory", "q0", "q1", "q2", "q12", "q13"
-  );
-}
-
-// Convert 8 ARGB pixels (32 bytes) to 8 Sepia ARGB pixels.
-//    b = (r * 35 + g * 68 + b * 17) >> 7
-//    g = (r * 45 + g * 88 + b * 22) >> 7
-//    r = (r * 50 + g * 98 + b * 24) >> 7
-void ARGBSepiaRow_NEON(uint8* dst_argb, int width) {
-  asm volatile (
-    "vmov.u8    d20, #17                       \n"  // BB coefficient
-    "vmov.u8    d21, #68                       \n"  // BG coefficient
-    "vmov.u8    d22, #35                       \n"  // BR coefficient
-    "vmov.u8    d24, #22                       \n"  // GB coefficient
-    "vmov.u8    d25, #88                       \n"  // GG coefficient
-    "vmov.u8    d26, #45                       \n"  // GR coefficient
-    "vmov.u8    d28, #24                       \n"  // BB coefficient
-    "vmov.u8    d29, #98                       \n"  // BG coefficient
-    "vmov.u8    d30, #50                       \n"  // BR coefficient
-    ".p2align   2                              \n"
-  "1:                                          \n"
-    "vld4.8     {d0, d1, d2, d3}, [%0]         \n"  // load 8 ARGB pixels.
-    "subs       %1, %1, #8                     \n"  // 8 processed per loop.
-    "vmull.u8   q2, d0, d20                    \n"  // B to Sepia B
-    "vmlal.u8   q2, d1, d21                    \n"  // G
-    "vmlal.u8   q2, d2, d22                    \n"  // R
-    "vmull.u8   q3, d0, d24                    \n"  // B to Sepia G
-    "vmlal.u8   q3, d1, d25                    \n"  // G
-    "vmlal.u8   q3, d2, d26                    \n"  // R
-    "vmull.u8   q8, d0, d28                    \n"  // B to Sepia R
-    "vmlal.u8   q8, d1, d29                    \n"  // G
-    "vmlal.u8   q8, d2, d30                    \n"  // R
-    "vqshrn.u16 d0, q2, #7                     \n"  // 16 bit to 8 bit B
-    "vqshrn.u16 d1, q3, #7                     \n"  // 16 bit to 8 bit G
-    "vqshrn.u16 d2, q8, #7                     \n"  // 16 bit to 8 bit R
-    "vst4.8     {d0, d1, d2, d3}, [%0]!        \n"  // store 8 ARGB pixels.
-    "bgt        1b                             \n"
-  : "+r"(dst_argb),  // %0
-    "+r"(width)      // %1
-  :
-  : "cc", "memory", "q0", "q1", "q2", "q3",
-    "q10", "q11", "q12", "q13", "q14", "q15"
-  );
-}
-
-// Tranform 8 ARGB pixels (32 bytes) with color matrix.
-// TODO(fbarchard): Was same as Sepia except matrix is provided.  This function
-// needs to saturate.  Consider doing a non-saturating version.
-void ARGBColorMatrixRow_NEON(const uint8* src_argb, uint8* dst_argb,
-                             const int8* matrix_argb, int width) {
-  asm volatile (
-    "vld1.8     {q2}, [%3]                     \n"  // load 3 ARGB vectors.
-    "vmovl.s8   q0, d4                         \n"  // B,G coefficients s16.
-    "vmovl.s8   q1, d5                         \n"  // R,A coefficients s16.
-
-    ".p2align   2                              \n"
-  "1:                                          \n"
-    "vld4.8     {d16, d18, d20, d22}, [%0]!    \n"  // load 8 ARGB pixels.
-    "subs       %2, %2, #8                     \n"  // 8 processed per loop.
-    "vmovl.u8   q8, d16                        \n"  // b (0 .. 255) 16 bit
-    "vmovl.u8   q9, d18                        \n"  // g
-    "vmovl.u8   q10, d20                       \n"  // r
-    "vmovl.u8   q15, d22                       \n"  // a
-    "vmul.s16   q12, q8, d0[0]                 \n"  // B = B * Matrix B
-    "vmul.s16   q13, q8, d1[0]                 \n"  // G = B * Matrix G
-    "vmul.s16   q14, q8, d2[0]                 \n"  // R = B * Matrix R
-    "vmul.s16   q15, q8, d3[0]                 \n"  // A = B * Matrix A
-    "vmul.s16   q4, q9, d0[1]                  \n"  // B += G * Matrix B
-    "vmul.s16   q5, q9, d1[1]                  \n"  // G += G * Matrix G
-    "vmul.s16   q6, q9, d2[1]                  \n"  // R += G * Matrix R
-    "vmul.s16   q7, q9, d3[1]                  \n"  // A += G * Matrix A
-    "vqadd.s16  q12, q12, q4                   \n"  // Accumulate B
-    "vqadd.s16  q13, q13, q5                   \n"  // Accumulate G
-    "vqadd.s16  q14, q14, q6                   \n"  // Accumulate R
-    "vqadd.s16  q15, q15, q7                   \n"  // Accumulate A
-    "vmul.s16   q4, q10, d0[2]                 \n"  // B += R * Matrix B
-    "vmul.s16   q5, q10, d1[2]                 \n"  // G += R * Matrix G
-    "vmul.s16   q6, q10, d2[2]                 \n"  // R += R * Matrix R
-    "vmul.s16   q7, q10, d3[2]                 \n"  // A += R * Matrix A
-    "vqadd.s16  q12, q12, q4                   \n"  // Accumulate B
-    "vqadd.s16  q13, q13, q5                   \n"  // Accumulate G
-    "vqadd.s16  q14, q14, q6                   \n"  // Accumulate R
-    "vqadd.s16  q15, q15, q7                   \n"  // Accumulate A
-    "vmul.s16   q4, q15, d0[3]                 \n"  // B += A * Matrix B
-    "vmul.s16   q5, q15, d1[3]                 \n"  // G += A * Matrix G
-    "vmul.s16   q6, q15, d2[3]                 \n"  // R += A * Matrix R
-    "vmul.s16   q7, q15, d3[3]                 \n"  // A += A * Matrix A
-    "vqadd.s16  q12, q12, q4                   \n"  // Accumulate B
-    "vqadd.s16  q13, q13, q5                   \n"  // Accumulate G
-    "vqadd.s16  q14, q14, q6                   \n"  // Accumulate R
-    "vqadd.s16  q15, q15, q7                   \n"  // Accumulate A
-    "vqshrun.s16 d16, q12, #6                  \n"  // 16 bit to 8 bit B
-    "vqshrun.s16 d18, q13, #6                  \n"  // 16 bit to 8 bit G
-    "vqshrun.s16 d20, q14, #6                  \n"  // 16 bit to 8 bit R
-    "vqshrun.s16 d22, q15, #6                  \n"  // 16 bit to 8 bit A
-    "vst4.8     {d16, d18, d20, d22}, [%1]!    \n"  // store 8 ARGB pixels.
-    "bgt        1b                             \n"
-  : "+r"(src_argb),   // %0
-    "+r"(dst_argb),   // %1
-    "+r"(width)       // %2
-  : "r"(matrix_argb)  // %3
-  : "cc", "memory", "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7", "q8", "q9",
-    "q10", "q11", "q12", "q13", "q14", "q15"
-  );
-}
-
-// TODO(fbarchard): fix vqshrun in ARGBMultiplyRow_NEON and reenable.
-#ifdef HAS_ARGBMULTIPLYROW_NEON
-// Multiply 2 rows of ARGB pixels together, 8 pixels at a time.
-void ARGBMultiplyRow_NEON(const uint8* src_argb0, const uint8* src_argb1,
-                          uint8* dst_argb, int width) {
-  asm volatile (
-    // 8 pixel loop.
-    ".p2align   2                              \n"
-  "1:                                          \n"
-    "vld4.8     {d0, d2, d4, d6}, [%0]!        \n"  // load 8 ARGB pixels.
-    "vld4.8     {d1, d3, d5, d7}, [%1]!        \n"  // load 8 more ARGB pixels.
-    "subs       %3, %3, #8                     \n"  // 8 processed per loop.
-    "vmull.u8   q0, d0, d1                     \n"  // multiply B
-    "vmull.u8   q1, d2, d3                     \n"  // multiply G
-    "vmull.u8   q2, d4, d5                     \n"  // multiply R
-    "vmull.u8   q3, d6, d7                     \n"  // multiply A
-    "vrshrn.u16 d0, q0, #8                     \n"  // 16 bit to 8 bit B
-    "vrshrn.u16 d1, q1, #8                     \n"  // 16 bit to 8 bit G
-    "vrshrn.u16 d2, q2, #8                     \n"  // 16 bit to 8 bit R
-    "vrshrn.u16 d3, q3, #8                     \n"  // 16 bit to 8 bit A
-    "vst4.8     {d0, d1, d2, d3}, [%2]!        \n"  // store 8 ARGB pixels.
-    "bgt        1b                             \n"
-
-  : "+r"(src_argb0),  // %0
-    "+r"(src_argb1),  // %1
-    "+r"(dst_argb),   // %2
-    "+r"(width)       // %3
-  :
-  : "cc", "memory", "q0", "q1", "q2", "q3"
-  );
-}
-#endif  // HAS_ARGBMULTIPLYROW_NEON
-
-// Add 2 rows of ARGB pixels together, 8 pixels at a time.
-void ARGBAddRow_NEON(const uint8* src_argb0, const uint8* src_argb1,
-                     uint8* dst_argb, int width) {
-  asm volatile (
-    // 8 pixel loop.
-    ".p2align   2                              \n"
-  "1:                                          \n"
-    "vld4.8     {d0, d1, d2, d3}, [%0]!        \n"  // load 8 ARGB pixels.
-    "vld4.8     {d4, d5, d6, d7}, [%1]!        \n"  // load 8 more ARGB pixels.
-    "subs       %3, %3, #8                     \n"  // 8 processed per loop.
-    "vqadd.u8   q0, q0, q2                     \n"  // add B, G
-    "vqadd.u8   q1, q1, q3                     \n"  // add R, A
-    "vst4.8     {d0, d1, d2, d3}, [%2]!        \n"  // store 8 ARGB pixels.
-    "bgt        1b                             \n"
-
-  : "+r"(src_argb0),  // %0
-    "+r"(src_argb1),  // %1
-    "+r"(dst_argb),   // %2
-    "+r"(width)       // %3
-  :
-  : "cc", "memory", "q0", "q1", "q2", "q3"
-  );
-}
-
-// Subtract 2 rows of ARGB pixels, 8 pixels at a time.
-void ARGBSubtractRow_NEON(const uint8* src_argb0, const uint8* src_argb1,
-                          uint8* dst_argb, int width) {
-  asm volatile (
-    // 8 pixel loop.
-    ".p2align   2                              \n"
-  "1:                                          \n"
-    "vld4.8     {d0, d1, d2, d3}, [%0]!        \n"  // load 8 ARGB pixels.
-    "vld4.8     {d4, d5, d6, d7}, [%1]!        \n"  // load 8 more ARGB pixels.
-    "subs       %3, %3, #8                     \n"  // 8 processed per loop.
-    "vqsub.u8   q0, q0, q2                     \n"  // subtract B, G
-    "vqsub.u8   q1, q1, q3                     \n"  // subtract R, A
-    "vst4.8     {d0, d1, d2, d3}, [%2]!        \n"  // store 8 ARGB pixels.
-    "bgt        1b                             \n"
-
-  : "+r"(src_argb0),  // %0
-    "+r"(src_argb1),  // %1
-    "+r"(dst_argb),   // %2
-    "+r"(width)       // %3
-  :
-  : "cc", "memory", "q0", "q1", "q2", "q3"
-  );
-}
-
-// Adds Sobel X and Sobel Y and stores Sobel into ARGB.
-// A = 255
-// R = Sobel
-// G = Sobel
-// B = Sobel
-void SobelRow_NEON(const uint8* src_sobelx, const uint8* src_sobely,
-                     uint8* dst_argb, int width) {
-  asm volatile (
-    "vmov.u8    d3, #255                       \n"  // alpha
-    // 8 pixel loop.
-    ".p2align   2                              \n"
-  "1:                                          \n"
-    "vld1.8     {d0}, [%0]!                    \n"  // load 8 sobelx.
-    "vld1.8     {d1}, [%1]!                    \n"  // load 8 sobely.
-    "subs       %3, %3, #8                     \n"  // 8 processed per loop.
-    "vqadd.u8   d0, d0, d1                     \n"  // add
-    "vmov.u8    d1, d0                         \n"
-    "vmov.u8    d2, d0                         \n"
-    "vst4.8     {d0, d1, d2, d3}, [%2]!        \n"  // store 8 ARGB pixels.
-    "bgt        1b                             \n"
-  : "+r"(src_sobelx),  // %0
-    "+r"(src_sobely),  // %1
-    "+r"(dst_argb),    // %2
-    "+r"(width)        // %3
-  :
-  : "cc", "memory", "q0", "q1"
-  );
-}
-
-// Adds Sobel X and Sobel Y and stores Sobel into plane.
-void SobelToPlaneRow_NEON(const uint8* src_sobelx, const uint8* src_sobely,
-                          uint8* dst_y, int width) {
-  asm volatile (
-    // 16 pixel loop.
-    ".p2align   2                              \n"
-  "1:                                          \n"
-    "vld1.8     {q0}, [%0]!                    \n"  // load 16 sobelx.
-    "vld1.8     {q1}, [%1]!                    \n"  // load 16 sobely.
-    "subs       %3, %3, #16                    \n"  // 16 processed per loop.
-    "vqadd.u8   q0, q0, q1                     \n"  // add
-    "vst1.8     {q0}, [%2]!                    \n"  // store 16 pixels.
-    "bgt        1b                             \n"
-  : "+r"(src_sobelx),  // %0
-    "+r"(src_sobely),  // %1
-    "+r"(dst_y),       // %2
-    "+r"(width)        // %3
-  :
-  : "cc", "memory", "q0", "q1"
-  );
-}
-
-// Mixes Sobel X, Sobel Y and Sobel into ARGB.
-// A = 255
-// R = Sobel X
-// G = Sobel
-// B = Sobel Y
-void SobelXYRow_NEON(const uint8* src_sobelx, const uint8* src_sobely,
-                     uint8* dst_argb, int width) {
-  asm volatile (
-    "vmov.u8    d3, #255                       \n"  // alpha
-    // 8 pixel loop.
-    ".p2align   2                              \n"
-  "1:                                          \n"
-    "vld1.8     {d2}, [%0]!                    \n"  // load 8 sobelx.
-    "vld1.8     {d0}, [%1]!                    \n"  // load 8 sobely.
-    "subs       %3, %3, #8                     \n"  // 8 processed per loop.
-    "vqadd.u8   d1, d0, d2                     \n"  // add
-    "vst4.8     {d0, d1, d2, d3}, [%2]!        \n"  // store 8 ARGB pixels.
-    "bgt        1b                             \n"
-  : "+r"(src_sobelx),  // %0
-    "+r"(src_sobely),  // %1
-    "+r"(dst_argb),    // %2
-    "+r"(width)        // %3
-  :
-  : "cc", "memory", "q0", "q1"
-  );
-}
-
-// SobelX as a matrix is
-// -1  0  1
-// -2  0  2
-// -1  0  1
-void SobelXRow_NEON(const uint8* src_y0, const uint8* src_y1,
-                    const uint8* src_y2, uint8* dst_sobelx, int width) {
-  asm volatile (
-    ".p2align   2                              \n"
-  "1:                                          \n"
-    "vld1.8     {d0}, [%0],%5                  \n"  // top
-    "vld1.8     {d1}, [%0],%6                  \n"
-    "vsubl.u8   q0, d0, d1                     \n"
-    "vld1.8     {d2}, [%1],%5                  \n"  // center * 2
-    "vld1.8     {d3}, [%1],%6                  \n"
-    "vsubl.u8   q1, d2, d3                     \n"
-    "vadd.s16   q0, q0, q1                     \n"
-    "vadd.s16   q0, q0, q1                     \n"
-    "vld1.8     {d2}, [%2],%5                  \n"  // bottom
-    "vld1.8     {d3}, [%2],%6                  \n"
-    "subs       %4, %4, #8                     \n"  // 8 pixels
-    "vsubl.u8   q1, d2, d3                     \n"
-    "vadd.s16   q0, q0, q1                     \n"
-    "vabs.s16   q0, q0                         \n"
-    "vqmovn.u16 d0, q0                         \n"
-    "vst1.8     {d0}, [%3]!                    \n"  // store 8 sobelx
-    "bgt        1b                             \n"
-  : "+r"(src_y0),      // %0
-    "+r"(src_y1),      // %1
-    "+r"(src_y2),      // %2
-    "+r"(dst_sobelx),  // %3
-    "+r"(width)        // %4
-  : "r"(2),            // %5
-    "r"(6)             // %6
-  : "cc", "memory", "q0", "q1"  // Clobber List
-  );
-}
-
-// SobelY as a matrix is
-// -1 -2 -1
-//  0  0  0
-//  1  2  1
-void SobelYRow_NEON(const uint8* src_y0, const uint8* src_y1,
-                    uint8* dst_sobely, int width) {
-  asm volatile (
-    ".p2align   2                              \n"
-  "1:                                          \n"
-    "vld1.8     {d0}, [%0],%4                  \n"  // left
-    "vld1.8     {d1}, [%1],%4                  \n"
-    "vsubl.u8   q0, d0, d1                     \n"
-    "vld1.8     {d2}, [%0],%4                  \n"  // center * 2
-    "vld1.8     {d3}, [%1],%4                  \n"
-    "vsubl.u8   q1, d2, d3                     \n"
-    "vadd.s16   q0, q0, q1                     \n"
-    "vadd.s16   q0, q0, q1                     \n"
-    "vld1.8     {d2}, [%0],%5                  \n"  // right
-    "vld1.8     {d3}, [%1],%5                  \n"
-    "subs       %3, %3, #8                     \n"  // 8 pixels
-    "vsubl.u8   q1, d2, d3                     \n"
-    "vadd.s16   q0, q0, q1                     \n"
-    "vabs.s16   q0, q0                         \n"
-    "vqmovn.u16 d0, q0                         \n"
-    "vst1.8     {d0}, [%2]!                    \n"  // store 8 sobely
-    "bgt        1b                             \n"
-  : "+r"(src_y0),      // %0
-    "+r"(src_y1),      // %1
-    "+r"(dst_sobely),  // %2
-    "+r"(width)        // %3
-  : "r"(1),            // %4
-    "r"(6)             // %5
-  : "cc", "memory", "q0", "q1"  // Clobber List
-  );
-}
-#endif  // __ARM_NEON__
-
-#ifdef __cplusplus
-}  // extern "C"
-}  // namespace libyuv
-#endif
diff --git a/drivers/theoraplayer/src/YUV/libyuv/src/row_posix.cc b/drivers/theoraplayer/src/YUV/libyuv/src/row_posix.cc
deleted file mode 100755
index 106fda5689..0000000000
--- a/drivers/theoraplayer/src/YUV/libyuv/src/row_posix.cc
+++ /dev/null
@@ -1,6443 +0,0 @@
-/*
- *  Copyright 2011 The LibYuv Project Authors. All rights reserved.
- *
- *  Use of this source code is governed by a BSD-style license
- *  that can be found in the LICENSE file in the root of the source
- *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS. All contributing project authors may
- *  be found in the AUTHORS file in the root of the source tree.
- */
-
-#include "libyuv/row.h"
-
-#ifdef __cplusplus
-namespace libyuv {
-extern "C" {
-#endif
-
-// This module is for GCC x86 and x64.
-#if !defined(LIBYUV_DISABLE_X86) && (defined(__x86_64__) || defined(__i386__))
-
-#if defined(HAS_ARGBTOYROW_SSSE3) || defined(HAS_ARGBGRAYROW_SSSE3)
-
-// Constants for ARGB
-static vec8 kARGBToY = {
-  13, 65, 33, 0, 13, 65, 33, 0, 13, 65, 33, 0, 13, 65, 33, 0
-};
-
-// JPeg full range.
-static vec8 kARGBToYJ = {
-  15, 75, 38, 0, 15, 75, 38, 0, 15, 75, 38, 0, 15, 75, 38, 0
-};
-#endif  // defined(HAS_ARGBTOYROW_SSSE3) || defined(HAS_ARGBGRAYROW_SSSE3)
-
-#if defined(HAS_ARGBTOYROW_SSSE3) || defined(HAS_I422TOARGBROW_SSSE3)
-
-static vec8 kARGBToU = {
-  112, -74, -38, 0, 112, -74, -38, 0, 112, -74, -38, 0, 112, -74, -38, 0
-};
-
-static vec8 kARGBToUJ = {
-  127, -84, -43, 0, 127, -84, -43, 0, 127, -84, -43, 0, 127, -84, -43, 0
-};
-
-static vec8 kARGBToV = {
-  -18, -94, 112, 0, -18, -94, 112, 0, -18, -94, 112, 0, -18, -94, 112, 0,
-};
-
-static vec8 kARGBToVJ = {
-  -20, -107, 127, 0, -20, -107, 127, 0, -20, -107, 127, 0, -20, -107, 127, 0
-};
-
-// Constants for BGRA
-static vec8 kBGRAToY = {
-  0, 33, 65, 13, 0, 33, 65, 13, 0, 33, 65, 13, 0, 33, 65, 13
-};
-
-static vec8 kBGRAToU = {
-  0, -38, -74, 112, 0, -38, -74, 112, 0, -38, -74, 112, 0, -38, -74, 112
-};
-
-static vec8 kBGRAToV = {
-  0, 112, -94, -18, 0, 112, -94, -18, 0, 112, -94, -18, 0, 112, -94, -18
-};
-
-// Constants for ABGR
-static vec8 kABGRToY = {
-  33, 65, 13, 0, 33, 65, 13, 0, 33, 65, 13, 0, 33, 65, 13, 0
-};
-
-static vec8 kABGRToU = {
-  -38, -74, 112, 0, -38, -74, 112, 0, -38, -74, 112, 0, -38, -74, 112, 0
-};
-
-static vec8 kABGRToV = {
-  112, -94, -18, 0, 112, -94, -18, 0, 112, -94, -18, 0, 112, -94, -18, 0
-};
-
-// Constants for RGBA.
-static vec8 kRGBAToY = {
-  0, 13, 65, 33, 0, 13, 65, 33, 0, 13, 65, 33, 0, 13, 65, 33
-};
-
-static vec8 kRGBAToU = {
-  0, 112, -74, -38, 0, 112, -74, -38, 0, 112, -74, -38, 0, 112, -74, -38
-};
-
-static vec8 kRGBAToV = {
-  0, -18, -94, 112, 0, -18, -94, 112, 0, -18, -94, 112, 0, -18, -94, 112
-};
-
-static uvec8 kAddY16 = {
-  16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u
-};
-
-static vec16 kAddYJ64 = {
-  64, 64, 64, 64, 64, 64, 64, 64
-};
-
-static uvec8 kAddUV128 = {
-  128u, 128u, 128u, 128u, 128u, 128u, 128u, 128u,
-  128u, 128u, 128u, 128u, 128u, 128u, 128u, 128u
-};
-
-static uvec16 kAddUVJ128 = {
-  0x8080u, 0x8080u, 0x8080u, 0x8080u, 0x8080u, 0x8080u, 0x8080u, 0x8080u
-};
-#endif  // defined(HAS_ARGBTOYROW_SSSE3) || defined(HAS_I422TOARGBROW_SSSE3)
-
-#ifdef HAS_RGB24TOARGBROW_SSSE3
-
-// Shuffle table for converting RGB24 to ARGB.
-static uvec8 kShuffleMaskRGB24ToARGB = {
-  0u, 1u, 2u, 12u, 3u, 4u, 5u, 13u, 6u, 7u, 8u, 14u, 9u, 10u, 11u, 15u
-};
-
-// Shuffle table for converting RAW to ARGB.
-static uvec8 kShuffleMaskRAWToARGB = {
-  2u, 1u, 0u, 12u, 5u, 4u, 3u, 13u, 8u, 7u, 6u, 14u, 11u, 10u, 9u, 15u
-};
-
-// Shuffle table for converting ARGB to RGB24.
-static uvec8 kShuffleMaskARGBToRGB24 = {
-  0u, 1u, 2u, 4u, 5u, 6u, 8u, 9u, 10u, 12u, 13u, 14u, 128u, 128u, 128u, 128u
-};
-
-// Shuffle table for converting ARGB to RAW.
-static uvec8 kShuffleMaskARGBToRAW = {
-  2u, 1u, 0u, 6u, 5u, 4u, 10u, 9u, 8u, 14u, 13u, 12u, 128u, 128u, 128u, 128u
-};
-
-// Shuffle table for converting ARGBToRGB24 for I422ToRGB24.  First 8 + next 4
-static uvec8 kShuffleMaskARGBToRGB24_0 = {
-  0u, 1u, 2u, 4u, 5u, 6u, 8u, 9u, 128u, 128u, 128u, 128u, 10u, 12u, 13u, 14u
-};
-
-// Shuffle table for converting ARGB to RAW.
-static uvec8 kShuffleMaskARGBToRAW_0 = {
-  2u, 1u, 0u, 6u, 5u, 4u, 10u, 9u, 128u, 128u, 128u, 128u, 8u, 14u, 13u, 12u
-};
-#endif  // HAS_RGB24TOARGBROW_SSSE3
-
-#if defined(TESTING) && defined(__x86_64__)
-void TestRow_SSE2(const uint8* src_y, uint8* dst_argb, int pix) {
-  asm volatile (
-    ".p2align  5                               \n"
-    "mov       %%eax,%%eax                     \n"
-    "mov       %%ebx,%%ebx                     \n"
-    "mov       %%ecx,%%ecx                     \n"
-    "mov       %%edx,%%edx                     \n"
-    "mov       %%esi,%%esi                     \n"
-    "mov       %%edi,%%edi                     \n"
-    "mov       %%ebp,%%ebp                     \n"
-    "mov       %%esp,%%esp                     \n"
-    ".p2align  5                               \n"
-    "mov       %%r8d,%%r8d                     \n"
-    "mov       %%r9d,%%r9d                     \n"
-    "mov       %%r10d,%%r10d                   \n"
-    "mov       %%r11d,%%r11d                   \n"
-    "mov       %%r12d,%%r12d                   \n"
-    "mov       %%r13d,%%r13d                   \n"
-    "mov       %%r14d,%%r14d                   \n"
-    "mov       %%r15d,%%r15d                   \n"
-    ".p2align  5                               \n"
-    "lea       (%%rax),%%eax                   \n"
-    "lea       (%%rbx),%%ebx                   \n"
-    "lea       (%%rcx),%%ecx                   \n"
-    "lea       (%%rdx),%%edx                   \n"
-    "lea       (%%rsi),%%esi                   \n"
-    "lea       (%%rdi),%%edi                   \n"
-    "lea       (%%rbp),%%ebp                   \n"
-    "lea       (%%rsp),%%esp                   \n"
-    ".p2align  5                               \n"
-    "lea       (%%r8),%%r8d                    \n"
-    "lea       (%%r9),%%r9d                    \n"
-    "lea       (%%r10),%%r10d                  \n"
-    "lea       (%%r11),%%r11d                  \n"
-    "lea       (%%r12),%%r12d                  \n"
-    "lea       (%%r13),%%r13d                  \n"
-    "lea       (%%r14),%%r14d                  \n"
-    "lea       (%%r15),%%r15d                  \n"
-
-    ".p2align  5                               \n"
-    "lea       0x10(%%rax),%%eax               \n"
-    "lea       0x10(%%rbx),%%ebx               \n"
-    "lea       0x10(%%rcx),%%ecx               \n"
-    "lea       0x10(%%rdx),%%edx               \n"
-    "lea       0x10(%%rsi),%%esi               \n"
-    "lea       0x10(%%rdi),%%edi               \n"
-    "lea       0x10(%%rbp),%%ebp               \n"
-    "lea       0x10(%%rsp),%%esp               \n"
-    ".p2align  5                               \n"
-    "lea       0x10(%%r8),%%r8d                \n"
-    "lea       0x10(%%r9),%%r9d                \n"
-    "lea       0x10(%%r10),%%r10d              \n"
-    "lea       0x10(%%r11),%%r11d              \n"
-    "lea       0x10(%%r12),%%r12d              \n"
-    "lea       0x10(%%r13),%%r13d              \n"
-    "lea       0x10(%%r14),%%r14d              \n"
-    "lea       0x10(%%r15),%%r15d              \n"
-
-    ".p2align  5                               \n"
-    "add       0x10,%%eax                      \n"
-    "add       0x10,%%ebx                      \n"
-    "add       0x10,%%ecx                      \n"
-    "add       0x10,%%edx                      \n"
-    "add       0x10,%%esi                      \n"
-    "add       0x10,%%edi                      \n"
-    "add       0x10,%%ebp                      \n"
-    "add       0x10,%%esp                      \n"
-    ".p2align  5                               \n"
-    "add       0x10,%%r8d                      \n"
-    "add       0x10,%%r9d                      \n"
-    "add       0x10,%%r10d                     \n"
-    "add       0x10,%%r11d                     \n"
-    "add       0x10,%%r12d                     \n"
-    "add       0x10,%%r13d                     \n"
-    "add       0x10,%%r14d                     \n"
-    "add       0x10,%%r15d                     \n"
-
-    ".p2align  2                               \n"
-  "1:                                          \n"
-    "movq      " MEMACCESS(0) ",%%xmm0         \n"
-    "lea       " MEMLEA(0x8,0) ",%0            \n"
-    "movdqa    %%xmm0," MEMACCESS(1) "         \n"
-    "lea       " MEMLEA(0x20,1) ",%1           \n"
-    "sub       $0x8,%2                         \n"
-    "jg        1b                              \n"
-  : "+r"(src_y),     // %0
-    "+r"(dst_argb),  // %1
-    "+r"(pix)        // %2
-  :
-  : "memory", "cc"
-#if defined(__SSE2__)
-    , "xmm0", "xmm1", "xmm5"
-#endif
-  );
-}
-#endif  // TESTING
-
-#ifdef HAS_I400TOARGBROW_SSE2
-void I400ToARGBRow_SSE2(const uint8* src_y, uint8* dst_argb, int pix) {
-  asm volatile (
-    "pcmpeqb   %%xmm5,%%xmm5                   \n"
-    "pslld     $0x18,%%xmm5                    \n"
-    LABELALIGN
-  "1:                                          \n"
-    "movq      " MEMACCESS(0) ",%%xmm0         \n"
-    "lea       " MEMLEA(0x8,0) ",%0            \n"
-    "punpcklbw %%xmm0,%%xmm0                   \n"
-    "movdqa    %%xmm0,%%xmm1                   \n"
-    "punpcklwd %%xmm0,%%xmm0                   \n"
-    "punpckhwd %%xmm1,%%xmm1                   \n"
-    "por       %%xmm5,%%xmm0                   \n"
-    "por       %%xmm5,%%xmm1                   \n"
-    "movdqa    %%xmm0," MEMACCESS(1) "         \n"
-    "movdqa    %%xmm1," MEMACCESS2(0x10,1) "   \n"
-    "lea       " MEMLEA(0x20,1) ",%1           \n"
-    "sub       $0x8,%2                         \n"
-    "jg        1b                              \n"
-  : "+r"(src_y),     // %0
-    "+r"(dst_argb),  // %1
-    "+r"(pix)        // %2
-  :
-  : "memory", "cc"
-#if defined(__SSE2__)
-    , "xmm0", "xmm1", "xmm5"
-#endif
-  );
-}
-
-void I400ToARGBRow_Unaligned_SSE2(const uint8* src_y, uint8* dst_argb,
-                                  int pix) {
-  asm volatile (
-    "pcmpeqb   %%xmm5,%%xmm5                   \n"
-    "pslld     $0x18,%%xmm5                    \n"
-    LABELALIGN
-  "1:                                          \n"
-    "movq      " MEMACCESS(0) ",%%xmm0         \n"
-    "lea       " MEMLEA(0x8,0) ",%0            \n"
-    "punpcklbw %%xmm0,%%xmm0                   \n"
-    "movdqa    %%xmm0,%%xmm1                   \n"
-    "punpcklwd %%xmm0,%%xmm0                   \n"
-    "punpckhwd %%xmm1,%%xmm1                   \n"
-    "por       %%xmm5,%%xmm0                   \n"
-    "por       %%xmm5,%%xmm1                   \n"
-    "movdqu    %%xmm0," MEMACCESS(1) "         \n"
-    "movdqu    %%xmm1," MEMACCESS2(0x10,1) "   \n"
-    "lea       " MEMLEA(0x20,1) ",%1           \n"
-    "sub       $0x8,%2                         \n"
-    "jg        1b                              \n"
-  : "+r"(src_y),     // %0
-    "+r"(dst_argb),  // %1
-    "+r"(pix)        // %2
-  :
-  : "memory", "cc"
-#if defined(__SSE2__)
-    , "xmm0", "xmm1", "xmm5"
-#endif
-  );
-}
-#endif  // HAS_I400TOARGBROW_SSE2
-
-#ifdef HAS_RGB24TOARGBROW_SSSE3
-void RGB24ToARGBRow_SSSE3(const uint8* src_rgb24, uint8* dst_argb, int pix) {
-  asm volatile (
-    "pcmpeqb   %%xmm5,%%xmm5                   \n"  // generate mask 0xff000000
-    "pslld     $0x18,%%xmm5                    \n"
-    "movdqa    %3,%%xmm4                       \n"
-    LABELALIGN
-  "1:                                          \n"
-    "movdqu    " MEMACCESS(0) ",%%xmm0         \n"
-    "movdqu    " MEMACCESS2(0x10,0) ",%%xmm1   \n"
-    "movdqu    " MEMACCESS2(0x20,0) ",%%xmm3   \n"
-    "lea       " MEMLEA(0x30,0) ",%0           \n"
-    "movdqa    %%xmm3,%%xmm2                   \n"
-    "palignr   $0x8,%%xmm1,%%xmm2              \n"
-    "pshufb    %%xmm4,%%xmm2                   \n"
-    "por       %%xmm5,%%xmm2                   \n"
-    "palignr   $0xc,%%xmm0,%%xmm1              \n"
-    "pshufb    %%xmm4,%%xmm0                   \n"
-    "movdqa    %%xmm2," MEMACCESS2(0x20,1) "   \n"
-    "por       %%xmm5,%%xmm0                   \n"
-    "pshufb    %%xmm4,%%xmm1                   \n"
-    "movdqa    %%xmm0," MEMACCESS(1) "         \n"
-    "por       %%xmm5,%%xmm1                   \n"
-    "palignr   $0x4,%%xmm3,%%xmm3              \n"
-    "pshufb    %%xmm4,%%xmm3                   \n"
-    "movdqa    %%xmm1," MEMACCESS2(0x10,1) "   \n"
-    "por       %%xmm5,%%xmm3                   \n"
-    "sub       $0x10,%2                        \n"
-    "movdqa    %%xmm3," MEMACCESS2(0x30,1) "   \n"
-    "lea       " MEMLEA(0x40,1) ",%1           \n"
-    "jg        1b                              \n"
-  : "+r"(src_rgb24),  // %0
-    "+r"(dst_argb),  // %1
-    "+r"(pix)        // %2
-  : "m"(kShuffleMaskRGB24ToARGB)  // %3
-  : "memory", "cc"
-#if defined(__SSE2__)
-    , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
-#endif
-  );
-}
-
-void RAWToARGBRow_SSSE3(const uint8* src_raw, uint8* dst_argb, int pix) {
-  asm volatile (
-    "pcmpeqb   %%xmm5,%%xmm5                   \n"  // generate mask 0xff000000
-    "pslld     $0x18,%%xmm5                    \n"
-    "movdqa    %3,%%xmm4                       \n"
-    LABELALIGN
-  "1:                                          \n"
-    "movdqu    " MEMACCESS(0) ",%%xmm0         \n"
-    "movdqu    " MEMACCESS2(0x10,0) ",%%xmm1   \n"
-    "movdqu    " MEMACCESS2(0x20,0) ",%%xmm3   \n"
-    "lea       " MEMLEA(0x30,0) ",%0           \n"
-    "movdqa    %%xmm3,%%xmm2                   \n"
-    "palignr   $0x8,%%xmm1,%%xmm2              \n"
-    "pshufb    %%xmm4,%%xmm2                   \n"
-    "por       %%xmm5,%%xmm2                   \n"
-    "palignr   $0xc,%%xmm0,%%xmm1              \n"
-    "pshufb    %%xmm4,%%xmm0                   \n"
-    "movdqa    %%xmm2," MEMACCESS2(0x20,1) "   \n"
-    "por       %%xmm5,%%xmm0                   \n"
-    "pshufb    %%xmm4,%%xmm1                   \n"
-    "movdqa    %%xmm0," MEMACCESS(1) "         \n"
-    "por       %%xmm5,%%xmm1                   \n"
-    "palignr   $0x4,%%xmm3,%%xmm3              \n"
-    "pshufb    %%xmm4,%%xmm3                   \n"
-    "movdqa    %%xmm1," MEMACCESS2(0x10,1) "   \n"
-    "por       %%xmm5,%%xmm3                   \n"
-    "sub       $0x10,%2                        \n"
-    "movdqa    %%xmm3," MEMACCESS2(0x30,1) "   \n"
-    "lea       " MEMLEA(0x40,1) ",%1           \n"
-    "jg        1b                              \n"
-  : "+r"(src_raw),   // %0
-    "+r"(dst_argb),  // %1
-    "+r"(pix)        // %2
-  : "m"(kShuffleMaskRAWToARGB)  // %3
-  : "memory", "cc"
-#if defined(__SSE2__)
-    , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
-#endif
-  );
-}
-
-void RGB565ToARGBRow_SSE2(const uint8* src, uint8* dst, int pix) {
-  asm volatile (
-    "mov       $0x1080108,%%eax                \n"
-    "movd      %%eax,%%xmm5                    \n"
-    "pshufd    $0x0,%%xmm5,%%xmm5              \n"
-    "mov       $0x20802080,%%eax               \n"
-    "movd      %%eax,%%xmm6                    \n"
-    "pshufd    $0x0,%%xmm6,%%xmm6              \n"
-    "pcmpeqb   %%xmm3,%%xmm3                   \n"
-    "psllw     $0xb,%%xmm3                     \n"
-    "pcmpeqb   %%xmm4,%%xmm4                   \n"
-    "psllw     $0xa,%%xmm4                     \n"
-    "psrlw     $0x5,%%xmm4                     \n"
-    "pcmpeqb   %%xmm7,%%xmm7                   \n"
-    "psllw     $0x8,%%xmm7                     \n"
-    "sub       %0,%1                           \n"
-    "sub       %0,%1                           \n"
-    LABELALIGN
-  "1:                                          \n"
-    "movdqu    " MEMACCESS(0) ",%%xmm0         \n"
-    "movdqa    %%xmm0,%%xmm1                   \n"
-    "movdqa    %%xmm0,%%xmm2                   \n"
-    "pand      %%xmm3,%%xmm1                   \n"
-    "psllw     $0xb,%%xmm2                     \n"
-    "pmulhuw   %%xmm5,%%xmm1                   \n"
-    "pmulhuw   %%xmm5,%%xmm2                   \n"
-    "psllw     $0x8,%%xmm1                     \n"
-    "por       %%xmm2,%%xmm1                   \n"
-    "pand      %%xmm4,%%xmm0                   \n"
-    "pmulhuw   %%xmm6,%%xmm0                   \n"
-    "por       %%xmm7,%%xmm0                   \n"
-    "movdqa    %%xmm1,%%xmm2                   \n"
-    "punpcklbw %%xmm0,%%xmm1                   \n"
-    "punpckhbw %%xmm0,%%xmm2                   \n"
-    BUNDLEALIGN
-    MEMOPMEM(movdqa,xmm1,0x00,1,0,2)           //  movdqa  %%xmm1,(%1,%0,2)
-    MEMOPMEM(movdqa,xmm2,0x10,1,0,2)           //  movdqa  %%xmm2,0x10(%1,%0,2)
-    "lea       " MEMLEA(0x10,0) ",%0           \n"
-    "sub       $0x8,%2                         \n"
-    "jg        1b                              \n"
-  : "+r"(src),  // %0
-    "+r"(dst),  // %1
-    "+r"(pix)   // %2
-  :
-  : "memory", "cc", "eax"
-#if defined(__native_client__) && defined(__x86_64__)
-    , "r14"
-#endif
-#if defined(__SSE2__)
-    , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7"
-#endif
-  );
-}
-
-void ARGB1555ToARGBRow_SSE2(const uint8* src, uint8* dst, int pix) {
-  asm volatile (
-    "mov       $0x1080108,%%eax                \n"
-    "movd      %%eax,%%xmm5                    \n"
-    "pshufd    $0x0,%%xmm5,%%xmm5              \n"
-    "mov       $0x42004200,%%eax               \n"
-    "movd      %%eax,%%xmm6                    \n"
-    "pshufd    $0x0,%%xmm6,%%xmm6              \n"
-    "pcmpeqb   %%xmm3,%%xmm3                   \n"
-    "psllw     $0xb,%%xmm3                     \n"
-    "movdqa    %%xmm3,%%xmm4                   \n"
-    "psrlw     $0x6,%%xmm4                     \n"
-    "pcmpeqb   %%xmm7,%%xmm7                   \n"
-    "psllw     $0x8,%%xmm7                     \n"
-    "sub       %0,%1                           \n"
-    "sub       %0,%1                           \n"
-    LABELALIGN
-  "1:                                          \n"
-    "movdqu    " MEMACCESS(0) ",%%xmm0         \n"
-    "movdqa    %%xmm0,%%xmm1                   \n"
-    "movdqa    %%xmm0,%%xmm2                   \n"
-    "psllw     $0x1,%%xmm1                     \n"
-    "psllw     $0xb,%%xmm2                     \n"
-    "pand      %%xmm3,%%xmm1                   \n"
-    "pmulhuw   %%xmm5,%%xmm2                   \n"
-    "pmulhuw   %%xmm5,%%xmm1                   \n"
-    "psllw     $0x8,%%xmm1                     \n"
-    "por       %%xmm2,%%xmm1                   \n"
-    "movdqa    %%xmm0,%%xmm2                   \n"
-    "pand      %%xmm4,%%xmm0                   \n"
-    "psraw     $0x8,%%xmm2                     \n"
-    "pmulhuw   %%xmm6,%%xmm0                   \n"
-    "pand      %%xmm7,%%xmm2                   \n"
-    "por       %%xmm2,%%xmm0                   \n"
-    "movdqa    %%xmm1,%%xmm2                   \n"
-    "punpcklbw %%xmm0,%%xmm1                   \n"
-    "punpckhbw %%xmm0,%%xmm2                   \n"
-    BUNDLEALIGN
-    MEMOPMEM(movdqa,xmm1,0x00,1,0,2)           //  movdqa  %%xmm1,(%1,%0,2)
-    MEMOPMEM(movdqa,xmm2,0x10,1,0,2)           //  movdqa  %%xmm2,0x10(%1,%0,2)
-    "lea       " MEMLEA(0x10,0) ",%0           \n"
-    "sub       $0x8,%2                         \n"
-    "jg        1b                              \n"
-  : "+r"(src),  // %0
-    "+r"(dst),  // %1
-    "+r"(pix)   // %2
-  :
-  : "memory", "cc", "eax"
-#if defined(__native_client__) && defined(__x86_64__)
-    , "r14"
-#endif
-#if defined(__SSE2__)
-    , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7"
-#endif
-  );
-}
-
-void ARGB4444ToARGBRow_SSE2(const uint8* src, uint8* dst, int pix) {
-  asm volatile (
-    "mov       $0xf0f0f0f,%%eax                \n"
-    "movd      %%eax,%%xmm4                    \n"
-    "pshufd    $0x0,%%xmm4,%%xmm4              \n"
-    "movdqa    %%xmm4,%%xmm5                   \n"
-    "pslld     $0x4,%%xmm5                     \n"
-    "sub       %0,%1                           \n"
-    "sub       %0,%1                           \n"
-    LABELALIGN
-  "1:                                          \n"
-    "movdqu    " MEMACCESS(0) ",%%xmm0         \n"
-    "movdqa    %%xmm0,%%xmm2                   \n"
-    "pand      %%xmm4,%%xmm0                   \n"
-    "pand      %%xmm5,%%xmm2                   \n"
-    "movdqa    %%xmm0,%%xmm1                   \n"
-    "movdqa    %%xmm2,%%xmm3                   \n"
-    "psllw     $0x4,%%xmm1                     \n"
-    "psrlw     $0x4,%%xmm3                     \n"
-    "por       %%xmm1,%%xmm0                   \n"
-    "por       %%xmm3,%%xmm2                   \n"
-    "movdqa    %%xmm0,%%xmm1                   \n"
-    "punpcklbw %%xmm2,%%xmm0                   \n"
-    "punpckhbw %%xmm2,%%xmm1                   \n"
-    BUNDLEALIGN
-    MEMOPMEM(movdqa,xmm0,0x00,1,0,2)           //  movdqa  %%xmm0,(%1,%0,2)
-    MEMOPMEM(movdqa,xmm1,0x10,1,0,2)           //  movdqa  %%xmm1,0x10(%1,%0,2)
-    "lea       " MEMLEA(0x10,0) ",%0           \n"
-    "sub       $0x8,%2                         \n"
-    "jg        1b                              \n"
-  : "+r"(src),  // %0
-    "+r"(dst),  // %1
-    "+r"(pix)   // %2
-  :
-  : "memory", "cc", "eax"
-#if defined(__native_client__) && defined(__x86_64__)
-    , "r14"
-#endif
-#if defined(__SSE2__)
-    , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
-#endif
-  );
-}
-
-void ARGBToRGB24Row_SSSE3(const uint8* src, uint8* dst, int pix) {
-  asm volatile (
-    "movdqa    %3,%%xmm6                       \n"
-    LABELALIGN
-  "1:                                          \n"
-    "movdqu    " MEMACCESS(0) ",%%xmm0         \n"
-    "movdqu    " MEMACCESS2(0x10,0) ",%%xmm1   \n"
-    "movdqu    " MEMACCESS2(0x20,0) ",%%xmm2   \n"
-    "movdqu    " MEMACCESS2(0x30,0) ",%%xmm3   \n"
-    "lea       " MEMLEA(0x40,0) ",%0           \n"
-    "pshufb    %%xmm6,%%xmm0                   \n"
-    "pshufb    %%xmm6,%%xmm1                   \n"
-    "pshufb    %%xmm6,%%xmm2                   \n"
-    "pshufb    %%xmm6,%%xmm3                   \n"
-    "movdqa    %%xmm1,%%xmm4                   \n"
-    "psrldq    $0x4,%%xmm1                     \n"
-    "pslldq    $0xc,%%xmm4                     \n"
-    "movdqa    %%xmm2,%%xmm5                   \n"
-    "por       %%xmm4,%%xmm0                   \n"
-    "pslldq    $0x8,%%xmm5                     \n"
-    "movdqu    %%xmm0," MEMACCESS(1) "         \n"
-    "por       %%xmm5,%%xmm1                   \n"
-    "psrldq    $0x8,%%xmm2                     \n"
-    "pslldq    $0x4,%%xmm3                     \n"
-    "por       %%xmm3,%%xmm2                   \n"
-    "movdqu    %%xmm1," MEMACCESS2(0x10,1) "   \n"
-    "movdqu    %%xmm2," MEMACCESS2(0x20,1) "   \n"
-    "lea       " MEMLEA(0x30,1) ",%1           \n"
-    "sub       $0x10,%2                        \n"
-    "jg        1b                              \n"
-  : "+r"(src),  // %0
-    "+r"(dst),  // %1
-    "+r"(pix)   // %2
-  : "m"(kShuffleMaskARGBToRGB24)  // %3
-  : "memory", "cc"
-#if defined(__SSE2__)
-    , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6"
-#endif
-  );
-}
-
-void ARGBToRAWRow_SSSE3(const uint8* src, uint8* dst, int pix) {
-  asm volatile (
-    "movdqa    %3,%%xmm6                       \n"
-    LABELALIGN
-  "1:                                          \n"
-    "movdqu    " MEMACCESS(0) ",%%xmm0         \n"
-    "movdqu    " MEMACCESS2(0x10,0) ",%%xmm1   \n"
-    "movdqu    " MEMACCESS2(0x20,0) ",%%xmm2   \n"
-    "movdqu    " MEMACCESS2(0x30,0) ",%%xmm3   \n"
-    "lea       " MEMLEA(0x40,0) ",%0           \n"
-    "pshufb    %%xmm6,%%xmm0                   \n"
-    "pshufb    %%xmm6,%%xmm1                   \n"
-    "pshufb    %%xmm6,%%xmm2                   \n"
-    "pshufb    %%xmm6,%%xmm3                   \n"
-    "movdqa    %%xmm1,%%xmm4                   \n"
-    "psrldq    $0x4,%%xmm1                     \n"
-    "pslldq    $0xc,%%xmm4                     \n"
-    "movdqa    %%xmm2,%%xmm5                   \n"
-    "por       %%xmm4,%%xmm0                   \n"
-    "pslldq    $0x8,%%xmm5                     \n"
-    "movdqu    %%xmm0," MEMACCESS(1) "         \n"
-    "por       %%xmm5,%%xmm1                   \n"
-    "psrldq    $0x8,%%xmm2                     \n"
-    "pslldq    $0x4,%%xmm3                     \n"
-    "por       %%xmm3,%%xmm2                   \n"
-    "movdqu    %%xmm1," MEMACCESS2(0x10,1) "   \n"
-    "movdqu    %%xmm2," MEMACCESS2(0x20,1) "   \n"
-    "lea       " MEMLEA(0x30,1) ",%1           \n"
-    "sub       $0x10,%2                        \n"
-    "jg        1b                              \n"
-  : "+r"(src),  // %0
-    "+r"(dst),  // %1
-    "+r"(pix)   // %2
-  : "m"(kShuffleMaskARGBToRAW)  // %3
-  : "memory", "cc"
-#if defined(__SSE2__)
-    , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6"
-#endif
-  );
-}
-
-void ARGBToRGB565Row_SSE2(const uint8* src, uint8* dst, int pix) {
-  asm volatile (
-    "pcmpeqb   %%xmm3,%%xmm3                   \n"
-    "psrld     $0x1b,%%xmm3                    \n"
-    "pcmpeqb   %%xmm4,%%xmm4                   \n"
-    "psrld     $0x1a,%%xmm4                    \n"
-    "pslld     $0x5,%%xmm4                     \n"
-    "pcmpeqb   %%xmm5,%%xmm5                   \n"
-    "pslld     $0xb,%%xmm5                     \n"
-    LABELALIGN
-  "1:                                          \n"
-    "movdqa    " MEMACCESS(0) ",%%xmm0         \n"
-    "movdqa    %%xmm0,%%xmm1                   \n"
-    "movdqa    %%xmm0,%%xmm2                   \n"
-    "pslld     $0x8,%%xmm0                     \n"
-    "psrld     $0x3,%%xmm1                     \n"
-    "psrld     $0x5,%%xmm2                     \n"
-    "psrad     $0x10,%%xmm0                    \n"
-    "pand      %%xmm3,%%xmm1                   \n"
-    "pand      %%xmm4,%%xmm2                   \n"
-    "pand      %%xmm5,%%xmm0                   \n"
-    "por       %%xmm2,%%xmm1                   \n"
-    "por       %%xmm1,%%xmm0                   \n"
-    "packssdw  %%xmm0,%%xmm0                   \n"
-    "lea       " MEMLEA(0x10,0) ",%0           \n"
-    "movq      %%xmm0," MEMACCESS(1) "         \n"
-    "lea       " MEMLEA(0x8,1) ",%1            \n"
-    "sub       $0x4,%2                         \n"
-    "jg        1b                              \n"
-  : "+r"(src),  // %0
-    "+r"(dst),  // %1
-    "+r"(pix)   // %2
-  :
-  : "memory", "cc"
-#if defined(__SSE2__)
-    , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
-#endif
-  );
-}
-
-void ARGBToARGB1555Row_SSE2(const uint8* src, uint8* dst, int pix) {
-  asm volatile (
-    "pcmpeqb   %%xmm4,%%xmm4                   \n"
-    "psrld     $0x1b,%%xmm4                    \n"
-    "movdqa    %%xmm4,%%xmm5                   \n"
-    "pslld     $0x5,%%xmm5                     \n"
-    "movdqa    %%xmm4,%%xmm6                   \n"
-    "pslld     $0xa,%%xmm6                     \n"
-    "pcmpeqb   %%xmm7,%%xmm7                   \n"
-    "pslld     $0xf,%%xmm7                     \n"
-    LABELALIGN
-  "1:                                          \n"
-    "movdqa    " MEMACCESS(0) ",%%xmm0         \n"
-    "movdqa    %%xmm0,%%xmm1                   \n"
-    "movdqa    %%xmm0,%%xmm2                   \n"
-    "movdqa    %%xmm0,%%xmm3                   \n"
-    "psrad     $0x10,%%xmm0                    \n"
-    "psrld     $0x3,%%xmm1                     \n"
-    "psrld     $0x6,%%xmm2                     \n"
-    "psrld     $0x9,%%xmm3                     \n"
-    "pand      %%xmm7,%%xmm0                   \n"
-    "pand      %%xmm4,%%xmm1                   \n"
-    "pand      %%xmm5,%%xmm2                   \n"
-    "pand      %%xmm6,%%xmm3                   \n"
-    "por       %%xmm1,%%xmm0                   \n"
-    "por       %%xmm3,%%xmm2                   \n"
-    "por       %%xmm2,%%xmm0                   \n"
-    "packssdw  %%xmm0,%%xmm0                   \n"
-    "lea       " MEMLEA(0x10,0) ",%0           \n"
-    "movq      %%xmm0," MEMACCESS(1) "         \n"
-    "lea       " MEMACCESS2(0x8,1) ",%1        \n"
-    "sub       $0x4,%2                         \n"
-    "jg        1b                              \n"
-  : "+r"(src),  // %0
-    "+r"(dst),  // %1
-    "+r"(pix)   // %2
-  :
-  : "memory", "cc"
-#if defined(__SSE2__)
-    , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7"
-#endif
-  );
-}
-
-void ARGBToARGB4444Row_SSE2(const uint8* src, uint8* dst, int pix) {
-  asm volatile (
-    "pcmpeqb   %%xmm4,%%xmm4                   \n"
-    "psllw     $0xc,%%xmm4                     \n"
-    "movdqa    %%xmm4,%%xmm3                   \n"
-    "psrlw     $0x8,%%xmm3                     \n"
-    LABELALIGN
-  "1:                                          \n"
-    "movdqa    " MEMACCESS(0) ",%%xmm0         \n"
-    "movdqa    %%xmm0,%%xmm1                   \n"
-    "pand      %%xmm3,%%xmm0                   \n"
-    "pand      %%xmm4,%%xmm1                   \n"
-    "psrlq     $0x4,%%xmm0                     \n"
-    "psrlq     $0x8,%%xmm1                     \n"
-    "por       %%xmm1,%%xmm0                   \n"
-    "packuswb  %%xmm0,%%xmm0                   \n"
-    "lea       " MEMLEA(0x10,0) ",%0           \n"
-    "movq      %%xmm0," MEMACCESS(1) "         \n"
-    "lea       " MEMLEA(0x8,1) ",%1            \n"
-    "sub       $0x4,%2                         \n"
-    "jg        1b                              \n"
-  : "+r"(src),  // %0
-    "+r"(dst),  // %1
-    "+r"(pix)   // %2
-  :
-  : "memory", "cc"
-#if defined(__SSE2__)
-    , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4"
-#endif
-  );
-}
-#endif  // HAS_RGB24TOARGBROW_SSSE3
-
-#ifdef HAS_ARGBTOYROW_SSSE3
-void ARGBToYRow_SSSE3(const uint8* src_argb, uint8* dst_y, int pix) {
-  asm volatile (
-    "movdqa    %4,%%xmm5                       \n"
-    "movdqa    %3,%%xmm4                       \n"
-    LABELALIGN
-  "1:                                          \n"
-    "movdqa    " MEMACCESS(0) ",%%xmm0         \n"
-    "movdqa    " MEMACCESS2(0x10,0) ",%%xmm1   \n"
-    "movdqa    " MEMACCESS2(0x20,0) ",%%xmm2   \n"
-    "movdqa    " MEMACCESS2(0x30,0) ",%%xmm3   \n"
-    "pmaddubsw %%xmm4,%%xmm0                   \n"
-    "pmaddubsw %%xmm4,%%xmm1                   \n"
-    "pmaddubsw %%xmm4,%%xmm2                   \n"
-    "pmaddubsw %%xmm4,%%xmm3                   \n"
-    "lea       " MEMLEA(0x40,0) ",%0           \n"
-    "phaddw    %%xmm1,%%xmm0                   \n"
-    "phaddw    %%xmm3,%%xmm2                   \n"
-    "psrlw     $0x7,%%xmm0                     \n"
-    "psrlw     $0x7,%%xmm2                     \n"
-    "packuswb  %%xmm2,%%xmm0                   \n"
-    "paddb     %%xmm5,%%xmm0                   \n"
-    "sub       $0x10,%2                        \n"
-    "movdqa    %%xmm0," MEMACCESS(1) "         \n"
-    "lea       " MEMLEA(0x10,1) ",%1           \n"
-    "jg        1b                              \n"
-  : "+r"(src_argb),  // %0
-    "+r"(dst_y),     // %1
-    "+r"(pix)        // %2
-  : "m"(kARGBToY),   // %3
-    "m"(kAddY16)     // %4
-  : "memory", "cc"
-#if defined(__SSE2__)
-    , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
-#endif
-  );
-}
-
-void ARGBToYRow_Unaligned_SSSE3(const uint8* src_argb, uint8* dst_y, int pix) {
-  asm volatile (
-    "movdqa    %4,%%xmm5                       \n"
-    "movdqa    %3,%%xmm4                       \n"
-    LABELALIGN
-  "1:                                          \n"
-    "movdqu    " MEMACCESS(0) ",%%xmm0         \n"
-    "movdqu    " MEMACCESS2(0x10,0) ",%%xmm1   \n"
-    "movdqu    " MEMACCESS2(0x20,0) ",%%xmm2   \n"
-    "movdqu    " MEMACCESS2(0x30,0) ",%%xmm3   \n"
-    "pmaddubsw %%xmm4,%%xmm0                   \n"
-    "pmaddubsw %%xmm4,%%xmm1                   \n"
-    "pmaddubsw %%xmm4,%%xmm2                   \n"
-    "pmaddubsw %%xmm4,%%xmm3                   \n"
-    "lea       " MEMLEA(0x40,0) ",%0           \n"
-    "phaddw    %%xmm1,%%xmm0                   \n"
-    "phaddw    %%xmm3,%%xmm2                   \n"
-    "psrlw     $0x7,%%xmm0                     \n"
-    "psrlw     $0x7,%%xmm2                     \n"
-    "packuswb  %%xmm2,%%xmm0                   \n"
-    "paddb     %%xmm5,%%xmm0                   \n"
-    "sub       $0x10,%2                        \n"
-    "movdqu    %%xmm0," MEMACCESS(1) "         \n"
-    "lea       " MEMLEA(0x10,1) ",%1           \n"
-    "jg        1b                              \n"
-  : "+r"(src_argb),  // %0
-    "+r"(dst_y),     // %1
-    "+r"(pix)        // %2
-  : "m"(kARGBToY),   // %3
-    "m"(kAddY16)     // %4
-  : "memory", "cc"
-#if defined(__SSE2__)
-    , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
-#endif
-  );
-}
-#endif  // HAS_ARGBTOYROW_SSSE3
-
-#ifdef HAS_ARGBTOYJROW_SSSE3
-void ARGBToYJRow_SSSE3(const uint8* src_argb, uint8* dst_y, int pix) {
-  asm volatile (
-    "movdqa    %3,%%xmm4                       \n"
-    "movdqa    %4,%%xmm5                       \n"
-    LABELALIGN
-  "1:                                          \n"
-    "movdqa    " MEMACCESS(0) ",%%xmm0         \n"
-    "movdqa    " MEMACCESS2(0x10,0) ",%%xmm1   \n"
-    "movdqa    " MEMACCESS2(0x20,0) ",%%xmm2   \n"
-    "movdqa    " MEMACCESS2(0x30,0) ",%%xmm3   \n"
-    "pmaddubsw %%xmm4,%%xmm0                   \n"
-    "pmaddubsw %%xmm4,%%xmm1                   \n"
-    "pmaddubsw %%xmm4,%%xmm2                   \n"
-    "pmaddubsw %%xmm4,%%xmm3                   \n"
-    "lea       " MEMLEA(0x40,0) ",%0           \n"
-    "phaddw    %%xmm1,%%xmm0                   \n"
-    "phaddw    %%xmm3,%%xmm2                   \n"
-    "paddw     %%xmm5,%%xmm0                   \n"
-    "paddw     %%xmm5,%%xmm2                   \n"
-    "psrlw     $0x7,%%xmm0                     \n"
-    "psrlw     $0x7,%%xmm2                     \n"
-    "packuswb  %%xmm2,%%xmm0                   \n"
-    "sub       $0x10,%2                        \n"
-    "movdqa    %%xmm0," MEMACCESS(1) "         \n"
-    "lea       " MEMLEA(0x10,1) ",%1           \n"
-    "jg        1b                              \n"
-  : "+r"(src_argb),  // %0
-    "+r"(dst_y),     // %1
-    "+r"(pix)        // %2
-  : "m"(kARGBToYJ),  // %3
-    "m"(kAddYJ64)    // %4
-  : "memory", "cc"
-#if defined(__SSE2__)
-    , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
-#endif
-  );
-}
-
-void ARGBToYJRow_Unaligned_SSSE3(const uint8* src_argb, uint8* dst_y, int pix) {
-  asm volatile (
-    "movdqa    %3,%%xmm4                       \n"
-    "movdqa    %4,%%xmm5                       \n"
-    LABELALIGN
-  "1:                                          \n"
-    "movdqu    " MEMACCESS(0) ",%%xmm0         \n"
-    "movdqu    " MEMACCESS2(0x10,0) ",%%xmm1   \n"
-    "movdqu    " MEMACCESS2(0x20,0) ",%%xmm2   \n"
-    "movdqu    " MEMACCESS2(0x30,0) ",%%xmm3   \n"
-    "pmaddubsw %%xmm4,%%xmm0                   \n"
-    "pmaddubsw %%xmm4,%%xmm1                   \n"
-    "pmaddubsw %%xmm4,%%xmm2                   \n"
-    "pmaddubsw %%xmm4,%%xmm3                   \n"
-    "lea       " MEMLEA(0x40,0) ",%0           \n"
-    "phaddw    %%xmm1,%%xmm0                   \n"
-    "phaddw    %%xmm3,%%xmm2                   \n"
-    "paddw     %%xmm5,%%xmm0                   \n"
-    "paddw     %%xmm5,%%xmm2                   \n"
-    "psrlw     $0x7,%%xmm0                     \n"
-    "psrlw     $0x7,%%xmm2                     \n"
-    "packuswb  %%xmm2,%%xmm0                   \n"
-    "sub       $0x10,%2                        \n"
-    "movdqu    %%xmm0," MEMACCESS(1) "         \n"
-    "lea       " MEMLEA(0x10,1) ",%1           \n"
-    "jg        1b                              \n"
-  : "+r"(src_argb),  // %0
-    "+r"(dst_y),     // %1
-    "+r"(pix)        // %2
-  : "m"(kARGBToYJ),  // %3
-    "m"(kAddYJ64)    // %4
-  : "memory", "cc"
-#if defined(__SSE2__)
-    , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
-#endif
-  );
-}
-#endif  // HAS_ARGBTOYJROW_SSSE3
-
-#ifdef HAS_ARGBTOUVROW_SSSE3
-// TODO(fbarchard): pass xmm constants to single block of assembly.
-// fpic on GCC 4.2 for OSX runs out of GPR registers. "m" effectively takes
-// 3 registers - ebx, ebp and eax. "m" can be passed with 3 normal registers,
-// or 4 if stack frame is disabled. Doing 2 assembly blocks is a work around
-// and considered unsafe.
-void ARGBToUVRow_SSSE3(const uint8* src_argb0, int src_stride_argb,
-                       uint8* dst_u, uint8* dst_v, int width) {
-  asm volatile (
-    "movdqa    %0,%%xmm4                       \n"
-    "movdqa    %1,%%xmm3                       \n"
-    "movdqa    %2,%%xmm5                       \n"
-  :
-  : "m"(kARGBToU),  // %0
-    "m"(kARGBToV),  // %1
-    "m"(kAddUV128)  // %2
-  );
-  asm volatile (
-    "sub       %1,%2                           \n"
-    LABELALIGN
-  "1:                                          \n"
-    "movdqa    " MEMACCESS(0) ",%%xmm0         \n"
-    "movdqa    " MEMACCESS2(0x10,0) ",%%xmm1   \n"
-    "movdqa    " MEMACCESS2(0x20,0) ",%%xmm2   \n"
-    "movdqa    " MEMACCESS2(0x30,0) ",%%xmm6   \n"
-    BUNDLEALIGN
-    MEMOPREG(pavgb,0x00,0,4,1,xmm0)            //  pavgb   (%0,%4,1),%%xmm0
-    MEMOPREG(pavgb,0x10,0,4,1,xmm1)            //  pavgb   0x10(%0,%4,1),%%xmm1
-    MEMOPREG(pavgb,0x20,0,4,1,xmm2)            //  pavgb   0x20(%0,%4,1),%%xmm2
-    MEMOPREG(pavgb,0x30,0,4,1,xmm6)            //  pavgb   0x30(%0,%4,1),%%xmm6
-    "lea       " MEMLEA(0x40,0) ",%0           \n"
-    "movdqa    %%xmm0,%%xmm7                   \n"
-    "shufps    $0x88,%%xmm1,%%xmm0             \n"
-    "shufps    $0xdd,%%xmm1,%%xmm7             \n"
-    "pavgb     %%xmm7,%%xmm0                   \n"
-    "movdqa    %%xmm2,%%xmm7                   \n"
-    "shufps    $0x88,%%xmm6,%%xmm2             \n"
-    "shufps    $0xdd,%%xmm6,%%xmm7             \n"
-    "pavgb     %%xmm7,%%xmm2                   \n"
-    "movdqa    %%xmm0,%%xmm1                   \n"
-    "movdqa    %%xmm2,%%xmm6                   \n"
-    "pmaddubsw %%xmm4,%%xmm0                   \n"
-    "pmaddubsw %%xmm4,%%xmm2                   \n"
-    "pmaddubsw %%xmm3,%%xmm1                   \n"
-    "pmaddubsw %%xmm3,%%xmm6                   \n"
-    "phaddw    %%xmm2,%%xmm0                   \n"
-    "phaddw    %%xmm6,%%xmm1                   \n"
-    "psraw     $0x8,%%xmm0                     \n"
-    "psraw     $0x8,%%xmm1                     \n"
-    "packsswb  %%xmm1,%%xmm0                   \n"
-    "paddb     %%xmm5,%%xmm0                   \n"
-    "sub       $0x10,%3                        \n"
-    "movlps    %%xmm0," MEMACCESS(1) "         \n"
-    BUNDLEALIGN
-    MEMOPMEM(movhps,xmm0,0x00,1,2,1)           //  movhps    %%xmm0,(%1,%2,1)
-    "lea       " MEMLEA(0x8,1) ",%1            \n"
-    "jg        1b                              \n"
-  : "+r"(src_argb0),       // %0
-    "+r"(dst_u),           // %1
-    "+r"(dst_v),           // %2
-    "+rm"(width)           // %3
-  : "r"((intptr_t)(src_stride_argb)) // %4
-  : "memory", "cc"
-#if defined(__native_client__) && defined(__x86_64__)
-    , "r14"
-#endif
-#if defined(__SSE2__)
-    , "xmm0", "xmm1", "xmm2", "xmm6", "xmm7"
-#endif
-  );
-}
-
-// TODO(fbarchard): Share code with ARGBToUVRow_SSSE3.
-void ARGBToUVJRow_SSSE3(const uint8* src_argb0, int src_stride_argb,
-                        uint8* dst_u, uint8* dst_v, int width) {
-  asm volatile (
-    "movdqa    %0,%%xmm4                       \n"
-    "movdqa    %1,%%xmm3                       \n"
-    "movdqa    %2,%%xmm5                       \n"
-  :
-  : "m"(kARGBToUJ),  // %0
-    "m"(kARGBToVJ),  // %1
-    "m"(kAddUVJ128)  // %2
-  );
-  asm volatile (
-    "sub       %1,%2                           \n"
-    LABELALIGN
-  "1:                                          \n"
-    "movdqa    " MEMACCESS(0) ",%%xmm0         \n"
-    "movdqa    " MEMACCESS2(0x10,0) ",%%xmm1   \n"
-    "movdqa    " MEMACCESS2(0x20,0) ",%%xmm2   \n"
-    "movdqa    " MEMACCESS2(0x30,0) ",%%xmm6   \n"
-    BUNDLEALIGN
-    MEMOPREG(pavgb,0x00,0,4,1,xmm0)            //  pavgb   (%0,%4,1),%%xmm0
-    MEMOPREG(pavgb,0x10,0,4,1,xmm1)            //  pavgb   0x10(%0,%4,1),%%xmm1
-    MEMOPREG(pavgb,0x20,0,4,1,xmm2)            //  pavgb   0x20(%0,%4,1),%%xmm2
-    MEMOPREG(pavgb,0x30,0,4,1,xmm6)            //  pavgb   0x30(%0,%4,1),%%xmm6
-    "lea       " MEMLEA(0x40,0) ",%0           \n"
-    "movdqa    %%xmm0,%%xmm7                   \n"
-    "shufps    $0x88,%%xmm1,%%xmm0             \n"
-    "shufps    $0xdd,%%xmm1,%%xmm7             \n"
-    "pavgb     %%xmm7,%%xmm0                   \n"
-    "movdqa    %%xmm2,%%xmm7                   \n"
-    "shufps    $0x88,%%xmm6,%%xmm2             \n"
-    "shufps    $0xdd,%%xmm6,%%xmm7             \n"
-    "pavgb     %%xmm7,%%xmm2                   \n"
-    "movdqa    %%xmm0,%%xmm1                   \n"
-    "movdqa    %%xmm2,%%xmm6                   \n"
-    "pmaddubsw %%xmm4,%%xmm0                   \n"
-    "pmaddubsw %%xmm4,%%xmm2                   \n"
-    "pmaddubsw %%xmm3,%%xmm1                   \n"
-    "pmaddubsw %%xmm3,%%xmm6                   \n"
-    "phaddw    %%xmm2,%%xmm0                   \n"
-    "phaddw    %%xmm6,%%xmm1                   \n"
-    "paddw     %%xmm5,%%xmm0                   \n"
-    "paddw     %%xmm5,%%xmm1                   \n"
-    "psraw     $0x8,%%xmm0                     \n"
-    "psraw     $0x8,%%xmm1                     \n"
-    "packsswb  %%xmm1,%%xmm0                   \n"
-    "sub       $0x10,%3                        \n"
-    "movlps    %%xmm0," MEMACCESS(1) "         \n"
-    BUNDLEALIGN
-    MEMOPMEM(movhps,xmm0,0x00,1,2,1)           //  movhps  %%xmm0,(%1,%2,1)
-    "lea       " MEMLEA(0x8,1) ",%1            \n"
-    "jg        1b                              \n"
-  : "+r"(src_argb0),       // %0
-    "+r"(dst_u),           // %1
-    "+r"(dst_v),           // %2
-    "+rm"(width)           // %3
-  : "r"((intptr_t)(src_stride_argb)) // %4
-  : "memory", "cc"
-#if defined(__native_client__) && defined(__x86_64__)
-    , "r14"
-#endif
-#if defined(__SSE2__)
-    , "xmm0", "xmm1", "xmm2", "xmm6", "xmm7"
-#endif
-  );
-}
-
-void ARGBToUVRow_Unaligned_SSSE3(const uint8* src_argb0, int src_stride_argb,
-                                 uint8* dst_u, uint8* dst_v, int width) {
-  asm volatile (
-    "movdqa    %0,%%xmm4                       \n"
-    "movdqa    %1,%%xmm3                       \n"
-    "movdqa    %2,%%xmm5                       \n"
-  :
-  : "m"(kARGBToU),         // %0
-    "m"(kARGBToV),         // %1
-    "m"(kAddUV128)         // %2
-  );
-  asm volatile (
-    "sub       %1,%2                           \n"
-    LABELALIGN
-  "1:                                          \n"
-    "movdqu    " MEMACCESS(0) ",%%xmm0         \n"
-    "movdqu    " MEMACCESS2(0x10,0) ",%%xmm1   \n"
-    "movdqu    " MEMACCESS2(0x20,0) ",%%xmm2   \n"
-    "movdqu    " MEMACCESS2(0x30,0) ",%%xmm6   \n"
-    BUNDLEALIGN
-    MEMOPREG(movdqu,0x00,0,4,1,xmm7)           //  movdqu  (%0,%4,1),%%xmm7
-    "pavgb     %%xmm7,%%xmm0                   \n"
-    MEMOPREG(movdqu,0x10,0,4,1,xmm7)           //  movdqu  0x10(%0,%4,1),%%xmm7
-    "pavgb     %%xmm7,%%xmm1                   \n"
-    MEMOPREG(movdqu,0x20,0,4,1,xmm7)           //  movdqu  0x20(%0,%4,1),%%xmm7
-    "pavgb     %%xmm7,%%xmm2                   \n"
-    MEMOPREG(movdqu,0x30,0,4,1,xmm7)           //  movdqu  0x30(%0,%4,1),%%xmm7
-    "pavgb     %%xmm7,%%xmm6                   \n"
-    "lea       " MEMLEA(0x40,0) ",%0           \n"
-    "movdqa    %%xmm0,%%xmm7                   \n"
-    "shufps    $0x88,%%xmm1,%%xmm0             \n"
-    "shufps    $0xdd,%%xmm1,%%xmm7             \n"
-    "pavgb     %%xmm7,%%xmm0                   \n"
-    "movdqa    %%xmm2,%%xmm7                   \n"
-    "shufps    $0x88,%%xmm6,%%xmm2             \n"
-    "shufps    $0xdd,%%xmm6,%%xmm7             \n"
-    "pavgb     %%xmm7,%%xmm2                   \n"
-    "movdqa    %%xmm0,%%xmm1                   \n"
-    "movdqa    %%xmm2,%%xmm6                   \n"
-    "pmaddubsw %%xmm4,%%xmm0                   \n"
-    "pmaddubsw %%xmm4,%%xmm2                   \n"
-    "pmaddubsw %%xmm3,%%xmm1                   \n"
-    "pmaddubsw %%xmm3,%%xmm6                   \n"
-    "phaddw    %%xmm2,%%xmm0                   \n"
-    "phaddw    %%xmm6,%%xmm1                   \n"
-    "psraw     $0x8,%%xmm0                     \n"
-    "psraw     $0x8,%%xmm1                     \n"
-    "packsswb  %%xmm1,%%xmm0                   \n"
-    "paddb     %%xmm5,%%xmm0                   \n"
-    "sub       $0x10,%3                        \n"
-    "movlps    %%xmm0," MEMACCESS(1) "         \n"
-    BUNDLEALIGN
-    MEMOPMEM(movhps,xmm0,0x00,1,2,1)           //  movhps  %%xmm0,(%1,%2,1)
-    "lea       " MEMLEA(0x8,1) ",%1            \n"
-    "jg        1b                              \n"
-  : "+r"(src_argb0),       // %0
-    "+r"(dst_u),           // %1
-    "+r"(dst_v),           // %2
-    "+rm"(width)           // %3
-  : "r"((intptr_t)(src_stride_argb)) // %4
-  : "memory", "cc"
-#if defined(__native_client__) && defined(__x86_64__)
-    , "r14"
-#endif
-#if defined(__SSE2__)
-    , "xmm0", "xmm1", "xmm2", "xmm6", "xmm7"
-#endif
-  );
-}
-
-void ARGBToUVJRow_Unaligned_SSSE3(const uint8* src_argb0, int src_stride_argb,
-                                  uint8* dst_u, uint8* dst_v, int width) {
-  asm volatile (
-    "movdqa    %0,%%xmm4                       \n"
-    "movdqa    %1,%%xmm3                       \n"
-    "movdqa    %2,%%xmm5                       \n"
-  :
-  : "m"(kARGBToUJ),         // %0
-    "m"(kARGBToVJ),         // %1
-    "m"(kAddUVJ128)         // %2
-  );
-  asm volatile (
-    "sub       %1,%2                           \n"
-    LABELALIGN
-  "1:                                          \n"
-    "movdqu    " MEMACCESS(0) ",%%xmm0         \n"
-    "movdqu    " MEMACCESS2(0x10,0) ",%%xmm1   \n"
-    "movdqu    " MEMACCESS2(0x20,0) ",%%xmm2   \n"
-    "movdqu    " MEMACCESS2(0x30,0) ",%%xmm6   \n"
-    BUNDLEALIGN
-    MEMOPREG(movdqu,0x00,0,4,1,xmm7)           //  movdqu  (%0,%4,1),%%xmm7
-    "pavgb     %%xmm7,%%xmm0                   \n"
-    MEMOPREG(movdqu,0x10,0,4,1,xmm7)           //  movdqu  0x10(%0,%4,1),%%xmm7
-    "pavgb     %%xmm7,%%xmm1                   \n"
-    MEMOPREG(movdqu,0x20,0,4,1,xmm7)           //  movdqu  0x20(%0,%4,1),%%xmm7
-    "pavgb     %%xmm7,%%xmm2                   \n"
-    MEMOPREG(movdqu,0x30,0,4,1,xmm7)           //  movdqu  0x30(%0,%4,1),%%xmm7
-    "pavgb     %%xmm7,%%xmm6                   \n"
-    "lea       " MEMLEA(0x40,0) ",%0           \n"
-    "movdqa    %%xmm0,%%xmm7                   \n"
-    "shufps    $0x88,%%xmm1,%%xmm0             \n"
-    "shufps    $0xdd,%%xmm1,%%xmm7             \n"
-    "pavgb     %%xmm7,%%xmm0                   \n"
-    "movdqa    %%xmm2,%%xmm7                   \n"
-    "shufps    $0x88,%%xmm6,%%xmm2             \n"
-    "shufps    $0xdd,%%xmm6,%%xmm7             \n"
-    "pavgb     %%xmm7,%%xmm2                   \n"
-    "movdqa    %%xmm0,%%xmm1                   \n"
-    "movdqa    %%xmm2,%%xmm6                   \n"
-    "pmaddubsw %%xmm4,%%xmm0                   \n"
-    "pmaddubsw %%xmm4,%%xmm2                   \n"
-    "pmaddubsw %%xmm3,%%xmm1                   \n"
-    "pmaddubsw %%xmm3,%%xmm6                   \n"
-    "phaddw    %%xmm2,%%xmm0                   \n"
-    "phaddw    %%xmm6,%%xmm1                   \n"
-    "paddw     %%xmm5,%%xmm0                   \n"
-    "paddw     %%xmm5,%%xmm1                   \n"
-    "psraw     $0x8,%%xmm0                     \n"
-    "psraw     $0x8,%%xmm1                     \n"
-    "packsswb  %%xmm1,%%xmm0                   \n"
-    "sub       $0x10,%3                        \n"
-    "movlps    %%xmm0," MEMACCESS(1) "         \n"
-    BUNDLEALIGN
-    MEMOPMEM(movhps,xmm0,0x00,1,2,1)           //  movhps  %%xmm0,(%1,%2,1)
-    "lea       " MEMLEA(0x8,1) ",%1            \n"
-    "jg        1b                              \n"
-  : "+r"(src_argb0),       // %0
-    "+r"(dst_u),           // %1
-    "+r"(dst_v),           // %2
-    "+rm"(width)           // %3
-  : "r"((intptr_t)(src_stride_argb))
-  : "memory", "cc"
-#if defined(__native_client__) && defined(__x86_64__)
-    , "r14"
-#endif
-#if defined(__SSE2__)
-    , "xmm0", "xmm1", "xmm2", "xmm6", "xmm7"
-#endif
-  );
-}
-
-void ARGBToUV444Row_SSSE3(const uint8* src_argb, uint8* dst_u, uint8* dst_v,
-                          int width) {
-  asm volatile (
-    "movdqa    %0,%%xmm4                       \n"
-    "movdqa    %1,%%xmm3                       \n"
-    "movdqa    %2,%%xmm5                       \n"
-  :
-  : "m"(kARGBToU),  // %0
-    "m"(kARGBToV),  // %1
-    "m"(kAddUV128)  // %2
-  );
-  asm volatile (
-    "sub       %1,%2                           \n"
-    LABELALIGN
-  "1:                                          \n"
-    "movdqa    " MEMACCESS(0) ",%%xmm0         \n"
-    "movdqa    " MEMACCESS2(0x10,0) ",%%xmm1   \n"
-    "movdqa    " MEMACCESS2(0x20,0) ",%%xmm2   \n"
-    "movdqa    " MEMACCESS2(0x30,0) ",%%xmm6   \n"
-    "pmaddubsw %%xmm4,%%xmm0                   \n"
-    "pmaddubsw %%xmm4,%%xmm1                   \n"
-    "pmaddubsw %%xmm4,%%xmm2                   \n"
-    "pmaddubsw %%xmm4,%%xmm6                   \n"
-    "phaddw    %%xmm1,%%xmm0                   \n"
-    "phaddw    %%xmm6,%%xmm2                   \n"
-    "psraw     $0x8,%%xmm0                     \n"
-    "psraw     $0x8,%%xmm2                     \n"
-    "packsswb  %%xmm2,%%xmm0                   \n"
-    "paddb     %%xmm5,%%xmm0                   \n"
-    "sub       $0x10,%3                        \n"
-    "movdqa    %%xmm0," MEMACCESS(1) "         \n"
-    "movdqa    " MEMACCESS(0) ",%%xmm0         \n"
-    "movdqa    " MEMACCESS2(0x10,0) ",%%xmm1   \n"
-    "movdqa    " MEMACCESS2(0x20,0) ",%%xmm2   \n"
-    "movdqa    " MEMACCESS2(0x30,0) ",%%xmm6   \n"
-    "pmaddubsw %%xmm3,%%xmm0                   \n"
-    "pmaddubsw %%xmm3,%%xmm1                   \n"
-    "pmaddubsw %%xmm3,%%xmm2                   \n"
-    "pmaddubsw %%xmm3,%%xmm6                   \n"
-    "phaddw    %%xmm1,%%xmm0                   \n"
-    "phaddw    %%xmm6,%%xmm2                   \n"
-    "psraw     $0x8,%%xmm0                     \n"
-    "psraw     $0x8,%%xmm2                     \n"
-    "packsswb  %%xmm2,%%xmm0                   \n"
-    "paddb     %%xmm5,%%xmm0                   \n"
-    "lea       " MEMLEA(0x40,0) ",%0           \n"
-    BUNDLEALIGN
-    MEMOPMEM(movdqa,xmm0,0x00,1,2,1)           //  movdqa  %%xmm0,(%1,%2,1)
-    "lea       " MEMLEA(0x10,1) ",%1           \n"
-    "jg        1b                              \n"
-  : "+r"(src_argb),        // %0
-    "+r"(dst_u),           // %1
-    "+r"(dst_v),           // %2
-    "+rm"(width)           // %3
-  :
-  : "memory", "cc"
-#if defined(__native_client__) && defined(__x86_64__)
-    , "r14"
-#endif
-#if defined(__SSE2__)
-    , "xmm0", "xmm1", "xmm2", "xmm6"
-#endif
-  );
-}
-
-void ARGBToUV444Row_Unaligned_SSSE3(const uint8* src_argb, uint8* dst_u,
-                                    uint8* dst_v, int width) {
-  asm volatile (
-    "movdqa    %0,%%xmm4                       \n"
-    "movdqa    %1,%%xmm3                       \n"
-    "movdqa    %2,%%xmm5                       \n"
-  :
-  : "m"(kARGBToU),  // %0
-    "m"(kARGBToV),  // %1
-    "m"(kAddUV128)  // %2
-  );
-  asm volatile (
-    "sub       %1,%2                           \n"
-    LABELALIGN
-  "1:                                          \n"
-    "movdqu    " MEMACCESS(0) ",%%xmm0         \n"
-    "movdqu    " MEMACCESS2(0x10,0) ",%%xmm1   \n"
-    "movdqu    " MEMACCESS2(0x20,0) ",%%xmm2   \n"
-    "movdqu    " MEMACCESS2(0x30,0) ",%%xmm6   \n"
-    "pmaddubsw %%xmm4,%%xmm0                   \n"
-    "pmaddubsw %%xmm4,%%xmm1                   \n"
-    "pmaddubsw %%xmm4,%%xmm2                   \n"
-    "pmaddubsw %%xmm4,%%xmm6                   \n"
-    "phaddw    %%xmm1,%%xmm0                   \n"
-    "phaddw    %%xmm6,%%xmm2                   \n"
-    "psraw     $0x8,%%xmm0                     \n"
-    "psraw     $0x8,%%xmm2                     \n"
-    "packsswb  %%xmm2,%%xmm0                   \n"
-    "paddb     %%xmm5,%%xmm0                   \n"
-    "sub       $0x10,%3                        \n"
-    "movdqu    %%xmm0," MEMACCESS(1) "         \n"
-    "movdqu    " MEMACCESS(0) ",%%xmm0         \n"
-    "movdqu    " MEMACCESS2(0x10,0) ",%%xmm1   \n"
-    "movdqu    " MEMACCESS2(0x20,0) ",%%xmm2   \n"
-    "movdqu    " MEMACCESS2(0x30,0) ",%%xmm6   \n"
-    "pmaddubsw %%xmm3,%%xmm0                   \n"
-    "pmaddubsw %%xmm3,%%xmm1                   \n"
-    "pmaddubsw %%xmm3,%%xmm2                   \n"
-    "pmaddubsw %%xmm3,%%xmm6                   \n"
-    "phaddw    %%xmm1,%%xmm0                   \n"
-    "phaddw    %%xmm6,%%xmm2                   \n"
-    "psraw     $0x8,%%xmm0                     \n"
-    "psraw     $0x8,%%xmm2                     \n"
-    "packsswb  %%xmm2,%%xmm0                   \n"
-    "paddb     %%xmm5,%%xmm0                   \n"
-    "lea       " MEMLEA(0x40,0) ",%0           \n"
-    BUNDLEALIGN
-    MEMOPMEM(movdqu,xmm0,0x00,1,2,1)           //  movdqu  %%xmm0,(%1,%2,1)
-    "lea       " MEMLEA(0x10,1) ",%1           \n"
-    "jg        1b                              \n"
-  : "+r"(src_argb),        // %0
-    "+r"(dst_u),           // %1
-    "+r"(dst_v),           // %2
-    "+rm"(width)           // %3
-  :
-  : "memory", "cc"
-#if defined(__native_client__) && defined(__x86_64__)
-    , "r14"
-#endif
-#if defined(__SSE2__)
-    , "xmm0", "xmm1", "xmm2", "xmm6"
-#endif
-  );
-}
-
-void ARGBToUV422Row_SSSE3(const uint8* src_argb0,
-                          uint8* dst_u, uint8* dst_v, int width) {
-  asm volatile (
-    "movdqa    %0,%%xmm4                       \n"
-    "movdqa    %1,%%xmm3                       \n"
-    "movdqa    %2,%%xmm5                       \n"
-  :
-  : "m"(kARGBToU),  // %0
-    "m"(kARGBToV),  // %1
-    "m"(kAddUV128)  // %2
-  );
-  asm volatile (
-    "sub       %1,%2                           \n"
-    LABELALIGN
-  "1:                                          \n"
-    "movdqa    " MEMACCESS(0) ",%%xmm0         \n"
-    "movdqa    " MEMACCESS2(0x10,0) ",%%xmm1   \n"
-    "movdqa    " MEMACCESS2(0x20,0) ",%%xmm2   \n"
-    "movdqa    " MEMACCESS2(0x30,0) ",%%xmm6   \n"
-    "lea       " MEMLEA(0x40,0) ",%0           \n"
-    "movdqa    %%xmm0,%%xmm7                   \n"
-    "shufps    $0x88,%%xmm1,%%xmm0             \n"
-    "shufps    $0xdd,%%xmm1,%%xmm7             \n"
-    "pavgb     %%xmm7,%%xmm0                   \n"
-    "movdqa    %%xmm2,%%xmm7                   \n"
-    "shufps    $0x88,%%xmm6,%%xmm2             \n"
-    "shufps    $0xdd,%%xmm6,%%xmm7             \n"
-    "pavgb     %%xmm7,%%xmm2                   \n"
-    "movdqa    %%xmm0,%%xmm1                   \n"
-    "movdqa    %%xmm2,%%xmm6                   \n"
-    "pmaddubsw %%xmm4,%%xmm0                   \n"
-    "pmaddubsw %%xmm4,%%xmm2                   \n"
-    "pmaddubsw %%xmm3,%%xmm1                   \n"
-    "pmaddubsw %%xmm3,%%xmm6                   \n"
-    "phaddw    %%xmm2,%%xmm0                   \n"
-    "phaddw    %%xmm6,%%xmm1                   \n"
-    "psraw     $0x8,%%xmm0                     \n"
-    "psraw     $0x8,%%xmm1                     \n"
-    "packsswb  %%xmm1,%%xmm0                   \n"
-    "paddb     %%xmm5,%%xmm0                   \n"
-    "sub       $0x10,%3                        \n"
-    "movlps    %%xmm0," MEMACCESS(1) "         \n"
-    BUNDLEALIGN
-    MEMOPMEM(movhps,xmm0,0x00,1,2,1)           //  movhps  %%xmm0,(%1,%2,1)
-    "lea       " MEMLEA(0x8,1) ",%1            \n"
-    "jg        1b                              \n"
-  : "+r"(src_argb0),       // %0
-    "+r"(dst_u),           // %1
-    "+r"(dst_v),           // %2
-    "+rm"(width)           // %3
-  :
-  : "memory", "cc"
-#if defined(__native_client__) && defined(__x86_64__)
-    , "r14"
-#endif
-#if defined(__SSE2__)
-    , "xmm0", "xmm1", "xmm2", "xmm6", "xmm7"
-#endif
-  );
-}
-
-void ARGBToUV422Row_Unaligned_SSSE3(const uint8* src_argb0,
-                                    uint8* dst_u, uint8* dst_v, int width) {
-  asm volatile (
-    "movdqa    %0,%%xmm4                       \n"
-    "movdqa    %1,%%xmm3                       \n"
-    "movdqa    %2,%%xmm5                       \n"
-  :
-  : "m"(kARGBToU),  // %0
-    "m"(kARGBToV),  // %1
-    "m"(kAddUV128)  // %2
-  );
-  asm volatile (
-    "sub       %1,%2                           \n"
-    LABELALIGN
-  "1:                                          \n"
-    "movdqu    " MEMACCESS(0) ",%%xmm0         \n"
-    "movdqu    " MEMACCESS2(0x10,0) ",%%xmm1   \n"
-    "movdqu    " MEMACCESS2(0x20,0) ",%%xmm2   \n"
-    "movdqu    " MEMACCESS2(0x30,0) ",%%xmm6   \n"
-    "lea       " MEMLEA(0x40,0) ",%0           \n"
-    "movdqa    %%xmm0,%%xmm7                   \n"
-    "shufps    $0x88,%%xmm1,%%xmm0             \n"
-    "shufps    $0xdd,%%xmm1,%%xmm7             \n"
-    "pavgb     %%xmm7,%%xmm0                   \n"
-    "movdqa    %%xmm2,%%xmm7                   \n"
-    "shufps    $0x88,%%xmm6,%%xmm2             \n"
-    "shufps    $0xdd,%%xmm6,%%xmm7             \n"
-    "pavgb     %%xmm7,%%xmm2                   \n"
-    "movdqa    %%xmm0,%%xmm1                   \n"
-    "movdqa    %%xmm2,%%xmm6                   \n"
-    "pmaddubsw %%xmm4,%%xmm0                   \n"
-    "pmaddubsw %%xmm4,%%xmm2                   \n"
-    "pmaddubsw %%xmm3,%%xmm1                   \n"
-    "pmaddubsw %%xmm3,%%xmm6                   \n"
-    "phaddw    %%xmm2,%%xmm0                   \n"
-    "phaddw    %%xmm6,%%xmm1                   \n"
-    "psraw     $0x8,%%xmm0                     \n"
-    "psraw     $0x8,%%xmm1                     \n"
-    "packsswb  %%xmm1,%%xmm0                   \n"
-    "paddb     %%xmm5,%%xmm0                   \n"
-    "sub       $0x10,%3                        \n"
-    "movlps    %%xmm0," MEMACCESS(1) "         \n"
-    BUNDLEALIGN
-    MEMOPMEM(movhps,xmm0,0x00,1,2,1)           //  movhps  %%xmm0,(%1,%2,1)
-    "lea       " MEMLEA(0x8,1) ",%1            \n"
-    "jg        1b                              \n"
-  : "+r"(src_argb0),       // %0
-    "+r"(dst_u),           // %1
-    "+r"(dst_v),           // %2
-    "+rm"(width)           // %3
-  :
-  : "memory", "cc"
-#if defined(__native_client__) && defined(__x86_64__)
-    , "r14"
-#endif
-#if defined(__SSE2__)
-    , "xmm0", "xmm1", "xmm2", "xmm6", "xmm7"
-#endif
-  );
-}
-
-void BGRAToYRow_SSSE3(const uint8* src_bgra, uint8* dst_y, int pix) {
-  asm volatile (
-    "movdqa    %4,%%xmm5                       \n"
-    "movdqa    %3,%%xmm4                       \n"
-    LABELALIGN
-  "1:                                          \n"
-    "movdqa    " MEMACCESS(0) ",%%xmm0         \n"
-    "movdqa    " MEMACCESS2(0x10,0) ",%%xmm1   \n"
-    "movdqa    " MEMACCESS2(0x20,0) ",%%xmm2   \n"
-    "movdqa    " MEMACCESS2(0x30,0) ",%%xmm3   \n"
-    "pmaddubsw %%xmm4,%%xmm0                   \n"
-    "pmaddubsw %%xmm4,%%xmm1                   \n"
-    "pmaddubsw %%xmm4,%%xmm2                   \n"
-    "pmaddubsw %%xmm4,%%xmm3                   \n"
-    "lea       " MEMLEA(0x40,0) ",%0           \n"
-    "phaddw    %%xmm1,%%xmm0                   \n"
-    "phaddw    %%xmm3,%%xmm2                   \n"
-    "psrlw     $0x7,%%xmm0                     \n"
-    "psrlw     $0x7,%%xmm2                     \n"
-    "packuswb  %%xmm2,%%xmm0                   \n"
-    "paddb     %%xmm5,%%xmm0                   \n"
-    "sub       $0x10,%2                        \n"
-    "movdqa    %%xmm0," MEMACCESS(1) "         \n"
-    "lea       " MEMLEA(0x10,1) ",%1           \n"
-    "jg        1b                              \n"
-  : "+r"(src_bgra),  // %0
-    "+r"(dst_y),     // %1
-    "+r"(pix)        // %2
-  : "m"(kBGRAToY),   // %3
-    "m"(kAddY16)     // %4
-  : "memory", "cc"
-#if defined(__SSE2__)
-    , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
-#endif
-  );
-}
-
-void BGRAToYRow_Unaligned_SSSE3(const uint8* src_bgra, uint8* dst_y, int pix) {
-  asm volatile (
-    "movdqa    %4,%%xmm5                       \n"
-    "movdqa    %3,%%xmm4                       \n"
-    LABELALIGN
-  "1:                                          \n"
-    "movdqu    " MEMACCESS(0) ",%%xmm0         \n"
-    "movdqu    " MEMACCESS2(0x10,0) ",%%xmm1   \n"
-    "movdqu    " MEMACCESS2(0x20,0) ",%%xmm2   \n"
-    "movdqu    " MEMACCESS2(0x30,0) ",%%xmm3   \n"
-    "pmaddubsw %%xmm4,%%xmm0                   \n"
-    "pmaddubsw %%xmm4,%%xmm1                   \n"
-    "pmaddubsw %%xmm4,%%xmm2                   \n"
-    "pmaddubsw %%xmm4,%%xmm3                   \n"
-    "lea       " MEMLEA(0x40,0) ",%0           \n"
-    "phaddw    %%xmm1,%%xmm0                   \n"
-    "phaddw    %%xmm3,%%xmm2                   \n"
-    "psrlw     $0x7,%%xmm0                     \n"
-    "psrlw     $0x7,%%xmm2                     \n"
-    "packuswb  %%xmm2,%%xmm0                   \n"
-    "paddb     %%xmm5,%%xmm0                   \n"
-    "sub       $0x10,%2                        \n"
-    "movdqu    %%xmm0," MEMACCESS(1) "         \n"
-    "lea       " MEMLEA(0x10,1) ",%1           \n"
-    "jg        1b                              \n"
-  : "+r"(src_bgra),  // %0
-    "+r"(dst_y),     // %1
-    "+r"(pix)        // %2
-  : "m"(kBGRAToY),   // %3
-    "m"(kAddY16)     // %4
-  : "memory", "cc"
-#if defined(__SSE2__)
-    , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
-#endif
-  );
-}
-
-void BGRAToUVRow_SSSE3(const uint8* src_bgra0, int src_stride_bgra,
-                       uint8* dst_u, uint8* dst_v, int width) {
-  asm volatile (
-    "movdqa    %0,%%xmm4                       \n"
-    "movdqa    %1,%%xmm3                       \n"
-    "movdqa    %2,%%xmm5                       \n"
-  :
-  : "m"(kBGRAToU),         // %0
-    "m"(kBGRAToV),         // %1
-    "m"(kAddUV128)         // %2
-  );
-  asm volatile (
-    "sub       %1,%2                           \n"
-    LABELALIGN
-  "1:                                          \n"
-    "movdqa    " MEMACCESS(0) ",%%xmm0         \n"
-    "movdqa    " MEMACCESS2(0x10,0) ",%%xmm1   \n"
-    "movdqa    " MEMACCESS2(0x20,0) ",%%xmm2   \n"
-    "movdqa    " MEMACCESS2(0x30,0) ",%%xmm6   \n"
-    BUNDLEALIGN
-    MEMOPREG(pavgb,0x00,0,4,1,xmm0)            //  pavgb   (%0,%4,1),%%xmm0
-    MEMOPREG(pavgb,0x10,0,4,1,xmm1)            //  pavgb   0x10(%0,%4,1),%%xmm1
-    MEMOPREG(pavgb,0x20,0,4,1,xmm2)            //  pavgb   0x20(%0,%4,1),%%xmm2
-    MEMOPREG(pavgb,0x30,0,4,1,xmm6)            //  pavgb   0x30(%0,%4,1),%%xmm6
-    "lea       " MEMLEA(0x40,0) ",%0           \n"
-    "movdqa    %%xmm0,%%xmm7                   \n"
-    "shufps    $0x88,%%xmm1,%%xmm0             \n"
-    "shufps    $0xdd,%%xmm1,%%xmm7             \n"
-    "pavgb     %%xmm7,%%xmm0                   \n"
-    "movdqa    %%xmm2,%%xmm7                   \n"
-    "shufps    $0x88,%%xmm6,%%xmm2             \n"
-    "shufps    $0xdd,%%xmm6,%%xmm7             \n"
-    "pavgb     %%xmm7,%%xmm2                   \n"
-    "movdqa    %%xmm0,%%xmm1                   \n"
-    "movdqa    %%xmm2,%%xmm6                   \n"
-    "pmaddubsw %%xmm4,%%xmm0                   \n"
-    "pmaddubsw %%xmm4,%%xmm2                   \n"
-    "pmaddubsw %%xmm3,%%xmm1                   \n"
-    "pmaddubsw %%xmm3,%%xmm6                   \n"
-    "phaddw    %%xmm2,%%xmm0                   \n"
-    "phaddw    %%xmm6,%%xmm1                   \n"
-    "psraw     $0x8,%%xmm0                     \n"
-    "psraw     $0x8,%%xmm1                     \n"
-    "packsswb  %%xmm1,%%xmm0                   \n"
-    "paddb     %%xmm5,%%xmm0                   \n"
-    "sub       $0x10,%3                        \n"
-    "movlps    %%xmm0," MEMACCESS(1) "         \n"
-    BUNDLEALIGN
-    MEMOPMEM(movhps,xmm0,0x00,1,2,1)           //  movhps  %%xmm0,(%1,%2,1)
-    "lea       " MEMLEA(0x8,1) ",%1            \n"
-    "jg        1b                              \n"
-  : "+r"(src_bgra0),       // %0
-    "+r"(dst_u),           // %1
-    "+r"(dst_v),           // %2
-    "+rm"(width)           // %3
-  : "r"((intptr_t)(src_stride_bgra)) // %4
-  : "memory", "cc"
-#if defined(__native_client__) && defined(__x86_64__)
-    , "r14"
-#endif
-#if defined(__SSE2__)
-    , "xmm0", "xmm1", "xmm2", "xmm6", "xmm7"
-#endif
-  );
-}
-
-void BGRAToUVRow_Unaligned_SSSE3(const uint8* src_bgra0, int src_stride_bgra,
-                                 uint8* dst_u, uint8* dst_v, int width) {
-  asm volatile (
-    "movdqa    %0,%%xmm4                       \n"
-    "movdqa    %1,%%xmm3                       \n"
-    "movdqa    %2,%%xmm5                       \n"
-  :
-  : "m"(kBGRAToU),         // %0
-    "m"(kBGRAToV),         // %1
-    "m"(kAddUV128)         // %2
-  );
-  asm volatile (
-    "sub       %1,%2                           \n"
-    LABELALIGN
-  "1:                                          \n"
-    "movdqu    " MEMACCESS(0) ",%%xmm0         \n"
-    "movdqu    " MEMACCESS2(0x10,0) ",%%xmm1   \n"
-    "movdqu    " MEMACCESS2(0x20,0) ",%%xmm2   \n"
-    "movdqu    " MEMACCESS2(0x30,0) ",%%xmm6   \n"
-    BUNDLEALIGN
-    MEMOPREG(movdqu,0x00,0,4,1,xmm7)           //  movdqu  (%0,%4,1),%%xmm7
-    "pavgb     %%xmm7,%%xmm0                   \n"
-    MEMOPREG(movdqu,0x10,0,4,1,xmm7)           //  movdqu  0x10(%0,%4,1),%%xmm7
-    "pavgb     %%xmm7,%%xmm1                   \n"
-    MEMOPREG(movdqu,0x20,0,4,1,xmm7)           //  movdqu  0x20(%0,%4,1),%%xmm7
-    "pavgb     %%xmm7,%%xmm2                   \n"
-    MEMOPREG(movdqu,0x30,0,4,1,xmm7)           //  movdqu  0x30(%0,%4,1),%%xmm7
-    "pavgb     %%xmm7,%%xmm6                   \n"
-    "lea       " MEMLEA(0x40,0) ",%0           \n"
-    "movdqa    %%xmm0,%%xmm7                   \n"
-    "shufps    $0x88,%%xmm1,%%xmm0             \n"
-    "shufps    $0xdd,%%xmm1,%%xmm7             \n"
-    "pavgb     %%xmm7,%%xmm0                   \n"
-    "movdqa    %%xmm2,%%xmm7                   \n"
-    "shufps    $0x88,%%xmm6,%%xmm2             \n"
-    "shufps    $0xdd,%%xmm6,%%xmm7             \n"
-    "pavgb     %%xmm7,%%xmm2                   \n"
-    "movdqa    %%xmm0,%%xmm1                   \n"
-    "movdqa    %%xmm2,%%xmm6                   \n"
-    "pmaddubsw %%xmm4,%%xmm0                   \n"
-    "pmaddubsw %%xmm4,%%xmm2                   \n"
-    "pmaddubsw %%xmm3,%%xmm1                   \n"
-    "pmaddubsw %%xmm3,%%xmm6                   \n"
-    "phaddw    %%xmm2,%%xmm0                   \n"
-    "phaddw    %%xmm6,%%xmm1                   \n"
-    "psraw     $0x8,%%xmm0                     \n"
-    "psraw     $0x8,%%xmm1                     \n"
-    "packsswb  %%xmm1,%%xmm0                   \n"
-    "paddb     %%xmm5,%%xmm0                   \n"
-    "sub       $0x10,%3                        \n"
-    "movlps    %%xmm0," MEMACCESS(1) "         \n"
-    BUNDLEALIGN
-    MEMOPMEM(movhps,xmm0,0x00,1,2,1)           //  movhps  %%xmm0,(%1,%2,1)
-    "lea       " MEMLEA(0x8,1) ",%1            \n"
-    "jg        1b                              \n"
-  : "+r"(src_bgra0),       // %0
-    "+r"(dst_u),           // %1
-    "+r"(dst_v),           // %2
-    "+rm"(width)           // %3
-  : "r"((intptr_t)(src_stride_bgra)) // %4
-  : "memory", "cc"
-#if defined(__native_client__) && defined(__x86_64__)
-    , "r14"
-#endif
-#if defined(__SSE2__)
-    , "xmm0", "xmm1", "xmm2", "xmm6", "xmm7"
-#endif
-  );
-}
-
-void ABGRToYRow_SSSE3(const uint8* src_abgr, uint8* dst_y, int pix) {
-  asm volatile (
-    "movdqa    %4,%%xmm5                       \n"
-    "movdqa    %3,%%xmm4                       \n"
-    LABELALIGN
-  "1:                                          \n"
-    "movdqa    " MEMACCESS(0) ",%%xmm0         \n"
-    "movdqa    " MEMACCESS2(0x10,0) ",%%xmm1   \n"
-    "movdqa    " MEMACCESS2(0x20,0) ",%%xmm2   \n"
-    "movdqa    " MEMACCESS2(0x30,0) ",%%xmm3   \n"
-    "pmaddubsw %%xmm4,%%xmm0                   \n"
-    "pmaddubsw %%xmm4,%%xmm1                   \n"
-    "pmaddubsw %%xmm4,%%xmm2                   \n"
-    "pmaddubsw %%xmm4,%%xmm3                   \n"
-    "lea       " MEMLEA(0x40,0) ",%0           \n"
-    "phaddw    %%xmm1,%%xmm0                   \n"
-    "phaddw    %%xmm3,%%xmm2                   \n"
-    "psrlw     $0x7,%%xmm0                     \n"
-    "psrlw     $0x7,%%xmm2                     \n"
-    "packuswb  %%xmm2,%%xmm0                   \n"
-    "paddb     %%xmm5,%%xmm0                   \n"
-    "sub       $0x10,%2                        \n"
-    "movdqa    %%xmm0," MEMACCESS(1) "         \n"
-    "lea       " MEMLEA(0x10,1) ",%1           \n"
-    "jg        1b                              \n"
-  : "+r"(src_abgr),  // %0
-    "+r"(dst_y),     // %1
-    "+r"(pix)        // %2
-  : "m"(kABGRToY),   // %3
-    "m"(kAddY16)     // %4
-  : "memory", "cc"
-#if defined(__SSE2__)
-    , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
-#endif
-  );
-}
-
-void ABGRToYRow_Unaligned_SSSE3(const uint8* src_abgr, uint8* dst_y, int pix) {
-  asm volatile (
-    "movdqa    %4,%%xmm5                       \n"
-    "movdqa    %3,%%xmm4                       \n"
-    LABELALIGN
-  "1:                                          \n"
-    "movdqu    " MEMACCESS(0) ",%%xmm0         \n"
-    "movdqu    " MEMACCESS2(0x10,0) ",%%xmm1   \n"
-    "movdqu    " MEMACCESS2(0x20,0) ",%%xmm2   \n"
-    "movdqu    " MEMACCESS2(0x30,0) ",%%xmm3   \n"
-    "pmaddubsw %%xmm4,%%xmm0                   \n"
-    "pmaddubsw %%xmm4,%%xmm1                   \n"
-    "pmaddubsw %%xmm4,%%xmm2                   \n"
-    "pmaddubsw %%xmm4,%%xmm3                   \n"
-    "lea       " MEMLEA(0x40,0) ",%0           \n"
-    "phaddw    %%xmm1,%%xmm0                   \n"
-    "phaddw    %%xmm3,%%xmm2                   \n"
-    "psrlw     $0x7,%%xmm0                     \n"
-    "psrlw     $0x7,%%xmm2                     \n"
-    "packuswb  %%xmm2,%%xmm0                   \n"
-    "paddb     %%xmm5,%%xmm0                   \n"
-    "sub       $0x10,%2                        \n"
-    "movdqu    %%xmm0," MEMACCESS(1) "         \n"
-    "lea       " MEMLEA(0x10,1) ",%1           \n"
-    "jg        1b                              \n"
-  : "+r"(src_abgr),  // %0
-    "+r"(dst_y),     // %1
-    "+r"(pix)        // %2
-  : "m"(kABGRToY),   // %3
-    "m"(kAddY16)     // %4
-  : "memory", "cc"
-#if defined(__SSE2__)
-    , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
-#endif
-  );
-}
-
-void RGBAToYRow_SSSE3(const uint8* src_rgba, uint8* dst_y, int pix) {
-  asm volatile (
-    "movdqa    %4,%%xmm5                       \n"
-    "movdqa    %3,%%xmm4                       \n"
-    LABELALIGN
-  "1:                                          \n"
-    "movdqa    " MEMACCESS(0) ",%%xmm0         \n"
-    "movdqa    " MEMACCESS2(0x10,0) ",%%xmm1   \n"
-    "movdqa    " MEMACCESS2(0x20,0) ",%%xmm2   \n"
-    "movdqa    " MEMACCESS2(0x30,0) ",%%xmm3   \n"
-    "pmaddubsw %%xmm4,%%xmm0                   \n"
-    "pmaddubsw %%xmm4,%%xmm1                   \n"
-    "pmaddubsw %%xmm4,%%xmm2                   \n"
-    "pmaddubsw %%xmm4,%%xmm3                   \n"
-    "lea       " MEMLEA(0x40,0) ",%0           \n"
-    "phaddw    %%xmm1,%%xmm0                   \n"
-    "phaddw    %%xmm3,%%xmm2                   \n"
-    "psrlw     $0x7,%%xmm0                     \n"
-    "psrlw     $0x7,%%xmm2                     \n"
-    "packuswb  %%xmm2,%%xmm0                   \n"
-    "paddb     %%xmm5,%%xmm0                   \n"
-    "sub       $0x10,%2                        \n"
-    "movdqa    %%xmm0," MEMACCESS(1) "         \n"
-    "lea       " MEMLEA(0x10,1) ",%1           \n"
-    "jg        1b                              \n"
-  : "+r"(src_rgba),  // %0
-    "+r"(dst_y),     // %1
-    "+r"(pix)        // %2
-  : "m"(kRGBAToY),   // %3
-    "m"(kAddY16)     // %4
-  : "memory", "cc"
-#if defined(__SSE2__)
-    , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
-#endif
-  );
-}
-
-void RGBAToYRow_Unaligned_SSSE3(const uint8* src_rgba, uint8* dst_y, int pix) {
-  asm volatile (
-    "movdqa    %4,%%xmm5                       \n"
-    "movdqa    %3,%%xmm4                       \n"
-    LABELALIGN
-  "1:                                          \n"
-    "movdqu    " MEMACCESS(0) ",%%xmm0         \n"
-    "movdqu    " MEMACCESS2(0x10,0) ",%%xmm1   \n"
-    "movdqu    " MEMACCESS2(0x20,0) ",%%xmm2   \n"
-    "movdqu    " MEMACCESS2(0x30,0) ",%%xmm3   \n"
-    "pmaddubsw %%xmm4,%%xmm0                   \n"
-    "pmaddubsw %%xmm4,%%xmm1                   \n"
-    "pmaddubsw %%xmm4,%%xmm2                   \n"
-    "pmaddubsw %%xmm4,%%xmm3                   \n"
-    "lea       " MEMLEA(0x40,0) ",%0           \n"
-    "phaddw    %%xmm1,%%xmm0                   \n"
-    "phaddw    %%xmm3,%%xmm2                   \n"
-    "psrlw     $0x7,%%xmm0                     \n"
-    "psrlw     $0x7,%%xmm2                     \n"
-    "packuswb  %%xmm2,%%xmm0                   \n"
-    "paddb     %%xmm5,%%xmm0                   \n"
-    "sub       $0x10,%2                        \n"
-    "movdqu    %%xmm0," MEMACCESS(1) "         \n"
-    "lea       " MEMLEA(0x10,1) ",%1           \n"
-    "jg        1b                              \n"
-  : "+r"(src_rgba),  // %0
-    "+r"(dst_y),     // %1
-    "+r"(pix)        // %2
-  : "m"(kRGBAToY),   // %3
-    "m"(kAddY16)     // %4
-  : "memory", "cc"
-#if defined(__SSE2__)
-    , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
-#endif
-  );
-}
-
-void ABGRToUVRow_SSSE3(const uint8* src_abgr0, int src_stride_abgr,
-                       uint8* dst_u, uint8* dst_v, int width) {
-  asm volatile (
-    "movdqa    %0,%%xmm4                       \n"
-    "movdqa    %1,%%xmm3                       \n"
-    "movdqa    %2,%%xmm5                       \n"
-  :
-  : "m"(kABGRToU),         // %0
-    "m"(kABGRToV),         // %1
-    "m"(kAddUV128)         // %2
-  );
-  asm volatile (
-    "sub       %1,%2                           \n"
-    LABELALIGN
-  "1:                                          \n"
-    "movdqa    " MEMACCESS(0) ",%%xmm0         \n"
-    "movdqa    " MEMACCESS2(0x10,0) ",%%xmm1   \n"
-    "movdqa    " MEMACCESS2(0x20,0) ",%%xmm2   \n"
-    "movdqa    " MEMACCESS2(0x30,0) ",%%xmm6   \n"
-    BUNDLEALIGN
-    MEMOPREG(pavgb,0x00,0,4,1,xmm0)            //  pavgb   (%0,%4,1),%%xmm0
-    MEMOPREG(pavgb,0x10,0,4,1,xmm1)            //  pavgb   0x10(%0,%4,1),%%xmm1
-    MEMOPREG(pavgb,0x20,0,4,1,xmm2)            //  pavgb   0x20(%0,%4,1),%%xmm2
-    MEMOPREG(pavgb,0x30,0,4,1,xmm6)            //  pavgb   0x30(%0,%4,1),%%xmm6
-    "lea       " MEMLEA(0x40,0) ",%0           \n"
-    "movdqa    %%xmm0,%%xmm7                   \n"
-    "shufps    $0x88,%%xmm1,%%xmm0             \n"
-    "shufps    $0xdd,%%xmm1,%%xmm7             \n"
-    "pavgb     %%xmm7,%%xmm0                   \n"
-    "movdqa    %%xmm2,%%xmm7                   \n"
-    "shufps    $0x88,%%xmm6,%%xmm2             \n"
-    "shufps    $0xdd,%%xmm6,%%xmm7             \n"
-    "pavgb     %%xmm7,%%xmm2                   \n"
-    "movdqa    %%xmm0,%%xmm1                   \n"
-    "movdqa    %%xmm2,%%xmm6                   \n"
-    "pmaddubsw %%xmm4,%%xmm0                   \n"
-    "pmaddubsw %%xmm4,%%xmm2                   \n"
-    "pmaddubsw %%xmm3,%%xmm1                   \n"
-    "pmaddubsw %%xmm3,%%xmm6                   \n"
-    "phaddw    %%xmm2,%%xmm0                   \n"
-    "phaddw    %%xmm6,%%xmm1                   \n"
-    "psraw     $0x8,%%xmm0                     \n"
-    "psraw     $0x8,%%xmm1                     \n"
-    "packsswb  %%xmm1,%%xmm0                   \n"
-    "paddb     %%xmm5,%%xmm0                   \n"
-    "sub       $0x10,%3                        \n"
-    "movlps    %%xmm0," MEMACCESS(1) "         \n"
-    BUNDLEALIGN
-    MEMOPMEM(movhps,xmm0,0x00,1,2,1)           //  movhps  %%xmm0,(%1,%2,1)
-    "lea       " MEMLEA(0x8,1) ",%1            \n"
-    "jg        1b                              \n"
-  : "+r"(src_abgr0),       // %0
-    "+r"(dst_u),           // %1
-    "+r"(dst_v),           // %2
-    "+rm"(width)           // %3
-  : "r"((intptr_t)(src_stride_abgr)) // %4
-  : "memory", "cc"
-#if defined(__native_client__) && defined(__x86_64__)
-    , "r14"
-#endif
-#if defined(__SSE2__)
-    , "xmm0", "xmm1", "xmm2", "xmm6", "xmm7"
-#endif
-  );
-}
-
-void ABGRToUVRow_Unaligned_SSSE3(const uint8* src_abgr0, int src_stride_abgr,
-                                 uint8* dst_u, uint8* dst_v, int width) {
-  asm volatile (
-    "movdqa    %0,%%xmm4                       \n"
-    "movdqa    %1,%%xmm3                       \n"
-    "movdqa    %2,%%xmm5                       \n"
-  :
-  : "m"(kABGRToU),         // %0
-    "m"(kABGRToV),         // %1
-    "m"(kAddUV128)         // %2
-  );
-  asm volatile (
-    "sub       %1,%2                           \n"
-    LABELALIGN
-  "1:                                          \n"
-    "movdqu    " MEMACCESS(0) ",%%xmm0         \n"
-    "movdqu    " MEMACCESS2(0x10,0) ",%%xmm1   \n"
-    "movdqu    " MEMACCESS2(0x20,0) ",%%xmm2   \n"
-    "movdqu    " MEMACCESS2(0x30,0) ",%%xmm6   \n"
-    BUNDLEALIGN
-    MEMOPREG(movdqu,0x00,0,4,1,xmm7)           //  movdqu  (%0,%4,1),%%xmm7
-    "pavgb     %%xmm7,%%xmm0                   \n"
-    MEMOPREG(movdqu,0x10,0,4,1,xmm7)           //  movdqu  0x10(%0,%4,1),%%xmm7
-    "pavgb     %%xmm7,%%xmm1                   \n"
-    MEMOPREG(movdqu,0x20,0,4,1,xmm7)           //  movdqu  0x20(%0,%4,1),%%xmm7
-    "pavgb     %%xmm7,%%xmm2                   \n"
-    MEMOPREG(movdqu,0x30,0,4,1,xmm7)           //  movdqu  0x30(%0,%4,1),%%xmm7
-    "pavgb     %%xmm7,%%xmm6                   \n"
-    "lea       " MEMLEA(0x40,0) ",%0           \n"
-    "movdqa    %%xmm0,%%xmm7                   \n"
-    "shufps    $0x88,%%xmm1,%%xmm0             \n"
-    "shufps    $0xdd,%%xmm1,%%xmm7             \n"
-    "pavgb     %%xmm7,%%xmm0                   \n"
-    "movdqa    %%xmm2,%%xmm7                   \n"
-    "shufps    $0x88,%%xmm6,%%xmm2             \n"
-    "shufps    $0xdd,%%xmm6,%%xmm7             \n"
-    "pavgb     %%xmm7,%%xmm2                   \n"
-    "movdqa    %%xmm0,%%xmm1                   \n"
-    "movdqa    %%xmm2,%%xmm6                   \n"
-    "pmaddubsw %%xmm4,%%xmm0                   \n"
-    "pmaddubsw %%xmm4,%%xmm2                   \n"
-    "pmaddubsw %%xmm3,%%xmm1                   \n"
-    "pmaddubsw %%xmm3,%%xmm6                   \n"
-    "phaddw    %%xmm2,%%xmm0                   \n"
-    "phaddw    %%xmm6,%%xmm1                   \n"
-    "psraw     $0x8,%%xmm0                     \n"
-    "psraw     $0x8,%%xmm1                     \n"
-    "packsswb  %%xmm1,%%xmm0                   \n"
-    "paddb     %%xmm5,%%xmm0                   \n"
-    "sub       $0x10,%3                        \n"
-    "movlps    %%xmm0," MEMACCESS(1) "         \n"
-    BUNDLEALIGN
-    MEMOPMEM(movhps,xmm0,0x00,1,2,1)           //  movhps  %%xmm0,(%1,%2,1)
-    "lea       " MEMLEA(0x8,1) ",%1            \n"
-    "jg        1b                              \n"
-  : "+r"(src_abgr0),       // %0
-    "+r"(dst_u),           // %1
-    "+r"(dst_v),           // %2
-    "+rm"(width)           // %3
-  : "r"((intptr_t)(src_stride_abgr)) // %4
-  : "memory", "cc"
-#if defined(__native_client__) && defined(__x86_64__)
-    , "r14"
-#endif
-#if defined(__SSE2__)
-    , "xmm0", "xmm1", "xmm2", "xmm6", "xmm7"
-#endif
-  );
-}
-
-void RGBAToUVRow_SSSE3(const uint8* src_rgba0, int src_stride_rgba,
-                       uint8* dst_u, uint8* dst_v, int width) {
-  asm volatile (
-    "movdqa    %0,%%xmm4                       \n"
-    "movdqa    %1,%%xmm3                       \n"
-    "movdqa    %2,%%xmm5                       \n"
-  :
-  : "m"(kRGBAToU),         // %0
-    "m"(kRGBAToV),         // %1
-    "m"(kAddUV128)         // %2
-  );
-  asm volatile (
-    "sub       %1,%2                           \n"
-    LABELALIGN
-  "1:                                          \n"
-    "movdqa    " MEMACCESS(0) ",%%xmm0         \n"
-    "movdqa    " MEMACCESS2(0x10,0) ",%%xmm1   \n"
-    "movdqa    " MEMACCESS2(0x20,0) ",%%xmm2   \n"
-    "movdqa    " MEMACCESS2(0x30,0) ",%%xmm6   \n"
-    BUNDLEALIGN
-    MEMOPREG(pavgb,0x00,0,4,1,xmm0)            //  pavgb   (%0,%4,1),%%xmm0
-    MEMOPREG(pavgb,0x10,0,4,1,xmm1)            //  pavgb   0x10(%0,%4,1),%%xmm1
-    MEMOPREG(pavgb,0x20,0,4,1,xmm2)            //  pavgb   0x20(%0,%4,1),%%xmm2
-    MEMOPREG(pavgb,0x30,0,4,1,xmm6)            //  pavgb   0x30(%0,%4,1),%%xmm6
-    "lea       " MEMLEA(0x40,0) ",%0           \n"
-    "movdqa    %%xmm0,%%xmm7                   \n"
-    "shufps    $0x88,%%xmm1,%%xmm0             \n"
-    "shufps    $0xdd,%%xmm1,%%xmm7             \n"
-    "pavgb     %%xmm7,%%xmm0                   \n"
-    "movdqa    %%xmm2,%%xmm7                   \n"
-    "shufps    $0x88,%%xmm6,%%xmm2             \n"
-    "shufps    $0xdd,%%xmm6,%%xmm7             \n"
-    "pavgb     %%xmm7,%%xmm2                   \n"
-    "movdqa    %%xmm0,%%xmm1                   \n"
-    "movdqa    %%xmm2,%%xmm6                   \n"
-    "pmaddubsw %%xmm4,%%xmm0                   \n"
-    "pmaddubsw %%xmm4,%%xmm2                   \n"
-    "pmaddubsw %%xmm3,%%xmm1                   \n"
-    "pmaddubsw %%xmm3,%%xmm6                   \n"
-    "phaddw    %%xmm2,%%xmm0                   \n"
-    "phaddw    %%xmm6,%%xmm1                   \n"
-    "psraw     $0x8,%%xmm0                     \n"
-    "psraw     $0x8,%%xmm1                     \n"
-    "packsswb  %%xmm1,%%xmm0                   \n"
-    "paddb     %%xmm5,%%xmm0                   \n"
-    "sub       $0x10,%3                        \n"
-    "movlps    %%xmm0," MEMACCESS(1) "         \n"
-    BUNDLEALIGN
-    MEMOPMEM(movhps,xmm0,0x00,1,2,1)           //  movhps  %%xmm0,(%1,%2,1)
-    "lea       " MEMLEA(0x8,1) ",%1            \n"
-    "jg        1b                              \n"
-  : "+r"(src_rgba0),       // %0
-    "+r"(dst_u),           // %1
-    "+r"(dst_v),           // %2
-    "+rm"(width)           // %3
-  : "r"((intptr_t)(src_stride_rgba))
-  : "memory", "cc"
-#if defined(__native_client__) && defined(__x86_64__)
-    , "r14"
-#endif
-#if defined(__SSE2__)
-    , "xmm0", "xmm1", "xmm2", "xmm6", "xmm7"
-#endif
-  );
-}
-
-void RGBAToUVRow_Unaligned_SSSE3(const uint8* src_rgba0, int src_stride_rgba,
-                                 uint8* dst_u, uint8* dst_v, int width) {
-  asm volatile (
-    "movdqa    %0,%%xmm4                       \n"
-    "movdqa    %1,%%xmm3                       \n"
-    "movdqa    %2,%%xmm5                       \n"
-  :
-  : "m"(kRGBAToU),         // %0
-    "m"(kRGBAToV),         // %1
-    "m"(kAddUV128)         // %2
-  );
-  asm volatile (
-    "sub       %1,%2                           \n"
-    LABELALIGN
-  "1:                                          \n"
-    "movdqu    " MEMACCESS(0) ",%%xmm0         \n"
-    "movdqu    " MEMACCESS2(0x10,0) ",%%xmm1   \n"
-    "movdqu    " MEMACCESS2(0x20,0) ",%%xmm2   \n"
-    "movdqu    " MEMACCESS2(0x30,0) ",%%xmm6   \n"
-    BUNDLEALIGN
-    MEMOPREG(movdqu,0x00,0,4,1,xmm7)           //  movdqu  (%0,%4,1),%%xmm7
-    "pavgb     %%xmm7,%%xmm0                   \n"
-    MEMOPREG(movdqu,0x10,0,4,1,xmm7)           //  movdqu  0x10(%0,%4,1),%%xmm7
-    "pavgb     %%xmm7,%%xmm1                   \n"
-    MEMOPREG(movdqu,0x20,0,4,1,xmm7)           //  movdqu  0x20(%0,%4,1),%%xmm7
-    "pavgb     %%xmm7,%%xmm2                   \n"
-    MEMOPREG(movdqu,0x30,0,4,1,xmm7)           //  movdqu  0x30(%0,%4,1),%%xmm7
-    "pavgb     %%xmm7,%%xmm6                   \n"
-    "lea       " MEMLEA(0x40,0) ",%0           \n"
-    "movdqa    %%xmm0,%%xmm7                   \n"
-    "shufps    $0x88,%%xmm1,%%xmm0             \n"
-    "shufps    $0xdd,%%xmm1,%%xmm7             \n"
-    "pavgb     %%xmm7,%%xmm0                   \n"
-    "movdqa    %%xmm2,%%xmm7                   \n"
-    "shufps    $0x88,%%xmm6,%%xmm2             \n"
-    "shufps    $0xdd,%%xmm6,%%xmm7             \n"
-    "pavgb     %%xmm7,%%xmm2                   \n"
-    "movdqa    %%xmm0,%%xmm1                   \n"
-    "movdqa    %%xmm2,%%xmm6                   \n"
-    "pmaddubsw %%xmm4,%%xmm0                   \n"
-    "pmaddubsw %%xmm4,%%xmm2                   \n"
-    "pmaddubsw %%xmm3,%%xmm1                   \n"
-    "pmaddubsw %%xmm3,%%xmm6                   \n"
-    "phaddw    %%xmm2,%%xmm0                   \n"
-    "phaddw    %%xmm6,%%xmm1                   \n"
-    "psraw     $0x8,%%xmm0                     \n"
-    "psraw     $0x8,%%xmm1                     \n"
-    "packsswb  %%xmm1,%%xmm0                   \n"
-    "paddb     %%xmm5,%%xmm0                   \n"
-    "sub       $0x10,%3                        \n"
-    "movlps    %%xmm0," MEMACCESS(1) "         \n"
-    BUNDLEALIGN
-    MEMOPMEM(movhps,xmm0,0x00,1,2,1)           //  movhps  %%xmm0,(%1,%2,1)
-    "lea       " MEMLEA(0x8,1) ",%1            \n"
-    "jg        1b                              \n"
-  : "+r"(src_rgba0),       // %0
-    "+r"(dst_u),           // %1
-    "+r"(dst_v),           // %2
-    "+rm"(width)           // %3
-  : "r"((intptr_t)(src_stride_rgba)) // %4
-  : "memory", "cc"
-#if defined(__native_client__) && defined(__x86_64__)
-    , "r14"
-#endif
-#if defined(__SSE2__)
-    , "xmm0", "xmm1", "xmm2", "xmm6", "xmm7"
-#endif
-  );
-}
-#endif  // HAS_ARGBTOUVROW_SSSE3
-
-#ifdef HAS_I422TOARGBROW_SSSE3
-#define UB 127 /* min(63,(int8)(2.018 * 64)) */
-#define UG -25 /* (int8)(-0.391 * 64 - 0.5) */
-#define UR 0
-
-#define VB 0
-#define VG -52 /* (int8)(-0.813 * 64 - 0.5) */
-#define VR 102 /* (int8)(1.596 * 64 + 0.5) */
-
-// Bias
-#define BB UB * 128 + VB * 128
-#define BG UG * 128 + VG * 128
-#define BR UR * 128 + VR * 128
-
-#define YG 74 /* (int8)(1.164 * 64 + 0.5) */
-
-struct {
-  vec8 kUVToB;  // 0
-  vec8 kUVToG;  // 16
-  vec8 kUVToR;  // 32
-  vec16 kUVBiasB;  // 48
-  vec16 kUVBiasG;  // 64
-  vec16 kUVBiasR;  // 80
-  vec16 kYSub16;  // 96
-  vec16 kYToRgb;  // 112
-  vec8 kVUToB;  // 128
-  vec8 kVUToG;  // 144
-  vec8 kVUToR;  // 160
-} static SIMD_ALIGNED(kYuvConstants) = {
-  { UB, VB, UB, VB, UB, VB, UB, VB, UB, VB, UB, VB, UB, VB, UB, VB },
-  { UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG },
-  { UR, VR, UR, VR, UR, VR, UR, VR, UR, VR, UR, VR, UR, VR, UR, VR },
-  { BB, BB, BB, BB, BB, BB, BB, BB },
-  { BG, BG, BG, BG, BG, BG, BG, BG },
-  { BR, BR, BR, BR, BR, BR, BR, BR },
-  { 16, 16, 16, 16, 16, 16, 16, 16 },
-  { YG, YG, YG, YG, YG, YG, YG, YG },
-  { VB, UB, VB, UB, VB, UB, VB, UB, VB, UB, VB, UB, VB, UB, VB, UB },
-  { VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG },
-  { VR, UR, VR, UR, VR, UR, VR, UR, VR, UR, VR, UR, VR, UR, VR, UR }
-};
-
-
-// Read 8 UV from 411
-#define READYUV444                                                             \
-    "movq       " MEMACCESS([u_buf]) ",%%xmm0                   \n"            \
-    BUNDLEALIGN                                                                \
-    MEMOPREG(movq, 0x00, [u_buf], [v_buf], 1, xmm1)                            \
-    "lea        " MEMLEA(0x8, [u_buf]) ",%[u_buf]               \n"            \
-    "punpcklbw  %%xmm1,%%xmm0                                   \n"
-
-// Read 4 UV from 422, upsample to 8 UV
-#define READYUV422                                                             \
-    "movd       " MEMACCESS([u_buf]) ",%%xmm0                   \n"            \
-    BUNDLEALIGN                                                                \
-    MEMOPREG(movd, 0x00, [u_buf], [v_buf], 1, xmm1)                            \
-    "lea        " MEMLEA(0x4, [u_buf]) ",%[u_buf]               \n"            \
-    "punpcklbw  %%xmm1,%%xmm0                                   \n"            \
-    "punpcklwd  %%xmm0,%%xmm0                                   \n"
-
-// Read 2 UV from 411, upsample to 8 UV
-#define READYUV411                                                             \
-    "movd       " MEMACCESS([u_buf]) ",%%xmm0                   \n"            \
-    BUNDLEALIGN                                                                \
-    MEMOPREG(movd, 0x00, [u_buf], [v_buf], 1, xmm1)                            \
-    "lea        " MEMLEA(0x2, [u_buf]) ",%[u_buf]               \n"            \
-    "punpcklbw  %%xmm1,%%xmm0                                   \n"            \
-    "punpcklwd  %%xmm0,%%xmm0                                   \n"            \
-    "punpckldq  %%xmm0,%%xmm0                                   \n"
-
-// Read 4 UV from NV12, upsample to 8 UV
-#define READNV12                                                               \
-    "movq       " MEMACCESS([uv_buf]) ",%%xmm0                  \n"            \
-    "lea        " MEMLEA(0x8, [uv_buf]) ",%[uv_buf]             \n"            \
-    "punpcklwd  %%xmm0,%%xmm0                                   \n"
-
-// Convert 8 pixels: 8 UV and 8 Y
-#define YUVTORGB                                                               \
-    "movdqa     %%xmm0,%%xmm1                                   \n"            \
-    "movdqa     %%xmm0,%%xmm2                                   \n"            \
-    "pmaddubsw  " MEMACCESS([kYuvConstants]) ",%%xmm0           \n"            \
-    "pmaddubsw  " MEMACCESS2(16, [kYuvConstants]) ",%%xmm1      \n"            \
-    "pmaddubsw  " MEMACCESS2(32, [kYuvConstants]) ",%%xmm2      \n"            \
-    "psubw      " MEMACCESS2(48, [kYuvConstants]) ",%%xmm0      \n"            \
-    "psubw      " MEMACCESS2(64, [kYuvConstants]) ",%%xmm1      \n"            \
-    "psubw      " MEMACCESS2(80, [kYuvConstants]) ",%%xmm2      \n"            \
-    "movq       " MEMACCESS([y_buf]) ",%%xmm3                   \n"            \
-    "lea        " MEMLEA(0x8, [y_buf]) ",%[y_buf]               \n"            \
-    "punpcklbw  %%xmm4,%%xmm3                                   \n"            \
-    "psubsw     " MEMACCESS2(96, [kYuvConstants]) ",%%xmm3      \n"            \
-    "pmullw     " MEMACCESS2(112, [kYuvConstants]) ",%%xmm3     \n"            \
-    "paddsw     %%xmm3,%%xmm0                                   \n"            \
-    "paddsw     %%xmm3,%%xmm1                                   \n"            \
-    "paddsw     %%xmm3,%%xmm2                                   \n"            \
-    "psraw      $0x6,%%xmm0                                     \n"            \
-    "psraw      $0x6,%%xmm1                                     \n"            \
-    "psraw      $0x6,%%xmm2                                     \n"            \
-    "packuswb   %%xmm0,%%xmm0                                   \n"            \
-    "packuswb   %%xmm1,%%xmm1                                   \n"            \
-    "packuswb   %%xmm2,%%xmm2                                   \n"
-
-// Convert 8 pixels: 8 VU and 8 Y
-#define YVUTORGB                                                               \
-    "movdqa     %%xmm0,%%xmm1                                   \n"            \
-    "movdqa     %%xmm0,%%xmm2                                   \n"            \
-    "pmaddubsw  " MEMACCESS2(128, [kYuvConstants]) ",%%xmm0     \n"            \
-    "pmaddubsw  " MEMACCESS2(144, [kYuvConstants]) ",%%xmm1     \n"            \
-    "pmaddubsw  " MEMACCESS2(160, [kYuvConstants]) ",%%xmm2     \n"            \
-    "psubw      " MEMACCESS2(48, [kYuvConstants]) ",%%xmm0      \n"            \
-    "psubw      " MEMACCESS2(64, [kYuvConstants]) ",%%xmm1      \n"            \
-    "psubw      " MEMACCESS2(80, [kYuvConstants]) ",%%xmm2      \n"            \
-    "movq       " MEMACCESS([y_buf]) ",%%xmm3                   \n"            \
-    "lea        " MEMLEA(0x8, [y_buf]) ",%[y_buf]               \n"            \
-    "punpcklbw  %%xmm4,%%xmm3                                   \n"            \
-    "psubsw     " MEMACCESS2(96, [kYuvConstants]) ",%%xmm3      \n"            \
-    "pmullw     " MEMACCESS2(112, [kYuvConstants]) ",%%xmm3     \n"            \
-    "paddsw     %%xmm3,%%xmm0                                   \n"            \
-    "paddsw     %%xmm3,%%xmm1                                   \n"            \
-    "paddsw     %%xmm3,%%xmm2                                   \n"            \
-    "psraw      $0x6,%%xmm0                                     \n"            \
-    "psraw      $0x6,%%xmm1                                     \n"            \
-    "psraw      $0x6,%%xmm2                                     \n"            \
-    "packuswb   %%xmm0,%%xmm0                                   \n"            \
-    "packuswb   %%xmm1,%%xmm1                                   \n"            \
-    "packuswb   %%xmm2,%%xmm2                                   \n"
-
-void OMITFP I444ToARGBRow_SSSE3(const uint8* y_buf,
-                                const uint8* u_buf,
-                                const uint8* v_buf,
-                                uint8* dst_argb,
-                                int width) {
-  asm volatile (
-    "sub       %[u_buf],%[v_buf]               \n"
-    "pcmpeqb   %%xmm5,%%xmm5                   \n"
-    "pxor      %%xmm4,%%xmm4                   \n"
-    LABELALIGN
-  "1:                                          \n"
-    READYUV444
-    YUVTORGB
-    "punpcklbw %%xmm1,%%xmm0                   \n"
-    "punpcklbw %%xmm5,%%xmm2                   \n"
-    "movdqa    %%xmm0,%%xmm1                   \n"
-    "punpcklwd %%xmm2,%%xmm0                   \n"
-    "punpckhwd %%xmm2,%%xmm1                   \n"
-    "movdqa    %%xmm0," MEMACCESS([dst_argb]) "         \n"
-    "movdqa    %%xmm1," MEMACCESS2(0x10,[dst_argb]) "   \n"
-    "lea       " MEMLEA(0x20,[dst_argb]) ",%[dst_argb]  \n"
-    "sub       $0x8,%[width]                   \n"
-    "jg        1b                              \n"
-  : [y_buf]"+r"(y_buf),    // %[y_buf]
-    [u_buf]"+r"(u_buf),    // %[u_buf]
-    [v_buf]"+r"(v_buf),    // %[v_buf]
-    [dst_argb]"+r"(dst_argb),  // %[dst_argb]
-    [width]"+rm"(width)    // %[width]
-  : [kYuvConstants]"r"(&kYuvConstants.kUVToB) // %[kYuvConstants]
-  : "memory", "cc"
-#if defined(__native_client__) && defined(__x86_64__)
-    , "r14"
-#endif
-#if defined(__SSE2__)
-    , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
-#endif
-  );
-}
-
-void OMITFP I422ToRGB24Row_SSSE3(const uint8* y_buf,
-                                 const uint8* u_buf,
-                                 const uint8* v_buf,
-                                 uint8* dst_rgb24,
-                                 int width) {
-// fpic 32 bit gcc 4.2 on OSX runs out of GPR regs.
-#if defined(__i386__)
-  asm volatile (
-    "movdqa    %[kShuffleMaskARGBToRGB24_0],%%xmm5 \n"
-    "movdqa    %[kShuffleMaskARGBToRGB24],%%xmm6   \n"
-  :: [kShuffleMaskARGBToRGB24_0]"m"(kShuffleMaskARGBToRGB24_0),
-    [kShuffleMaskARGBToRGB24]"m"(kShuffleMaskARGBToRGB24));
-#endif
-
-  asm volatile (
-#if !defined(__i386__)
-    "movdqa    %[kShuffleMaskARGBToRGB24_0],%%xmm5 \n"
-    "movdqa    %[kShuffleMaskARGBToRGB24],%%xmm6   \n"
-#endif
-    "sub       %[u_buf],%[v_buf]               \n"
-    "pxor      %%xmm4,%%xmm4                   \n"
-    LABELALIGN
-  "1:                                          \n"
-    READYUV422
-    YUVTORGB
-    "punpcklbw %%xmm1,%%xmm0                   \n"
-    "punpcklbw %%xmm2,%%xmm2                   \n"
-    "movdqa    %%xmm0,%%xmm1                   \n"
-    "punpcklwd %%xmm2,%%xmm0                   \n"
-    "punpckhwd %%xmm2,%%xmm1                   \n"
-    "pshufb    %%xmm5,%%xmm0                   \n"
-    "pshufb    %%xmm6,%%xmm1                   \n"
-    "palignr   $0xc,%%xmm0,%%xmm1              \n"
-    "movq      %%xmm0," MEMACCESS([dst_rgb24]) "\n"
-    "movdqu    %%xmm1," MEMACCESS2(0x8,[dst_rgb24]) "\n"
-    "lea       " MEMLEA(0x18,[dst_rgb24]) ",%[dst_rgb24] \n"
-    "sub       $0x8,%[width]                   \n"
-    "jg        1b                              \n"
-  : [y_buf]"+r"(y_buf),    // %[y_buf]
-    [u_buf]"+r"(u_buf),    // %[u_buf]
-    [v_buf]"+r"(v_buf),    // %[v_buf]
-    [dst_rgb24]"+r"(dst_rgb24),  // %[dst_rgb24]
-    [width]"+rm"(width)    // %[width]
-  : [kYuvConstants]"r"(&kYuvConstants.kUVToB)
-#if !defined(__i386__)
-    , [kShuffleMaskARGBToRGB24_0]"m"(kShuffleMaskARGBToRGB24_0),
-    [kShuffleMaskARGBToRGB24]"m"(kShuffleMaskARGBToRGB24)
-#endif
-  : "memory", "cc"
-#if defined(__native_client__) && defined(__x86_64__)
-    , "r14"
-#endif
-#if defined(__SSE2__)
-    , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6"
-#endif
-  );
-}
-
-void OMITFP I422ToRAWRow_SSSE3(const uint8* y_buf,
-                               const uint8* u_buf,
-                               const uint8* v_buf,
-                               uint8* dst_raw,
-                               int width) {
-// fpic 32 bit gcc 4.2 on OSX runs out of GPR regs.
-#if defined(__i386__)
-  asm volatile (
-    "movdqa    %[kShuffleMaskARGBToRAW_0],%%xmm5 \n"
-    "movdqa    %[kShuffleMaskARGBToRAW],%%xmm6   \n"
-  :: [kShuffleMaskARGBToRAW_0]"m"(kShuffleMaskARGBToRAW_0),
-    [kShuffleMaskARGBToRAW]"m"(kShuffleMaskARGBToRAW));
-#endif
-
-  asm volatile (
-#if !defined(__i386__)
-    "movdqa    %[kShuffleMaskARGBToRAW_0],%%xmm5 \n"
-    "movdqa    %[kShuffleMaskARGBToRAW],%%xmm6   \n"
-#endif
-    "sub       %[u_buf],%[v_buf]               \n"
-    "pxor      %%xmm4,%%xmm4                   \n"
-    LABELALIGN
-  "1:                                          \n"
-    READYUV422
-    YUVTORGB
-    "punpcklbw %%xmm1,%%xmm0                   \n"
-    "punpcklbw %%xmm2,%%xmm2                   \n"
-    "movdqa    %%xmm0,%%xmm1                   \n"
-    "punpcklwd %%xmm2,%%xmm0                   \n"
-    "punpckhwd %%xmm2,%%xmm1                   \n"
-    "pshufb    %%xmm5,%%xmm0                   \n"
-    "pshufb    %%xmm6,%%xmm1                   \n"
-    "palignr   $0xc,%%xmm0,%%xmm1              \n"
-    "movq      %%xmm0," MEMACCESS([dst_raw]) " \n"
-    "movdqu    %%xmm1," MEMACCESS2(0x8,[dst_raw]) "\n"
-    "lea       " MEMLEA(0x18,[dst_raw]) ",%[dst_raw] \n"
-    "sub       $0x8,%[width]                   \n"
-    "jg        1b                              \n"
-  : [y_buf]"+r"(y_buf),    // %[y_buf]
-    [u_buf]"+r"(u_buf),    // %[u_buf]
-    [v_buf]"+r"(v_buf),    // %[v_buf]
-    [dst_raw]"+r"(dst_raw),  // %[dst_raw]
-    [width]"+rm"(width)    // %[width]
-  : [kYuvConstants]"r"(&kYuvConstants.kUVToB)
-#if !defined(__i386__)
-    , [kShuffleMaskARGBToRAW_0]"m"(kShuffleMaskARGBToRAW_0),
-    [kShuffleMaskARGBToRAW]"m"(kShuffleMaskARGBToRAW)
-#endif
-  : "memory", "cc"
-#if defined(__native_client__) && defined(__x86_64__)
-    , "r14"
-#endif
-#if defined(__SSE2__)
-    , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6"
-#endif
-  );
-}
-
-void OMITFP I422ToARGBRow_SSSE3(const uint8* y_buf,
-                                const uint8* u_buf,
-                                const uint8* v_buf,
-                                uint8* dst_argb,
-                                int width) {
-  asm volatile (
-    "sub       %[u_buf],%[v_buf]               \n"
-    "pcmpeqb   %%xmm5,%%xmm5                   \n"
-    "pxor      %%xmm4,%%xmm4                   \n"
-    LABELALIGN
-  "1:                                          \n"
-    READYUV422
-    YUVTORGB
-    "punpcklbw %%xmm1,%%xmm0                   \n"
-    "punpcklbw %%xmm5,%%xmm2                   \n"
-    "movdqa    %%xmm0,%%xmm1                   \n"
-    "punpcklwd %%xmm2,%%xmm0                   \n"
-    "punpckhwd %%xmm2,%%xmm1                   \n"
-    "movdqa    %%xmm0," MEMACCESS([dst_argb]) "\n"
-    "movdqa    %%xmm1," MEMACCESS2(0x10,[dst_argb]) "\n"
-    "lea       " MEMLEA(0x20,[dst_argb]) ",%[dst_argb] \n"
-    "sub       $0x8,%[width]                   \n"
-    "jg        1b                              \n"
-  : [y_buf]"+r"(y_buf),    // %[y_buf]
-    [u_buf]"+r"(u_buf),    // %[u_buf]
-    [v_buf]"+r"(v_buf),    // %[v_buf]
-    [dst_argb]"+r"(dst_argb),  // %[dst_argb]
-    [width]"+rm"(width)    // %[width]
-  : [kYuvConstants]"r"(&kYuvConstants.kUVToB) // %[kYuvConstants]
-  : "memory", "cc"
-#if defined(__native_client__) && defined(__x86_64__)
-    , "r14"
-#endif
-#if defined(__SSE2__)
-    , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
-#endif
-  );
-}
-
-void OMITFP I411ToARGBRow_SSSE3(const uint8* y_buf,
-                                const uint8* u_buf,
-                                const uint8* v_buf,
-                                uint8* dst_argb,
-                                int width) {
-  asm volatile (
-    "sub       %[u_buf],%[v_buf]               \n"
-    "pcmpeqb   %%xmm5,%%xmm5                   \n"
-    "pxor      %%xmm4,%%xmm4                   \n"
-    LABELALIGN
-  "1:                                          \n"
-    READYUV411
-    YUVTORGB
-    "punpcklbw %%xmm1,%%xmm0                   \n"
-    "punpcklbw %%xmm5,%%xmm2                   \n"
-    "movdqa    %%xmm0,%%xmm1                   \n"
-    "punpcklwd %%xmm2,%%xmm0                   \n"
-    "punpckhwd %%xmm2,%%xmm1                   \n"
-    "movdqa    %%xmm0," MEMACCESS([dst_argb]) "\n"
-    "movdqa    %%xmm1," MEMACCESS2(0x10,[dst_argb]) "\n"
-    "lea       " MEMLEA(0x20,[dst_argb]) ",%[dst_argb] \n"
-    "sub       $0x8,%[width]                   \n"
-    "jg        1b                              \n"
-  : [y_buf]"+r"(y_buf),    // %[y_buf]
-    [u_buf]"+r"(u_buf),    // %[u_buf]
-    [v_buf]"+r"(v_buf),    // %[v_buf]
-    [dst_argb]"+r"(dst_argb),  // %[dst_argb]
-    [width]"+rm"(width)    // %[width]
-  : [kYuvConstants]"r"(&kYuvConstants.kUVToB) // %[kYuvConstants]
-  : "memory", "cc"
-#if defined(__native_client__) && defined(__x86_64__)
-    , "r14"
-#endif
-#if defined(__SSE2__)
-    , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
-#endif
-  );
-}
-
-void OMITFP NV12ToARGBRow_SSSE3(const uint8* y_buf,
-                                const uint8* uv_buf,
-                                uint8* dst_argb,
-                                int width) {
-  asm volatile (
-    "pcmpeqb   %%xmm5,%%xmm5                   \n"
-    "pxor      %%xmm4,%%xmm4                   \n"
-    LABELALIGN
-  "1:                                          \n"
-    READNV12
-    YUVTORGB
-    "punpcklbw %%xmm1,%%xmm0                   \n"
-    "punpcklbw %%xmm5,%%xmm2                   \n"
-    "movdqa    %%xmm0,%%xmm1                   \n"
-    "punpcklwd %%xmm2,%%xmm0                   \n"
-    "punpckhwd %%xmm2,%%xmm1                   \n"
-    "movdqa    %%xmm0," MEMACCESS([dst_argb]) "\n"
-    "movdqa    %%xmm1," MEMACCESS2(0x10,[dst_argb]) "\n"
-    "lea       " MEMLEA(0x20,[dst_argb]) ",%[dst_argb] \n"
-    "sub       $0x8,%[width]                   \n"
-    "jg        1b                              \n"
-  : [y_buf]"+r"(y_buf),    // %[y_buf]
-    [uv_buf]"+r"(uv_buf),    // %[uv_buf]
-    [dst_argb]"+r"(dst_argb),  // %[dst_argb]
-    [width]"+rm"(width)    // %[width]
-  : [kYuvConstants]"r"(&kYuvConstants.kUVToB) // %[kYuvConstants]
-  : "memory", "cc"
-  // Does not use r14.
-#if defined(__SSE2__)
-    , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
-#endif
-  );
-}
-
-void OMITFP NV21ToARGBRow_SSSE3(const uint8* y_buf,
-                                const uint8* uv_buf,
-                                uint8* dst_argb,
-                                int width) {
-  asm volatile (
-    "pcmpeqb   %%xmm5,%%xmm5                   \n"
-    "pxor      %%xmm4,%%xmm4                   \n"
-    LABELALIGN
-  "1:                                          \n"
-    READNV12
-    YVUTORGB
-    "punpcklbw %%xmm1,%%xmm0                   \n"
-    "punpcklbw %%xmm5,%%xmm2                   \n"
-    "movdqa    %%xmm0,%%xmm1                   \n"
-    "punpcklwd %%xmm2,%%xmm0                   \n"
-    "punpckhwd %%xmm2,%%xmm1                   \n"
-    "movdqa    %%xmm0," MEMACCESS([dst_argb]) "\n"
-    "movdqa    %%xmm1," MEMACCESS2(0x10,[dst_argb]) "\n"
-    "lea       " MEMLEA(0x20,[dst_argb]) ",%[dst_argb] \n"
-    "sub       $0x8,%[width]                   \n"
-    "jg        1b                              \n"
-  : [y_buf]"+r"(y_buf),    // %[y_buf]
-    [uv_buf]"+r"(uv_buf),    // %[uv_buf]
-    [dst_argb]"+r"(dst_argb),  // %[dst_argb]
-    [width]"+rm"(width)    // %[width]
-  : [kYuvConstants]"r"(&kYuvConstants.kUVToB) // %[kYuvConstants]
-  : "memory", "cc"
-  // Does not use r14.
-#if defined(__SSE2__)
-    , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
-#endif
-  );
-}
-
-void OMITFP I444ToARGBRow_Unaligned_SSSE3(const uint8* y_buf,
-                                          const uint8* u_buf,
-                                          const uint8* v_buf,
-                                          uint8* dst_argb,
-                                          int width) {
-  asm volatile (
-    "sub       %[u_buf],%[v_buf]               \n"
-    "pcmpeqb   %%xmm5,%%xmm5                   \n"
-    "pxor      %%xmm4,%%xmm4                   \n"
-    LABELALIGN
-  "1:                                          \n"
-    READYUV444
-    YUVTORGB
-    "punpcklbw %%xmm1,%%xmm0                   \n"
-    "punpcklbw %%xmm5,%%xmm2                   \n"
-    "movdqa    %%xmm0,%%xmm1                   \n"
-    "punpcklwd %%xmm2,%%xmm0                   \n"
-    "punpckhwd %%xmm2,%%xmm1                   \n"
-    "movdqu    %%xmm0," MEMACCESS([dst_argb]) "\n"
-    "movdqu    %%xmm1," MEMACCESS2(0x10,[dst_argb]) "\n"
-    "lea       " MEMLEA(0x20,[dst_argb]) ",%[dst_argb] \n"
-    "sub       $0x8,%[width]                   \n"
-    "jg        1b                              \n"
-  : [y_buf]"+r"(y_buf),    // %[y_buf]
-    [u_buf]"+r"(u_buf),    // %[u_buf]
-    [v_buf]"+r"(v_buf),    // %[v_buf]
-    [dst_argb]"+r"(dst_argb),  // %[dst_argb]
-    [width]"+rm"(width)    // %[width]
-  : [kYuvConstants]"r"(&kYuvConstants.kUVToB) // %[kYuvConstants]
-  : "memory", "cc"
-#if defined(__native_client__) && defined(__x86_64__)
-    , "r14"
-#endif
-#if defined(__SSE2__)
-    , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
-#endif
-  );
-}
-
-void OMITFP I422ToARGBRow_Unaligned_SSSE3(const uint8* y_buf,
-                                          const uint8* u_buf,
-                                          const uint8* v_buf,
-                                          uint8* dst_argb,
-                                          int width) {
-  asm volatile (
-    "sub       %[u_buf],%[v_buf]               \n"
-    "pcmpeqb   %%xmm5,%%xmm5                   \n"
-    "pxor      %%xmm4,%%xmm4                   \n"
-    LABELALIGN
-  "1:                                          \n"
-    READYUV422
-    YUVTORGB
-    "punpcklbw %%xmm1,%%xmm0                   \n"
-    "punpcklbw %%xmm5,%%xmm2                   \n"
-    "movdqa    %%xmm0,%%xmm1                   \n"
-    "punpcklwd %%xmm2,%%xmm0                   \n"
-    "punpckhwd %%xmm2,%%xmm1                   \n"
-    "movdqu    %%xmm0," MEMACCESS([dst_argb]) "\n"
-    "movdqu    %%xmm1," MEMACCESS2(0x10,[dst_argb]) "\n"
-    "lea       " MEMLEA(0x20,[dst_argb]) ",%[dst_argb] \n"
-    "sub       $0x8,%[width]                   \n"
-    "jg        1b                              \n"
-  : [y_buf]"+r"(y_buf),    // %[y_buf]
-    [u_buf]"+r"(u_buf),    // %[u_buf]
-    [v_buf]"+r"(v_buf),    // %[v_buf]
-    [dst_argb]"+r"(dst_argb),  // %[dst_argb]
-    [width]"+rm"(width)    // %[width]
-  : [kYuvConstants]"r"(&kYuvConstants.kUVToB) // %[kYuvConstants]
-  : "memory", "cc"
-#if defined(__native_client__) && defined(__x86_64__)
-    , "r14"
-#endif
-#if defined(__SSE2__)
-    , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
-#endif
-  );
-}
-
-void OMITFP I411ToARGBRow_Unaligned_SSSE3(const uint8* y_buf,
-                                          const uint8* u_buf,
-                                          const uint8* v_buf,
-                                          uint8* dst_argb,
-                                          int width) {
-  asm volatile (
-    "sub       %[u_buf],%[v_buf]               \n"
-    "pcmpeqb   %%xmm5,%%xmm5                   \n"
-    "pxor      %%xmm4,%%xmm4                   \n"
-    LABELALIGN
-  "1:                                          \n"
-    READYUV411
-    YUVTORGB
-    "punpcklbw %%xmm1,%%xmm0                   \n"
-    "punpcklbw %%xmm5,%%xmm2                   \n"
-    "movdqa    %%xmm0,%%xmm1                   \n"
-    "punpcklwd %%xmm2,%%xmm0                   \n"
-    "punpckhwd %%xmm2,%%xmm1                   \n"
-    "movdqu    %%xmm0," MEMACCESS([dst_argb]) "\n"
-    "movdqu    %%xmm1," MEMACCESS2(0x10,[dst_argb]) "\n"
-    "lea       " MEMLEA(0x20,[dst_argb]) ",%[dst_argb] \n"
-    "sub       $0x8,%[width]                   \n"
-    "jg        1b                              \n"
-  : [y_buf]"+r"(y_buf),    // %[y_buf]
-    [u_buf]"+r"(u_buf),    // %[u_buf]
-    [v_buf]"+r"(v_buf),    // %[v_buf]
-    [dst_argb]"+r"(dst_argb),  // %[dst_argb]
-    [width]"+rm"(width)    // %[width]
-  : [kYuvConstants]"r"(&kYuvConstants.kUVToB) // %[kYuvConstants]
-  : "memory", "cc"
-#if defined(__native_client__) && defined(__x86_64__)
-    , "r14"
-#endif
-#if defined(__SSE2__)
-    , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
-#endif
-  );
-}
-
-void OMITFP NV12ToARGBRow_Unaligned_SSSE3(const uint8* y_buf,
-                                          const uint8* uv_buf,
-                                          uint8* dst_argb,
-                                          int width) {
-  asm volatile (
-    "pcmpeqb   %%xmm5,%%xmm5                   \n"
-    "pxor      %%xmm4,%%xmm4                   \n"
-    LABELALIGN
-  "1:                                          \n"
-    READNV12
-    YUVTORGB
-    "punpcklbw %%xmm1,%%xmm0                   \n"
-    "punpcklbw %%xmm5,%%xmm2                   \n"
-    "movdqa    %%xmm0,%%xmm1                   \n"
-    "punpcklwd %%xmm2,%%xmm0                   \n"
-    "punpckhwd %%xmm2,%%xmm1                   \n"
-    "movdqu    %%xmm0," MEMACCESS([dst_argb]) "\n"
-    "movdqu    %%xmm1," MEMACCESS2(0x10,[dst_argb]) "\n"
-    "lea       " MEMLEA(0x20,[dst_argb]) ",%[dst_argb] \n"
-    "sub       $0x8,%[width]                   \n"
-    "jg        1b                              \n"
-  : [y_buf]"+r"(y_buf),    // %[y_buf]
-    [uv_buf]"+r"(uv_buf),    // %[uv_buf]
-    [dst_argb]"+r"(dst_argb),  // %[dst_argb]
-    [width]"+rm"(width)    // %[width]
-  : [kYuvConstants]"r"(&kYuvConstants.kUVToB) // %[kYuvConstants]
-  : "memory", "cc"
-  // Does not use r14.
-#if defined(__SSE2__)
-    , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
-#endif
-  );
-}
-
-void OMITFP NV21ToARGBRow_Unaligned_SSSE3(const uint8* y_buf,
-                                          const uint8* uv_buf,
-                                          uint8* dst_argb,
-                                          int width) {
-  asm volatile (
-    "pcmpeqb   %%xmm5,%%xmm5                   \n"
-    "pxor      %%xmm4,%%xmm4                   \n"
-    LABELALIGN
-  "1:                                          \n"
-    READNV12
-    YVUTORGB
-    "punpcklbw %%xmm1,%%xmm0                   \n"
-    "punpcklbw %%xmm5,%%xmm2                   \n"
-    "movdqa    %%xmm0,%%xmm1                   \n"
-    "punpcklwd %%xmm2,%%xmm0                   \n"
-    "punpckhwd %%xmm2,%%xmm1                   \n"
-    "movdqu    %%xmm0," MEMACCESS([dst_argb]) "\n"
-    "movdqu    %%xmm1," MEMACCESS2(0x10,[dst_argb]) "\n"
-    "lea       " MEMLEA(0x20,[dst_argb]) ",%[dst_argb] \n"
-    "sub       $0x8,%[width]                   \n"
-    "jg        1b                              \n"
-  : [y_buf]"+r"(y_buf),    // %[y_buf]
-    [uv_buf]"+r"(uv_buf),    // %[uv_buf]
-    [dst_argb]"+r"(dst_argb),  // %[dst_argb]
-    [width]"+rm"(width)    // %[width]
-  : [kYuvConstants]"r"(&kYuvConstants.kUVToB) // %[kYuvConstants]
-  : "memory", "cc"
-  // Does not use r14.
-#if defined(__SSE2__)
-    , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
-#endif
-  );
-}
-
-void OMITFP I422ToBGRARow_SSSE3(const uint8* y_buf,
-                                const uint8* u_buf,
-                                const uint8* v_buf,
-                                uint8* dst_bgra,
-                                int width) {
-  asm volatile (
-    "sub       %[u_buf],%[v_buf]               \n"
-    "pcmpeqb   %%xmm5,%%xmm5                   \n"
-    "pxor      %%xmm4,%%xmm4                   \n"
-    LABELALIGN
-  "1:                                          \n"
-    READYUV422
-    YUVTORGB
-    "pcmpeqb   %%xmm5,%%xmm5                   \n"
-    "punpcklbw %%xmm0,%%xmm1                   \n"
-    "punpcklbw %%xmm2,%%xmm5                   \n"
-    "movdqa    %%xmm5,%%xmm0                   \n"
-    "punpcklwd %%xmm1,%%xmm5                   \n"
-    "punpckhwd %%xmm1,%%xmm0                   \n"
-    "movdqa    %%xmm5," MEMACCESS([dst_bgra]) "\n"
-    "movdqa    %%xmm0," MEMACCESS2(0x10,[dst_bgra]) "\n"
-    "lea       " MEMLEA(0x20,[dst_bgra]) ",%[dst_bgra] \n"
-    "sub       $0x8,%[width]                   \n"
-    "jg        1b                              \n"
-  : [y_buf]"+r"(y_buf),    // %[y_buf]
-    [u_buf]"+r"(u_buf),    // %[u_buf]
-    [v_buf]"+r"(v_buf),    // %[v_buf]
-    [dst_bgra]"+r"(dst_bgra),  // %[dst_bgra]
-    [width]"+rm"(width)    // %[width]
-  : [kYuvConstants]"r"(&kYuvConstants.kUVToB) // %[kYuvConstants]
-  : "memory", "cc"
-#if defined(__native_client__) && defined(__x86_64__)
-    , "r14"
-#endif
-#if defined(__SSE2__)
-    , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
-#endif
-  );
-}
-
-void OMITFP I422ToABGRRow_SSSE3(const uint8* y_buf,
-                                const uint8* u_buf,
-                                const uint8* v_buf,
-                                uint8* dst_abgr,
-                                int width) {
-  asm volatile (
-    "sub       %[u_buf],%[v_buf]               \n"
-    "pcmpeqb   %%xmm5,%%xmm5                   \n"
-    "pxor      %%xmm4,%%xmm4                   \n"
-    LABELALIGN
-  "1:                                          \n"
-    READYUV422
-    YUVTORGB
-    "punpcklbw %%xmm1,%%xmm2                   \n"
-    "punpcklbw %%xmm5,%%xmm0                   \n"
-    "movdqa    %%xmm2,%%xmm1                   \n"
-    "punpcklwd %%xmm0,%%xmm2                   \n"
-    "punpckhwd %%xmm0,%%xmm1                   \n"
-    "movdqa    %%xmm2," MEMACCESS([dst_abgr]) "\n"
-    "movdqa    %%xmm1," MEMACCESS2(0x10,[dst_abgr]) "\n"
-    "lea       " MEMLEA(0x20,[dst_abgr]) ",%[dst_abgr] \n"
-    "sub       $0x8,%[width]                   \n"
-    "jg        1b                              \n"
-  : [y_buf]"+r"(y_buf),    // %[y_buf]
-    [u_buf]"+r"(u_buf),    // %[u_buf]
-    [v_buf]"+r"(v_buf),    // %[v_buf]
-    [dst_abgr]"+r"(dst_abgr),  // %[dst_abgr]
-    [width]"+rm"(width)    // %[width]
-  : [kYuvConstants]"r"(&kYuvConstants.kUVToB) // %[kYuvConstants]
-  : "memory", "cc"
-#if defined(__native_client__) && defined(__x86_64__)
-    , "r14"
-#endif
-#if defined(__SSE2__)
-    , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
-#endif
-  );
-}
-
-void OMITFP I422ToRGBARow_SSSE3(const uint8* y_buf,
-                                const uint8* u_buf,
-                                const uint8* v_buf,
-                                uint8* dst_rgba,
-                                int width) {
-  asm volatile (
-    "sub       %[u_buf],%[v_buf]               \n"
-    "pcmpeqb   %%xmm5,%%xmm5                   \n"
-    "pxor      %%xmm4,%%xmm4                   \n"
-    LABELALIGN
-  "1:                                          \n"
-    READYUV422
-    YUVTORGB
-    "pcmpeqb   %%xmm5,%%xmm5                   \n"
-    "punpcklbw %%xmm2,%%xmm1                   \n"
-    "punpcklbw %%xmm0,%%xmm5                   \n"
-    "movdqa    %%xmm5,%%xmm0                   \n"
-    "punpcklwd %%xmm1,%%xmm5                   \n"
-    "punpckhwd %%xmm1,%%xmm0                   \n"
-    "movdqa    %%xmm5," MEMACCESS([dst_rgba]) "\n"
-    "movdqa    %%xmm0," MEMACCESS2(0x10,[dst_rgba]) "\n"
-    "lea       " MEMLEA(0x20,[dst_rgba]) ",%[dst_rgba] \n"
-    "sub       $0x8,%[width]                   \n"
-    "jg        1b                              \n"
-  : [y_buf]"+r"(y_buf),    // %[y_buf]
-    [u_buf]"+r"(u_buf),    // %[u_buf]
-    [v_buf]"+r"(v_buf),    // %[v_buf]
-    [dst_rgba]"+r"(dst_rgba),  // %[dst_rgba]
-    [width]"+rm"(width)    // %[width]
-  : [kYuvConstants]"r"(&kYuvConstants.kUVToB) // %[kYuvConstants]
-  : "memory", "cc"
-#if defined(__native_client__) && defined(__x86_64__)
-    , "r14"
-#endif
-#if defined(__SSE2__)
-    , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
-#endif
-  );
-}
-
-void OMITFP I422ToBGRARow_Unaligned_SSSE3(const uint8* y_buf,
-                                          const uint8* u_buf,
-                                          const uint8* v_buf,
-                                          uint8* dst_bgra,
-                                          int width) {
-  asm volatile (
-    "sub       %[u_buf],%[v_buf]               \n"
-    "pcmpeqb   %%xmm5,%%xmm5                   \n"
-    "pxor      %%xmm4,%%xmm4                   \n"
-    LABELALIGN
-  "1:                                          \n"
-    READYUV422
-    YUVTORGB
-    "pcmpeqb   %%xmm5,%%xmm5                   \n"
-    "punpcklbw %%xmm0,%%xmm1                   \n"
-    "punpcklbw %%xmm2,%%xmm5                   \n"
-    "movdqa    %%xmm5,%%xmm0                   \n"
-    "punpcklwd %%xmm1,%%xmm5                   \n"
-    "punpckhwd %%xmm1,%%xmm0                   \n"
-    "movdqu    %%xmm5," MEMACCESS([dst_bgra]) "\n"
-    "movdqu    %%xmm0," MEMACCESS2(0x10,[dst_bgra]) "\n"
-    "lea       " MEMLEA(0x20,[dst_bgra]) ",%[dst_bgra] \n"
-    "sub       $0x8,%[width]                   \n"
-    "jg        1b                              \n"
-  : [y_buf]"+r"(y_buf),    // %[y_buf]
-    [u_buf]"+r"(u_buf),    // %[u_buf]
-    [v_buf]"+r"(v_buf),    // %[v_buf]
-    [dst_bgra]"+r"(dst_bgra),  // %[dst_bgra]
-    [width]"+rm"(width)    // %[width]
-  : [kYuvConstants]"r"(&kYuvConstants.kUVToB) // %[kYuvConstants]
-  : "memory", "cc"
-#if defined(__native_client__) && defined(__x86_64__)
-    , "r14"
-#endif
-#if defined(__SSE2__)
-    , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
-#endif
-  );
-}
-
-void OMITFP I422ToABGRRow_Unaligned_SSSE3(const uint8* y_buf,
-                                          const uint8* u_buf,
-                                          const uint8* v_buf,
-                                          uint8* dst_abgr,
-                                          int width) {
-  asm volatile (
-    "sub       %[u_buf],%[v_buf]               \n"
-    "pcmpeqb   %%xmm5,%%xmm5                   \n"
-    "pxor      %%xmm4,%%xmm4                   \n"
-    LABELALIGN
-  "1:                                          \n"
-    READYUV422
-    YUVTORGB
-    "punpcklbw %%xmm1,%%xmm2                   \n"
-    "punpcklbw %%xmm5,%%xmm0                   \n"
-    "movdqa    %%xmm2,%%xmm1                   \n"
-    "punpcklwd %%xmm0,%%xmm2                   \n"
-    "punpckhwd %%xmm0,%%xmm1                   \n"
-    "movdqu    %%xmm2," MEMACCESS([dst_abgr]) "\n"
-    "movdqu    %%xmm1," MEMACCESS2(0x10,[dst_abgr]) "\n"
-    "lea       " MEMLEA(0x20,[dst_abgr]) ",%[dst_abgr] \n"
-    "sub       $0x8,%[width]                   \n"
-    "jg        1b                              \n"
-  : [y_buf]"+r"(y_buf),    // %[y_buf]
-    [u_buf]"+r"(u_buf),    // %[u_buf]
-    [v_buf]"+r"(v_buf),    // %[v_buf]
-    [dst_abgr]"+r"(dst_abgr),  // %[dst_abgr]
-    [width]"+rm"(width)    // %[width]
-  : [kYuvConstants]"r"(&kYuvConstants.kUVToB) // %[kYuvConstants]
-  : "memory", "cc"
-#if defined(__native_client__) && defined(__x86_64__)
-    , "r14"
-#endif
-#if defined(__SSE2__)
-    , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
-#endif
-  );
-}
-
-void OMITFP I422ToRGBARow_Unaligned_SSSE3(const uint8* y_buf,
-                                          const uint8* u_buf,
-                                          const uint8* v_buf,
-                                          uint8* dst_rgba,
-                                          int width) {
-  asm volatile (
-    "sub       %[u_buf],%[v_buf]               \n"
-    "pcmpeqb   %%xmm5,%%xmm5                   \n"
-    "pxor      %%xmm4,%%xmm4                   \n"
-    LABELALIGN
-  "1:                                          \n"
-    READYUV422
-    YUVTORGB
-    "pcmpeqb   %%xmm5,%%xmm5                   \n"
-    "punpcklbw %%xmm2,%%xmm1                   \n"
-    "punpcklbw %%xmm0,%%xmm5                   \n"
-    "movdqa    %%xmm5,%%xmm0                   \n"
-    "punpcklwd %%xmm1,%%xmm5                   \n"
-    "punpckhwd %%xmm1,%%xmm0                   \n"
-    "movdqu    %%xmm5," MEMACCESS([dst_rgba]) "\n"
-    "movdqu    %%xmm0," MEMACCESS2(0x10,[dst_rgba]) "\n"
-    "lea       " MEMLEA(0x20,[dst_rgba]) ",%[dst_rgba] \n"
-    "sub       $0x8,%[width]                   \n"
-    "jg        1b                              \n"
-  : [y_buf]"+r"(y_buf),    // %[y_buf]
-    [u_buf]"+r"(u_buf),    // %[u_buf]
-    [v_buf]"+r"(v_buf),    // %[v_buf]
-    [dst_rgba]"+r"(dst_rgba),  // %[dst_rgba]
-    [width]"+rm"(width)    // %[width]
-  : [kYuvConstants]"r"(&kYuvConstants.kUVToB) // %[kYuvConstants]
-  : "memory", "cc"
-#if defined(__native_client__) && defined(__x86_64__)
-    , "r14"
-#endif
-#if defined(__SSE2__)
-    , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
-#endif
-  );
-}
-
-#endif  // HAS_I422TOARGBROW_SSSE3
-
-#ifdef HAS_YTOARGBROW_SSE2
-void YToARGBRow_SSE2(const uint8* y_buf,
-                     uint8* dst_argb,
-                     int width) {
-  asm volatile (
-    "pxor      %%xmm5,%%xmm5                   \n"
-    "pcmpeqb   %%xmm4,%%xmm4                   \n"
-    "pslld     $0x18,%%xmm4                    \n"
-    "mov       $0x00100010,%%eax               \n"
-    "movd      %%eax,%%xmm3                    \n"
-    "pshufd    $0x0,%%xmm3,%%xmm3              \n"
-    "mov       $0x004a004a,%%eax               \n"
-    "movd      %%eax,%%xmm2                    \n"
-    "pshufd    $0x0,%%xmm2,%%xmm2              \n"
-    LABELALIGN
-  "1:                                          \n"
-    // Step 1: Scale Y contribution to 8 G values. G = (y - 16) * 1.164
-    "movq      " MEMACCESS(0) ",%%xmm0         \n"
-    "lea       " MEMLEA(0x8,0) ",%0            \n"
-    "punpcklbw %%xmm5,%%xmm0                   \n"
-    "psubusw   %%xmm3,%%xmm0                   \n"
-    "pmullw    %%xmm2,%%xmm0                   \n"
-    "psrlw     $6, %%xmm0                      \n"
-    "packuswb  %%xmm0,%%xmm0                   \n"
-
-    // Step 2: Weave into ARGB
-    "punpcklbw %%xmm0,%%xmm0                   \n"
-    "movdqa    %%xmm0,%%xmm1                   \n"
-    "punpcklwd %%xmm0,%%xmm0                   \n"
-    "punpckhwd %%xmm1,%%xmm1                   \n"
-    "por       %%xmm4,%%xmm0                   \n"
-    "por       %%xmm4,%%xmm1                   \n"
-    "movdqa    %%xmm0," MEMACCESS(1) "         \n"
-    "movdqa    %%xmm1," MEMACCESS2(0x10,1) "   \n"
-    "lea       " MEMLEA(0x20,1) ",%1           \n"
-
-    "sub       $0x8,%2                         \n"
-    "jg        1b                              \n"
-  : "+r"(y_buf),     // %0
-    "+r"(dst_argb),  // %1
-    "+rm"(width)     // %2
-  :
-  : "memory", "cc", "eax"
-#if defined(__SSE2__)
-    , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4"
-#endif
-  );
-}
-#endif  // HAS_YTOARGBROW_SSE2
-
-#ifdef HAS_MIRRORROW_SSSE3
-// Shuffle table for reversing the bytes.
-static uvec8 kShuffleMirror = {
-  15u, 14u, 13u, 12u, 11u, 10u, 9u, 8u, 7u, 6u, 5u, 4u, 3u, 2u, 1u, 0u
-};
-
-void MirrorRow_SSSE3(const uint8* src, uint8* dst, int width) {
-  intptr_t temp_width = (intptr_t)(width);
-  asm volatile (
-    "movdqa    %3,%%xmm5                       \n"
-    "lea       " MEMLEA(-0x10,0) ",%0          \n"
-    LABELALIGN
-  "1:                                          \n"
-    MEMOPREG(movdqa,0x00,0,2,1,xmm0)           //  movdqa  (%0,%2),%%xmm0
-    "pshufb    %%xmm5,%%xmm0                   \n"
-    "sub       $0x10,%2                        \n"
-    "movdqa    %%xmm0," MEMACCESS(1) "         \n"
-    "lea       " MEMLEA(0x10,1) ",%1           \n"
-    "jg        1b                              \n"
-  : "+r"(src),  // %0
-    "+r"(dst),  // %1
-    "+r"(temp_width)  // %2
-  : "m"(kShuffleMirror) // %3
-  : "memory", "cc"
-#if defined(__native_client__) && defined(__x86_64__)
-    , "r14"
-#endif
-#if defined(__SSE2__)
-    , "xmm0", "xmm5"
-#endif
-  );
-}
-#endif  // HAS_MIRRORROW_SSSE3
-
-#ifdef HAS_MIRRORROW_SSE2
-void MirrorRow_SSE2(const uint8* src, uint8* dst, int width) {
-  intptr_t temp_width = (intptr_t)(width);
-  asm volatile (
-    "lea       " MEMLEA(-0x10,0) ",%0          \n"
-    LABELALIGN
-  "1:                                          \n"
-    MEMOPREG(movdqu,0x00,0,2,1,xmm0)           //  movdqu  (%0,%2),%%xmm0
-    "movdqa    %%xmm0,%%xmm1                   \n"
-    "psllw     $0x8,%%xmm0                     \n"
-    "psrlw     $0x8,%%xmm1                     \n"
-    "por       %%xmm1,%%xmm0                   \n"
-    "pshuflw   $0x1b,%%xmm0,%%xmm0             \n"
-    "pshufhw   $0x1b,%%xmm0,%%xmm0             \n"
-    "pshufd    $0x4e,%%xmm0,%%xmm0             \n"
-    "sub       $0x10,%2                        \n"
-    "movdqu    %%xmm0," MEMACCESS(1) "         \n"
-    "lea       " MEMLEA(0x10,1)",%1            \n"
-    "jg        1b                              \n"
-  : "+r"(src),  // %0
-    "+r"(dst),  // %1
-    "+r"(temp_width)  // %2
-  :
-  : "memory", "cc"
-#if defined(__native_client__) && defined(__x86_64__)
-    , "r14"
-#endif
-#if defined(__SSE2__)
-    , "xmm0", "xmm1"
-#endif
-  );
-}
-#endif  // HAS_MIRRORROW_SSE2
-
-#ifdef HAS_MIRRORROW_UV_SSSE3
-// Shuffle table for reversing the bytes of UV channels.
-static uvec8 kShuffleMirrorUV = {
-  14u, 12u, 10u, 8u, 6u, 4u, 2u, 0u, 15u, 13u, 11u, 9u, 7u, 5u, 3u, 1u
-};
-void MirrorUVRow_SSSE3(const uint8* src, uint8* dst_u, uint8* dst_v,
-                       int width) {
-  intptr_t temp_width = (intptr_t)(width);
-  asm volatile (
-    "movdqa    %4,%%xmm1                       \n"
-    "lea       " MEMLEA4(-0x10,0,3,2) ",%0       \n"
-    "sub       %1,%2                           \n"
-    LABELALIGN
-  "1:                                          \n"
-    "movdqa    " MEMACCESS(0) ",%%xmm0         \n"
-    "lea       " MEMLEA(-0x10,0) ",%0            \n"
-    "pshufb    %%xmm1,%%xmm0                   \n"
-    "sub       $8,%3                           \n"
-    "movlpd    %%xmm0," MEMACCESS(1) "         \n"
-    BUNDLEALIGN
-    MEMOPMEM(movhpd,xmm0,0x00,1,2,1)           //  movhpd    %%xmm0,(%1,%2)
-    "lea       " MEMLEA(0x8,1) ",%1            \n"
-    "jg        1b                              \n"
-  : "+r"(src),      // %0
-    "+r"(dst_u),    // %1
-    "+r"(dst_v),    // %2
-    "+r"(temp_width)  // %3
-  : "m"(kShuffleMirrorUV)  // %4
-  : "memory", "cc"
-#if defined(__native_client__) && defined(__x86_64__)
-    , "r14"
-#endif
-#if defined(__SSE2__)
-    , "xmm0", "xmm1"
-#endif
-  );
-}
-#endif  // HAS_MIRRORROW_UV_SSSE3
-
-#ifdef HAS_ARGBMIRRORROW_SSSE3
-// Shuffle table for reversing the bytes.
-static uvec8 kARGBShuffleMirror = {
-  12u, 13u, 14u, 15u, 8u, 9u, 10u, 11u, 4u, 5u, 6u, 7u, 0u, 1u, 2u, 3u
-};
-
-void ARGBMirrorRow_SSSE3(const uint8* src, uint8* dst, int width) {
-  intptr_t temp_width = (intptr_t)(width);
-  asm volatile (
-    "lea       " MEMLEA4(-0x10,0,2,4) ",%0     \n"
-    "movdqa    %3,%%xmm5                       \n"
-    LABELALIGN
-  "1:                                          \n"
-    "movdqa    " MEMACCESS(0) ",%%xmm0         \n"
-    "pshufb    %%xmm5,%%xmm0                   \n"
-    "lea       " MEMLEA(-0x10,0) ",%0          \n"
-    "sub       $0x4,%2                         \n"
-    "movdqa    %%xmm0," MEMACCESS(1) "         \n"
-    "lea       " MEMLEA(0x10,1) ",%1           \n"
-    "jg        1b                              \n"
-  : "+r"(src),  // %0
-    "+r"(dst),  // %1
-    "+r"(temp_width)  // %2
-  : "m"(kARGBShuffleMirror)  // %3
-  : "memory", "cc"
-#if defined(__SSE2__)
-    , "xmm0", "xmm5"
-#endif
-  );
-}
-#endif  // HAS_ARGBMIRRORROW_SSSE3
-
-#ifdef HAS_SPLITUVROW_SSE2
-void SplitUVRow_SSE2(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int pix) {
-  asm volatile (
-    "pcmpeqb    %%xmm5,%%xmm5                    \n"
-    "psrlw      $0x8,%%xmm5                      \n"
-    "sub        %1,%2                            \n"
-    LABELALIGN
-  "1:                                            \n"
-    "movdqa     " MEMACCESS(0) ",%%xmm0          \n"
-    "movdqa     " MEMACCESS2(0x10,0) ",%%xmm1    \n"
-    "lea        " MEMLEA(0x20,0) ",%0            \n"
-    "movdqa     %%xmm0,%%xmm2                    \n"
-    "movdqa     %%xmm1,%%xmm3                    \n"
-    "pand       %%xmm5,%%xmm0                    \n"
-    "pand       %%xmm5,%%xmm1                    \n"
-    "packuswb   %%xmm1,%%xmm0                    \n"
-    "psrlw      $0x8,%%xmm2                      \n"
-    "psrlw      $0x8,%%xmm3                      \n"
-    "packuswb   %%xmm3,%%xmm2                    \n"
-    "movdqa     %%xmm0," MEMACCESS(1) "          \n"
-    MEMOPMEM(movdqa,xmm2,0x00,1,2,1)             // movdqa     %%xmm2,(%1,%2)
-    "lea        " MEMLEA(0x10,1) ",%1            \n"
-    "sub        $0x10,%3                         \n"
-    "jg         1b                               \n"
-  : "+r"(src_uv),     // %0
-    "+r"(dst_u),      // %1
-    "+r"(dst_v),      // %2
-    "+r"(pix)         // %3
-  :
-  : "memory", "cc"
-#if defined(__native_client__) && defined(__x86_64__)
-    , "r14"
-#endif
-#if defined(__SSE2__)
-    , "xmm0", "xmm1", "xmm2", "xmm3", "xmm5"
-#endif
-  );
-}
-
-void SplitUVRow_Unaligned_SSE2(const uint8* src_uv, uint8* dst_u, uint8* dst_v,
-                               int pix) {
-  asm volatile (
-    "pcmpeqb    %%xmm5,%%xmm5                    \n"
-    "psrlw      $0x8,%%xmm5                      \n"
-    "sub        %1,%2                            \n"
-    LABELALIGN
-  "1:                                            \n"
-    "movdqu     " MEMACCESS(0) ",%%xmm0          \n"
-    "movdqu     " MEMACCESS2(0x10,0) ",%%xmm1    \n"
-    "lea        " MEMLEA(0x20,0) ",%0            \n"
-    "movdqa     %%xmm0,%%xmm2                    \n"
-    "movdqa     %%xmm1,%%xmm3                    \n"
-    "pand       %%xmm5,%%xmm0                    \n"
-    "pand       %%xmm5,%%xmm1                    \n"
-    "packuswb   %%xmm1,%%xmm0                    \n"
-    "psrlw      $0x8,%%xmm2                      \n"
-    "psrlw      $0x8,%%xmm3                      \n"
-    "packuswb   %%xmm3,%%xmm2                    \n"
-    "movdqu     %%xmm0," MEMACCESS(1) "          \n"
-    MEMOPMEM(movdqu,xmm2,0x00,1,2,1)             //  movdqu     %%xmm2,(%1,%2)
-    "lea        " MEMLEA(0x10,1) ",%1            \n"
-    "sub        $0x10,%3                         \n"
-    "jg         1b                               \n"
-  : "+r"(src_uv),     // %0
-    "+r"(dst_u),      // %1
-    "+r"(dst_v),      // %2
-    "+r"(pix)         // %3
-  :
-  : "memory", "cc"
-#if defined(__native_client__) && defined(__x86_64__)
-    , "r14"
-#endif
-#if defined(__SSE2__)
-    , "xmm0", "xmm1", "xmm2", "xmm3", "xmm5"
-#endif
-  );
-}
-#endif  // HAS_SPLITUVROW_SSE2
-
-#ifdef HAS_MERGEUVROW_SSE2
-void MergeUVRow_SSE2(const uint8* src_u, const uint8* src_v, uint8* dst_uv,
-                     int width) {
-  asm volatile (
-    "sub       %0,%1                             \n"
-    LABELALIGN
-  "1:                                            \n"
-    "movdqa    " MEMACCESS(0) ",%%xmm0           \n"
-    MEMOPREG(movdqa,0x00,0,1,1,xmm1)             //  movdqa    (%0,%1,1),%%xmm1
-    "lea       " MEMLEA(0x10,0) ",%0             \n"
-    "movdqa    %%xmm0,%%xmm2                     \n"
-    "punpcklbw %%xmm1,%%xmm0                     \n"
-    "punpckhbw %%xmm1,%%xmm2                     \n"
-    "movdqa    %%xmm0," MEMACCESS(2) "           \n"
-    "movdqa    %%xmm2," MEMACCESS2(0x10,2) "     \n"
-    "lea       " MEMLEA(0x20,2) ",%2             \n"
-    "sub       $0x10,%3                          \n"
-    "jg        1b                                \n"
-  : "+r"(src_u),     // %0
-    "+r"(src_v),     // %1
-    "+r"(dst_uv),    // %2
-    "+r"(width)      // %3
-  :
-  : "memory", "cc"
-#if defined(__native_client__) && defined(__x86_64__)
-    , "r14"
-#endif
-#if defined(__SSE2__)
-    , "xmm0", "xmm1", "xmm2"
-#endif
-  );
-}
-
-void MergeUVRow_Unaligned_SSE2(const uint8* src_u, const uint8* src_v,
-                               uint8* dst_uv, int width) {
-  asm volatile (
-    "sub       %0,%1                             \n"
-    LABELALIGN
-  "1:                                            \n"
-    "movdqu    " MEMACCESS(0) ",%%xmm0           \n"
-    MEMOPREG(movdqu,0x00,0,1,1,xmm1)             //  movdqu    (%0,%1,1),%%xmm1
-    "lea       " MEMLEA(0x10,0) ",%0             \n"
-    "movdqa    %%xmm0,%%xmm2                     \n"
-    "punpcklbw %%xmm1,%%xmm0                     \n"
-    "punpckhbw %%xmm1,%%xmm2                     \n"
-    "movdqu    %%xmm0," MEMACCESS(2) "           \n"
-    "movdqu    %%xmm2," MEMACCESS2(0x10,2) "     \n"
-    "lea       " MEMLEA(0x20,2) ",%2             \n"
-    "sub       $0x10,%3                          \n"
-    "jg        1b                                \n"
-  : "+r"(src_u),     // %0
-    "+r"(src_v),     // %1
-    "+r"(dst_uv),    // %2
-    "+r"(width)      // %3
-  :
-  : "memory", "cc"
-#if defined(__native_client__) && defined(__x86_64__)
-    , "r14"
-#endif
-#if defined(__SSE2__)
-    , "xmm0", "xmm1", "xmm2"
-#endif
-  );
-}
-#endif  // HAS_MERGEUVROW_SSE2
-
-#ifdef HAS_COPYROW_SSE2
-void CopyRow_SSE2(const uint8* src, uint8* dst, int count) {
-  asm volatile (
-    LABELALIGN
-  "1:                                          \n"
-    "movdqa    " MEMACCESS(0) ",%%xmm0         \n"
-    "movdqa    " MEMACCESS2(0x10,0) ",%%xmm1   \n"
-    "lea       " MEMLEA(0x20,0) ",%0           \n"
-    "movdqa    %%xmm0," MEMACCESS(1) "         \n"
-    "movdqa    %%xmm1," MEMACCESS2(0x10,1) "   \n"
-    "lea       " MEMLEA(0x20,1) ",%1           \n"
-    "sub       $0x20,%2                        \n"
-    "jg        1b                              \n"
-  : "+r"(src),   // %0
-    "+r"(dst),   // %1
-    "+r"(count)  // %2
-  :
-  : "memory", "cc"
-#if defined(__SSE2__)
-    , "xmm0", "xmm1"
-#endif
-  );
-}
-#endif  // HAS_COPYROW_SSE2
-
-#ifdef HAS_COPYROW_X86
-void CopyRow_X86(const uint8* src, uint8* dst, int width) {
-  size_t width_tmp = (size_t)(width);
-  asm volatile (
-    "shr       $0x2,%2                         \n"
-    "rep movsl " MEMMOVESTRING(0,1) "          \n"
-  : "+S"(src),  // %0
-    "+D"(dst),  // %1
-    "+c"(width_tmp) // %2
-  :
-  : "memory", "cc"
-  );
-}
-#endif  // HAS_COPYROW_X86
-
-#ifdef HAS_COPYROW_ERMS
-// Unaligned Multiple of 1.
-void CopyRow_ERMS(const uint8* src, uint8* dst, int width) {
-  size_t width_tmp = (size_t)(width);
-  asm volatile (
-    "rep movsb " MEMMOVESTRING(0,1) "          \n"
-  : "+S"(src),  // %0
-    "+D"(dst),  // %1
-    "+c"(width_tmp) // %2
-  :
-  : "memory", "cc"
-  );
-}
-#endif  // HAS_COPYROW_ERMS
-
-#ifdef HAS_ARGBCOPYALPHAROW_SSE2
-// width in pixels
-void ARGBCopyAlphaRow_SSE2(const uint8* src, uint8* dst, int width) {
-  asm volatile (
-    "pcmpeqb   %%xmm0,%%xmm0                   \n"
-    "pslld     $0x18,%%xmm0                    \n"
-    "pcmpeqb   %%xmm1,%%xmm1                   \n"
-    "psrld     $0x8,%%xmm1                     \n"
-    LABELALIGN
-  "1:                                          \n"
-    "movdqa    " MEMACCESS(0) ",%%xmm2         \n"
-    "movdqa    " MEMACCESS2(0x10,0) ",%%xmm3   \n"
-    "lea       " MEMLEA(0x20,0) ",%0           \n"
-    "movdqa    " MEMACCESS(1) ",%%xmm4         \n"
-    "movdqa    " MEMACCESS2(0x10,1) ",%%xmm5   \n"
-    "pand      %%xmm0,%%xmm2                   \n"
-    "pand      %%xmm0,%%xmm3                   \n"
-    "pand      %%xmm1,%%xmm4                   \n"
-    "pand      %%xmm1,%%xmm5                   \n"
-    "por       %%xmm4,%%xmm2                   \n"
-    "por       %%xmm5,%%xmm3                   \n"
-    "movdqa    %%xmm2," MEMACCESS(1) "         \n"
-    "movdqa    %%xmm3," MEMACCESS2(0x10,1) "   \n"
-    "lea       " MEMLEA(0x20,1) ",%1           \n"
-    "sub       $0x8,%2                         \n"
-    "jg        1b                              \n"
-  : "+r"(src),   // %0
-    "+r"(dst),   // %1
-    "+r"(width)  // %2
-  :
-  : "memory", "cc"
-#if defined(__SSE2__)
-    , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
-#endif
-  );
-}
-#endif  // HAS_ARGBCOPYALPHAROW_SSE2
-
-#ifdef HAS_ARGBCOPYALPHAROW_AVX2
-// width in pixels
-void ARGBCopyAlphaRow_AVX2(const uint8* src, uint8* dst, int width) {
-  asm volatile (
-    "vpcmpeqb  %%ymm0,%%ymm0,%%ymm0            \n"
-    "vpsrld    $0x8,%%ymm0,%%ymm0              \n"
-    LABELALIGN
-  "1:                                          \n"
-    "vmovdqu   " MEMACCESS(0) ",%%ymm1         \n"
-    "vmovdqu   " MEMACCESS2(0x20,0) ",%%ymm2   \n"
-    "lea       " MEMLEA(0x40,0) ",%0           \n"
-    "vpblendvb %%ymm0," MEMACCESS(1) ",%%ymm1,%%ymm1        \n"
-    "vpblendvb %%ymm0," MEMACCESS2(0x20,1) ",%%ymm2,%%ymm2  \n"
-    "vmovdqu   %%ymm1," MEMACCESS(1) "         \n"
-    "vmovdqu   %%ymm2," MEMACCESS2(0x20,1) "   \n"
-    "lea       " MEMLEA(0x40,1) ",%1           \n"
-    "sub       $0x10,%2                        \n"
-    "jg        1b                              \n"
-    "vzeroupper                                \n"
-  : "+r"(src),   // %0
-    "+r"(dst),   // %1
-    "+r"(width)  // %2
-  :
-  : "memory", "cc"
-#if defined(__SSE2__)
-    , "xmm0", "xmm1", "xmm2"
-#endif
-  );
-}
-#endif  // HAS_ARGBCOPYALPHAROW_AVX2
-
-#ifdef HAS_ARGBCOPYYTOALPHAROW_SSE2
-// width in pixels
-void ARGBCopyYToAlphaRow_SSE2(const uint8* src, uint8* dst, int width) {
-  asm volatile (
-    "pcmpeqb   %%xmm0,%%xmm0                   \n"
-    "pslld     $0x18,%%xmm0                    \n"
-    "pcmpeqb   %%xmm1,%%xmm1                   \n"
-    "psrld     $0x8,%%xmm1                     \n"
-    LABELALIGN
-  "1:                                          \n"
-    "movq      " MEMACCESS(0) ",%%xmm2         \n"
-    "lea       " MEMLEA(0x8,0) ",%0            \n"
-    "punpcklbw %%xmm2,%%xmm2                   \n"
-    "punpckhwd %%xmm2,%%xmm3                   \n"
-    "punpcklwd %%xmm2,%%xmm2                   \n"
-    "movdqa    " MEMACCESS(1) ",%%xmm4         \n"
-    "movdqa    " MEMACCESS2(0x10,1) ",%%xmm5   \n"
-    "pand      %%xmm0,%%xmm2                   \n"
-    "pand      %%xmm0,%%xmm3                   \n"
-    "pand      %%xmm1,%%xmm4                   \n"
-    "pand      %%xmm1,%%xmm5                   \n"
-    "por       %%xmm4,%%xmm2                   \n"
-    "por       %%xmm5,%%xmm3                   \n"
-    "movdqa    %%xmm2," MEMACCESS(1) "         \n"
-    "movdqa    %%xmm3," MEMACCESS2(0x10,1) "   \n"
-    "lea       " MEMLEA(0x20,1) ",%1           \n"
-    "sub       $0x8,%2                         \n"
-    "jg        1b                              \n"
-  : "+r"(src),   // %0
-    "+r"(dst),   // %1
-    "+r"(width)  // %2
-  :
-  : "memory", "cc"
-#if defined(__SSE2__)
-    , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
-#endif
-  );
-}
-#endif  // HAS_ARGBCOPYYTOALPHAROW_SSE2
-
-#ifdef HAS_ARGBCOPYYTOALPHAROW_AVX2
-// width in pixels
-void ARGBCopyYToAlphaRow_AVX2(const uint8* src, uint8* dst, int width) {
-  asm volatile (
-    "vpcmpeqb  %%ymm0,%%ymm0,%%ymm0            \n"
-    "vpsrld    $0x8,%%ymm0,%%ymm0              \n"
-    LABELALIGN
-  "1:                                          \n"
-    "vpmovzxbd " MEMACCESS(0) ",%%ymm1         \n"
-    "vpmovzxbd " MEMACCESS2(0x8,0) ",%%ymm2    \n"
-    "lea       " MEMLEA(0x10,0) ",%0           \n"
-    "vpslld    $0x18,%%ymm1,%%ymm1             \n"
-    "vpslld    $0x18,%%ymm2,%%ymm2             \n"
-    "vpblendvb %%ymm0," MEMACCESS(1) ",%%ymm1,%%ymm1        \n"
-    "vpblendvb %%ymm0," MEMACCESS2(0x20,1) ",%%ymm2,%%ymm2  \n"
-    "vmovdqu   %%ymm1," MEMACCESS(1) "         \n"
-    "vmovdqu   %%ymm2," MEMACCESS2(0x20,1) "   \n"
-    "lea       " MEMLEA(0x40,1) ",%1           \n"
-    "sub       $0x10,%2                        \n"
-    "jg        1b                              \n"
-    "vzeroupper                                \n"
-  : "+r"(src),   // %0
-    "+r"(dst),   // %1
-    "+r"(width)  // %2
-  :
-  : "memory", "cc"
-#if defined(__SSE2__)
-    , "xmm0", "xmm1", "xmm2"
-#endif
-  );
-}
-#endif  // HAS_ARGBCOPYYTOALPHAROW_AVX2
-
-#ifdef HAS_SETROW_X86
-void SetRow_X86(uint8* dst, uint32 v32, int width) {
-  size_t width_tmp = (size_t)(width);
-  asm volatile (
-    "shr       $0x2,%1                         \n"
-    "rep stosl " MEMSTORESTRING(eax,0) "       \n"
-    : "+D"(dst),       // %0
-      "+c"(width_tmp)  // %1
-    : "a"(v32)         // %2
-    : "memory", "cc");
-}
-
-void ARGBSetRows_X86(uint8* dst, uint32 v32, int width,
-                   int dst_stride, int height) {
-  for (int y = 0; y < height; ++y) {
-    size_t width_tmp = (size_t)(width);
-    uint32* d = (uint32*)(dst);
-    asm volatile (
-      "rep stosl " MEMSTORESTRING(eax,0) "     \n"
-      : "+D"(d),         // %0
-        "+c"(width_tmp)  // %1
-      : "a"(v32)         // %2
-      : "memory", "cc");
-    dst += dst_stride;
-  }
-}
-#endif  // HAS_SETROW_X86
-
-#ifdef HAS_YUY2TOYROW_SSE2
-void YUY2ToYRow_SSE2(const uint8* src_yuy2, uint8* dst_y, int pix) {
-  asm volatile (
-    "pcmpeqb   %%xmm5,%%xmm5                   \n"
-    "psrlw     $0x8,%%xmm5                     \n"
-    LABELALIGN
-  "1:                                          \n"
-    "movdqa    " MEMACCESS(0) ",%%xmm0         \n"
-    "movdqa    " MEMACCESS2(0x10,0) ",%%xmm1   \n"
-    "lea       " MEMLEA(0x20,0) ",%0           \n"
-    "pand      %%xmm5,%%xmm0                   \n"
-    "pand      %%xmm5,%%xmm1                   \n"
-    "packuswb  %%xmm1,%%xmm0                   \n"
-    "movdqa    %%xmm0," MEMACCESS(1) "         \n"
-    "lea       " MEMLEA(0x10,1) ",%1           \n"
-    "sub       $0x10,%2                        \n"
-    "jg        1b                              \n"
-  : "+r"(src_yuy2),  // %0
-    "+r"(dst_y),     // %1
-    "+r"(pix)        // %2
-  :
-  : "memory", "cc"
-#if defined(__SSE2__)
-    , "xmm0", "xmm1", "xmm5"
-#endif
-  );
-}
-
-void YUY2ToUVRow_SSE2(const uint8* src_yuy2, int stride_yuy2,
-                      uint8* dst_u, uint8* dst_v, int pix) {
-  asm volatile (
-    "pcmpeqb   %%xmm5,%%xmm5                   \n"
-    "psrlw     $0x8,%%xmm5                     \n"
-    "sub       %1,%2                           \n"
-    LABELALIGN
-  "1:                                          \n"
-    "movdqa    " MEMACCESS(0) ",%%xmm0         \n"
-    "movdqa    " MEMACCESS2(0x10,0) ",%%xmm1   \n"
-    BUNDLEALIGN
-    MEMOPREG(movdqa,0x00,0,4,1,xmm2)           //  movdqa  (%0,%4,1),%%xmm2
-    MEMOPREG(movdqa,0x10,0,4,1,xmm3)           //  movdqa  0x10(%0,%4,1),%%xmm3
-    "lea       " MEMLEA(0x20,0) ",%0           \n"
-    "pavgb     %%xmm2,%%xmm0                   \n"
-    "pavgb     %%xmm3,%%xmm1                   \n"
-    "psrlw     $0x8,%%xmm0                     \n"
-    "psrlw     $0x8,%%xmm1                     \n"
-    "packuswb  %%xmm1,%%xmm0                   \n"
-    "movdqa    %%xmm0,%%xmm1                   \n"
-    "pand      %%xmm5,%%xmm0                   \n"
-    "packuswb  %%xmm0,%%xmm0                   \n"
-    "psrlw     $0x8,%%xmm1                     \n"
-    "packuswb  %%xmm1,%%xmm1                   \n"
-    "movq      %%xmm0," MEMACCESS(1) "         \n"
-    BUNDLEALIGN
-    MEMOPMEM(movq,xmm1,0x00,1,2,1)             //  movq    %%xmm1,(%1,%2)
-    "lea       " MEMLEA(0x8,1) ",%1            \n"
-    "sub       $0x10,%3                        \n"
-    "jg        1b                              \n"
-  : "+r"(src_yuy2),    // %0
-    "+r"(dst_u),       // %1
-    "+r"(dst_v),       // %2
-    "+r"(pix)          // %3
-  : "r"((intptr_t)(stride_yuy2))  // %4
-  : "memory", "cc"
-#if defined(__native_client__) && defined(__x86_64__)
-    , "r14"
-#endif
-#if defined(__SSE2__)
-    , "xmm0", "xmm1", "xmm2", "xmm3", "xmm5"
-#endif
-  );
-}
-
-void YUY2ToUV422Row_SSE2(const uint8* src_yuy2,
-                         uint8* dst_u, uint8* dst_v, int pix) {
-  asm volatile (
-    "pcmpeqb   %%xmm5,%%xmm5                   \n"
-    "psrlw     $0x8,%%xmm5                     \n"
-    "sub       %1,%2                           \n"
-    LABELALIGN
-  "1:                                          \n"
-    "movdqa    " MEMACCESS(0) ",%%xmm0         \n"
-    "movdqa    " MEMACCESS2(0x10,0) ",%%xmm1   \n"
-    "lea       " MEMLEA(0x20,0) ",%0           \n"
-    "psrlw     $0x8,%%xmm0                     \n"
-    "psrlw     $0x8,%%xmm1                     \n"
-    "packuswb  %%xmm1,%%xmm0                   \n"
-    "movdqa    %%xmm0,%%xmm1                   \n"
-    "pand      %%xmm5,%%xmm0                   \n"
-    "packuswb  %%xmm0,%%xmm0                   \n"
-    "psrlw     $0x8,%%xmm1                     \n"
-    "packuswb  %%xmm1,%%xmm1                   \n"
-    "movq      %%xmm0," MEMACCESS(1) "         \n"
-    BUNDLEALIGN
-    MEMOPMEM(movq,xmm1,0x00,1,2,1)             //  movq    %%xmm1,(%1,%2)
-    "lea       " MEMLEA(0x8,1) ",%1            \n"
-    "sub       $0x10,%3                        \n"
-    "jg        1b                              \n"
-  : "+r"(src_yuy2),    // %0
-    "+r"(dst_u),       // %1
-    "+r"(dst_v),       // %2
-    "+r"(pix)          // %3
-  :
-  : "memory", "cc"
-#if defined(__native_client__) && defined(__x86_64__)
-    , "r14"
-#endif
-#if defined(__SSE2__)
-    , "xmm0", "xmm1", "xmm5"
-#endif
-  );
-}
-
-void YUY2ToYRow_Unaligned_SSE2(const uint8* src_yuy2,
-                               uint8* dst_y, int pix) {
-  asm volatile (
-    "pcmpeqb   %%xmm5,%%xmm5                   \n"
-    "psrlw     $0x8,%%xmm5                     \n"
-    LABELALIGN
-  "1:                                          \n"
-    "movdqu    " MEMACCESS(0) ",%%xmm0         \n"
-    "movdqu    " MEMACCESS2(0x10,0) ",%%xmm1   \n"
-    "lea       " MEMLEA(0x20,0) ",%0           \n"
-    "pand      %%xmm5,%%xmm0                   \n"
-    "pand      %%xmm5,%%xmm1                   \n"
-    "packuswb  %%xmm1,%%xmm0                   \n"
-    "sub       $0x10,%2                        \n"
-    "movdqu    %%xmm0," MEMACCESS(1) "         \n"
-    "lea       " MEMLEA(0x10,1) ",%1           \n"
-    "jg        1b                              \n"
-  : "+r"(src_yuy2),  // %0
-    "+r"(dst_y),     // %1
-    "+r"(pix)        // %2
-  :
-  : "memory", "cc"
-#if defined(__SSE2__)
-    , "xmm0", "xmm1", "xmm5"
-#endif
-  );
-}
-
-void YUY2ToUVRow_Unaligned_SSE2(const uint8* src_yuy2,
-                                int stride_yuy2,
-                                uint8* dst_u, uint8* dst_v, int pix) {
-  asm volatile (
-    "pcmpeqb   %%xmm5,%%xmm5                   \n"
-    "psrlw     $0x8,%%xmm5                     \n"
-    "sub       %1,%2                           \n"
-    LABELALIGN
-  "1:                                          \n"
-    "movdqu    " MEMACCESS(0) ",%%xmm0         \n"
-    "movdqu    " MEMACCESS2(0x10,0) ",%%xmm1   \n"
-    BUNDLEALIGN
-    MEMOPREG(movdqu,0x00,0,4,1,xmm2)           //  movdqu  (%0,%4,1),%%xmm2
-    MEMOPREG(movdqu,0x10,0,4,1,xmm3)           //  movdqu  0x10(%0,%4,1),%%xmm3
-    "lea       " MEMLEA(0x20,0) ",%0           \n"
-    "pavgb     %%xmm2,%%xmm0                   \n"
-    "pavgb     %%xmm3,%%xmm1                   \n"
-    "psrlw     $0x8,%%xmm0                     \n"
-    "psrlw     $0x8,%%xmm1                     \n"
-    "packuswb  %%xmm1,%%xmm0                   \n"
-    "movdqa    %%xmm0,%%xmm1                   \n"
-    "pand      %%xmm5,%%xmm0                   \n"
-    "packuswb  %%xmm0,%%xmm0                   \n"
-    "psrlw     $0x8,%%xmm1                     \n"
-    "packuswb  %%xmm1,%%xmm1                   \n"
-    "movq      %%xmm0," MEMACCESS(1) "         \n"
-    BUNDLEALIGN
-    MEMOPMEM(movq,xmm1,0x00,1,2,1)             //  movq    %%xmm1,(%1,%2)
-    "lea       " MEMLEA(0x8,1) ",%1            \n"
-    "sub       $0x10,%3                        \n"
-    "jg        1b                              \n"
-  : "+r"(src_yuy2),    // %0
-    "+r"(dst_u),       // %1
-    "+r"(dst_v),       // %2
-    "+r"(pix)          // %3
-  : "r"((intptr_t)(stride_yuy2))  // %4
-  : "memory", "cc"
-#if defined(__native_client__) && defined(__x86_64__)
-    , "r14"
-#endif
-#if defined(__SSE2__)
-    , "xmm0", "xmm1", "xmm2", "xmm3", "xmm5"
-#endif
-  );
-}
-
-void YUY2ToUV422Row_Unaligned_SSE2(const uint8* src_yuy2,
-                                   uint8* dst_u, uint8* dst_v, int pix) {
-  asm volatile (
-    "pcmpeqb   %%xmm5,%%xmm5                   \n"
-    "psrlw     $0x8,%%xmm5                     \n"
-    "sub       %1,%2                           \n"
-    LABELALIGN
-  "1:                                          \n"
-    "movdqu    " MEMACCESS(0) ",%%xmm0         \n"
-    "movdqu    " MEMACCESS2(0x10,0) ",%%xmm1   \n"
-    "lea       " MEMLEA(0x20,0) ",%0           \n"
-    "psrlw     $0x8,%%xmm0                     \n"
-    "psrlw     $0x8,%%xmm1                     \n"
-    "packuswb  %%xmm1,%%xmm0                   \n"
-    "movdqa    %%xmm0,%%xmm1                   \n"
-    "pand      %%xmm5,%%xmm0                   \n"
-    "packuswb  %%xmm0,%%xmm0                   \n"
-    "psrlw     $0x8,%%xmm1                     \n"
-    "packuswb  %%xmm1,%%xmm1                   \n"
-    "movq      %%xmm0," MEMACCESS(1) "         \n"
-    BUNDLEALIGN
-    MEMOPMEM(movq,xmm1,0x00,1,2,1)             //  movq    %%xmm1,(%1,%2)
-    "lea       " MEMLEA(0x8,1) ",%1            \n"
-    "sub       $0x10,%3                        \n"
-    "jg        1b                              \n"
-  : "+r"(src_yuy2),    // %0
-    "+r"(dst_u),       // %1
-    "+r"(dst_v),       // %2
-    "+r"(pix)          // %3
-  :
-  : "memory", "cc"
-#if defined(__native_client__) && defined(__x86_64__)
-    , "r14"
-#endif
-#if defined(__SSE2__)
-    , "xmm0", "xmm1", "xmm5"
-#endif
-  );
-}
-
-void UYVYToYRow_SSE2(const uint8* src_uyvy, uint8* dst_y, int pix) {
-  asm volatile (
-    LABELALIGN
-  "1:                                          \n"
-    "movdqa    " MEMACCESS(0) ",%%xmm0         \n"
-    "movdqa    " MEMACCESS2(0x10,0) ",%%xmm1   \n"
-    "lea       " MEMLEA(0x20,0) ",%0           \n"
-    "psrlw     $0x8,%%xmm0                     \n"
-    "psrlw     $0x8,%%xmm1                     \n"
-    "packuswb  %%xmm1,%%xmm0                   \n"
-    "sub       $0x10,%2                        \n"
-    "movdqa    %%xmm0," MEMACCESS(1) "         \n"
-    "lea       " MEMLEA(0x10,1) ",%1           \n"
-    "jg        1b                              \n"
-  : "+r"(src_uyvy),  // %0
-    "+r"(dst_y),     // %1
-    "+r"(pix)        // %2
-  :
-  : "memory", "cc"
-#if defined(__SSE2__)
-    , "xmm0", "xmm1"
-#endif
-  );
-}
-
-void UYVYToUVRow_SSE2(const uint8* src_uyvy, int stride_uyvy,
-                      uint8* dst_u, uint8* dst_v, int pix) {
-  asm volatile (
-    "pcmpeqb   %%xmm5,%%xmm5                   \n"
-    "psrlw     $0x8,%%xmm5                     \n"
-    "sub       %1,%2                           \n"
-    LABELALIGN
-  "1:                                          \n"
-    "movdqa    " MEMACCESS(0) ",%%xmm0         \n"
-    "movdqa    " MEMACCESS2(0x10,0) ",%%xmm1   \n"
-    BUNDLEALIGN
-    MEMOPREG(movdqa,0x00,0,4,1,xmm2)           //  movdqa  (%0,%4,1),%%xmm2
-    MEMOPREG(movdqa,0x10,0,4,1,xmm3)           //  movdqa  0x10(%0,%4,1),%%xmm3
-    "lea       " MEMLEA(0x20,0) ",%0           \n"
-    "pavgb     %%xmm2,%%xmm0                   \n"
-    "pavgb     %%xmm3,%%xmm1                   \n"
-    "pand      %%xmm5,%%xmm0                   \n"
-    "pand      %%xmm5,%%xmm1                   \n"
-    "packuswb  %%xmm1,%%xmm0                   \n"
-    "movdqa    %%xmm0,%%xmm1                   \n"
-    "pand      %%xmm5,%%xmm0                   \n"
-    "packuswb  %%xmm0,%%xmm0                   \n"
-    "psrlw     $0x8,%%xmm1                     \n"
-    "packuswb  %%xmm1,%%xmm1                   \n"
-    "movq      %%xmm0," MEMACCESS(1) "         \n"
-    BUNDLEALIGN
-    MEMOPMEM(movq,xmm1,0x00,1,2,1)             //  movq    %%xmm1,(%1,%2)
-    "lea       " MEMLEA(0x8,1) ",%1            \n"
-    "sub       $0x10,%3                        \n"
-    "jg        1b                              \n"
-  : "+r"(src_uyvy),    // %0
-    "+r"(dst_u),       // %1
-    "+r"(dst_v),       // %2
-    "+r"(pix)          // %3
-  : "r"((intptr_t)(stride_uyvy))  // %4
-  : "memory", "cc"
-#if defined(__native_client__) && defined(__x86_64__)
-    , "r14"
-#endif
-#if defined(__SSE2__)
-    , "xmm0", "xmm1", "xmm2", "xmm3", "xmm5"
-#endif
-  );
-}
-
-void UYVYToUV422Row_SSE2(const uint8* src_uyvy,
-                         uint8* dst_u, uint8* dst_v, int pix) {
-  asm volatile (
-    "pcmpeqb   %%xmm5,%%xmm5                   \n"
-    "psrlw     $0x8,%%xmm5                     \n"
-    "sub       %1,%2                           \n"
-    LABELALIGN
-  "1:                                          \n"
-    "movdqa    " MEMACCESS(0) ",%%xmm0         \n"
-    "movdqa    " MEMACCESS2(0x10,0) ",%%xmm1   \n"
-    "lea       " MEMLEA(0x20,0) ",%0           \n"
-    "pand      %%xmm5,%%xmm0                   \n"
-    "pand      %%xmm5,%%xmm1                   \n"
-    "packuswb  %%xmm1,%%xmm0                   \n"
-    "movdqa    %%xmm0,%%xmm1                   \n"
-    "pand      %%xmm5,%%xmm0                   \n"
-    "packuswb  %%xmm0,%%xmm0                   \n"
-    "psrlw     $0x8,%%xmm1                     \n"
-    "packuswb  %%xmm1,%%xmm1                   \n"
-    "movq      %%xmm0," MEMACCESS(1) "         \n"
-    BUNDLEALIGN
-    MEMOPMEM(movq,xmm1,0x00,1,2,1)             //  movq    %%xmm1,(%1,%2)
-    "lea       " MEMLEA(0x8,1) ",%1            \n"
-    "sub       $0x10,%3                        \n"
-    "jg        1b                              \n"
-  : "+r"(src_uyvy),    // %0
-    "+r"(dst_u),       // %1
-    "+r"(dst_v),       // %2
-    "+r"(pix)          // %3
-  :
-  : "memory", "cc"
-#if defined(__native_client__) && defined(__x86_64__)
-    , "r14"
-#endif
-#if defined(__SSE2__)
-    , "xmm0", "xmm1", "xmm5"
-#endif
-  );
-}
-
-void UYVYToYRow_Unaligned_SSE2(const uint8* src_uyvy,
-                               uint8* dst_y, int pix) {
-  asm volatile (
-    LABELALIGN
-  "1:                                          \n"
-    "movdqu    " MEMACCESS(0) ",%%xmm0         \n"
-    "movdqu    " MEMACCESS2(0x10,0) ",%%xmm1   \n"
-    "lea       " MEMLEA(0x20,0) ",%0           \n"
-    "psrlw     $0x8,%%xmm0                     \n"
-    "psrlw     $0x8,%%xmm1                     \n"
-    "packuswb  %%xmm1,%%xmm0                   \n"
-    "sub       $0x10,%2                        \n"
-    "movdqu    %%xmm0," MEMACCESS(1) "         \n"
-    "lea       " MEMLEA(0x10,1) ",%1           \n"
-    "jg        1b                              \n"
-  : "+r"(src_uyvy),  // %0
-    "+r"(dst_y),     // %1
-    "+r"(pix)        // %2
-  :
-  : "memory", "cc"
-#if defined(__SSE2__)
-    , "xmm0", "xmm1"
-#endif
-  );
-}
-
-void UYVYToUVRow_Unaligned_SSE2(const uint8* src_uyvy, int stride_uyvy,
-                                uint8* dst_u, uint8* dst_v, int pix) {
-  asm volatile (
-    "pcmpeqb   %%xmm5,%%xmm5                   \n"
-    "psrlw     $0x8,%%xmm5                     \n"
-    "sub       %1,%2                           \n"
-    LABELALIGN
-  "1:                                          \n"
-    "movdqu    " MEMACCESS(0) ",%%xmm0         \n"
-    "movdqu    " MEMACCESS2(0x10,0) ",%%xmm1   \n"
-    BUNDLEALIGN
-    MEMOPREG(movdqu,0x00,0,4,1,xmm2)           //  movdqu  (%0,%4,1),%%xmm2
-    MEMOPREG(movdqu,0x10,0,4,1,xmm3)           //  movdqu  0x10(%0,%4,1),%%xmm3
-    "lea       " MEMLEA(0x20,0) ",%0           \n"
-    "pavgb     %%xmm2,%%xmm0                   \n"
-    "pavgb     %%xmm3,%%xmm1                   \n"
-    "pand      %%xmm5,%%xmm0                   \n"
-    "pand      %%xmm5,%%xmm1                   \n"
-    "packuswb  %%xmm1,%%xmm0                   \n"
-    "movdqa    %%xmm0,%%xmm1                   \n"
-    "pand      %%xmm5,%%xmm0                   \n"
-    "packuswb  %%xmm0,%%xmm0                   \n"
-    "psrlw     $0x8,%%xmm1                     \n"
-    "packuswb  %%xmm1,%%xmm1                   \n"
-    "movq      %%xmm0," MEMACCESS(1) "         \n"
-    BUNDLEALIGN
-    MEMOPMEM(movq,xmm1,0x00,1,2,1)             //  movq    %%xmm1,(%1,%2)
-    "lea       " MEMLEA(0x8,1) ",%1            \n"
-    "sub       $0x10,%3                        \n"
-    "jg        1b                              \n"
-  : "+r"(src_uyvy),    // %0
-    "+r"(dst_u),       // %1
-    "+r"(dst_v),       // %2
-    "+r"(pix)          // %3
-  : "r"((intptr_t)(stride_uyvy))  // %4
-  : "memory", "cc"
-#if defined(__native_client__) && defined(__x86_64__)
-    , "r14"
-#endif
-#if defined(__SSE2__)
-    , "xmm0", "xmm1", "xmm2", "xmm3", "xmm5"
-#endif
-  );
-}
-
-void UYVYToUV422Row_Unaligned_SSE2(const uint8* src_uyvy,
-                                   uint8* dst_u, uint8* dst_v, int pix) {
-  asm volatile (
-    "pcmpeqb   %%xmm5,%%xmm5                   \n"
-    "psrlw     $0x8,%%xmm5                     \n"
-    "sub       %1,%2                           \n"
-    LABELALIGN
-  "1:                                          \n"
-    "movdqu    " MEMACCESS(0) ",%%xmm0         \n"
-    "movdqu    " MEMACCESS2(0x10,0) ",%%xmm1   \n"
-    "lea       " MEMLEA(0x20,0) ",%0           \n"
-    "pand      %%xmm5,%%xmm0                   \n"
-    "pand      %%xmm5,%%xmm1                   \n"
-    "packuswb  %%xmm1,%%xmm0                   \n"
-    "movdqa    %%xmm0,%%xmm1                   \n"
-    "pand      %%xmm5,%%xmm0                   \n"
-    "packuswb  %%xmm0,%%xmm0                   \n"
-    "psrlw     $0x8,%%xmm1                     \n"
-    "packuswb  %%xmm1,%%xmm1                   \n"
-    "movq      %%xmm0," MEMACCESS(1) "         \n"
-    BUNDLEALIGN
-    MEMOPMEM(movq,xmm1,0x00,1,2,1)             //  movq    %%xmm1,(%1,%2)
-    "lea       " MEMLEA(0x8,1) ",%1            \n"
-    "sub       $0x10,%3                        \n"
-    "jg        1b                              \n"
-  : "+r"(src_uyvy),    // %0
-    "+r"(dst_u),       // %1
-    "+r"(dst_v),       // %2
-    "+r"(pix)          // %3
-  :
-  : "memory", "cc"
-#if defined(__native_client__) && defined(__x86_64__)
-    , "r14"
-#endif
-#if defined(__SSE2__)
-    , "xmm0", "xmm1", "xmm5"
-#endif
-  );
-}
-#endif  // HAS_YUY2TOYROW_SSE2
-
-#ifdef HAS_ARGBBLENDROW_SSE2
-// Blend 8 pixels at a time.
-void ARGBBlendRow_SSE2(const uint8* src_argb0, const uint8* src_argb1,
-                       uint8* dst_argb, int width) {
-  asm volatile (
-    "pcmpeqb   %%xmm7,%%xmm7                   \n"
-    "psrlw     $0xf,%%xmm7                     \n"
-    "pcmpeqb   %%xmm6,%%xmm6                   \n"
-    "psrlw     $0x8,%%xmm6                     \n"
-    "pcmpeqb   %%xmm5,%%xmm5                   \n"
-    "psllw     $0x8,%%xmm5                     \n"
-    "pcmpeqb   %%xmm4,%%xmm4                   \n"
-    "pslld     $0x18,%%xmm4                    \n"
-    "sub       $0x1,%3                         \n"
-    "je        91f                             \n"
-    "jl        99f                             \n"
-
-    // 1 pixel loop until destination pointer is aligned.
-  "10:                                         \n"
-    "test      $0xf,%2                         \n"
-    "je        19f                             \n"
-    "movd      " MEMACCESS(0) ",%%xmm3         \n"
-    "lea       " MEMLEA(0x4,0) ",%0            \n"
-    "movdqa    %%xmm3,%%xmm0                   \n"
-    "pxor      %%xmm4,%%xmm3                   \n"
-    "movd      " MEMACCESS(1) ",%%xmm2         \n"
-    "psrlw     $0x8,%%xmm3                     \n"
-    "pshufhw   $0xf5,%%xmm3,%%xmm3             \n"
-    "pshuflw   $0xf5,%%xmm3,%%xmm3             \n"
-    "pand      %%xmm6,%%xmm2                   \n"
-    "paddw     %%xmm7,%%xmm3                   \n"
-    "pmullw    %%xmm3,%%xmm2                   \n"
-    "movd      " MEMACCESS(1) ",%%xmm1         \n"
-    "lea       " MEMLEA(0x4,1) ",%1            \n"
-    "psrlw     $0x8,%%xmm1                     \n"
-    "por       %%xmm4,%%xmm0                   \n"
-    "pmullw    %%xmm3,%%xmm1                   \n"
-    "psrlw     $0x8,%%xmm2                     \n"
-    "paddusb   %%xmm2,%%xmm0                   \n"
-    "pand      %%xmm5,%%xmm1                   \n"
-    "paddusb   %%xmm1,%%xmm0                   \n"
-    "sub       $0x1,%3                         \n"
-    "movd      %%xmm0," MEMACCESS(2) "         \n"
-    "lea       " MEMLEA(0x4,2) ",%2            \n"
-    "jge       10b                             \n"
-
-  "19:                                         \n"
-    "add       $1-4,%3                         \n"
-    "jl        49f                             \n"
-
-    // 4 pixel loop.
-    LABELALIGN
-  "41:                                         \n"
-    "movdqu    " MEMACCESS(0) ",%%xmm3         \n"
-    "lea       " MEMLEA(0x10,0) ",%0           \n"
-    "movdqa    %%xmm3,%%xmm0                   \n"
-    "pxor      %%xmm4,%%xmm3                   \n"
-    "movdqu    " MEMACCESS(1) ",%%xmm2         \n"
-    "psrlw     $0x8,%%xmm3                     \n"
-    "pshufhw   $0xf5,%%xmm3,%%xmm3             \n"
-    "pshuflw   $0xf5,%%xmm3,%%xmm3             \n"
-    "pand      %%xmm6,%%xmm2                   \n"
-    "paddw     %%xmm7,%%xmm3                   \n"
-    "pmullw    %%xmm3,%%xmm2                   \n"
-    "movdqu    " MEMACCESS(1) ",%%xmm1         \n"
-    "lea       " MEMLEA(0x10,1) ",%1           \n"
-    "psrlw     $0x8,%%xmm1                     \n"
-    "por       %%xmm4,%%xmm0                   \n"
-    "pmullw    %%xmm3,%%xmm1                   \n"
-    "psrlw     $0x8,%%xmm2                     \n"
-    "paddusb   %%xmm2,%%xmm0                   \n"
-    "pand      %%xmm5,%%xmm1                   \n"
-    "paddusb   %%xmm1,%%xmm0                   \n"
-    "sub       $0x4,%3                         \n"
-    "movdqa    %%xmm0," MEMACCESS(2) "         \n"
-    "lea       " MEMLEA(0x10,2) ",%2           \n"
-    "jge       41b                             \n"
-
-  "49:                                         \n"
-    "add       $0x3,%3                         \n"
-    "jl        99f                             \n"
-
-    // 1 pixel loop.
-  "91:                                         \n"
-    "movd      " MEMACCESS(0) ",%%xmm3         \n"
-    "lea       " MEMLEA(0x4,0) ",%0            \n"
-    "movdqa    %%xmm3,%%xmm0                   \n"
-    "pxor      %%xmm4,%%xmm3                   \n"
-    "movd      " MEMACCESS(1) ",%%xmm2         \n"
-    "psrlw     $0x8,%%xmm3                     \n"
-    "pshufhw   $0xf5,%%xmm3,%%xmm3             \n"
-    "pshuflw   $0xf5,%%xmm3,%%xmm3             \n"
-    "pand      %%xmm6,%%xmm2                   \n"
-    "paddw     %%xmm7,%%xmm3                   \n"
-    "pmullw    %%xmm3,%%xmm2                   \n"
-    "movd      " MEMACCESS(1) ",%%xmm1         \n"
-    "lea       " MEMLEA(0x4,1) ",%1            \n"
-    "psrlw     $0x8,%%xmm1                     \n"
-    "por       %%xmm4,%%xmm0                   \n"
-    "pmullw    %%xmm3,%%xmm1                   \n"
-    "psrlw     $0x8,%%xmm2                     \n"
-    "paddusb   %%xmm2,%%xmm0                   \n"
-    "pand      %%xmm5,%%xmm1                   \n"
-    "paddusb   %%xmm1,%%xmm0                   \n"
-    "sub       $0x1,%3                         \n"
-    "movd      %%xmm0," MEMACCESS(2) "         \n"
-    "lea       " MEMLEA(0x4,2) ",%2            \n"
-    "jge       91b                             \n"
-  "99:                                         \n"
-  : "+r"(src_argb0),    // %0
-    "+r"(src_argb1),    // %1
-    "+r"(dst_argb),     // %2
-    "+r"(width)         // %3
-  :
-  : "memory", "cc"
-#if defined(__SSE2__)
-    , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7"
-#endif
-  );
-}
-#endif  // HAS_ARGBBLENDROW_SSE2
-
-#ifdef HAS_ARGBBLENDROW_SSSE3
-// Shuffle table for isolating alpha.
-static uvec8 kShuffleAlpha = {
-  3u, 0x80, 3u, 0x80, 7u, 0x80, 7u, 0x80,
-  11u, 0x80, 11u, 0x80, 15u, 0x80, 15u, 0x80
-};
-
-// Blend 8 pixels at a time
-// Shuffle table for reversing the bytes.
-
-// Same as SSE2, but replaces
-//    psrlw      xmm3, 8          // alpha
-//    pshufhw    xmm3, xmm3,0F5h  // 8 alpha words
-//    pshuflw    xmm3, xmm3,0F5h
-// with..
-//    pshufb     xmm3, kShuffleAlpha // alpha
-
-void ARGBBlendRow_SSSE3(const uint8* src_argb0, const uint8* src_argb1,
-                        uint8* dst_argb, int width) {
-  asm volatile (
-    "pcmpeqb   %%xmm7,%%xmm7                   \n"
-    "psrlw     $0xf,%%xmm7                     \n"
-    "pcmpeqb   %%xmm6,%%xmm6                   \n"
-    "psrlw     $0x8,%%xmm6                     \n"
-    "pcmpeqb   %%xmm5,%%xmm5                   \n"
-    "psllw     $0x8,%%xmm5                     \n"
-    "pcmpeqb   %%xmm4,%%xmm4                   \n"
-    "pslld     $0x18,%%xmm4                    \n"
-    "sub       $0x1,%3                         \n"
-    "je        91f                             \n"
-    "jl        99f                             \n"
-
-    // 1 pixel loop until destination pointer is aligned.
-  "10:                                         \n"
-    "test      $0xf,%2                         \n"
-    "je        19f                             \n"
-    "movd      " MEMACCESS(0) ",%%xmm3         \n"
-    "lea       " MEMLEA(0x4,0) ",%0            \n"
-    "movdqa    %%xmm3,%%xmm0                   \n"
-    "pxor      %%xmm4,%%xmm3                   \n"
-    "movd      " MEMACCESS(1) ",%%xmm2         \n"
-    "pshufb    %4,%%xmm3                       \n"
-    "pand      %%xmm6,%%xmm2                   \n"
-    "paddw     %%xmm7,%%xmm3                   \n"
-    "pmullw    %%xmm3,%%xmm2                   \n"
-    "movd      " MEMACCESS(1) ",%%xmm1         \n"
-    "lea       " MEMLEA(0x4,1) ",%1            \n"
-    "psrlw     $0x8,%%xmm1                     \n"
-    "por       %%xmm4,%%xmm0                   \n"
-    "pmullw    %%xmm3,%%xmm1                   \n"
-    "psrlw     $0x8,%%xmm2                     \n"
-    "paddusb   %%xmm2,%%xmm0                   \n"
-    "pand      %%xmm5,%%xmm1                   \n"
-    "paddusb   %%xmm1,%%xmm0                   \n"
-    "sub       $0x1,%3                         \n"
-    "movd      %%xmm0," MEMACCESS(2) "         \n"
-    "lea       " MEMLEA(0x4,2) ",%2            \n"
-    "jge       10b                             \n"
-
-  "19:                                         \n"
-    "add       $1-4,%3                         \n"
-    "jl        49f                             \n"
-    "test      $0xf,%0                         \n"
-    "jne       41f                             \n"
-    "test      $0xf,%1                         \n"
-    "jne       41f                             \n"
-
-    // 4 pixel loop.
-    LABELALIGN
-  "40:                                         \n"
-    "movdqa    " MEMACCESS(0) ",%%xmm3         \n"
-    "lea       " MEMLEA(0x10,0) ",%0           \n"
-    "movdqa    %%xmm3,%%xmm0                   \n"
-    "pxor      %%xmm4,%%xmm3                   \n"
-    "movdqa    " MEMACCESS(1) ",%%xmm2         \n"
-    "pshufb    %4,%%xmm3                       \n"
-    "pand      %%xmm6,%%xmm2                   \n"
-    "paddw     %%xmm7,%%xmm3                   \n"
-    "pmullw    %%xmm3,%%xmm2                   \n"
-    "movdqa    " MEMACCESS(1) ",%%xmm1         \n"
-    "lea       " MEMLEA(0x10,1) ",%1           \n"
-    "psrlw     $0x8,%%xmm1                     \n"
-    "por       %%xmm4,%%xmm0                   \n"
-    "pmullw    %%xmm3,%%xmm1                   \n"
-    "psrlw     $0x8,%%xmm2                     \n"
-    "paddusb   %%xmm2,%%xmm0                   \n"
-    "pand      %%xmm5,%%xmm1                   \n"
-    "paddusb   %%xmm1,%%xmm0                   \n"
-    "sub       $0x4,%3                         \n"
-    "movdqa    %%xmm0," MEMACCESS(2) "         \n"
-    "lea       " MEMLEA(0x10,2) ",%2           \n"
-    "jge       40b                             \n"
-    "jmp       49f                             \n"
-
-    // 4 pixel unaligned loop.
-    LABELALIGN
-  "41:                                         \n"
-    "movdqu    " MEMACCESS(0) ",%%xmm3         \n"
-    "lea       " MEMLEA(0x10,0) ",%0           \n"
-    "movdqa    %%xmm3,%%xmm0                   \n"
-    "pxor      %%xmm4,%%xmm3                   \n"
-    "movdqu    " MEMACCESS(1) ",%%xmm2         \n"
-    "pshufb    %4,%%xmm3                       \n"
-    "pand      %%xmm6,%%xmm2                   \n"
-    "paddw     %%xmm7,%%xmm3                   \n"
-    "pmullw    %%xmm3,%%xmm2                   \n"
-    "movdqu    " MEMACCESS(1) ",%%xmm1         \n"
-    "lea       " MEMLEA(0x10,1) ",%1           \n"
-    "psrlw     $0x8,%%xmm1                     \n"
-    "por       %%xmm4,%%xmm0                   \n"
-    "pmullw    %%xmm3,%%xmm1                   \n"
-    "psrlw     $0x8,%%xmm2                     \n"
-    "paddusb   %%xmm2,%%xmm0                   \n"
-    "pand      %%xmm5,%%xmm1                   \n"
-    "paddusb   %%xmm1,%%xmm0                   \n"
-    "sub       $0x4,%3                         \n"
-    "movdqa    %%xmm0," MEMACCESS(2) "         \n"
-    "lea       " MEMLEA(0x10,2) ",%2           \n"
-    "jge       41b                             \n"
-
-  "49:                                         \n"
-    "add       $0x3,%3                         \n"
-    "jl        99f                             \n"
-
-    // 1 pixel loop.
-  "91:                                         \n"
-    "movd      " MEMACCESS(0) ",%%xmm3         \n"
-    "lea       " MEMLEA(0x4,0) ",%0            \n"
-    "movdqa    %%xmm3,%%xmm0                   \n"
-    "pxor      %%xmm4,%%xmm3                   \n"
-    "movd      " MEMACCESS(1) ",%%xmm2         \n"
-    "pshufb    %4,%%xmm3                       \n"
-    "pand      %%xmm6,%%xmm2                   \n"
-    "paddw     %%xmm7,%%xmm3                   \n"
-    "pmullw    %%xmm3,%%xmm2                   \n"
-    "movd      " MEMACCESS(1) ",%%xmm1         \n"
-    "lea       " MEMLEA(0x4,1) ",%1            \n"
-    "psrlw     $0x8,%%xmm1                     \n"
-    "por       %%xmm4,%%xmm0                   \n"
-    "pmullw    %%xmm3,%%xmm1                   \n"
-    "psrlw     $0x8,%%xmm2                     \n"
-    "paddusb   %%xmm2,%%xmm0                   \n"
-    "pand      %%xmm5,%%xmm1                   \n"
-    "paddusb   %%xmm1,%%xmm0                   \n"
-    "sub       $0x1,%3                         \n"
-    "movd      %%xmm0," MEMACCESS(2) "         \n"
-    "lea       " MEMLEA(0x4,2) ",%2            \n"
-    "jge       91b                             \n"
-  "99:                                         \n"
-  : "+r"(src_argb0),    // %0
-    "+r"(src_argb1),    // %1
-    "+r"(dst_argb),     // %2
-    "+r"(width)         // %3
-  : "m"(kShuffleAlpha)  // %4
-  : "memory", "cc"
-#if defined(__SSE2__)
-    , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7"
-#endif
-  );
-}
-#endif  // HAS_ARGBBLENDROW_SSSE3
-
-#ifdef HAS_ARGBATTENUATEROW_SSE2
-// Attenuate 4 pixels at a time.
-// aligned to 16 bytes
-void ARGBAttenuateRow_SSE2(const uint8* src_argb, uint8* dst_argb, int width) {
-  asm volatile (
-    "pcmpeqb   %%xmm4,%%xmm4                   \n"
-    "pslld     $0x18,%%xmm4                    \n"
-    "pcmpeqb   %%xmm5,%%xmm5                   \n"
-    "psrld     $0x8,%%xmm5                     \n"
-
-    // 4 pixel loop.
-    LABELALIGN
-  "1:                                          \n"
-    "movdqa    " MEMACCESS(0) ",%%xmm0         \n"
-    "punpcklbw %%xmm0,%%xmm0                   \n"
-    "pshufhw   $0xff,%%xmm0,%%xmm2             \n"
-    "pshuflw   $0xff,%%xmm2,%%xmm2             \n"
-    "pmulhuw   %%xmm2,%%xmm0                   \n"
-    "movdqa    " MEMACCESS(0) ",%%xmm1         \n"
-    "punpckhbw %%xmm1,%%xmm1                   \n"
-    "pshufhw   $0xff,%%xmm1,%%xmm2             \n"
-    "pshuflw   $0xff,%%xmm2,%%xmm2             \n"
-    "pmulhuw   %%xmm2,%%xmm1                   \n"
-    "movdqa    " MEMACCESS(0) ",%%xmm2         \n"
-    "lea       " MEMLEA(0x10,0) ",%0           \n"
-    "psrlw     $0x8,%%xmm0                     \n"
-    "pand      %%xmm4,%%xmm2                   \n"
-    "psrlw     $0x8,%%xmm1                     \n"
-    "packuswb  %%xmm1,%%xmm0                   \n"
-    "pand      %%xmm5,%%xmm0                   \n"
-    "por       %%xmm2,%%xmm0                   \n"
-    "sub       $0x4,%2                         \n"
-    "movdqa    %%xmm0," MEMACCESS(1) "         \n"
-    "lea       " MEMLEA(0x10,1) ",%1           \n"
-    "jg        1b                              \n"
-  : "+r"(src_argb),    // %0
-    "+r"(dst_argb),    // %1
-    "+r"(width)        // %2
-  :
-  : "memory", "cc"
-#if defined(__SSE2__)
-    , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
-#endif
-  );
-}
-#endif  // HAS_ARGBATTENUATEROW_SSE2
-
-#ifdef HAS_ARGBATTENUATEROW_SSSE3
-// Shuffle table duplicating alpha
-static uvec8 kShuffleAlpha0 = {
-  3u, 3u, 3u, 3u, 3u, 3u, 128u, 128u, 7u, 7u, 7u, 7u, 7u, 7u, 128u, 128u,
-};
-static uvec8 kShuffleAlpha1 = {
-  11u, 11u, 11u, 11u, 11u, 11u, 128u, 128u,
-  15u, 15u, 15u, 15u, 15u, 15u, 128u, 128u,
-};
-// Attenuate 4 pixels at a time.
-// aligned to 16 bytes
-void ARGBAttenuateRow_SSSE3(const uint8* src_argb, uint8* dst_argb, int width) {
-  asm volatile (
-    "pcmpeqb   %%xmm3,%%xmm3                   \n"
-    "pslld     $0x18,%%xmm3                    \n"
-    "movdqa    %3,%%xmm4                       \n"
-    "movdqa    %4,%%xmm5                       \n"
-
-    // 4 pixel loop.
-    LABELALIGN
-  "1:                                          \n"
-    "movdqu    " MEMACCESS(0) ",%%xmm0         \n"
-    "pshufb    %%xmm4,%%xmm0                   \n"
-    "movdqu    " MEMACCESS(0) ",%%xmm1         \n"
-    "punpcklbw %%xmm1,%%xmm1                   \n"
-    "pmulhuw   %%xmm1,%%xmm0                   \n"
-    "movdqu    " MEMACCESS(0) ",%%xmm1         \n"
-    "pshufb    %%xmm5,%%xmm1                   \n"
-    "movdqu    " MEMACCESS(0) ",%%xmm2         \n"
-    "punpckhbw %%xmm2,%%xmm2                   \n"
-    "pmulhuw   %%xmm2,%%xmm1                   \n"
-    "movdqu    " MEMACCESS(0) ",%%xmm2         \n"
-    "lea       " MEMLEA(0x10,0) ",%0           \n"
-    "pand      %%xmm3,%%xmm2                   \n"
-    "psrlw     $0x8,%%xmm0                     \n"
-    "psrlw     $0x8,%%xmm1                     \n"
-    "packuswb  %%xmm1,%%xmm0                   \n"
-    "por       %%xmm2,%%xmm0                   \n"
-    "sub       $0x4,%2                         \n"
-    "movdqu    %%xmm0," MEMACCESS(1) "         \n"
-    "lea       " MEMLEA(0x10,1) ",%1           \n"
-    "jg        1b                              \n"
-  : "+r"(src_argb),    // %0
-    "+r"(dst_argb),    // %1
-    "+r"(width)        // %2
-  : "m"(kShuffleAlpha0),  // %3
-    "m"(kShuffleAlpha1)  // %4
-  : "memory", "cc"
-#if defined(__SSE2__)
-    , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
-#endif
-  );
-}
-#endif  // HAS_ARGBATTENUATEROW_SSSE3
-
-#ifdef HAS_ARGBUNATTENUATEROW_SSE2
-// Unattenuate 4 pixels at a time.
-// aligned to 16 bytes
-void ARGBUnattenuateRow_SSE2(const uint8* src_argb, uint8* dst_argb,
-                             int width) {
-  uintptr_t alpha = 0;
-  asm volatile (
-    // 4 pixel loop.
-    LABELALIGN
-  "1:                                          \n"
-    "movdqu    " MEMACCESS(0) ",%%xmm0         \n"
-    "movzb     " MEMACCESS2(0x03,0) ",%3       \n"
-    "punpcklbw %%xmm0,%%xmm0                   \n"
-    MEMOPREG(movd,0x00,4,3,4,xmm2)             //  movd      0x0(%4,%3,4),%%xmm2
-    "movzb     " MEMACCESS2(0x07,0) ",%3       \n"
-    MEMOPREG(movd,0x00,4,3,4,xmm3)             //  movd      0x0(%4,%3,4),%%xmm3
-    "pshuflw   $0x40,%%xmm2,%%xmm2             \n"
-    "pshuflw   $0x40,%%xmm3,%%xmm3             \n"
-    "movlhps   %%xmm3,%%xmm2                   \n"
-    "pmulhuw   %%xmm2,%%xmm0                   \n"
-    "movdqu    " MEMACCESS(0) ",%%xmm1         \n"
-    "movzb     " MEMACCESS2(0x0b,0) ",%3       \n"
-    "punpckhbw %%xmm1,%%xmm1                   \n"
-    BUNDLEALIGN
-    MEMOPREG(movd,0x00,4,3,4,xmm2)             //  movd      0x0(%4,%3,4),%%xmm2
-    "movzb     " MEMACCESS2(0x0f,0) ",%3       \n"
-    MEMOPREG(movd,0x00,4,3,4,xmm3)             //  movd      0x0(%4,%3,4),%%xmm3
-    "pshuflw   $0x40,%%xmm2,%%xmm2             \n"
-    "pshuflw   $0x40,%%xmm3,%%xmm3             \n"
-    "movlhps   %%xmm3,%%xmm2                   \n"
-    "pmulhuw   %%xmm2,%%xmm1                   \n"
-    "lea       " MEMLEA(0x10,0) ",%0           \n"
-    "packuswb  %%xmm1,%%xmm0                   \n"
-    "sub       $0x4,%2                         \n"
-    "movdqu    %%xmm0," MEMACCESS(1) "         \n"
-    "lea       " MEMLEA(0x10,1) ",%1           \n"
-    "jg        1b                              \n"
-  : "+r"(src_argb),    // %0
-    "+r"(dst_argb),    // %1
-    "+r"(width),       // %2
-    "+r"(alpha)        // %3
-  : "r"(fixed_invtbl8)  // %4
-  : "memory", "cc"
-#if defined(__native_client__) && defined(__x86_64__)
-    , "r14"
-#endif
-#if defined(__SSE2__)
-    , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
-#endif
-  );
-}
-#endif  // HAS_ARGBUNATTENUATEROW_SSE2
-
-#ifdef HAS_ARGBGRAYROW_SSSE3
-// Convert 8 ARGB pixels (64 bytes) to 8 Gray ARGB pixels
-void ARGBGrayRow_SSSE3(const uint8* src_argb, uint8* dst_argb, int width) {
-  asm volatile (
-    "movdqa    %3,%%xmm4                       \n"
-    "movdqa    %4,%%xmm5                       \n"
-
-    // 8 pixel loop.
-    LABELALIGN
-  "1:                                          \n"
-    "movdqa    " MEMACCESS(0) ",%%xmm0         \n"
-    "movdqa    " MEMACCESS2(0x10,0) ",%%xmm1   \n"
-    "pmaddubsw %%xmm4,%%xmm0                   \n"
-    "pmaddubsw %%xmm4,%%xmm1                   \n"
-    "phaddw    %%xmm1,%%xmm0                   \n"
-    "paddw     %%xmm5,%%xmm0                   \n"
-    "psrlw     $0x7,%%xmm0                     \n"
-    "packuswb  %%xmm0,%%xmm0                   \n"
-    "movdqa    " MEMACCESS(0) ",%%xmm2         \n"
-    "movdqa    " MEMACCESS2(0x10,0) ",%%xmm3   \n"
-    "lea       " MEMLEA(0x20,0) ",%0           \n"
-    "psrld     $0x18,%%xmm2                    \n"
-    "psrld     $0x18,%%xmm3                    \n"
-    "packuswb  %%xmm3,%%xmm2                   \n"
-    "packuswb  %%xmm2,%%xmm2                   \n"
-    "movdqa    %%xmm0,%%xmm3                   \n"
-    "punpcklbw %%xmm0,%%xmm0                   \n"
-    "punpcklbw %%xmm2,%%xmm3                   \n"
-    "movdqa    %%xmm0,%%xmm1                   \n"
-    "punpcklwd %%xmm3,%%xmm0                   \n"
-    "punpckhwd %%xmm3,%%xmm1                   \n"
-    "sub       $0x8,%2                         \n"
-    "movdqa    %%xmm0," MEMACCESS(1) "         \n"
-    "movdqa    %%xmm1," MEMACCESS2(0x10,1) "   \n"
-    "lea       " MEMLEA(0x20,1) ",%1           \n"
-    "jg        1b                              \n"
-  : "+r"(src_argb),   // %0
-    "+r"(dst_argb),   // %1
-    "+r"(width)       // %2
-  : "m"(kARGBToYJ),   // %3
-    "m"(kAddYJ64)     // %4
-  : "memory", "cc"
-#if defined(__SSE2__)
-    , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
-#endif
-  );
-}
-#endif  // HAS_ARGBGRAYROW_SSSE3
-
-#ifdef HAS_ARGBSEPIAROW_SSSE3
-//    b = (r * 35 + g * 68 + b * 17) >> 7
-//    g = (r * 45 + g * 88 + b * 22) >> 7
-//    r = (r * 50 + g * 98 + b * 24) >> 7
-// Constant for ARGB color to sepia tone
-static vec8 kARGBToSepiaB = {
-  17, 68, 35, 0, 17, 68, 35, 0, 17, 68, 35, 0, 17, 68, 35, 0
-};
-
-static vec8 kARGBToSepiaG = {
-  22, 88, 45, 0, 22, 88, 45, 0, 22, 88, 45, 0, 22, 88, 45, 0
-};
-
-static vec8 kARGBToSepiaR = {
-  24, 98, 50, 0, 24, 98, 50, 0, 24, 98, 50, 0, 24, 98, 50, 0
-};
-
-// Convert 8 ARGB pixels (32 bytes) to 8 Sepia ARGB pixels.
-void ARGBSepiaRow_SSSE3(uint8* dst_argb, int width) {
-  asm volatile (
-    "movdqa    %2,%%xmm2                       \n"
-    "movdqa    %3,%%xmm3                       \n"
-    "movdqa    %4,%%xmm4                       \n"
-
-    // 8 pixel loop.
-    LABELALIGN
-  "1:                                          \n"
-    "movdqa    " MEMACCESS(0) ",%%xmm0         \n"
-    "movdqa    " MEMACCESS2(0x10,0) ",%%xmm6   \n"
-    "pmaddubsw %%xmm2,%%xmm0                   \n"
-    "pmaddubsw %%xmm2,%%xmm6                   \n"
-    "phaddw    %%xmm6,%%xmm0                   \n"
-    "psrlw     $0x7,%%xmm0                     \n"
-    "packuswb  %%xmm0,%%xmm0                   \n"
-    "movdqa    " MEMACCESS(0) ",%%xmm5         \n"
-    "movdqa    " MEMACCESS2(0x10,0) ",%%xmm1   \n"
-    "pmaddubsw %%xmm3,%%xmm5                   \n"
-    "pmaddubsw %%xmm3,%%xmm1                   \n"
-    "phaddw    %%xmm1,%%xmm5                   \n"
-    "psrlw     $0x7,%%xmm5                     \n"
-    "packuswb  %%xmm5,%%xmm5                   \n"
-    "punpcklbw %%xmm5,%%xmm0                   \n"
-    "movdqa    " MEMACCESS(0) ",%%xmm5         \n"
-    "movdqa    " MEMACCESS2(0x10,0) ",%%xmm1   \n"
-    "pmaddubsw %%xmm4,%%xmm5                   \n"
-    "pmaddubsw %%xmm4,%%xmm1                   \n"
-    "phaddw    %%xmm1,%%xmm5                   \n"
-    "psrlw     $0x7,%%xmm5                     \n"
-    "packuswb  %%xmm5,%%xmm5                   \n"
-    "movdqa    " MEMACCESS(0) ",%%xmm6         \n"
-    "movdqa    " MEMACCESS2(0x10,0) ",%%xmm1   \n"
-    "psrld     $0x18,%%xmm6                    \n"
-    "psrld     $0x18,%%xmm1                    \n"
-    "packuswb  %%xmm1,%%xmm6                   \n"
-    "packuswb  %%xmm6,%%xmm6                   \n"
-    "punpcklbw %%xmm6,%%xmm5                   \n"
-    "movdqa    %%xmm0,%%xmm1                   \n"
-    "punpcklwd %%xmm5,%%xmm0                   \n"
-    "punpckhwd %%xmm5,%%xmm1                   \n"
-    "sub       $0x8,%1                         \n"
-    "movdqa    %%xmm0," MEMACCESS(0) "         \n"
-    "movdqa    %%xmm1," MEMACCESS2(0x10,0) "   \n"
-    "lea       " MEMLEA(0x20,0) ",%0           \n"
-    "jg        1b                              \n"
-  : "+r"(dst_argb),      // %0
-    "+r"(width)          // %1
-  : "m"(kARGBToSepiaB),  // %2
-    "m"(kARGBToSepiaG),  // %3
-    "m"(kARGBToSepiaR)   // %4
-  : "memory", "cc"
-#if defined(__SSE2__)
-    , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6"
-#endif
-  );
-}
-#endif  // HAS_ARGBSEPIAROW_SSSE3
-
-#ifdef HAS_ARGBCOLORMATRIXROW_SSSE3
-// Tranform 8 ARGB pixels (32 bytes) with color matrix.
-// Same as Sepia except matrix is provided.
-void ARGBColorMatrixRow_SSSE3(const uint8* src_argb, uint8* dst_argb,
-                              const int8* matrix_argb, int width) {
-  asm volatile (
-    "movdqu    " MEMACCESS(3) ",%%xmm5         \n"
-    "pshufd    $0x00,%%xmm5,%%xmm2             \n"
-    "pshufd    $0x55,%%xmm5,%%xmm3             \n"
-    "pshufd    $0xaa,%%xmm5,%%xmm4             \n"
-    "pshufd    $0xff,%%xmm5,%%xmm5             \n"
-
-    // 8 pixel loop.
-    LABELALIGN
-  "1:                                          \n"
-    "movdqa    " MEMACCESS(0) ",%%xmm0         \n"
-    "movdqa    " MEMACCESS2(0x10,0) ",%%xmm7   \n"
-    "pmaddubsw %%xmm2,%%xmm0                   \n"
-    "pmaddubsw %%xmm2,%%xmm7                   \n"
-    "movdqa    " MEMACCESS(0) ",%%xmm6         \n"
-    "movdqa    " MEMACCESS2(0x10,0) ",%%xmm1   \n"
-    "pmaddubsw %%xmm3,%%xmm6                   \n"
-    "pmaddubsw %%xmm3,%%xmm1                   \n"
-    "phaddsw   %%xmm7,%%xmm0                   \n"
-    "phaddsw   %%xmm1,%%xmm6                   \n"
-    "psraw     $0x6,%%xmm0                     \n"
-    "psraw     $0x6,%%xmm6                     \n"
-    "packuswb  %%xmm0,%%xmm0                   \n"
-    "packuswb  %%xmm6,%%xmm6                   \n"
-    "punpcklbw %%xmm6,%%xmm0                   \n"
-    "movdqa    " MEMACCESS(0) ",%%xmm1         \n"
-    "movdqa    " MEMACCESS2(0x10,0) ",%%xmm7   \n"
-    "pmaddubsw %%xmm4,%%xmm1                   \n"
-    "pmaddubsw %%xmm4,%%xmm7                   \n"
-    "phaddsw   %%xmm7,%%xmm1                   \n"
-    "movdqa    " MEMACCESS(0) ",%%xmm6         \n"
-    "movdqa    " MEMACCESS2(0x10,0) ",%%xmm7   \n"
-    "pmaddubsw %%xmm5,%%xmm6                   \n"
-    "pmaddubsw %%xmm5,%%xmm7                   \n"
-    "phaddsw   %%xmm7,%%xmm6                   \n"
-    "psraw     $0x6,%%xmm1                     \n"
-    "psraw     $0x6,%%xmm6                     \n"
-    "packuswb  %%xmm1,%%xmm1                   \n"
-    "packuswb  %%xmm6,%%xmm6                   \n"
-    "punpcklbw %%xmm6,%%xmm1                   \n"
-    "movdqa    %%xmm0,%%xmm6                   \n"
-    "punpcklwd %%xmm1,%%xmm0                   \n"
-    "punpckhwd %%xmm1,%%xmm6                   \n"
-    "sub       $0x8,%2                         \n"
-    "movdqa    %%xmm0," MEMACCESS(1) "         \n"
-    "movdqa    %%xmm6," MEMACCESS2(0x10,1) "   \n"
-    "lea       " MEMLEA(0x20,0) ",%0           \n"
-    "lea       " MEMLEA(0x20,1) ",%1           \n"
-    "jg        1b                              \n"
-  : "+r"(src_argb),      // %0
-    "+r"(dst_argb),      // %1
-    "+r"(width)          // %2
-  : "r"(matrix_argb)     // %3
-  : "memory", "cc"
-#if defined(__SSE2__)
-    , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7"
-#endif
-  );
-}
-#endif  // HAS_ARGBCOLORMATRIXROW_SSSE3
-
-#ifdef HAS_ARGBQUANTIZEROW_SSE2
-// Quantize 4 ARGB pixels (16 bytes).
-// aligned to 16 bytes
-void ARGBQuantizeRow_SSE2(uint8* dst_argb, int scale, int interval_size,
-                          int interval_offset, int width) {
-  asm volatile (
-    "movd      %2,%%xmm2                       \n"
-    "movd      %3,%%xmm3                       \n"
-    "movd      %4,%%xmm4                       \n"
-    "pshuflw   $0x40,%%xmm2,%%xmm2             \n"
-    "pshufd    $0x44,%%xmm2,%%xmm2             \n"
-    "pshuflw   $0x40,%%xmm3,%%xmm3             \n"
-    "pshufd    $0x44,%%xmm3,%%xmm3             \n"
-    "pshuflw   $0x40,%%xmm4,%%xmm4             \n"
-    "pshufd    $0x44,%%xmm4,%%xmm4             \n"
-    "pxor      %%xmm5,%%xmm5                   \n"
-    "pcmpeqb   %%xmm6,%%xmm6                   \n"
-    "pslld     $0x18,%%xmm6                    \n"
-
-    // 4 pixel loop.
-    LABELALIGN
-  "1:                                          \n"
-    "movdqa    " MEMACCESS(0) ",%%xmm0         \n"
-    "punpcklbw %%xmm5,%%xmm0                   \n"
-    "pmulhuw   %%xmm2,%%xmm0                   \n"
-    "movdqa    " MEMACCESS(0) ",%%xmm1         \n"
-    "punpckhbw %%xmm5,%%xmm1                   \n"
-    "pmulhuw   %%xmm2,%%xmm1                   \n"
-    "pmullw    %%xmm3,%%xmm0                   \n"
-    "movdqa    " MEMACCESS(0) ",%%xmm7         \n"
-    "pmullw    %%xmm3,%%xmm1                   \n"
-    "pand      %%xmm6,%%xmm7                   \n"
-    "paddw     %%xmm4,%%xmm0                   \n"
-    "paddw     %%xmm4,%%xmm1                   \n"
-    "packuswb  %%xmm1,%%xmm0                   \n"
-    "por       %%xmm7,%%xmm0                   \n"
-    "sub       $0x4,%1                         \n"
-    "movdqa    %%xmm0," MEMACCESS(0) "         \n"
-    "lea       " MEMLEA(0x10,0) ",%0           \n"
-    "jg        1b                              \n"
-  : "+r"(dst_argb),       // %0
-    "+r"(width)           // %1
-  : "r"(scale),           // %2
-    "r"(interval_size),   // %3
-    "r"(interval_offset)  // %4
-  : "memory", "cc"
-#if defined(__SSE2__)
-    , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7"
-#endif
-  );
-}
-#endif  // HAS_ARGBQUANTIZEROW_SSE2
-
-#ifdef HAS_ARGBSHADEROW_SSE2
-// Shade 4 pixels at a time by specified value.
-// Aligned to 16 bytes.
-void ARGBShadeRow_SSE2(const uint8* src_argb, uint8* dst_argb, int width,
-                       uint32 value) {
-  asm volatile (
-    "movd      %3,%%xmm2                       \n"
-    "punpcklbw %%xmm2,%%xmm2                   \n"
-    "punpcklqdq %%xmm2,%%xmm2                  \n"
-
-    // 4 pixel loop.
-    LABELALIGN
-  "1:                                          \n"
-    "movdqa    " MEMACCESS(0) ",%%xmm0         \n"
-    "lea       " MEMLEA(0x10,0) ",%0           \n"
-    "movdqa    %%xmm0,%%xmm1                   \n"
-    "punpcklbw %%xmm0,%%xmm0                   \n"
-    "punpckhbw %%xmm1,%%xmm1                   \n"
-    "pmulhuw   %%xmm2,%%xmm0                   \n"
-    "pmulhuw   %%xmm2,%%xmm1                   \n"
-    "psrlw     $0x8,%%xmm0                     \n"
-    "psrlw     $0x8,%%xmm1                     \n"
-    "packuswb  %%xmm1,%%xmm0                   \n"
-    "sub       $0x4,%2                         \n"
-    "movdqa    %%xmm0," MEMACCESS(1) "         \n"
-    "lea       " MEMLEA(0x10,1) ",%1           \n"
-    "jg        1b                              \n"
-  : "+r"(src_argb),  // %0
-    "+r"(dst_argb),  // %1
-    "+r"(width)      // %2
-  : "r"(value)       // %3
-  : "memory", "cc"
-#if defined(__SSE2__)
-    , "xmm0", "xmm1", "xmm2"
-#endif
-  );
-}
-#endif  // HAS_ARGBSHADEROW_SSE2
-
-#ifdef HAS_ARGBMULTIPLYROW_SSE2
-// Multiply 2 rows of ARGB pixels together, 4 pixels at a time.
-void ARGBMultiplyRow_SSE2(const uint8* src_argb0, const uint8* src_argb1,
-                          uint8* dst_argb, int width) {
-  asm volatile (
-    "pxor      %%xmm5,%%xmm5                   \n"
-
-    // 4 pixel loop.
-    LABELALIGN
-  "1:                                          \n"
-    "movdqu    " MEMACCESS(0) ",%%xmm0         \n"
-    "lea       " MEMLEA(0x10,0) ",%0           \n"
-    "movdqu    " MEMACCESS(1) ",%%xmm2         \n"
-    "lea       " MEMLEA(0x10,1) ",%1           \n"
-    "movdqu    %%xmm0,%%xmm1                   \n"
-    "movdqu    %%xmm2,%%xmm3                   \n"
-    "punpcklbw %%xmm0,%%xmm0                   \n"
-    "punpckhbw %%xmm1,%%xmm1                   \n"
-    "punpcklbw %%xmm5,%%xmm2                   \n"
-    "punpckhbw %%xmm5,%%xmm3                   \n"
-    "pmulhuw   %%xmm2,%%xmm0                   \n"
-    "pmulhuw   %%xmm3,%%xmm1                   \n"
-    "packuswb  %%xmm1,%%xmm0                   \n"
-    "sub       $0x4,%3                         \n"
-    "movdqu    %%xmm0," MEMACCESS(2) "         \n"
-    "lea       " MEMLEA(0x10,2) ",%2           \n"
-    "jg        1b                              \n"
-  : "+r"(src_argb0),  // %0
-    "+r"(src_argb1),  // %1
-    "+r"(dst_argb),   // %2
-    "+r"(width)       // %3
-  :
-  : "memory", "cc"
-#if defined(__SSE2__)
-    , "xmm0", "xmm1", "xmm2", "xmm3", "xmm5"
-#endif
-  );
-}
-#endif  // HAS_ARGBMULTIPLYROW_SSE2
-
-#ifdef HAS_ARGBADDROW_SSE2
-// Add 2 rows of ARGB pixels together, 4 pixels at a time.
-void ARGBAddRow_SSE2(const uint8* src_argb0, const uint8* src_argb1,
-                     uint8* dst_argb, int width) {
-  asm volatile (
-    // 4 pixel loop.
-    LABELALIGN
-  "1:                                          \n"
-    "movdqu    " MEMACCESS(0) ",%%xmm0         \n"
-    "lea       " MEMLEA(0x10,0) ",%0           \n"
-    "movdqu    " MEMACCESS(1) ",%%xmm1         \n"
-    "lea       " MEMLEA(0x10,1) ",%1           \n"
-    "paddusb   %%xmm1,%%xmm0                   \n"
-    "sub       $0x4,%3                         \n"
-    "movdqu    %%xmm0," MEMACCESS(2) "         \n"
-    "lea       " MEMLEA(0x10,2) ",%2           \n"
-    "jg        1b                              \n"
-  : "+r"(src_argb0),  // %0
-    "+r"(src_argb1),  // %1
-    "+r"(dst_argb),   // %2
-    "+r"(width)       // %3
-  :
-  : "memory", "cc"
-#if defined(__SSE2__)
-    , "xmm0", "xmm1"
-#endif
-  );
-}
-#endif  // HAS_ARGBADDROW_SSE2
-
-#ifdef HAS_ARGBSUBTRACTROW_SSE2
-// Subtract 2 rows of ARGB pixels, 4 pixels at a time.
-void ARGBSubtractRow_SSE2(const uint8* src_argb0, const uint8* src_argb1,
-                          uint8* dst_argb, int width) {
-  asm volatile (
-    // 4 pixel loop.
-    LABELALIGN
-  "1:                                          \n"
-    "movdqu    " MEMACCESS(0) ",%%xmm0         \n"
-    "lea       " MEMLEA(0x10,0) ",%0           \n"
-    "movdqu    " MEMACCESS(1) ",%%xmm1         \n"
-    "lea       " MEMLEA(0x10,1) ",%1           \n"
-    "psubusb   %%xmm1,%%xmm0                   \n"
-    "sub       $0x4,%3                         \n"
-    "movdqu    %%xmm0," MEMACCESS(2) "         \n"
-    "lea       " MEMLEA(0x10,2) ",%2           \n"
-    "jg        1b                              \n"
-  : "+r"(src_argb0),  // %0
-    "+r"(src_argb1),  // %1
-    "+r"(dst_argb),   // %2
-    "+r"(width)       // %3
-  :
-  : "memory", "cc"
-#if defined(__SSE2__)
-    , "xmm0", "xmm1"
-#endif
-  );
-}
-#endif  // HAS_ARGBSUBTRACTROW_SSE2
-
-#ifdef HAS_SOBELXROW_SSE2
-// SobelX as a matrix is
-// -1  0  1
-// -2  0  2
-// -1  0  1
-void SobelXRow_SSE2(const uint8* src_y0, const uint8* src_y1,
-                    const uint8* src_y2, uint8* dst_sobelx, int width) {
-  asm volatile (
-    "sub       %0,%1                           \n"
-    "sub       %0,%2                           \n"
-    "sub       %0,%3                           \n"
-    "pxor      %%xmm5,%%xmm5                   \n"
-
-    // 8 pixel loop.
-    LABELALIGN
-  "1:                                          \n"
-    "movq      " MEMACCESS(0) ",%%xmm0         \n"
-    "movq      " MEMACCESS2(0x2,0) ",%%xmm1    \n"
-    "punpcklbw %%xmm5,%%xmm0                   \n"
-    "punpcklbw %%xmm5,%%xmm1                   \n"
-    "psubw     %%xmm1,%%xmm0                   \n"
-    BUNDLEALIGN
-    MEMOPREG(movq,0x00,0,1,1,xmm1)             //  movq      (%0,%1,1),%%xmm1
-    MEMOPREG(movq,0x02,0,1,1,xmm2)             //  movq      0x2(%0,%1,1),%%xmm2
-    "punpcklbw %%xmm5,%%xmm1                   \n"
-    "punpcklbw %%xmm5,%%xmm2                   \n"
-    "psubw     %%xmm2,%%xmm1                   \n"
-    BUNDLEALIGN
-    MEMOPREG(movq,0x00,0,2,1,xmm2)             //  movq      (%0,%2,1),%%xmm2
-    MEMOPREG(movq,0x02,0,2,1,xmm3)             //  movq      0x2(%0,%2,1),%%xmm3
-    "punpcklbw %%xmm5,%%xmm2                   \n"
-    "punpcklbw %%xmm5,%%xmm3                   \n"
-    "psubw     %%xmm3,%%xmm2                   \n"
-    "paddw     %%xmm2,%%xmm0                   \n"
-    "paddw     %%xmm1,%%xmm0                   \n"
-    "paddw     %%xmm1,%%xmm0                   \n"
-    "pxor      %%xmm1,%%xmm1                   \n"
-    "psubw     %%xmm0,%%xmm1                   \n"
-    "pmaxsw    %%xmm1,%%xmm0                   \n"
-    "packuswb  %%xmm0,%%xmm0                   \n"
-    "sub       $0x8,%4                         \n"
-    BUNDLEALIGN
-    MEMOPMEM(movq,xmm0,0x00,0,3,1)             //  movq      %%xmm0,(%0,%3,1)
-    "lea       " MEMLEA(0x8,0) ",%0            \n"
-    "jg        1b                              \n"
-  : "+r"(src_y0),      // %0
-    "+r"(src_y1),      // %1
-    "+r"(src_y2),      // %2
-    "+r"(dst_sobelx),  // %3
-    "+r"(width)        // %4
-  :
-  : "memory", "cc"
-#if defined(__native_client__) && defined(__x86_64__)
-    , "r14"
-#endif
-#if defined(__SSE2__)
-    , "xmm0", "xmm1", "xmm2", "xmm3", "xmm5"
-#endif
-  );
-}
-#endif  // HAS_SOBELXROW_SSE2
-
-#ifdef HAS_SOBELYROW_SSE2
-// SobelY as a matrix is
-// -1 -2 -1
-//  0  0  0
-//  1  2  1
-void SobelYRow_SSE2(const uint8* src_y0, const uint8* src_y1,
-                    uint8* dst_sobely, int width) {
-  asm volatile (
-    "sub       %0,%1                           \n"
-    "sub       %0,%2                           \n"
-    "pxor      %%xmm5,%%xmm5                   \n"
-
-    // 8 pixel loop.
-    LABELALIGN
-  "1:                                          \n"
-    "movq      " MEMACCESS(0) ",%%xmm0         \n"
-    MEMOPREG(movq,0x00,0,1,1,xmm1)             //  movq      (%0,%1,1),%%xmm1
-    "punpcklbw %%xmm5,%%xmm0                   \n"
-    "punpcklbw %%xmm5,%%xmm1                   \n"
-    "psubw     %%xmm1,%%xmm0                   \n"
-    BUNDLEALIGN
-    "movq      " MEMACCESS2(0x1,0) ",%%xmm1    \n"
-    MEMOPREG(movq,0x01,0,1,1,xmm2)             //  movq      0x1(%0,%1,1),%%xmm2
-    "punpcklbw %%xmm5,%%xmm1                   \n"
-    "punpcklbw %%xmm5,%%xmm2                   \n"
-    "psubw     %%xmm2,%%xmm1                   \n"
-    BUNDLEALIGN
-    "movq      " MEMACCESS2(0x2,0) ",%%xmm2    \n"
-    MEMOPREG(movq,0x02,0,1,1,xmm3)             //  movq      0x2(%0,%1,1),%%xmm3
-    "punpcklbw %%xmm5,%%xmm2                   \n"
-    "punpcklbw %%xmm5,%%xmm3                   \n"
-    "psubw     %%xmm3,%%xmm2                   \n"
-    "paddw     %%xmm2,%%xmm0                   \n"
-    "paddw     %%xmm1,%%xmm0                   \n"
-    "paddw     %%xmm1,%%xmm0                   \n"
-    "pxor      %%xmm1,%%xmm1                   \n"
-    "psubw     %%xmm0,%%xmm1                   \n"
-    "pmaxsw    %%xmm1,%%xmm0                   \n"
-    "packuswb  %%xmm0,%%xmm0                   \n"
-    "sub       $0x8,%3                         \n"
-    BUNDLEALIGN
-    MEMOPMEM(movq,xmm0,0x00,0,2,1)             //  movq      %%xmm0,(%0,%2,1)
-    "lea       " MEMLEA(0x8,0) ",%0            \n"
-    "jg        1b                              \n"
-  : "+r"(src_y0),      // %0
-    "+r"(src_y1),      // %1
-    "+r"(dst_sobely),  // %2
-    "+r"(width)        // %3
-  :
-  : "memory", "cc"
-#if defined(__native_client__) && defined(__x86_64__)
-    , "r14"
-#endif
-#if defined(__SSE2__)
-    , "xmm0", "xmm1", "xmm2", "xmm3", "xmm5"
-#endif
-  );
-}
-#endif  // HAS_SOBELYROW_SSE2
-
-#ifdef HAS_SOBELROW_SSE2
-// Adds Sobel X and Sobel Y and stores Sobel into ARGB.
-// A = 255
-// R = Sobel
-// G = Sobel
-// B = Sobel
-void SobelRow_SSE2(const uint8* src_sobelx, const uint8* src_sobely,
-                   uint8* dst_argb, int width) {
-  asm volatile (
-    "sub       %0,%1                           \n"
-    "pcmpeqb   %%xmm5,%%xmm5                   \n"
-    "pslld     $0x18,%%xmm5                    \n"
-
-    // 8 pixel loop.
-    LABELALIGN
-  "1:                                          \n"
-    "movdqa    " MEMACCESS(0) ",%%xmm0         \n"
-    MEMOPREG(movdqa,0x00,0,1,1,xmm1)           //  movdqa    (%0,%1,1),%%xmm1
-    "lea       " MEMLEA(0x10,0) ",%0           \n"
-    "paddusb   %%xmm1,%%xmm0                   \n"
-    "movdqa    %%xmm0,%%xmm2                   \n"
-    "punpcklbw %%xmm0,%%xmm2                   \n"
-    "punpckhbw %%xmm0,%%xmm0                   \n"
-    "movdqa    %%xmm2,%%xmm1                   \n"
-    "punpcklwd %%xmm2,%%xmm1                   \n"
-    "punpckhwd %%xmm2,%%xmm2                   \n"
-    "por       %%xmm5,%%xmm1                   \n"
-    "por       %%xmm5,%%xmm2                   \n"
-    "movdqa    %%xmm0,%%xmm3                   \n"
-    "punpcklwd %%xmm0,%%xmm3                   \n"
-    "punpckhwd %%xmm0,%%xmm0                   \n"
-    "por       %%xmm5,%%xmm3                   \n"
-    "por       %%xmm5,%%xmm0                   \n"
-    "sub       $0x10,%3                        \n"
-    "movdqa    %%xmm1," MEMACCESS(2) "         \n"
-    "movdqa    %%xmm2," MEMACCESS2(0x10,2) "   \n"
-    "movdqa    %%xmm3," MEMACCESS2(0x20,2) "   \n"
-    "movdqa    %%xmm0," MEMACCESS2(0x30,2) "   \n"
-    "lea       " MEMLEA(0x40,2) ",%2           \n"
-    "jg        1b                              \n"
-  : "+r"(src_sobelx),  // %0
-    "+r"(src_sobely),  // %1
-    "+r"(dst_argb),    // %2
-    "+r"(width)        // %3
-  :
-  : "memory", "cc"
-#if defined(__native_client__) && defined(__x86_64__)
-    , "r14"
-#endif
-#if defined(__SSE2__)
-    , "xmm0", "xmm1", "xmm2", "xmm3", "xmm5"
-#endif
-  );
-}
-#endif  // HAS_SOBELROW_SSE2
-
-#ifdef HAS_SOBELTOPLANEROW_SSE2
-// Adds Sobel X and Sobel Y and stores Sobel into a plane.
-void SobelToPlaneRow_SSE2(const uint8* src_sobelx, const uint8* src_sobely,
-                          uint8* dst_y, int width) {
-  asm volatile (
-    "sub       %0,%1                           \n"
-    "pcmpeqb   %%xmm5,%%xmm5                   \n"
-    "pslld     $0x18,%%xmm5                    \n"
-
-    // 8 pixel loop.
-    LABELALIGN
-  "1:                                          \n"
-    "movdqa    " MEMACCESS(0) ",%%xmm0         \n"
-    MEMOPREG(movdqa,0x00,0,1,1,xmm1)           //  movdqa    (%0,%1,1),%%xmm1
-    "lea       " MEMLEA(0x10,0) ",%0           \n"
-    "paddusb   %%xmm1,%%xmm0                   \n"
-    "sub       $0x10,%3                        \n"
-    "movdqa    %%xmm0," MEMACCESS(2) "         \n"
-    "lea       " MEMLEA(0x10,2) ",%2           \n"
-    "jg        1b                              \n"
-  : "+r"(src_sobelx),  // %0
-    "+r"(src_sobely),  // %1
-    "+r"(dst_y),       // %2
-    "+r"(width)        // %3
-  :
-  : "memory", "cc"
-#if defined(__native_client__) && defined(__x86_64__)
-    , "r14"
-#endif
-#if defined(__SSE2__)
-    , "xmm0", "xmm1"
-#endif
-  );
-}
-#endif  // HAS_SOBELTOPLANEROW_SSE2
-
-#ifdef HAS_SOBELXYROW_SSE2
-// Mixes Sobel X, Sobel Y and Sobel into ARGB.
-// A = 255
-// R = Sobel X
-// G = Sobel
-// B = Sobel Y
-void SobelXYRow_SSE2(const uint8* src_sobelx, const uint8* src_sobely,
-                     uint8* dst_argb, int width) {
-  asm volatile (
-    "sub       %0,%1                           \n"
-    "pcmpeqb   %%xmm5,%%xmm5                   \n"
-
-    // 8 pixel loop.
-    LABELALIGN
-  "1:                                          \n"
-    "movdqa    " MEMACCESS(0) ",%%xmm0         \n"
-    MEMOPREG(movdqa,0x00,0,1,1,xmm1)           //  movdqa    (%0,%1,1),%%xmm1
-    "lea       " MEMLEA(0x10,0) ",%0           \n"
-    "movdqa    %%xmm0,%%xmm2                   \n"
-    "paddusb   %%xmm1,%%xmm2                   \n"
-    "movdqa    %%xmm0,%%xmm3                   \n"
-    "punpcklbw %%xmm5,%%xmm3                   \n"
-    "punpckhbw %%xmm5,%%xmm0                   \n"
-    "movdqa    %%xmm1,%%xmm4                   \n"
-    "punpcklbw %%xmm2,%%xmm4                   \n"
-    "punpckhbw %%xmm2,%%xmm1                   \n"
-    "movdqa    %%xmm4,%%xmm6                   \n"
-    "punpcklwd %%xmm3,%%xmm6                   \n"
-    "punpckhwd %%xmm3,%%xmm4                   \n"
-    "movdqa    %%xmm1,%%xmm7                   \n"
-    "punpcklwd %%xmm0,%%xmm7                   \n"
-    "punpckhwd %%xmm0,%%xmm1                   \n"
-    "sub       $0x10,%3                        \n"
-    "movdqa    %%xmm6," MEMACCESS(2) "         \n"
-    "movdqa    %%xmm4," MEMACCESS2(0x10,2) "   \n"
-    "movdqa    %%xmm7," MEMACCESS2(0x20,2) "   \n"
-    "movdqa    %%xmm1," MEMACCESS2(0x30,2) "   \n"
-    "lea       " MEMLEA(0x40,2) ",%2           \n"
-    "jg        1b                              \n"
-  : "+r"(src_sobelx),  // %0
-    "+r"(src_sobely),  // %1
-    "+r"(dst_argb),    // %2
-    "+r"(width)        // %3
-  :
-  : "memory", "cc"
-#if defined(__native_client__) && defined(__x86_64__)
-    , "r14"
-#endif
-#if defined(__SSE2__)
-    , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7"
-#endif
-  );
-}
-#endif  // HAS_SOBELXYROW_SSE2
-
-#ifdef HAS_COMPUTECUMULATIVESUMROW_SSE2
-// Creates a table of cumulative sums where each value is a sum of all values
-// above and to the left of the value, inclusive of the value.
-void ComputeCumulativeSumRow_SSE2(const uint8* row, int32* cumsum,
-                                  const int32* previous_cumsum, int width) {
-  asm volatile (
-    "pxor      %%xmm0,%%xmm0                   \n"
-    "pxor      %%xmm1,%%xmm1                   \n"
-    "sub       $0x4,%3                         \n"
-    "jl        49f                             \n"
-    "test      $0xf,%1                         \n"
-    "jne       49f                             \n"
-
-  // 4 pixel loop                              \n"
-    LABELALIGN
-  "40:                                         \n"
-    "movdqu    " MEMACCESS(0) ",%%xmm2         \n"
-    "lea       " MEMLEA(0x10,0) ",%0           \n"
-    "movdqa    %%xmm2,%%xmm4                   \n"
-    "punpcklbw %%xmm1,%%xmm2                   \n"
-    "movdqa    %%xmm2,%%xmm3                   \n"
-    "punpcklwd %%xmm1,%%xmm2                   \n"
-    "punpckhwd %%xmm1,%%xmm3                   \n"
-    "punpckhbw %%xmm1,%%xmm4                   \n"
-    "movdqa    %%xmm4,%%xmm5                   \n"
-    "punpcklwd %%xmm1,%%xmm4                   \n"
-    "punpckhwd %%xmm1,%%xmm5                   \n"
-    "paddd     %%xmm2,%%xmm0                   \n"
-    "movdqa    " MEMACCESS(2) ",%%xmm2         \n"
-    "paddd     %%xmm0,%%xmm2                   \n"
-    "paddd     %%xmm3,%%xmm0                   \n"
-    "movdqa    " MEMACCESS2(0x10,2) ",%%xmm3   \n"
-    "paddd     %%xmm0,%%xmm3                   \n"
-    "paddd     %%xmm4,%%xmm0                   \n"
-    "movdqa    " MEMACCESS2(0x20,2) ",%%xmm4   \n"
-    "paddd     %%xmm0,%%xmm4                   \n"
-    "paddd     %%xmm5,%%xmm0                   \n"
-    "movdqa    " MEMACCESS2(0x30,2) ",%%xmm5   \n"
-    "lea       " MEMLEA(0x40,2) ",%2           \n"
-    "paddd     %%xmm0,%%xmm5                   \n"
-    "movdqa    %%xmm2," MEMACCESS(1) "         \n"
-    "movdqa    %%xmm3," MEMACCESS2(0x10,1) "   \n"
-    "movdqa    %%xmm4," MEMACCESS2(0x20,1) "   \n"
-    "movdqa    %%xmm5," MEMACCESS2(0x30,1) "   \n"
-    "lea       " MEMLEA(0x40,1) ",%1           \n"
-    "sub       $0x4,%3                         \n"
-    "jge       40b                             \n"
-
-  "49:                                         \n"
-    "add       $0x3,%3                         \n"
-    "jl        19f                             \n"
-
-  // 1 pixel loop                              \n"
-    LABELALIGN
-  "10:                                         \n"
-    "movd      " MEMACCESS(0) ",%%xmm2         \n"
-    "lea       " MEMLEA(0x4,0) ",%0            \n"
-    "punpcklbw %%xmm1,%%xmm2                   \n"
-    "punpcklwd %%xmm1,%%xmm2                   \n"
-    "paddd     %%xmm2,%%xmm0                   \n"
-    "movdqu    " MEMACCESS(2) ",%%xmm2         \n"
-    "lea       " MEMLEA(0x10,2) ",%2           \n"
-    "paddd     %%xmm0,%%xmm2                   \n"
-    "movdqu    %%xmm2," MEMACCESS(1) "         \n"
-    "lea       " MEMLEA(0x10,1) ",%1           \n"
-    "sub       $0x1,%3                         \n"
-    "jge       10b                             \n"
-
-  "19:                                         \n"
-  : "+r"(row),  // %0
-    "+r"(cumsum),  // %1
-    "+r"(previous_cumsum),  // %2
-    "+r"(width)  // %3
-  :
-  : "memory", "cc"
-#if defined(__SSE2__)
-    , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
-#endif
-  );
-}
-#endif  // HAS_COMPUTECUMULATIVESUMROW_SSE2
-
-#ifdef HAS_CUMULATIVESUMTOAVERAGEROW_SSE2
-void CumulativeSumToAverageRow_SSE2(const int32* topleft, const int32* botleft,
-                                    int width, int area, uint8* dst,
-                                    int count) {
-  asm volatile (
-    "movd      %5,%%xmm5                       \n"
-    "cvtdq2ps  %%xmm5,%%xmm5                   \n"
-    "rcpss     %%xmm5,%%xmm4                   \n"
-    "pshufd    $0x0,%%xmm4,%%xmm4              \n"
-    "sub       $0x4,%3                         \n"
-    "jl        49f                             \n"
-    "cmpl      $0x80,%5                        \n"
-    "ja        40f                             \n"
-
-    "pshufd    $0x0,%%xmm5,%%xmm5              \n"
-    "pcmpeqb   %%xmm6,%%xmm6                   \n"
-    "psrld     $0x10,%%xmm6                    \n"
-    "cvtdq2ps  %%xmm6,%%xmm6                   \n"
-    "addps     %%xmm6,%%xmm5                   \n"
-    "mulps     %%xmm4,%%xmm5                   \n"
-    "cvtps2dq  %%xmm5,%%xmm5                   \n"
-    "packssdw  %%xmm5,%%xmm5                   \n"
-
-  // 4 pixel small loop                        \n"
-    LABELALIGN
-  "4:                                         \n"
-    "movdqa    " MEMACCESS(0) ",%%xmm0         \n"
-    "movdqa    " MEMACCESS2(0x10,0) ",%%xmm1   \n"
-    "movdqa    " MEMACCESS2(0x20,0) ",%%xmm2   \n"
-    "movdqa    " MEMACCESS2(0x30,0) ",%%xmm3   \n"
-    BUNDLEALIGN
-    MEMOPREG(psubd,0x00,0,4,4,xmm0)            // psubd    0x00(%0,%4,4),%%xmm0
-    MEMOPREG(psubd,0x10,0,4,4,xmm1)            // psubd    0x10(%0,%4,4),%%xmm1
-    MEMOPREG(psubd,0x20,0,4,4,xmm2)            // psubd    0x20(%0,%4,4),%%xmm2
-    MEMOPREG(psubd,0x30,0,4,4,xmm3)            // psubd    0x30(%0,%4,4),%%xmm3
-    "lea       " MEMLEA(0x40,0) ",%0           \n"
-    "psubd     " MEMACCESS(1) ",%%xmm0         \n"
-    "psubd     " MEMACCESS2(0x10,1) ",%%xmm1   \n"
-    "psubd     " MEMACCESS2(0x20,1) ",%%xmm2   \n"
-    "psubd     " MEMACCESS2(0x30,1) ",%%xmm3   \n"
-    BUNDLEALIGN
-    MEMOPREG(paddd,0x00,1,4,4,xmm0)            // paddd    0x00(%1,%4,4),%%xmm0
-    MEMOPREG(paddd,0x10,1,4,4,xmm1)            // paddd    0x10(%1,%4,4),%%xmm1
-    MEMOPREG(paddd,0x20,1,4,4,xmm2)            // paddd    0x20(%1,%4,4),%%xmm2
-    MEMOPREG(paddd,0x30,1,4,4,xmm3)            // paddd    0x30(%1,%4,4),%%xmm3
-    "lea       " MEMLEA(0x40,1) ",%1           \n"
-    "packssdw  %%xmm1,%%xmm0                   \n"
-    "packssdw  %%xmm3,%%xmm2                   \n"
-    "pmulhuw   %%xmm5,%%xmm0                   \n"
-    "pmulhuw   %%xmm5,%%xmm2                   \n"
-    "packuswb  %%xmm2,%%xmm0                   \n"
-    "movdqu    %%xmm0," MEMACCESS(2) "         \n"
-    "lea       " MEMLEA(0x10,2) ",%2           \n"
-    "sub       $0x4,%3                         \n"
-    "jge       4b                              \n"
-    "jmp       49f                             \n"
-
-  // 4 pixel loop                              \n"
-    LABELALIGN
-  "40:                                         \n"
-    "movdqa    " MEMACCESS(0) ",%%xmm0         \n"
-    "movdqa    " MEMACCESS2(0x10,0) ",%%xmm1   \n"
-    "movdqa    " MEMACCESS2(0x20,0) ",%%xmm2   \n"
-    "movdqa    " MEMACCESS2(0x30,0) ",%%xmm3   \n"
-    BUNDLEALIGN
-    MEMOPREG(psubd,0x00,0,4,4,xmm0)            // psubd    0x00(%0,%4,4),%%xmm0
-    MEMOPREG(psubd,0x10,0,4,4,xmm1)            // psubd    0x10(%0,%4,4),%%xmm1
-    MEMOPREG(psubd,0x20,0,4,4,xmm2)            // psubd    0x20(%0,%4,4),%%xmm2
-    MEMOPREG(psubd,0x30,0,4,4,xmm3)            // psubd    0x30(%0,%4,4),%%xmm3
-    "lea       " MEMLEA(0x40,0) ",%0           \n"
-    "psubd     " MEMACCESS(1) ",%%xmm0         \n"
-    "psubd     " MEMACCESS2(0x10,1) ",%%xmm1   \n"
-    "psubd     " MEMACCESS2(0x20,1) ",%%xmm2   \n"
-    "psubd     " MEMACCESS2(0x30,1) ",%%xmm3   \n"
-    BUNDLEALIGN
-    MEMOPREG(paddd,0x00,1,4,4,xmm0)            // paddd    0x00(%1,%4,4),%%xmm0
-    MEMOPREG(paddd,0x10,1,4,4,xmm1)            // paddd    0x10(%1,%4,4),%%xmm1
-    MEMOPREG(paddd,0x20,1,4,4,xmm2)            // paddd    0x20(%1,%4,4),%%xmm2
-    MEMOPREG(paddd,0x30,1,4,4,xmm3)            // paddd    0x30(%1,%4,4),%%xmm3
-    "lea       " MEMLEA(0x40,1) ",%1           \n"
-    "cvtdq2ps  %%xmm0,%%xmm0                   \n"
-    "cvtdq2ps  %%xmm1,%%xmm1                   \n"
-    "mulps     %%xmm4,%%xmm0                   \n"
-    "mulps     %%xmm4,%%xmm1                   \n"
-    "cvtdq2ps  %%xmm2,%%xmm2                   \n"
-    "cvtdq2ps  %%xmm3,%%xmm3                   \n"
-    "mulps     %%xmm4,%%xmm2                   \n"
-    "mulps     %%xmm4,%%xmm3                   \n"
-    "cvtps2dq  %%xmm0,%%xmm0                   \n"
-    "cvtps2dq  %%xmm1,%%xmm1                   \n"
-    "cvtps2dq  %%xmm2,%%xmm2                   \n"
-    "cvtps2dq  %%xmm3,%%xmm3                   \n"
-    "packssdw  %%xmm1,%%xmm0                   \n"
-    "packssdw  %%xmm3,%%xmm2                   \n"
-    "packuswb  %%xmm2,%%xmm0                   \n"
-    "movdqu    %%xmm0," MEMACCESS(2) "         \n"
-    "lea       " MEMLEA(0x10,2) ",%2           \n"
-    "sub       $0x4,%3                         \n"
-    "jge       40b                             \n"
-
-  "49:                                         \n"
-    "add       $0x3,%3                         \n"
-    "jl        19f                             \n"
-
-  // 1 pixel loop                              \n"
-    LABELALIGN
-  "10:                                         \n"
-    "movdqa    " MEMACCESS(0) ",%%xmm0         \n"
-    MEMOPREG(psubd,0x00,0,4,4,xmm0)            // psubd    0x00(%0,%4,4),%%xmm0
-    "lea       " MEMLEA(0x10,0) ",%0           \n"
-    "psubd     " MEMACCESS(1) ",%%xmm0         \n"
-    BUNDLEALIGN
-    MEMOPREG(paddd,0x00,1,4,4,xmm0)            // paddd    0x00(%1,%4,4),%%xmm0
-    "lea       " MEMLEA(0x10,1) ",%1           \n"
-    "cvtdq2ps  %%xmm0,%%xmm0                   \n"
-    "mulps     %%xmm4,%%xmm0                   \n"
-    "cvtps2dq  %%xmm0,%%xmm0                   \n"
-    "packssdw  %%xmm0,%%xmm0                   \n"
-    "packuswb  %%xmm0,%%xmm0                   \n"
-    "movd      %%xmm0," MEMACCESS(2) "         \n"
-    "lea       " MEMLEA(0x4,2) ",%2            \n"
-    "sub       $0x1,%3                         \n"
-    "jge       10b                             \n"
-  "19:                                         \n"
-  : "+r"(topleft),  // %0
-    "+r"(botleft),  // %1
-    "+r"(dst),      // %2
-    "+rm"(count)    // %3
-  : "r"((intptr_t)(width)),  // %4
-    "rm"(area)     // %5
-  : "memory", "cc"
-#if defined(__native_client__) && defined(__x86_64__)
-    , "r14"
-#endif
-#if defined(__SSE2__)
-    , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6"
-#endif
-  );
-}
-#endif  // HAS_CUMULATIVESUMTOAVERAGEROW_SSE2
-
-#ifdef HAS_ARGBAFFINEROW_SSE2
-// Copy ARGB pixels from source image with slope to a row of destination.
-LIBYUV_API
-void ARGBAffineRow_SSE2(const uint8* src_argb, int src_argb_stride,
-                        uint8* dst_argb, const float* src_dudv, int width) {
-  intptr_t src_argb_stride_temp = src_argb_stride;
-  intptr_t temp = 0;
-  asm volatile (
-    "movq      " MEMACCESS(3) ",%%xmm2         \n"
-    "movq      " MEMACCESS2(0x08,3) ",%%xmm7   \n"
-    "shl       $0x10,%1                        \n"
-    "add       $0x4,%1                         \n"
-    "movd      %1,%%xmm5                       \n"
-    "sub       $0x4,%4                         \n"
-    "jl        49f                             \n"
-
-    "pshufd    $0x44,%%xmm7,%%xmm7             \n"
-    "pshufd    $0x0,%%xmm5,%%xmm5              \n"
-    "movdqa    %%xmm2,%%xmm0                   \n"
-    "addps     %%xmm7,%%xmm0                   \n"
-    "movlhps   %%xmm0,%%xmm2                   \n"
-    "movdqa    %%xmm7,%%xmm4                   \n"
-    "addps     %%xmm4,%%xmm4                   \n"
-    "movdqa    %%xmm2,%%xmm3                   \n"
-    "addps     %%xmm4,%%xmm3                   \n"
-    "addps     %%xmm4,%%xmm4                   \n"
-
-  // 4 pixel loop                              \n"
-    LABELALIGN
-  "40:                                         \n"
-    "cvttps2dq %%xmm2,%%xmm0                   \n"  // x, y float to int first 2
-    "cvttps2dq %%xmm3,%%xmm1                   \n"  // x, y float to int next 2
-    "packssdw  %%xmm1,%%xmm0                   \n"  // x, y as 8 shorts
-    "pmaddwd   %%xmm5,%%xmm0                   \n"  // off = x * 4 + y * stride
-    "movd      %%xmm0,%k1                      \n"
-    "pshufd    $0x39,%%xmm0,%%xmm0             \n"
-    "movd      %%xmm0,%k5                      \n"
-    "pshufd    $0x39,%%xmm0,%%xmm0             \n"
-    BUNDLEALIGN
-    MEMOPREG(movd,0x00,0,1,1,xmm1)             //  movd      (%0,%1,1),%%xmm1
-    MEMOPREG(movd,0x00,0,5,1,xmm6)             //  movd      (%0,%5,1),%%xmm6
-    "punpckldq %%xmm6,%%xmm1                   \n"
-    "addps     %%xmm4,%%xmm2                   \n"
-    "movq      %%xmm1," MEMACCESS(2) "         \n"
-    "movd      %%xmm0,%k1                      \n"
-    "pshufd    $0x39,%%xmm0,%%xmm0             \n"
-    "movd      %%xmm0,%k5                      \n"
-    BUNDLEALIGN
-    MEMOPREG(movd,0x00,0,1,1,xmm0)             //  movd      (%0,%1,1),%%xmm0
-    MEMOPREG(movd,0x00,0,5,1,xmm6)             //  movd      (%0,%5,1),%%xmm6
-    "punpckldq %%xmm6,%%xmm0                   \n"
-    "addps     %%xmm4,%%xmm3                   \n"
-    "sub       $0x4,%4                         \n"
-    "movq      %%xmm0," MEMACCESS2(0x08,2) "   \n"
-    "lea       " MEMLEA(0x10,2) ",%2           \n"
-    "jge       40b                             \n"
-
-  "49:                                         \n"
-    "add       $0x3,%4                         \n"
-    "jl        19f                             \n"
-
-  // 1 pixel loop                              \n"
-    LABELALIGN
-  "10:                                         \n"
-    "cvttps2dq %%xmm2,%%xmm0                   \n"
-    "packssdw  %%xmm0,%%xmm0                   \n"
-    "pmaddwd   %%xmm5,%%xmm0                   \n"
-    "addps     %%xmm7,%%xmm2                   \n"
-    "movd      %%xmm0,%k1                      \n"
-    BUNDLEALIGN
-    MEMOPREG(movd,0x00,0,1,1,xmm0)             //  movd      (%0,%1,1),%%xmm0
-    "sub       $0x1,%4                         \n"
-    "movd      %%xmm0," MEMACCESS(2) "         \n"
-    "lea       " MEMLEA(0x04,2) ",%2           \n"
-    "jge       10b                             \n"
-  "19:                                         \n"
-  : "+r"(src_argb),  // %0
-    "+r"(src_argb_stride_temp),  // %1
-    "+r"(dst_argb),  // %2
-    "+r"(src_dudv),  // %3
-    "+rm"(width),    // %4
-    "+r"(temp)   // %5
-  :
-  : "memory", "cc"
-#if defined(__native_client__) && defined(__x86_64__)
-    , "r14"
-#endif
-#if defined(__SSE2__)
-    , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7"
-#endif
-  );
-}
-#endif  // HAS_ARGBAFFINEROW_SSE2
-
-#ifdef HAS_INTERPOLATEROW_SSSE3
-// Bilinear filter 16x2 -> 16x1
-void InterpolateRow_SSSE3(uint8* dst_ptr, const uint8* src_ptr,
-                          ptrdiff_t src_stride, int dst_width,
-                          int source_y_fraction) {
-  asm volatile (
-    "sub       %1,%0                           \n"
-    "shr       %3                              \n"
-    "cmp       $0x0,%3                         \n"
-    "je        100f                            \n"
-    "cmp       $0x20,%3                        \n"
-    "je        75f                             \n"
-    "cmp       $0x40,%3                        \n"
-    "je        50f                             \n"
-    "cmp       $0x60,%3                        \n"
-    "je        25f                             \n"
-
-    "movd      %3,%%xmm0                       \n"
-    "neg       %3                              \n"
-    "add       $0x80,%3                        \n"
-    "movd      %3,%%xmm5                       \n"
-    "punpcklbw %%xmm0,%%xmm5                   \n"
-    "punpcklwd %%xmm5,%%xmm5                   \n"
-    "pshufd    $0x0,%%xmm5,%%xmm5              \n"
-
-    // General purpose row blend.
-    LABELALIGN
-  "1:                                          \n"
-    "movdqa    " MEMACCESS(1) ",%%xmm0         \n"
-    MEMOPREG(movdqa,0x00,1,4,1,xmm2)
-    "movdqa    %%xmm0,%%xmm1                   \n"
-    "punpcklbw %%xmm2,%%xmm0                   \n"
-    "punpckhbw %%xmm2,%%xmm1                   \n"
-    "pmaddubsw %%xmm5,%%xmm0                   \n"
-    "pmaddubsw %%xmm5,%%xmm1                   \n"
-    "psrlw     $0x7,%%xmm0                     \n"
-    "psrlw     $0x7,%%xmm1                     \n"
-    "packuswb  %%xmm1,%%xmm0                   \n"
-    "sub       $0x10,%2                        \n"
-    BUNDLEALIGN
-    MEMOPMEM(movdqa,xmm0,0x00,1,0,1)
-    "lea       " MEMLEA(0x10,1) ",%1           \n"
-    "jg        1b                              \n"
-    "jmp       99f                             \n"
-
-    // Blend 25 / 75.
-    LABELALIGN
-  "25:                                         \n"
-    "movdqa    " MEMACCESS(1) ",%%xmm0         \n"
-    MEMOPREG(movdqa,0x00,1,4,1,xmm1)
-    "pavgb     %%xmm1,%%xmm0                   \n"
-    "pavgb     %%xmm1,%%xmm0                   \n"
-    "sub       $0x10,%2                        \n"
-    BUNDLEALIGN
-    MEMOPMEM(movdqa,xmm0,0x00,1,0,1)
-    "lea       " MEMLEA(0x10,1) ",%1           \n"
-    "jg        25b                             \n"
-    "jmp       99f                             \n"
-
-    // Blend 50 / 50.
-    LABELALIGN
-  "50:                                         \n"
-    "movdqa    " MEMACCESS(1) ",%%xmm0         \n"
-    MEMOPREG(movdqa,0x00,1,4,1,xmm1)
-    "pavgb     %%xmm1,%%xmm0                   \n"
-    "sub       $0x10,%2                        \n"
-    BUNDLEALIGN
-    MEMOPMEM(movdqa,xmm0,0x00,1,0,1)
-    "lea       " MEMLEA(0x10,1) ",%1           \n"
-    "jg        50b                             \n"
-    "jmp       99f                             \n"
-
-    // Blend 75 / 25.
-    LABELALIGN
-  "75:                                         \n"
-    "movdqa    " MEMACCESS(1) ",%%xmm1         \n"
-    MEMOPREG(movdqa,0x00,1,4,1,xmm0)
-    "pavgb     %%xmm1,%%xmm0                   \n"
-    "pavgb     %%xmm1,%%xmm0                   \n"
-    "sub       $0x10,%2                        \n"
-    BUNDLEALIGN
-    MEMOPMEM(movdqa,xmm0,0x00,1,0,1)
-    "lea       " MEMLEA(0x10,1) ",%1           \n"
-    "jg        75b                             \n"
-    "jmp       99f                             \n"
-
-    // Blend 100 / 0 - Copy row unchanged.
-    LABELALIGN
-  "100:                                        \n"
-    "movdqa    " MEMACCESS(1) ",%%xmm0         \n"
-    "sub       $0x10,%2                        \n"
-    MEMOPMEM(movdqa,xmm0,0x00,1,0,1)
-    "lea       " MEMLEA(0x10,1) ",%1           \n"
-    "jg        100b                            \n"
-
-  "99:                                         \n"
-  : "+r"(dst_ptr),    // %0
-    "+r"(src_ptr),    // %1
-    "+r"(dst_width),  // %2
-    "+r"(source_y_fraction)  // %3
-  : "r"((intptr_t)(src_stride))  // %4
-  : "memory", "cc"
-#if defined(__native_client__) && defined(__x86_64__)
-    , "r14"
-#endif
-#if defined(__SSE2__)
-    , "xmm0", "xmm1", "xmm2", "xmm5"
-#endif
-  );
-}
-#endif  // HAS_INTERPOLATEROW_SSSE3
-
-#ifdef HAS_INTERPOLATEROW_SSE2
-// Bilinear filter 16x2 -> 16x1
-void InterpolateRow_SSE2(uint8* dst_ptr, const uint8* src_ptr,
-                         ptrdiff_t src_stride, int dst_width,
-                         int source_y_fraction) {
-  asm volatile (
-    "sub       %1,%0                           \n"
-    "shr       %3                              \n"
-    "cmp       $0x0,%3                         \n"
-    "je        100f                            \n"
-    "cmp       $0x20,%3                        \n"
-    "je        75f                             \n"
-    "cmp       $0x40,%3                        \n"
-    "je        50f                             \n"
-    "cmp       $0x60,%3                        \n"
-    "je        25f                             \n"
-
-    "movd      %3,%%xmm0                       \n"
-    "neg       %3                              \n"
-    "add       $0x80,%3                        \n"
-    "movd      %3,%%xmm5                       \n"
-    "punpcklbw %%xmm0,%%xmm5                   \n"
-    "punpcklwd %%xmm5,%%xmm5                   \n"
-    "pshufd    $0x0,%%xmm5,%%xmm5              \n"
-    "pxor      %%xmm4,%%xmm4                   \n"
-
-    // General purpose row blend.
-    LABELALIGN
-  "1:                                          \n"
-    "movdqa    " MEMACCESS(1) ",%%xmm0         \n"
-    MEMOPREG(movdqa,0x00,1,4,1,xmm2)           //  movdqa    (%1,%4,1),%%xmm2
-    "movdqa    %%xmm0,%%xmm1                   \n"
-    "movdqa    %%xmm2,%%xmm3                   \n"
-    "punpcklbw %%xmm4,%%xmm2                   \n"
-    "punpckhbw %%xmm4,%%xmm3                   \n"
-    "punpcklbw %%xmm4,%%xmm0                   \n"
-    "punpckhbw %%xmm4,%%xmm1                   \n"
-    "psubw     %%xmm0,%%xmm2                   \n"
-    "psubw     %%xmm1,%%xmm3                   \n"
-    "paddw     %%xmm2,%%xmm2                   \n"
-    "paddw     %%xmm3,%%xmm3                   \n"
-    "pmulhw    %%xmm5,%%xmm2                   \n"
-    "pmulhw    %%xmm5,%%xmm3                   \n"
-    "paddw     %%xmm2,%%xmm0                   \n"
-    "paddw     %%xmm3,%%xmm1                   \n"
-    "packuswb  %%xmm1,%%xmm0                   \n"
-    "sub       $0x10,%2                        \n"
-    BUNDLEALIGN
-    MEMOPMEM(movdqa,xmm0,0x00,1,0,1)           //  movdqa    %%xmm0,(%1,%0,1)
-    "lea       " MEMLEA(0x10,1) ",%1           \n"
-    "jg        1b                              \n"
-    "jmp       99f                             \n"
-
-    // Blend 25 / 75.
-    LABELALIGN
-  "25:                                         \n"
-    "movdqa    " MEMACCESS(1) ",%%xmm0         \n"
-    MEMOPREG(movdqa,0x00,1,4,1,xmm1)           //  movdqa    (%1,%4,1),%%xmm1
-    "pavgb     %%xmm1,%%xmm0                   \n"
-    "pavgb     %%xmm1,%%xmm0                   \n"
-    "sub       $0x10,%2                        \n"
-    BUNDLEALIGN
-    MEMOPMEM(movdqa,xmm0,0x00,1,0,1)           //  movdqa    %%xmm0,(%1,%0,1)
-    "lea       " MEMLEA(0x10,1) ",%1           \n"
-    "jg        25b                             \n"
-    "jmp       99f                             \n"
-
-    // Blend 50 / 50.
-    LABELALIGN
-  "50:                                         \n"
-    "movdqa    " MEMACCESS(1) ",%%xmm0         \n"
-    MEMOPREG(movdqa,0x00,1,4,1,xmm1)           //  movdqa    (%1,%4,1),%%xmm1
-    "pavgb     %%xmm1,%%xmm0                   \n"
-    "sub       $0x10,%2                        \n"
-    BUNDLEALIGN
-    MEMOPMEM(movdqa,xmm0,0x00,1,0,1)           //  movdqa    %%xmm0,(%1,%0,1)
-    "lea       " MEMLEA(0x10,1) ",%1           \n"
-    "jg        50b                             \n"
-    "jmp       99f                             \n"
-
-    // Blend 75 / 25.
-    LABELALIGN
-  "75:                                         \n"
-    "movdqa    " MEMACCESS(1) ",%%xmm1         \n"
-    MEMOPREG(movdqa,0x00,1,4,1,xmm0)           //  movdqa    (%1,%4,1),%%xmm0
-    "pavgb     %%xmm1,%%xmm0                   \n"
-    "pavgb     %%xmm1,%%xmm0                   \n"
-    "sub       $0x10,%2                        \n"
-    BUNDLEALIGN
-    MEMOPMEM(movdqa,xmm0,0x00,1,0,1)           //  movdqa    %%xmm0,(%1,%0,1)
-    "lea       " MEMLEA(0x10,1) ",%1           \n"
-    "jg        75b                             \n"
-    "jmp       99f                             \n"
-
-    // Blend 100 / 0 - Copy row unchanged.
-    LABELALIGN
-  "100:                                        \n"
-    "movdqa    " MEMACCESS(1) ",%%xmm0         \n"
-    "sub       $0x10,%2                        \n"
-    MEMOPMEM(movdqa,xmm0,0x00,1,0,1)           //  movdqa    %%xmm0,(%1,%0,1)
-    "lea       " MEMLEA(0x10,1) ",%1           \n"
-    "jg        100b                            \n"
-
-  "99:                                         \n"
-  : "+r"(dst_ptr),    // %0
-    "+r"(src_ptr),    // %1
-    "+r"(dst_width),  // %2
-    "+r"(source_y_fraction)  // %3
-  : "r"((intptr_t)(src_stride))  // %4
-  : "memory", "cc"
-#if defined(__native_client__) && defined(__x86_64__)
-    , "r14"
-#endif
-#if defined(__SSE2__)
-    , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
-#endif
-  );
-}
-#endif  // HAS_INTERPOLATEROW_SSE2
-
-#ifdef HAS_INTERPOLATEROW_SSSE3
-// Bilinear filter 16x2 -> 16x1
-void InterpolateRow_Unaligned_SSSE3(uint8* dst_ptr, const uint8* src_ptr,
-                                    ptrdiff_t src_stride, int dst_width,
-                                    int source_y_fraction) {
-  asm volatile (
-    "sub       %1,%0                           \n"
-    "shr       %3                              \n"
-    "cmp       $0x0,%3                         \n"
-    "je        100f                            \n"
-    "cmp       $0x20,%3                        \n"
-    "je        75f                             \n"
-    "cmp       $0x40,%3                        \n"
-    "je        50f                             \n"
-    "cmp       $0x60,%3                        \n"
-    "je        25f                             \n"
-
-    "movd      %3,%%xmm0                       \n"
-    "neg       %3                              \n"
-    "add       $0x80,%3                        \n"
-    "movd      %3,%%xmm5                       \n"
-    "punpcklbw %%xmm0,%%xmm5                   \n"
-    "punpcklwd %%xmm5,%%xmm5                   \n"
-    "pshufd    $0x0,%%xmm5,%%xmm5              \n"
-
-    // General purpose row blend.
-    LABELALIGN
-  "1:                                          \n"
-    "movdqu    " MEMACCESS(1) ",%%xmm0         \n"
-    MEMOPREG(movdqu,0x00,1,4,1,xmm2)
-    "movdqu    %%xmm0,%%xmm1                   \n"
-    "punpcklbw %%xmm2,%%xmm0                   \n"
-    "punpckhbw %%xmm2,%%xmm1                   \n"
-    "pmaddubsw %%xmm5,%%xmm0                   \n"
-    "pmaddubsw %%xmm5,%%xmm1                   \n"
-    "psrlw     $0x7,%%xmm0                     \n"
-    "psrlw     $0x7,%%xmm1                     \n"
-    "packuswb  %%xmm1,%%xmm0                   \n"
-    "sub       $0x10,%2                        \n"
-    BUNDLEALIGN
-    MEMOPMEM(movdqu,xmm0,0x00,1,0,1)
-    "lea       " MEMLEA(0x10,1) ",%1           \n"
-    "jg        1b                              \n"
-    "jmp       99f                             \n"
-
-    // Blend 25 / 75.
-    LABELALIGN
-  "25:                                         \n"
-    "movdqu    " MEMACCESS(1) ",%%xmm0         \n"
-    MEMOPREG(movdqu,0x00,1,4,1,xmm1)
-    "pavgb     %%xmm1,%%xmm0                   \n"
-    "pavgb     %%xmm1,%%xmm0                   \n"
-    "sub       $0x10,%2                        \n"
-    BUNDLEALIGN
-    MEMOPMEM(movdqu,xmm0,0x00,1,0,1)
-    "lea       " MEMLEA(0x10,1) ",%1           \n"
-    "jg        25b                             \n"
-    "jmp       99f                             \n"
-
-    // Blend 50 / 50.
-    LABELALIGN
-  "50:                                         \n"
-    "movdqu    " MEMACCESS(1) ",%%xmm0         \n"
-    MEMOPREG(movdqu,0x00,1,4,1,xmm1)
-    "pavgb     %%xmm1,%%xmm0                   \n"
-    "sub       $0x10,%2                        \n"
-    BUNDLEALIGN
-    MEMOPMEM(movdqu,xmm0,0x00,1,0,1)
-    "lea       " MEMLEA(0x10,1) ",%1           \n"
-    "jg        50b                             \n"
-    "jmp       99f                             \n"
-
-    // Blend 75 / 25.
-    LABELALIGN
-  "75:                                         \n"
-    "movdqu    " MEMACCESS(1) ",%%xmm1         \n"
-    MEMOPREG(movdqu,0x00,1,4,1,xmm0)
-    "pavgb     %%xmm1,%%xmm0                   \n"
-    "pavgb     %%xmm1,%%xmm0                   \n"
-    "sub       $0x10,%2                        \n"
-    BUNDLEALIGN
-    MEMOPMEM(movdqu,xmm0,0x00,1,0,1)
-    "lea       " MEMLEA(0x10,1) ",%1           \n"
-    "jg        75b                             \n"
-    "jmp       99f                             \n"
-
-    // Blend 100 / 0 - Copy row unchanged.
-    LABELALIGN
-  "100:                                        \n"
-    "movdqu    " MEMACCESS(1) ",%%xmm0         \n"
-    "sub       $0x10,%2                        \n"
-    MEMOPMEM(movdqu,xmm0,0x00,1,0,1)
-    "lea       " MEMLEA(0x10,1) ",%1           \n"
-    "jg        100b                            \n"
-
-  "99:                                         \n"
-  : "+r"(dst_ptr),    // %0
-    "+r"(src_ptr),    // %1
-    "+r"(dst_width),  // %2
-    "+r"(source_y_fraction)  // %3
-  : "r"((intptr_t)(src_stride))  // %4
-  : "memory", "cc"
-#if defined(__native_client__) && defined(__x86_64__)
-    , "r14"
-#endif
-#if defined(__SSE2__)
-    , "xmm0", "xmm1", "xmm2", "xmm5"
-#endif
-  );
-}
-#endif   // HAS_INTERPOLATEROW_SSSE3
-
-#ifdef HAS_INTERPOLATEROW_SSE2
-// Bilinear filter 16x2 -> 16x1
-void InterpolateRow_Unaligned_SSE2(uint8* dst_ptr, const uint8* src_ptr,
-                                   ptrdiff_t src_stride, int dst_width,
-                                   int source_y_fraction) {
-  asm volatile (
-    "sub       %1,%0                           \n"
-    "shr       %3                              \n"
-    "cmp       $0x0,%3                         \n"
-    "je        100f                            \n"
-    "cmp       $0x20,%3                        \n"
-    "je        75f                             \n"
-    "cmp       $0x40,%3                        \n"
-    "je        50f                             \n"
-    "cmp       $0x60,%3                        \n"
-    "je        25f                             \n"
-
-    "movd      %3,%%xmm0                       \n"
-    "neg       %3                              \n"
-    "add       $0x80,%3                        \n"
-    "movd      %3,%%xmm5                       \n"
-    "punpcklbw %%xmm0,%%xmm5                   \n"
-    "punpcklwd %%xmm5,%%xmm5                   \n"
-    "pshufd    $0x0,%%xmm5,%%xmm5              \n"
-    "pxor      %%xmm4,%%xmm4                   \n"
-
-    // General purpose row blend.
-    LABELALIGN
-  "1:                                          \n"
-    "movdqu    " MEMACCESS(1) ",%%xmm0         \n"
-    MEMOPREG(movdqu,0x00,1,4,1,xmm2)           //  movdqu    (%1,%4,1),%%xmm2
-    "movdqu    %%xmm0,%%xmm1                   \n"
-    "movdqu    %%xmm2,%%xmm3                   \n"
-    "punpcklbw %%xmm4,%%xmm2                   \n"
-    "punpckhbw %%xmm4,%%xmm3                   \n"
-    "punpcklbw %%xmm4,%%xmm0                   \n"
-    "punpckhbw %%xmm4,%%xmm1                   \n"
-    "psubw     %%xmm0,%%xmm2                   \n"
-    "psubw     %%xmm1,%%xmm3                   \n"
-    "paddw     %%xmm2,%%xmm2                   \n"
-    "paddw     %%xmm3,%%xmm3                   \n"
-    "pmulhw    %%xmm5,%%xmm2                   \n"
-    "pmulhw    %%xmm5,%%xmm3                   \n"
-    "paddw     %%xmm2,%%xmm0                   \n"
-    "paddw     %%xmm3,%%xmm1                   \n"
-    "packuswb  %%xmm1,%%xmm0                   \n"
-    "sub       $0x10,%2                        \n"
-    BUNDLEALIGN
-    MEMOPMEM(movdqu,xmm0,0x00,1,0,1)           //  movdqu    %%xmm0,(%1,%0,1)
-    "lea       " MEMLEA(0x10,1) ",%1           \n"
-    "jg        1b                              \n"
-    "jmp       99f                             \n"
-
-    // Blend 25 / 75.
-    LABELALIGN
-  "25:                                         \n"
-    "movdqu    " MEMACCESS(1) ",%%xmm0         \n"
-    MEMOPREG(movdqu,0x00,1,4,1,xmm1)           //  movdqu    (%1,%4,1),%%xmm1
-    "pavgb     %%xmm1,%%xmm0                   \n"
-    "pavgb     %%xmm1,%%xmm0                   \n"
-    "sub       $0x10,%2                        \n"
-    BUNDLEALIGN
-    MEMOPMEM(movdqu,xmm0,0x00,1,0,1)           //  movdqu    %%xmm0,(%1,%0,1)
-    "lea       " MEMLEA(0x10,1) ",%1           \n"
-    "jg        25b                             \n"
-    "jmp       99f                             \n"
-
-    // Blend 50 / 50.
-    LABELALIGN
-  "50:                                         \n"
-    "movdqu    " MEMACCESS(1) ",%%xmm0         \n"
-    MEMOPREG(movdqu,0x00,1,4,1,xmm1)           //  movdqu    (%1,%4,1),%%xmm1
-    "pavgb     %%xmm1,%%xmm0                   \n"
-    "sub       $0x10,%2                        \n"
-    BUNDLEALIGN
-    MEMOPMEM(movdqu,xmm0,0x00,1,0,1)           //  movdqu    %%xmm0,(%1,%0,1)
-    "lea       " MEMLEA(0x10,1) ",%1           \n"
-    "jg        50b                             \n"
-    "jmp       99f                             \n"
-
-    // Blend 75 / 25.
-    LABELALIGN
-  "75:                                         \n"
-    "movdqu    " MEMACCESS(1) ",%%xmm1         \n"
-    MEMOPREG(movdqu,0x00,1,4,1,xmm0)           //  movdqu    (%1,%4,1),%%xmm0
-    "pavgb     %%xmm1,%%xmm0                   \n"
-    "pavgb     %%xmm1,%%xmm0                   \n"
-    "sub       $0x10,%2                        \n"
-    BUNDLEALIGN
-    MEMOPMEM(movdqu,xmm0,0x00,1,0,1)           //  movdqu    %%xmm0,(%1,%0,1)
-    "lea       " MEMLEA(0x10,1) ",%1           \n"
-    "jg        75b                             \n"
-    "jmp       99f                             \n"
-
-    // Blend 100 / 0 - Copy row unchanged.
-    LABELALIGN
-  "100:                                        \n"
-    "movdqu    " MEMACCESS(1) ",%%xmm0         \n"
-    "sub       $0x10,%2                        \n"
-    MEMOPMEM(movdqu,xmm0,0x00,1,0,1)           //  movdqu    %%xmm0,(%1,%0,1)
-    "lea       " MEMLEA(0x10,1) ",%1           \n"
-    "jg        100b                            \n"
-
-  "99:                                         \n"
-  : "+r"(dst_ptr),    // %0
-    "+r"(src_ptr),    // %1
-    "+r"(dst_width),  // %2
-    "+r"(source_y_fraction)  // %3
-  : "r"((intptr_t)(src_stride))  // %4
-  : "memory", "cc"
-#if defined(__native_client__) && defined(__x86_64__)
-    , "r14"
-#endif
-#if defined(__SSE2__)
-    , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
-#endif
-  );
-}
-#endif  // HAS_INTERPOLATEROW_SSE2
-
-#ifdef HAS_HALFROW_SSE2
-void HalfRow_SSE2(const uint8* src_uv, int src_uv_stride,
-                  uint8* dst_uv, int pix) {
-  asm volatile (
-    "sub       %0,%1                           \n"
-    LABELALIGN
-  "1:                                          \n"
-    "movdqa    " MEMACCESS(0) ",%%xmm0         \n"
-    MEMOPREG(pavgb,0x00,0,3,1,xmm0)            //  pavgb     (%0,%3),%%xmm0
-    "sub       $0x10,%2                        \n"
-    MEMOPMEM(movdqa,xmm0,0x00,0,1,1)           //  movdqa    %%xmm0,(%0,%1)
-    "lea       " MEMLEA(0x10,0) ",%0           \n"
-    "jg        1b                              \n"
-  : "+r"(src_uv),  // %0
-    "+r"(dst_uv),  // %1
-    "+r"(pix)      // %2
-  : "r"((intptr_t)(src_uv_stride))  // %3
-  : "memory", "cc"
-#if defined(__SSE2__)
-      , "xmm0"
-#endif
-  );
-}
-#endif  // HAS_HALFROW_SSE2
-
-#ifdef HAS_ARGBTOBAYERROW_SSSE3
-void ARGBToBayerRow_SSSE3(const uint8* src_argb, uint8* dst_bayer,
-                          uint32 selector, int pix) {
-  asm volatile (
-    // NaCL caveat - assumes movd is from GPR
-    "movd      %3,%%xmm5                       \n"
-    "pshufd    $0x0,%%xmm5,%%xmm5              \n"
-    LABELALIGN
-  "1:                                          \n"
-    "movdqa    " MEMACCESS(0) ",%%xmm0         \n"
-    "movdqa    " MEMACCESS2(0x10,0) ",%%xmm1   \n"
-    "lea       " MEMLEA(0x20,0) ",%0           \n"
-    "pshufb    %%xmm5,%%xmm0                   \n"
-    "pshufb    %%xmm5,%%xmm1                   \n"
-    "punpckldq %%xmm1,%%xmm0                   \n"
-    "sub       $0x8,%2                         \n"
-    "movq      %%xmm0," MEMACCESS(1) "         \n"
-    "lea       " MEMLEA(0x8,1) ",%1            \n"
-    "jg        1b                              \n"
-  : "+r"(src_argb),  // %0
-    "+r"(dst_bayer), // %1
-    "+r"(pix)        // %2
-  : "g"(selector)    // %3
-  : "memory", "cc"
-#if defined(__SSE2__)
-    , "xmm0", "xmm1", "xmm5"
-#endif
-  );
-}
-#endif  // HAS_ARGBTOBAYERROW_SSSE3
-
-#ifdef HAS_ARGBTOBAYERGGROW_SSE2
-void ARGBToBayerGGRow_SSE2(const uint8* src_argb, uint8* dst_bayer,
-                           uint32 selector, int pix) {
-  asm volatile (
-    "pcmpeqb   %%xmm5,%%xmm5                   \n"
-    "psrld     $0x18,%%xmm5                    \n"
-    LABELALIGN
-  "1:                                          \n"
-    "movdqa    " MEMACCESS(0) ",%%xmm0         \n"
-    "movdqa    " MEMACCESS2(0x10,0) ",%%xmm1   \n"
-    "lea       " MEMLEA(0x20,0) ",%0           \n"
-    "psrld     $0x8,%%xmm0                     \n"
-    "psrld     $0x8,%%xmm1                     \n"
-    "pand      %%xmm5,%%xmm0                   \n"
-    "pand      %%xmm5,%%xmm1                   \n"
-    "packssdw  %%xmm1,%%xmm0                   \n"
-    "packuswb  %%xmm1,%%xmm0                   \n"
-    "sub       $0x8,%2                         \n"
-    "movq      %%xmm0," MEMACCESS(1) "         \n"
-    "lea       " MEMLEA(0x8,1) ",%1            \n"
-    "jg        1b                              \n"
-  : "+r"(src_argb),  // %0
-    "+r"(dst_bayer), // %1
-    "+r"(pix)        // %2
-  :
-  : "memory", "cc"
-#if defined(__SSE2__)
-    , "xmm0", "xmm1", "xmm5"
-#endif
-  );
-}
-#endif  // HAS_ARGBTOBAYERGGROW_SSE2
-
-#ifdef HAS_ARGBSHUFFLEROW_SSSE3
-// For BGRAToARGB, ABGRToARGB, RGBAToARGB, and ARGBToRGBA.
-void ARGBShuffleRow_SSSE3(const uint8* src_argb, uint8* dst_argb,
-                          const uint8* shuffler, int pix) {
-  asm volatile (
-    "movdqa    " MEMACCESS(3) ",%%xmm5         \n"
-    LABELALIGN
-  "1:                                          \n"
-    "movdqa    " MEMACCESS(0) ",%%xmm0         \n"
-    "movdqa    " MEMACCESS2(0x10,0) ",%%xmm1   \n"
-    "lea       " MEMLEA(0x20,0) ",%0           \n"
-    "pshufb    %%xmm5,%%xmm0                   \n"
-    "pshufb    %%xmm5,%%xmm1                   \n"
-    "sub       $0x8,%2                         \n"
-    "movdqa    %%xmm0," MEMACCESS(1) "         \n"
-    "movdqa    %%xmm1," MEMACCESS2(0x10,1) "   \n"
-    "lea       " MEMLEA(0x20,1) ",%1           \n"
-    "jg        1b                              \n"
-  : "+r"(src_argb),  // %0
-    "+r"(dst_argb),  // %1
-    "+r"(pix)        // %2
-  : "r"(shuffler)    // %3
-  : "memory", "cc"
-#if defined(__SSE2__)
-    , "xmm0", "xmm1", "xmm5"
-#endif
-  );
-}
-
-void ARGBShuffleRow_Unaligned_SSSE3(const uint8* src_argb, uint8* dst_argb,
-                                    const uint8* shuffler, int pix) {
-  asm volatile (
-    "movdqa    " MEMACCESS(3) ",%%xmm5         \n"
-    LABELALIGN
-  "1:                                          \n"
-    "movdqu    " MEMACCESS(0) ",%%xmm0         \n"
-    "movdqu    " MEMACCESS2(0x10,0) ",%%xmm1   \n"
-    "lea       " MEMLEA(0x20,0) ",%0           \n"
-    "pshufb    %%xmm5,%%xmm0                   \n"
-    "pshufb    %%xmm5,%%xmm1                   \n"
-    "sub       $0x8,%2                         \n"
-    "movdqu    %%xmm0," MEMACCESS(1) "         \n"
-    "movdqu    %%xmm1," MEMACCESS2(0x10,1) "   \n"
-    "lea       " MEMLEA(0x20,1) ",%1           \n"
-    "jg        1b                              \n"
-  : "+r"(src_argb),  // %0
-    "+r"(dst_argb),  // %1
-    "+r"(pix)        // %2
-  : "r"(shuffler)    // %3
-  : "memory", "cc"
-#if defined(__SSE2__)
-    , "xmm0", "xmm1", "xmm5"
-#endif
-  );
-}
-#endif  // HAS_ARGBSHUFFLEROW_SSSE3
-
-#ifdef HAS_ARGBSHUFFLEROW_AVX2
-// For BGRAToARGB, ABGRToARGB, RGBAToARGB, and ARGBToRGBA.
-void ARGBShuffleRow_AVX2(const uint8* src_argb, uint8* dst_argb,
-                         const uint8* shuffler, int pix) {
-  asm volatile (
-    "vbroadcastf128 " MEMACCESS(3) ",%%ymm5    \n"
-    LABELALIGN
-  "1:                                          \n"
-    "vmovdqu   " MEMACCESS(0) ",%%ymm0         \n"
-    "vmovdqu   " MEMACCESS2(0x20,0) ",%%ymm1   \n"
-    "lea       " MEMLEA(0x40,0) ",%0           \n"
-    "vpshufb   %%ymm5,%%ymm0,%%ymm0            \n"
-    "vpshufb   %%ymm5,%%ymm1,%%ymm1            \n"
-    "sub       $0x10,%2                        \n"
-    "vmovdqu   %%ymm0," MEMACCESS(1) "         \n"
-    "vmovdqu   %%ymm1," MEMACCESS2(0x20,1) "   \n"
-    "lea       " MEMLEA(0x40,1) ",%1           \n"
-    "jg        1b                              \n"
-  : "+r"(src_argb),  // %0
-    "+r"(dst_argb),  // %1
-    "+r"(pix)        // %2
-  : "r"(shuffler)    // %3
-  : "memory", "cc"
-#if defined(__SSE2__)
-    , "xmm0", "xmm1", "xmm5"
-#endif
-  );
-}
-#endif  // HAS_ARGBSHUFFLEROW_AVX2
-
-#ifdef HAS_ARGBSHUFFLEROW_SSE2
-// For BGRAToARGB, ABGRToARGB, RGBAToARGB, and ARGBToRGBA.
-void ARGBShuffleRow_SSE2(const uint8* src_argb, uint8* dst_argb,
-                         const uint8* shuffler, int pix) {
-  uintptr_t pixel_temp = 0u;
-  asm volatile (
-    "pxor      %%xmm5,%%xmm5                   \n"
-    "mov       " MEMACCESS(4) ",%k2            \n"
-    "cmp       $0x3000102,%k2                  \n"
-    "je        3012f                           \n"
-    "cmp       $0x10203,%k2                    \n"
-    "je        123f                            \n"
-    "cmp       $0x30201,%k2                    \n"
-    "je        321f                            \n"
-    "cmp       $0x2010003,%k2                  \n"
-    "je        2103f                           \n"
-
-    LABELALIGN
-  "1:                                          \n"
-    "movzb     " MEMACCESS(4) ",%2             \n"
-    MEMOPARG(movzb,0x00,0,2,1,2) "             \n"  //  movzb     (%0,%2,1),%2
-    "mov       %b2," MEMACCESS(1) "            \n"
-    "movzb     " MEMACCESS2(0x1,4) ",%2        \n"
-    MEMOPARG(movzb,0x00,0,2,1,2) "             \n"  //  movzb     (%0,%2,1),%2
-    "mov       %b2," MEMACCESS2(0x1,1) "       \n"
-    BUNDLEALIGN
-    "movzb     " MEMACCESS2(0x2,4) ",%2        \n"
-    MEMOPARG(movzb,0x00,0,2,1,2) "             \n"  //  movzb     (%0,%2,1),%2
-    "mov       %b2," MEMACCESS2(0x2,1) "       \n"
-    "movzb     " MEMACCESS2(0x3,4) ",%2        \n"
-    MEMOPARG(movzb,0x00,0,2,1,2) "             \n"  //  movzb     (%0,%2,1),%2
-    "mov       %b2," MEMACCESS2(0x3,1) "       \n"
-    "lea       " MEMLEA(0x4,0) ",%0            \n"
-    "lea       " MEMLEA(0x4,1) ",%1            \n"
-    "sub       $0x1,%3                         \n"
-    "jg        1b                              \n"
-    "jmp       99f                             \n"
-
-    LABELALIGN
-  "123:                                        \n"
-    "movdqu    " MEMACCESS(0) ",%%xmm0         \n"
-    "lea       " MEMLEA(0x10,0) ",%0           \n"
-    "movdqa    %%xmm0,%%xmm1                   \n"
-    "punpcklbw %%xmm5,%%xmm0                   \n"
-    "punpckhbw %%xmm5,%%xmm1                   \n"
-    "pshufhw   $0x1b,%%xmm0,%%xmm0             \n"
-    "pshuflw   $0x1b,%%xmm0,%%xmm0             \n"
-    "pshufhw   $0x1b,%%xmm1,%%xmm1             \n"
-    "pshuflw   $0x1b,%%xmm1,%%xmm1             \n"
-    "packuswb  %%xmm1,%%xmm0                   \n"
-    "sub       $0x4,%3                         \n"
-    "movdqu    %%xmm0," MEMACCESS(1) "         \n"
-    "lea       " MEMLEA(0x10,1) ",%1           \n"
-    "jg        123b                            \n"
-    "jmp       99f                             \n"
-
-    LABELALIGN
-  "321:                                        \n"
-    "movdqu    " MEMACCESS(0) ",%%xmm0         \n"
-    "lea       " MEMLEA(0x10,0) ",%0           \n"
-    "movdqa    %%xmm0,%%xmm1                   \n"
-    "punpcklbw %%xmm5,%%xmm0                   \n"
-    "punpckhbw %%xmm5,%%xmm1                   \n"
-    "pshufhw   $0x39,%%xmm0,%%xmm0             \n"
-    "pshuflw   $0x39,%%xmm0,%%xmm0             \n"
-    "pshufhw   $0x39,%%xmm1,%%xmm1             \n"
-    "pshuflw   $0x39,%%xmm1,%%xmm1             \n"
-    "packuswb  %%xmm1,%%xmm0                   \n"
-    "sub       $0x4,%3                         \n"
-    "movdqu    %%xmm0," MEMACCESS(1) "         \n"
-    "lea       " MEMLEA(0x10,1) ",%1           \n"
-    "jg        321b                            \n"
-    "jmp       99f                             \n"
-
-    LABELALIGN
-  "2103:                                       \n"
-    "movdqu    " MEMACCESS(0) ",%%xmm0         \n"
-    "lea       " MEMLEA(0x10,0) ",%0           \n"
-    "movdqa    %%xmm0,%%xmm1                   \n"
-    "punpcklbw %%xmm5,%%xmm0                   \n"
-    "punpckhbw %%xmm5,%%xmm1                   \n"
-    "pshufhw   $0x93,%%xmm0,%%xmm0             \n"
-    "pshuflw   $0x93,%%xmm0,%%xmm0             \n"
-    "pshufhw   $0x93,%%xmm1,%%xmm1             \n"
-    "pshuflw   $0x93,%%xmm1,%%xmm1             \n"
-    "packuswb  %%xmm1,%%xmm0                   \n"
-    "sub       $0x4,%3                         \n"
-    "movdqu    %%xmm0," MEMACCESS(1) "         \n"
-    "lea       " MEMLEA(0x10,1) ",%1           \n"
-    "jg        2103b                           \n"
-    "jmp       99f                             \n"
-
-    LABELALIGN
-  "3012:                                       \n"
-    "movdqu    " MEMACCESS(0) ",%%xmm0         \n"
-    "lea       " MEMLEA(0x10,0) ",%0           \n"
-    "movdqa    %%xmm0,%%xmm1                   \n"
-    "punpcklbw %%xmm5,%%xmm0                   \n"
-    "punpckhbw %%xmm5,%%xmm1                   \n"
-    "pshufhw   $0xc6,%%xmm0,%%xmm0             \n"
-    "pshuflw   $0xc6,%%xmm0,%%xmm0             \n"
-    "pshufhw   $0xc6,%%xmm1,%%xmm1             \n"
-    "pshuflw   $0xc6,%%xmm1,%%xmm1             \n"
-    "packuswb  %%xmm1,%%xmm0                   \n"
-    "sub       $0x4,%3                         \n"
-    "movdqu    %%xmm0," MEMACCESS(1) "         \n"
-    "lea       " MEMLEA(0x10,1) ",%1           \n"
-    "jg        3012b                           \n"
-
-  "99:                                         \n"
-  : "+r"(src_argb),    // %0
-    "+r"(dst_argb),    // %1
-    "+d"(pixel_temp),  // %2
-    "+r"(pix)         // %3
-  : "r"(shuffler)      // %4
-  : "memory", "cc"
-#if defined(__native_client__) && defined(__x86_64__)
-    , "r14"
-#endif
-#if defined(__SSE2__)
-    , "xmm0", "xmm1", "xmm5"
-#endif
-  );
-}
-#endif  // HAS_ARGBSHUFFLEROW_SSE2
-
-#ifdef HAS_I422TOYUY2ROW_SSE2
-void I422ToYUY2Row_SSE2(const uint8* src_y,
-                        const uint8* src_u,
-                        const uint8* src_v,
-                        uint8* dst_frame, int width) {
- asm volatile (
-    "sub       %1,%2                             \n"
-    LABELALIGN
-  "1:                                            \n"
-    "movq      " MEMACCESS(1) ",%%xmm2           \n"
-    MEMOPREG(movq,0x00,1,2,1,xmm3)               //  movq    (%1,%2,1),%%xmm3
-    "lea       " MEMLEA(0x8,1) ",%1              \n"
-    "punpcklbw %%xmm3,%%xmm2                     \n"
-    "movdqu    " MEMACCESS(0) ",%%xmm0           \n"
-    "lea       " MEMLEA(0x10,0) ",%0             \n"
-    "movdqa    %%xmm0,%%xmm1                     \n"
-    "punpcklbw %%xmm2,%%xmm0                     \n"
-    "punpckhbw %%xmm2,%%xmm1                     \n"
-    "movdqu    %%xmm0," MEMACCESS(3) "           \n"
-    "movdqu    %%xmm1," MEMACCESS2(0x10,3) "     \n"
-    "lea       " MEMLEA(0x20,3) ",%3             \n"
-    "sub       $0x10,%4                          \n"
-    "jg         1b                               \n"
-    : "+r"(src_y),  // %0
-      "+r"(src_u),  // %1
-      "+r"(src_v),  // %2
-      "+r"(dst_frame),  // %3
-      "+rm"(width)  // %4
-    :
-    : "memory", "cc"
-#if defined(__native_client__) && defined(__x86_64__)
-    , "r14"
-#endif
-#if defined(__SSE2__)
-    , "xmm0", "xmm1", "xmm2", "xmm3"
-#endif
-  );
-}
-#endif  // HAS_I422TOYUY2ROW_SSE2
-
-#ifdef HAS_I422TOUYVYROW_SSE2
-void I422ToUYVYRow_SSE2(const uint8* src_y,
-                        const uint8* src_u,
-                        const uint8* src_v,
-                        uint8* dst_frame, int width) {
- asm volatile (
-    "sub        %1,%2                            \n"
-    LABELALIGN
-  "1:                                            \n"
-    "movq      " MEMACCESS(1) ",%%xmm2           \n"
-    MEMOPREG(movq,0x00,1,2,1,xmm3)               //  movq    (%1,%2,1),%%xmm3
-    "lea       " MEMLEA(0x8,1) ",%1              \n"
-    "punpcklbw %%xmm3,%%xmm2                     \n"
-    "movdqu    " MEMACCESS(0) ",%%xmm0           \n"
-    "movdqa    %%xmm2,%%xmm1                     \n"
-    "lea       " MEMLEA(0x10,0) ",%0             \n"
-    "punpcklbw %%xmm0,%%xmm1                     \n"
-    "punpckhbw %%xmm0,%%xmm2                     \n"
-    "movdqu    %%xmm1," MEMACCESS(3) "           \n"
-    "movdqu    %%xmm2," MEMACCESS2(0x10,3) "     \n"
-    "lea       " MEMLEA(0x20,3) ",%3             \n"
-    "sub       $0x10,%4                          \n"
-    "jg         1b                               \n"
-    : "+r"(src_y),  // %0
-      "+r"(src_u),  // %1
-      "+r"(src_v),  // %2
-      "+r"(dst_frame),  // %3
-      "+rm"(width)  // %4
-    :
-    : "memory", "cc"
-#if defined(__native_client__) && defined(__x86_64__)
-    , "r14"
-#endif
-#if defined(__SSE2__)
-    , "xmm0", "xmm1", "xmm2", "xmm3"
-#endif
-  );
-}
-#endif  // HAS_I422TOUYVYROW_SSE2
-
-#ifdef HAS_ARGBPOLYNOMIALROW_SSE2
-void ARGBPolynomialRow_SSE2(const uint8* src_argb,
-                            uint8* dst_argb, const float* poly,
-                            int width) {
-  asm volatile (
-    "pxor      %%xmm3,%%xmm3                   \n"
-
-    // 2 pixel loop.
-    LABELALIGN
-  "1:                                          \n"
-    "movq      " MEMACCESS(0) ",%%xmm0         \n"
-    "lea       " MEMLEA(0x8,0) ",%0            \n"
-    "punpcklbw %%xmm3,%%xmm0                   \n"
-    "movdqa    %%xmm0,%%xmm4                   \n"
-    "punpcklwd %%xmm3,%%xmm0                   \n"
-    "punpckhwd %%xmm3,%%xmm4                   \n"
-    "cvtdq2ps  %%xmm0,%%xmm0                   \n"
-    "cvtdq2ps  %%xmm4,%%xmm4                   \n"
-    "movdqa    %%xmm0,%%xmm1                   \n"
-    "movdqa    %%xmm4,%%xmm5                   \n"
-    "mulps     " MEMACCESS2(0x10,3) ",%%xmm0   \n"
-    "mulps     " MEMACCESS2(0x10,3) ",%%xmm4   \n"
-    "addps     " MEMACCESS(3) ",%%xmm0         \n"
-    "addps     " MEMACCESS(3) ",%%xmm4         \n"
-    "movdqa    %%xmm1,%%xmm2                   \n"
-    "movdqa    %%xmm5,%%xmm6                   \n"
-    "mulps     %%xmm1,%%xmm2                   \n"
-    "mulps     %%xmm5,%%xmm6                   \n"
-    "mulps     %%xmm2,%%xmm1                   \n"
-    "mulps     %%xmm6,%%xmm5                   \n"
-    "mulps     " MEMACCESS2(0x20,3) ",%%xmm2   \n"
-    "mulps     " MEMACCESS2(0x20,3) ",%%xmm6   \n"
-    "mulps     " MEMACCESS2(0x30,3) ",%%xmm1   \n"
-    "mulps     " MEMACCESS2(0x30,3) ",%%xmm5   \n"
-    "addps     %%xmm2,%%xmm0                   \n"
-    "addps     %%xmm6,%%xmm4                   \n"
-    "addps     %%xmm1,%%xmm0                   \n"
-    "addps     %%xmm5,%%xmm4                   \n"
-    "cvttps2dq %%xmm0,%%xmm0                   \n"
-    "cvttps2dq %%xmm4,%%xmm4                   \n"
-    "packuswb  %%xmm4,%%xmm0                   \n"
-    "packuswb  %%xmm0,%%xmm0                   \n"
-    "sub       $0x2,%2                         \n"
-    "movq      %%xmm0," MEMACCESS(1) "         \n"
-    "lea       " MEMLEA(0x8,1) ",%1            \n"
-    "jg        1b                              \n"
-  : "+r"(src_argb),  // %0
-    "+r"(dst_argb),  // %1
-    "+r"(width)      // %2
-  : "r"(poly)        // %3
-  : "memory", "cc"
-#if defined(__SSE2__)
-    , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6"
-#endif
-  );
-}
-#endif  // HAS_ARGBPOLYNOMIALROW_SSE2
-
-#ifdef HAS_ARGBPOLYNOMIALROW_AVX2
-void ARGBPolynomialRow_AVX2(const uint8* src_argb,
-                            uint8* dst_argb, const float* poly,
-                            int width) {
-  asm volatile (
-    "vbroadcastf128 " MEMACCESS(3) ",%%ymm4     \n"
-    "vbroadcastf128 " MEMACCESS2(0x10,3) ",%%ymm5 \n"
-    "vbroadcastf128 " MEMACCESS2(0x20,3) ",%%ymm6 \n"
-    "vbroadcastf128 " MEMACCESS2(0x30,3) ",%%ymm7 \n"
-
-    // 2 pixel loop.
-    LABELALIGN
-  "1:                                          \n"
-    "vpmovzxbd   " MEMACCESS(0) ",%%ymm0       \n"  // 2 ARGB pixels
-    "lea         " MEMLEA(0x8,0) ",%0          \n"
-    "vcvtdq2ps   %%ymm0,%%ymm0                 \n"  // X 8 floats
-    "vmulps      %%ymm0,%%ymm0,%%ymm2          \n"  // X * X
-    "vmulps      %%ymm7,%%ymm0,%%ymm3          \n"  // C3 * X
-    "vfmadd132ps %%ymm5,%%ymm4,%%ymm0          \n"  // result = C0 + C1 * X
-    "vfmadd231ps %%ymm6,%%ymm2,%%ymm0          \n"  // result += C2 * X * X
-    "vfmadd231ps %%ymm3,%%ymm2,%%ymm0          \n"  // result += C3 * X * X * X
-    "vcvttps2dq  %%ymm0,%%ymm0                 \n"
-    "vpackusdw   %%ymm0,%%ymm0,%%ymm0          \n"
-    "vpermq      $0xd8,%%ymm0,%%ymm0           \n"
-    "vpackuswb   %%xmm0,%%xmm0,%%xmm0          \n"
-    "sub         $0x2,%2                       \n"
-    "vmovq       %%xmm0," MEMACCESS(1) "       \n"
-    "lea         " MEMLEA(0x8,1) ",%1          \n"
-    "jg          1b                            \n"
-    "vzeroupper                                \n"
-  : "+r"(src_argb),  // %0
-    "+r"(dst_argb),  // %1
-    "+r"(width)      // %2
-  : "r"(poly)        // %3
-  : "memory", "cc"
-#if defined(__SSE2__)
-// TODO(fbarchard): declare ymm usage when applicable.
-    , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7"
-#endif
-  );
-}
-#endif  // HAS_ARGBPOLYNOMIALROW_AVX2
-
-#ifdef HAS_ARGBCOLORTABLEROW_X86
-// Tranform ARGB pixels with color table.
-void ARGBColorTableRow_X86(uint8* dst_argb, const uint8* table_argb,
-                           int width) {
-  uintptr_t pixel_temp = 0u;
-  asm volatile (
-    // 1 pixel loop.
-    LABELALIGN
-  "1:                                          \n"
-    "movzb     " MEMACCESS(0) ",%1             \n"
-    "lea       " MEMLEA(0x4,0) ",%0            \n"
-    MEMOPARG(movzb,0x00,3,1,4,1) "             \n"  // movzb (%3,%1,4),%1
-    "mov       %b1," MEMACCESS2(-0x4,0) "      \n"
-    "movzb     " MEMACCESS2(-0x3,0) ",%1       \n"
-    MEMOPARG(movzb,0x01,3,1,4,1) "             \n"  // movzb 0x1(%3,%1,4),%1
-    "mov       %b1," MEMACCESS2(-0x3,0) "      \n"
-    "movzb     " MEMACCESS2(-0x2,0) ",%1       \n"
-    MEMOPARG(movzb,0x02,3,1,4,1) "             \n"  // movzb 0x2(%3,%1,4),%1
-    "mov       %b1," MEMACCESS2(-0x2,0) "      \n"
-    "movzb     " MEMACCESS2(-0x1,0) ",%1       \n"
-    MEMOPARG(movzb,0x03,3,1,4,1) "             \n"  // movzb 0x3(%3,%1,4),%1
-    "mov       %b1," MEMACCESS2(-0x1,0) "      \n"
-    "dec       %2                              \n"
-    "jg        1b                              \n"
-  : "+r"(dst_argb),   // %0
-    "+d"(pixel_temp), // %1
-    "+r"(width)       // %2
-  : "r"(table_argb)   // %3
-  : "memory", "cc");
-}
-#endif  // HAS_ARGBCOLORTABLEROW_X86
-
-#ifdef HAS_RGBCOLORTABLEROW_X86
-// Tranform RGB pixels with color table.
-void RGBColorTableRow_X86(uint8* dst_argb, const uint8* table_argb, int width) {
-  uintptr_t pixel_temp = 0u;
-  asm volatile (
-    // 1 pixel loop.
-    LABELALIGN
-  "1:                                          \n"
-    "movzb     " MEMACCESS(0) ",%1             \n"
-    "lea       " MEMLEA(0x4,0) ",%0            \n"
-    MEMOPARG(movzb,0x00,3,1,4,1) "             \n"  // movzb (%3,%1,4),%1
-    "mov       %b1," MEMACCESS2(-0x4,0) "      \n"
-    "movzb     " MEMACCESS2(-0x3,0) ",%1       \n"
-    MEMOPARG(movzb,0x01,3,1,4,1) "             \n"  // movzb 0x1(%3,%1,4),%1
-    "mov       %b1," MEMACCESS2(-0x3,0) "      \n"
-    "movzb     " MEMACCESS2(-0x2,0) ",%1       \n"
-    MEMOPARG(movzb,0x02,3,1,4,1) "             \n"  // movzb 0x2(%3,%1,4),%1
-    "mov       %b1," MEMACCESS2(-0x2,0) "      \n"
-    "dec       %2                              \n"
-    "jg        1b                              \n"
-  : "+r"(dst_argb),   // %0
-    "+d"(pixel_temp), // %1
-    "+r"(width)       // %2
-  : "r"(table_argb)   // %3
-  : "memory", "cc");
-}
-#endif  // HAS_RGBCOLORTABLEROW_X86
-
-#ifdef HAS_ARGBLUMACOLORTABLEROW_SSSE3
-// Tranform RGB pixels with luma table.
-void ARGBLumaColorTableRow_SSSE3(const uint8* src_argb, uint8* dst_argb,
-                                 int width,
-                                 const uint8* luma, uint32 lumacoeff) {
-  uintptr_t pixel_temp = 0u;
-  uintptr_t table_temp = 0u;
-  asm volatile (
-    "movd      %6,%%xmm3                       \n"
-    "pshufd    $0x0,%%xmm3,%%xmm3              \n"
-    "pcmpeqb   %%xmm4,%%xmm4                   \n"
-    "psllw     $0x8,%%xmm4                     \n"
-    "pxor      %%xmm5,%%xmm5                   \n"
-
-    // 4 pixel loop.
-    LABELALIGN
-  "1:                                          \n"
-    "movdqu    " MEMACCESS(2) ",%%xmm0         \n"
-    "pmaddubsw %%xmm3,%%xmm0                   \n"
-    "phaddw    %%xmm0,%%xmm0                   \n"
-    "pand      %%xmm4,%%xmm0                   \n"
-    "punpcklwd %%xmm5,%%xmm0                   \n"
-    "movd      %%xmm0,%k1                      \n"  // 32 bit offset
-    "add       %5,%1                           \n"
-    "pshufd    $0x39,%%xmm0,%%xmm0             \n"
-
-    "movzb     " MEMACCESS(2) ",%0             \n"
-    MEMOPARG(movzb,0x00,1,0,1,0) "             \n"  // movzb     (%1,%0,1),%0
-    "mov       %b0," MEMACCESS(3) "            \n"
-    "movzb     " MEMACCESS2(0x1,2) ",%0        \n"
-    MEMOPARG(movzb,0x00,1,0,1,0) "             \n"  // movzb     (%1,%0,1),%0
-    "mov       %b0," MEMACCESS2(0x1,3) "       \n"
-    "movzb     " MEMACCESS2(0x2,2) ",%0        \n"
-    MEMOPARG(movzb,0x00,1,0,1,0) "             \n"  // movzb     (%1,%0,1),%0
-    "mov       %b0," MEMACCESS2(0x2,3) "       \n"
-    "movzb     " MEMACCESS2(0x3,2) ",%0        \n"
-    "mov       %b0," MEMACCESS2(0x3,3) "       \n"
-
-    "movd      %%xmm0,%k1                      \n"  // 32 bit offset
-    "add       %5,%1                           \n"
-    "pshufd    $0x39,%%xmm0,%%xmm0             \n"
-
-    "movzb     " MEMACCESS2(0x4,2) ",%0        \n"
-    MEMOPARG(movzb,0x00,1,0,1,0) "             \n"  // movzb     (%1,%0,1),%0
-    "mov       %b0," MEMACCESS2(0x4,3) "       \n"
-    BUNDLEALIGN
-    "movzb     " MEMACCESS2(0x5,2) ",%0        \n"
-    MEMOPARG(movzb,0x00,1,0,1,0) "             \n"  // movzb     (%1,%0,1),%0
-    "mov       %b0," MEMACCESS2(0x5,3) "       \n"
-    "movzb     " MEMACCESS2(0x6,2) ",%0        \n"
-    MEMOPARG(movzb,0x00,1,0,1,0) "             \n"  // movzb     (%1,%0,1),%0
-    "mov       %b0," MEMACCESS2(0x6,3) "       \n"
-    "movzb     " MEMACCESS2(0x7,2) ",%0        \n"
-    "mov       %b0," MEMACCESS2(0x7,3) "       \n"
-
-    "movd      %%xmm0,%k1                      \n"  // 32 bit offset
-    "add       %5,%1                           \n"
-    "pshufd    $0x39,%%xmm0,%%xmm0             \n"
-
-    "movzb     " MEMACCESS2(0x8,2) ",%0        \n"
-    MEMOPARG(movzb,0x00,1,0,1,0) "             \n"  // movzb     (%1,%0,1),%0
-    "mov       %b0," MEMACCESS2(0x8,3) "       \n"
-    "movzb     " MEMACCESS2(0x9,2) ",%0        \n"
-    MEMOPARG(movzb,0x00,1,0,1,0) "             \n"  // movzb     (%1,%0,1),%0
-    "mov       %b0," MEMACCESS2(0x9,3) "       \n"
-    "movzb     " MEMACCESS2(0xa,2) ",%0        \n"
-    MEMOPARG(movzb,0x00,1,0,1,0) "             \n"  // movzb     (%1,%0,1),%0
-    "mov       %b0," MEMACCESS2(0xa,3) "       \n"
-    "movzb     " MEMACCESS2(0xb,2) ",%0        \n"
-    "mov       %b0," MEMACCESS2(0xb,3) "       \n"
-
-    "movd      %%xmm0,%k1                      \n"  // 32 bit offset
-    "add       %5,%1                           \n"
-
-    "movzb     " MEMACCESS2(0xc,2) ",%0        \n"
-    MEMOPARG(movzb,0x00,1,0,1,0) "             \n"  // movzb     (%1,%0,1),%0
-    "mov       %b0," MEMACCESS2(0xc,3) "       \n"
-    "movzb     " MEMACCESS2(0xd,2) ",%0        \n"
-    MEMOPARG(movzb,0x00,1,0,1,0) "             \n"  // movzb     (%1,%0,1),%0
-    "mov       %b0," MEMACCESS2(0xd,3) "       \n"
-    "movzb     " MEMACCESS2(0xe,2) ",%0        \n"
-    MEMOPARG(movzb,0x00,1,0,1,0) "             \n"  // movzb     (%1,%0,1),%0
-    "mov       %b0," MEMACCESS2(0xe,3) "       \n"
-    "movzb     " MEMACCESS2(0xf,2) ",%0        \n"
-    "mov       %b0," MEMACCESS2(0xf,3) "       \n"
-    "sub       $0x4,%4                         \n"
-    "lea       " MEMLEA(0x10,2) ",%2           \n"
-    "lea       " MEMLEA(0x10,3) ",%3           \n"
-    "jg        1b                              \n"
-  : "+d"(pixel_temp),  // %0
-    "+a"(table_temp),  // %1
-    "+r"(src_argb),    // %2
-    "+r"(dst_argb),    // %3
-    "+rm"(width)       // %4
-  : "r"(luma),         // %5
-    "rm"(lumacoeff)    // %6
-  : "memory", "cc"
-#if defined(__SSE2__)
-    , "xmm0", "xmm3", "xmm4", "xmm5"
-#endif
-  );
-}
-#endif  // HAS_ARGBLUMACOLORTABLEROW_SSSE3
-
-#endif  // defined(__x86_64__) || defined(__i386__)
-
-#ifdef __cplusplus
-}  // extern "C"
-}  // namespace libyuv
-#endif
diff --git a/drivers/theoraplayer/src/YUV/libyuv/src/row_win.cc b/drivers/theoraplayer/src/YUV/libyuv/src/row_win.cc
deleted file mode 100755
index f13e4d7ae5..0000000000
--- a/drivers/theoraplayer/src/YUV/libyuv/src/row_win.cc
+++ /dev/null
@@ -1,7284 +0,0 @@
-/*
- *  Copyright 2011 The LibYuv Project Authors. All rights reserved.
- *
- *  Use of this source code is governed by a BSD-style license
- *  that can be found in the LICENSE file in the root of the source
- *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS. All contributing project authors may
- *  be found in the AUTHORS file in the root of the source tree.
- */
-
-#include "libyuv/row.h"
-
-#ifdef __cplusplus
-namespace libyuv {
-extern "C" {
-#endif
-
-// This module is for Visual C x86.
-#if !defined(LIBYUV_DISABLE_X86) && defined(_M_IX86) && defined(_MSC_VER)
-
-#ifdef HAS_ARGBTOYROW_SSSE3
-
-// Constants for ARGB.
-static const vec8 kARGBToY = {
-  13, 65, 33, 0, 13, 65, 33, 0, 13, 65, 33, 0, 13, 65, 33, 0
-};
-
-// JPeg full range.
-static const vec8 kARGBToYJ = {
-  15, 75, 38, 0, 15, 75, 38, 0, 15, 75, 38, 0, 15, 75, 38, 0
-};
-
-static const vec8 kARGBToU = {
-  112, -74, -38, 0, 112, -74, -38, 0, 112, -74, -38, 0, 112, -74, -38, 0
-};
-
-static const vec8 kARGBToUJ = {
-  127, -84, -43, 0, 127, -84, -43, 0, 127, -84, -43, 0, 127, -84, -43, 0
-};
-
-static const vec8 kARGBToV = {
-  -18, -94, 112, 0, -18, -94, 112, 0, -18, -94, 112, 0, -18, -94, 112, 0,
-};
-
-static const vec8 kARGBToVJ = {
-  -20, -107, 127, 0, -20, -107, 127, 0, -20, -107, 127, 0, -20, -107, 127, 0
-};
-
-// vpermd for vphaddw + vpackuswb vpermd.
-static const lvec32 kPermdARGBToY_AVX = {
-  0, 4, 1, 5, 2, 6, 3, 7
-};
-
-// vpshufb for vphaddw + vpackuswb packed to shorts.
-static const lvec8 kShufARGBToUV_AVX = {
-  0, 1, 8, 9, 2, 3, 10, 11, 4, 5, 12, 13, 6, 7, 14, 15,
-  0, 1, 8, 9, 2, 3, 10, 11, 4, 5, 12, 13, 6, 7, 14, 15,
-};
-
-// Constants for BGRA.
-static const vec8 kBGRAToY = {
-  0, 33, 65, 13, 0, 33, 65, 13, 0, 33, 65, 13, 0, 33, 65, 13
-};
-
-static const vec8 kBGRAToU = {
-  0, -38, -74, 112, 0, -38, -74, 112, 0, -38, -74, 112, 0, -38, -74, 112
-};
-
-static const vec8 kBGRAToV = {
-  0, 112, -94, -18, 0, 112, -94, -18, 0, 112, -94, -18, 0, 112, -94, -18
-};
-
-// Constants for ABGR.
-static const vec8 kABGRToY = {
-  33, 65, 13, 0, 33, 65, 13, 0, 33, 65, 13, 0, 33, 65, 13, 0
-};
-
-static const vec8 kABGRToU = {
-  -38, -74, 112, 0, -38, -74, 112, 0, -38, -74, 112, 0, -38, -74, 112, 0
-};
-
-static const vec8 kABGRToV = {
-  112, -94, -18, 0, 112, -94, -18, 0, 112, -94, -18, 0, 112, -94, -18, 0
-};
-
-// Constants for RGBA.
-static const vec8 kRGBAToY = {
-  0, 13, 65, 33, 0, 13, 65, 33, 0, 13, 65, 33, 0, 13, 65, 33
-};
-
-static const vec8 kRGBAToU = {
-  0, 112, -74, -38, 0, 112, -74, -38, 0, 112, -74, -38, 0, 112, -74, -38
-};
-
-static const vec8 kRGBAToV = {
-  0, -18, -94, 112, 0, -18, -94, 112, 0, -18, -94, 112, 0, -18, -94, 112
-};
-
-static const uvec8 kAddY16 = {
-  16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u
-};
-
-static const vec16 kAddYJ64 = {
-  64, 64, 64, 64, 64, 64, 64, 64
-};
-
-static const uvec8 kAddUV128 = {
-  128u, 128u, 128u, 128u, 128u, 128u, 128u, 128u,
-  128u, 128u, 128u, 128u, 128u, 128u, 128u, 128u
-};
-
-static const uvec16 kAddUVJ128 = {
-  0x8080u, 0x8080u, 0x8080u, 0x8080u, 0x8080u, 0x8080u, 0x8080u, 0x8080u
-};
-
-// Shuffle table for converting RGB24 to ARGB.
-static const uvec8 kShuffleMaskRGB24ToARGB = {
-  0u, 1u, 2u, 12u, 3u, 4u, 5u, 13u, 6u, 7u, 8u, 14u, 9u, 10u, 11u, 15u
-};
-
-// Shuffle table for converting RAW to ARGB.
-static const uvec8 kShuffleMaskRAWToARGB = {
-  2u, 1u, 0u, 12u, 5u, 4u, 3u, 13u, 8u, 7u, 6u, 14u, 11u, 10u, 9u, 15u
-};
-
-// Shuffle table for converting ARGB to RGB24.
-static const uvec8 kShuffleMaskARGBToRGB24 = {
-  0u, 1u, 2u, 4u, 5u, 6u, 8u, 9u, 10u, 12u, 13u, 14u, 128u, 128u, 128u, 128u
-};
-
-// Shuffle table for converting ARGB to RAW.
-static const uvec8 kShuffleMaskARGBToRAW = {
-  2u, 1u, 0u, 6u, 5u, 4u, 10u, 9u, 8u, 14u, 13u, 12u, 128u, 128u, 128u, 128u
-};
-
-// Shuffle table for converting ARGBToRGB24 for I422ToRGB24.  First 8 + next 4
-static const uvec8 kShuffleMaskARGBToRGB24_0 = {
-  0u, 1u, 2u, 4u, 5u, 6u, 8u, 9u, 128u, 128u, 128u, 128u, 10u, 12u, 13u, 14u
-};
-
-// Shuffle table for converting ARGB to RAW.
-static const uvec8 kShuffleMaskARGBToRAW_0 = {
-  2u, 1u, 0u, 6u, 5u, 4u, 10u, 9u, 128u, 128u, 128u, 128u, 8u, 14u, 13u, 12u
-};
-
-// Duplicates gray value 3 times and fills in alpha opaque.
-__declspec(naked) __declspec(align(16))
-void I400ToARGBRow_SSE2(const uint8* src_y, uint8* dst_argb, int pix) {
-  __asm {
-    mov        eax, [esp + 4]        // src_y
-    mov        edx, [esp + 8]        // dst_argb
-    mov        ecx, [esp + 12]       // pix
-    pcmpeqb    xmm5, xmm5            // generate mask 0xff000000
-    pslld      xmm5, 24
-
-    align      4
-  convertloop:
-    movq       xmm0, qword ptr [eax]
-    lea        eax,  [eax + 8]
-    punpcklbw  xmm0, xmm0
-    movdqa     xmm1, xmm0
-    punpcklwd  xmm0, xmm0
-    punpckhwd  xmm1, xmm1
-    por        xmm0, xmm5
-    por        xmm1, xmm5
-    movdqa     [edx], xmm0
-    movdqa     [edx + 16], xmm1
-    lea        edx, [edx + 32]
-    sub        ecx, 8
-    jg         convertloop
-    ret
-  }
-}
-
-__declspec(naked) __declspec(align(16))
-void I400ToARGBRow_Unaligned_SSE2(const uint8* src_y, uint8* dst_argb,
-                                  int pix) {
-  __asm {
-    mov        eax, [esp + 4]        // src_y
-    mov        edx, [esp + 8]        // dst_argb
-    mov        ecx, [esp + 12]       // pix
-    pcmpeqb    xmm5, xmm5            // generate mask 0xff000000
-    pslld      xmm5, 24
-
-    align      4
-  convertloop:
-    movq       xmm0, qword ptr [eax]
-    lea        eax,  [eax + 8]
-    punpcklbw  xmm0, xmm0
-    movdqa     xmm1, xmm0
-    punpcklwd  xmm0, xmm0
-    punpckhwd  xmm1, xmm1
-    por        xmm0, xmm5
-    por        xmm1, xmm5
-    movdqu     [edx], xmm0
-    movdqu     [edx + 16], xmm1
-    lea        edx, [edx + 32]
-    sub        ecx, 8
-    jg         convertloop
-    ret
-  }
-}
-
-__declspec(naked) __declspec(align(16))
-void RGB24ToARGBRow_SSSE3(const uint8* src_rgb24, uint8* dst_argb, int pix) {
-  __asm {
-    mov       eax, [esp + 4]   // src_rgb24
-    mov       edx, [esp + 8]   // dst_argb
-    mov       ecx, [esp + 12]  // pix
-    pcmpeqb   xmm5, xmm5       // generate mask 0xff000000
-    pslld     xmm5, 24
-    movdqa    xmm4, kShuffleMaskRGB24ToARGB
-
-    align      4
- convertloop:
-    movdqu    xmm0, [eax]
-    movdqu    xmm1, [eax + 16]
-    movdqu    xmm3, [eax + 32]
-    lea       eax, [eax + 48]
-    movdqa    xmm2, xmm3
-    palignr   xmm2, xmm1, 8    // xmm2 = { xmm3[0:3] xmm1[8:15]}
-    pshufb    xmm2, xmm4
-    por       xmm2, xmm5
-    palignr   xmm1, xmm0, 12   // xmm1 = { xmm3[0:7] xmm0[12:15]}
-    pshufb    xmm0, xmm4
-    movdqa    [edx + 32], xmm2
-    por       xmm0, xmm5
-    pshufb    xmm1, xmm4
-    movdqa    [edx], xmm0
-    por       xmm1, xmm5
-    palignr   xmm3, xmm3, 4    // xmm3 = { xmm3[4:15]}
-    pshufb    xmm3, xmm4
-    movdqa    [edx + 16], xmm1
-    por       xmm3, xmm5
-    sub       ecx, 16
-    movdqa    [edx + 48], xmm3
-    lea       edx, [edx + 64]
-    jg        convertloop
-    ret
-  }
-}
-
-__declspec(naked) __declspec(align(16))
-void RAWToARGBRow_SSSE3(const uint8* src_raw, uint8* dst_argb,
-                        int pix) {
-  __asm {
-    mov       eax, [esp + 4]   // src_raw
-    mov       edx, [esp + 8]   // dst_argb
-    mov       ecx, [esp + 12]  // pix
-    pcmpeqb   xmm5, xmm5       // generate mask 0xff000000
-    pslld     xmm5, 24
-    movdqa    xmm4, kShuffleMaskRAWToARGB
-
-    align      4
- convertloop:
-    movdqu    xmm0, [eax]
-    movdqu    xmm1, [eax + 16]
-    movdqu    xmm3, [eax + 32]
-    lea       eax, [eax + 48]
-    movdqa    xmm2, xmm3
-    palignr   xmm2, xmm1, 8    // xmm2 = { xmm3[0:3] xmm1[8:15]}
-    pshufb    xmm2, xmm4
-    por       xmm2, xmm5
-    palignr   xmm1, xmm0, 12   // xmm1 = { xmm3[0:7] xmm0[12:15]}
-    pshufb    xmm0, xmm4
-    movdqa    [edx + 32], xmm2
-    por       xmm0, xmm5
-    pshufb    xmm1, xmm4
-    movdqa    [edx], xmm0
-    por       xmm1, xmm5
-    palignr   xmm3, xmm3, 4    // xmm3 = { xmm3[4:15]}
-    pshufb    xmm3, xmm4
-    movdqa    [edx + 16], xmm1
-    por       xmm3, xmm5
-    sub       ecx, 16
-    movdqa    [edx + 48], xmm3
-    lea       edx, [edx + 64]
-    jg        convertloop
-    ret
-  }
-}
-
-// pmul method to replicate bits.
-// Math to replicate bits:
-// (v << 8) | (v << 3)
-// v * 256 + v * 8
-// v * (256 + 8)
-// G shift of 5 is incorporated, so shift is 5 + 8 and 5 + 3
-// 20 instructions.
-__declspec(naked) __declspec(align(16))
-void RGB565ToARGBRow_SSE2(const uint8* src_rgb565, uint8* dst_argb,
-                          int pix) {
-  __asm {
-    mov       eax, 0x01080108  // generate multiplier to repeat 5 bits
-    movd      xmm5, eax
-    pshufd    xmm5, xmm5, 0
-    mov       eax, 0x20802080  // multiplier shift by 5 and then repeat 6 bits
-    movd      xmm6, eax
-    pshufd    xmm6, xmm6, 0
-    pcmpeqb   xmm3, xmm3       // generate mask 0xf800f800 for Red
-    psllw     xmm3, 11
-    pcmpeqb   xmm4, xmm4       // generate mask 0x07e007e0 for Green
-    psllw     xmm4, 10
-    psrlw     xmm4, 5
-    pcmpeqb   xmm7, xmm7       // generate mask 0xff00ff00 for Alpha
-    psllw     xmm7, 8
-
-    mov       eax, [esp + 4]   // src_rgb565
-    mov       edx, [esp + 8]   // dst_argb
-    mov       ecx, [esp + 12]  // pix
-    sub       edx, eax
-    sub       edx, eax
-
-    align      4
- convertloop:
-    movdqu    xmm0, [eax]   // fetch 8 pixels of bgr565
-    movdqa    xmm1, xmm0
-    movdqa    xmm2, xmm0
-    pand      xmm1, xmm3    // R in upper 5 bits
-    psllw     xmm2, 11      // B in upper 5 bits
-    pmulhuw   xmm1, xmm5    // * (256 + 8)
-    pmulhuw   xmm2, xmm5    // * (256 + 8)
-    psllw     xmm1, 8
-    por       xmm1, xmm2    // RB
-    pand      xmm0, xmm4    // G in middle 6 bits
-    pmulhuw   xmm0, xmm6    // << 5 * (256 + 4)
-    por       xmm0, xmm7    // AG
-    movdqa    xmm2, xmm1
-    punpcklbw xmm1, xmm0
-    punpckhbw xmm2, xmm0
-    movdqa    [eax * 2 + edx], xmm1  // store 4 pixels of ARGB
-    movdqa    [eax * 2 + edx + 16], xmm2  // store next 4 pixels of ARGB
-    lea       eax, [eax + 16]
-    sub       ecx, 8
-    jg        convertloop
-    ret
-  }
-}
-
-// 24 instructions
-__declspec(naked) __declspec(align(16))
-void ARGB1555ToARGBRow_SSE2(const uint8* src_argb1555, uint8* dst_argb,
-                            int pix) {
-  __asm {
-    mov       eax, 0x01080108  // generate multiplier to repeat 5 bits
-    movd      xmm5, eax
-    pshufd    xmm5, xmm5, 0
-    mov       eax, 0x42004200  // multiplier shift by 6 and then repeat 5 bits
-    movd      xmm6, eax
-    pshufd    xmm6, xmm6, 0
-    pcmpeqb   xmm3, xmm3       // generate mask 0xf800f800 for Red
-    psllw     xmm3, 11
-    movdqa    xmm4, xmm3       // generate mask 0x03e003e0 for Green
-    psrlw     xmm4, 6
-    pcmpeqb   xmm7, xmm7       // generate mask 0xff00ff00 for Alpha
-    psllw     xmm7, 8
-
-    mov       eax, [esp + 4]   // src_argb1555
-    mov       edx, [esp + 8]   // dst_argb
-    mov       ecx, [esp + 12]  // pix
-    sub       edx, eax
-    sub       edx, eax
-
-    align      4
- convertloop:
-    movdqu    xmm0, [eax]   // fetch 8 pixels of 1555
-    movdqa    xmm1, xmm0
-    movdqa    xmm2, xmm0
-    psllw     xmm1, 1       // R in upper 5 bits
-    psllw     xmm2, 11      // B in upper 5 bits
-    pand      xmm1, xmm3
-    pmulhuw   xmm2, xmm5    // * (256 + 8)
-    pmulhuw   xmm1, xmm5    // * (256 + 8)
-    psllw     xmm1, 8
-    por       xmm1, xmm2    // RB
-    movdqa    xmm2, xmm0
-    pand      xmm0, xmm4    // G in middle 5 bits
-    psraw     xmm2, 8       // A
-    pmulhuw   xmm0, xmm6    // << 6 * (256 + 8)
-    pand      xmm2, xmm7
-    por       xmm0, xmm2    // AG
-    movdqa    xmm2, xmm1
-    punpcklbw xmm1, xmm0
-    punpckhbw xmm2, xmm0
-    movdqa    [eax * 2 + edx], xmm1  // store 4 pixels of ARGB
-    movdqa    [eax * 2 + edx + 16], xmm2  // store next 4 pixels of ARGB
-    lea       eax, [eax + 16]
-    sub       ecx, 8
-    jg        convertloop
-    ret
-  }
-}
-
-// 18 instructions.
-__declspec(naked) __declspec(align(16))
-void ARGB4444ToARGBRow_SSE2(const uint8* src_argb4444, uint8* dst_argb,
-                            int pix) {
-  __asm {
-    mov       eax, 0x0f0f0f0f  // generate mask 0x0f0f0f0f
-    movd      xmm4, eax
-    pshufd    xmm4, xmm4, 0
-    movdqa    xmm5, xmm4       // 0xf0f0f0f0 for high nibbles
-    pslld     xmm5, 4
-    mov       eax, [esp + 4]   // src_argb4444
-    mov       edx, [esp + 8]   // dst_argb
-    mov       ecx, [esp + 12]  // pix
-    sub       edx, eax
-    sub       edx, eax
-
-    align      4
- convertloop:
-    movdqu    xmm0, [eax]   // fetch 8 pixels of bgra4444
-    movdqa    xmm2, xmm0
-    pand      xmm0, xmm4    // mask low nibbles
-    pand      xmm2, xmm5    // mask high nibbles
-    movdqa    xmm1, xmm0
-    movdqa    xmm3, xmm2
-    psllw     xmm1, 4
-    psrlw     xmm3, 4
-    por       xmm0, xmm1
-    por       xmm2, xmm3
-    movdqa    xmm1, xmm0
-    punpcklbw xmm0, xmm2
-    punpckhbw xmm1, xmm2
-    movdqa    [eax * 2 + edx], xmm0  // store 4 pixels of ARGB
-    movdqa    [eax * 2 + edx + 16], xmm1  // store next 4 pixels of ARGB
-    lea       eax, [eax + 16]
-    sub       ecx, 8
-    jg        convertloop
-    ret
-  }
-}
-
-__declspec(naked) __declspec(align(16))
-void ARGBToRGB24Row_SSSE3(const uint8* src_argb, uint8* dst_rgb, int pix) {
-  __asm {
-    mov       eax, [esp + 4]   // src_argb
-    mov       edx, [esp + 8]   // dst_rgb
-    mov       ecx, [esp + 12]  // pix
-    movdqa    xmm6, kShuffleMaskARGBToRGB24
-
-    align      4
- convertloop:
-    movdqu    xmm0, [eax]   // fetch 16 pixels of argb
-    movdqu    xmm1, [eax + 16]
-    movdqu    xmm2, [eax + 32]
-    movdqu    xmm3, [eax + 48]
-    lea       eax, [eax + 64]
-    pshufb    xmm0, xmm6    // pack 16 bytes of ARGB to 12 bytes of RGB
-    pshufb    xmm1, xmm6
-    pshufb    xmm2, xmm6
-    pshufb    xmm3, xmm6
-    movdqa    xmm4, xmm1   // 4 bytes from 1 for 0
-    psrldq    xmm1, 4      // 8 bytes from 1
-    pslldq    xmm4, 12     // 4 bytes from 1 for 0
-    movdqa    xmm5, xmm2   // 8 bytes from 2 for 1
-    por       xmm0, xmm4   // 4 bytes from 1 for 0
-    pslldq    xmm5, 8      // 8 bytes from 2 for 1
-    movdqu    [edx], xmm0  // store 0
-    por       xmm1, xmm5   // 8 bytes from 2 for 1
-    psrldq    xmm2, 8      // 4 bytes from 2
-    pslldq    xmm3, 4      // 12 bytes from 3 for 2
-    por       xmm2, xmm3   // 12 bytes from 3 for 2
-    movdqu    [edx + 16], xmm1   // store 1
-    movdqu    [edx + 32], xmm2   // store 2
-    lea       edx, [edx + 48]
-    sub       ecx, 16
-    jg        convertloop
-    ret
-  }
-}
-
-__declspec(naked) __declspec(align(16))
-void ARGBToRAWRow_SSSE3(const uint8* src_argb, uint8* dst_rgb, int pix) {
-  __asm {
-    mov       eax, [esp + 4]   // src_argb
-    mov       edx, [esp + 8]   // dst_rgb
-    mov       ecx, [esp + 12]  // pix
-    movdqa    xmm6, kShuffleMaskARGBToRAW
-
-    align      4
- convertloop:
-    movdqu    xmm0, [eax]   // fetch 16 pixels of argb
-    movdqu    xmm1, [eax + 16]
-    movdqu    xmm2, [eax + 32]
-    movdqu    xmm3, [eax + 48]
-    lea       eax, [eax + 64]
-    pshufb    xmm0, xmm6    // pack 16 bytes of ARGB to 12 bytes of RGB
-    pshufb    xmm1, xmm6
-    pshufb    xmm2, xmm6
-    pshufb    xmm3, xmm6
-    movdqa    xmm4, xmm1   // 4 bytes from 1 for 0
-    psrldq    xmm1, 4      // 8 bytes from 1
-    pslldq    xmm4, 12     // 4 bytes from 1 for 0
-    movdqa    xmm5, xmm2   // 8 bytes from 2 for 1
-    por       xmm0, xmm4   // 4 bytes from 1 for 0
-    pslldq    xmm5, 8      // 8 bytes from 2 for 1
-    movdqu    [edx], xmm0  // store 0
-    por       xmm1, xmm5   // 8 bytes from 2 for 1
-    psrldq    xmm2, 8      // 4 bytes from 2
-    pslldq    xmm3, 4      // 12 bytes from 3 for 2
-    por       xmm2, xmm3   // 12 bytes from 3 for 2
-    movdqu    [edx + 16], xmm1   // store 1
-    movdqu    [edx + 32], xmm2   // store 2
-    lea       edx, [edx + 48]
-    sub       ecx, 16
-    jg        convertloop
-    ret
-  }
-}
-
-__declspec(naked) __declspec(align(16))
-void ARGBToRGB565Row_SSE2(const uint8* src_argb, uint8* dst_rgb, int pix) {
-  __asm {
-    mov       eax, [esp + 4]   // src_argb
-    mov       edx, [esp + 8]   // dst_rgb
-    mov       ecx, [esp + 12]  // pix
-    pcmpeqb   xmm3, xmm3       // generate mask 0x0000001f
-    psrld     xmm3, 27
-    pcmpeqb   xmm4, xmm4       // generate mask 0x000007e0
-    psrld     xmm4, 26
-    pslld     xmm4, 5
-    pcmpeqb   xmm5, xmm5       // generate mask 0xfffff800
-    pslld     xmm5, 11
-
-    align      4
- convertloop:
-    movdqa    xmm0, [eax]   // fetch 4 pixels of argb
-    movdqa    xmm1, xmm0    // B
-    movdqa    xmm2, xmm0    // G
-    pslld     xmm0, 8       // R
-    psrld     xmm1, 3       // B
-    psrld     xmm2, 5       // G
-    psrad     xmm0, 16      // R
-    pand      xmm1, xmm3    // B
-    pand      xmm2, xmm4    // G
-    pand      xmm0, xmm5    // R
-    por       xmm1, xmm2    // BG
-    por       xmm0, xmm1    // BGR
-    packssdw  xmm0, xmm0
-    lea       eax, [eax + 16]
-    movq      qword ptr [edx], xmm0  // store 4 pixels of RGB565
-    lea       edx, [edx + 8]
-    sub       ecx, 4
-    jg        convertloop
-    ret
-  }
-}
-
-// TODO(fbarchard): Improve sign extension/packing.
-__declspec(naked) __declspec(align(16))
-void ARGBToARGB1555Row_SSE2(const uint8* src_argb, uint8* dst_rgb, int pix) {
-  __asm {
-    mov       eax, [esp + 4]   // src_argb
-    mov       edx, [esp + 8]   // dst_rgb
-    mov       ecx, [esp + 12]  // pix
-    pcmpeqb   xmm4, xmm4       // generate mask 0x0000001f
-    psrld     xmm4, 27
-    movdqa    xmm5, xmm4       // generate mask 0x000003e0
-    pslld     xmm5, 5
-    movdqa    xmm6, xmm4       // generate mask 0x00007c00
-    pslld     xmm6, 10
-    pcmpeqb   xmm7, xmm7       // generate mask 0xffff8000
-    pslld     xmm7, 15
-
-    align      4
- convertloop:
-    movdqa    xmm0, [eax]   // fetch 4 pixels of argb
-    movdqa    xmm1, xmm0    // B
-    movdqa    xmm2, xmm0    // G
-    movdqa    xmm3, xmm0    // R
-    psrad     xmm0, 16      // A
-    psrld     xmm1, 3       // B
-    psrld     xmm2, 6       // G
-    psrld     xmm3, 9       // R
-    pand      xmm0, xmm7    // A
-    pand      xmm1, xmm4    // B
-    pand      xmm2, xmm5    // G
-    pand      xmm3, xmm6    // R
-    por       xmm0, xmm1    // BA
-    por       xmm2, xmm3    // GR
-    por       xmm0, xmm2    // BGRA
-    packssdw  xmm0, xmm0
-    lea       eax, [eax + 16]
-    movq      qword ptr [edx], xmm0  // store 4 pixels of ARGB1555
-    lea       edx, [edx + 8]
-    sub       ecx, 4
-    jg        convertloop
-    ret
-  }
-}
-
-__declspec(naked) __declspec(align(16))
-void ARGBToARGB4444Row_SSE2(const uint8* src_argb, uint8* dst_rgb, int pix) {
-  __asm {
-    mov       eax, [esp + 4]   // src_argb
-    mov       edx, [esp + 8]   // dst_rgb
-    mov       ecx, [esp + 12]  // pix
-    pcmpeqb   xmm4, xmm4       // generate mask 0xf000f000
-    psllw     xmm4, 12
-    movdqa    xmm3, xmm4       // generate mask 0x00f000f0
-    psrlw     xmm3, 8
-
-    align      4
- convertloop:
-    movdqa    xmm0, [eax]   // fetch 4 pixels of argb
-    movdqa    xmm1, xmm0
-    pand      xmm0, xmm3    // low nibble
-    pand      xmm1, xmm4    // high nibble
-    psrl      xmm0, 4
-    psrl      xmm1, 8
-    por       xmm0, xmm1
-    packuswb  xmm0, xmm0
-    lea       eax, [eax + 16]
-    movq      qword ptr [edx], xmm0  // store 4 pixels of ARGB4444
-    lea       edx, [edx + 8]
-    sub       ecx, 4
-    jg        convertloop
-    ret
-  }
-}
-
-// Convert 16 ARGB pixels (64 bytes) to 16 Y values.
-__declspec(naked) __declspec(align(16))
-void ARGBToYRow_SSSE3(const uint8* src_argb, uint8* dst_y, int pix) {
-  __asm {
-    mov        eax, [esp + 4]   /* src_argb */
-    mov        edx, [esp + 8]   /* dst_y */
-    mov        ecx, [esp + 12]  /* pix */
-    movdqa     xmm5, kAddY16
-    movdqa     xmm4, kARGBToY
-
-    align      4
- convertloop:
-    movdqa     xmm0, [eax]
-    movdqa     xmm1, [eax + 16]
-    movdqa     xmm2, [eax + 32]
-    movdqa     xmm3, [eax + 48]
-    pmaddubsw  xmm0, xmm4
-    pmaddubsw  xmm1, xmm4
-    pmaddubsw  xmm2, xmm4
-    pmaddubsw  xmm3, xmm4
-    lea        eax, [eax + 64]
-    phaddw     xmm0, xmm1
-    phaddw     xmm2, xmm3
-    psrlw      xmm0, 7
-    psrlw      xmm2, 7
-    packuswb   xmm0, xmm2
-    paddb      xmm0, xmm5
-    sub        ecx, 16
-    movdqa     [edx], xmm0
-    lea        edx, [edx + 16]
-    jg         convertloop
-    ret
-  }
-}
-
-// Convert 16 ARGB pixels (64 bytes) to 16 Y values.
-__declspec(naked) __declspec(align(16))
-void ARGBToYJRow_SSSE3(const uint8* src_argb, uint8* dst_y, int pix) {
-  __asm {
-    mov        eax, [esp + 4]   /* src_argb */
-    mov        edx, [esp + 8]   /* dst_y */
-    mov        ecx, [esp + 12]  /* pix */
-    movdqa     xmm4, kARGBToYJ
-    movdqa     xmm5, kAddYJ64
-
-    align      4
- convertloop:
-    movdqa     xmm0, [eax]
-    movdqa     xmm1, [eax + 16]
-    movdqa     xmm2, [eax + 32]
-    movdqa     xmm3, [eax + 48]
-    pmaddubsw  xmm0, xmm4
-    pmaddubsw  xmm1, xmm4
-    pmaddubsw  xmm2, xmm4
-    pmaddubsw  xmm3, xmm4
-    lea        eax, [eax + 64]
-    phaddw     xmm0, xmm1
-    phaddw     xmm2, xmm3
-    paddw      xmm0, xmm5  // Add .5 for rounding.
-    paddw      xmm2, xmm5
-    psrlw      xmm0, 7
-    psrlw      xmm2, 7
-    packuswb   xmm0, xmm2
-    sub        ecx, 16
-    movdqa     [edx], xmm0
-    lea        edx, [edx + 16]
-    jg         convertloop
-    ret
-  }
-}
-
-#ifdef HAS_ARGBTOYROW_AVX2
-// Convert 32 ARGB pixels (128 bytes) to 32 Y values.
-__declspec(naked) __declspec(align(32))
-void ARGBToYRow_AVX2(const uint8* src_argb, uint8* dst_y, int pix) {
-  __asm {
-    mov        eax, [esp + 4]   /* src_argb */
-    mov        edx, [esp + 8]   /* dst_y */
-    mov        ecx, [esp + 12]  /* pix */
-    vbroadcastf128 ymm4, kARGBToY
-    vbroadcastf128 ymm5, kAddY16
-    vmovdqa    ymm6, kPermdARGBToY_AVX
-
-    align      4
- convertloop:
-    vmovdqu    ymm0, [eax]
-    vmovdqu    ymm1, [eax + 32]
-    vmovdqu    ymm2, [eax + 64]
-    vmovdqu    ymm3, [eax + 96]
-    vpmaddubsw ymm0, ymm0, ymm4
-    vpmaddubsw ymm1, ymm1, ymm4
-    vpmaddubsw ymm2, ymm2, ymm4
-    vpmaddubsw ymm3, ymm3, ymm4
-    lea        eax, [eax + 128]
-    vphaddw    ymm0, ymm0, ymm1  // mutates.
-    vphaddw    ymm2, ymm2, ymm3
-    vpsrlw     ymm0, ymm0, 7
-    vpsrlw     ymm2, ymm2, 7
-    vpackuswb  ymm0, ymm0, ymm2  // mutates.
-    vpermd     ymm0, ymm6, ymm0  // For vphaddw + vpackuswb mutation.
-    vpaddb     ymm0, ymm0, ymm5
-    sub        ecx, 32
-    vmovdqu    [edx], ymm0
-    lea        edx, [edx + 32]
-    jg         convertloop
-    vzeroupper
-    ret
-  }
-}
-#endif  //  HAS_ARGBTOYROW_AVX2
-
-#ifdef HAS_ARGBTOYROW_AVX2
-// Convert 32 ARGB pixels (128 bytes) to 32 Y values.
-__declspec(naked) __declspec(align(32))
-void ARGBToYJRow_AVX2(const uint8* src_argb, uint8* dst_y, int pix) {
-  __asm {
-    mov        eax, [esp + 4]   /* src_argb */
-    mov        edx, [esp + 8]   /* dst_y */
-    mov        ecx, [esp + 12]  /* pix */
-    vbroadcastf128 ymm4, kARGBToYJ
-    vbroadcastf128 ymm5, kAddYJ64
-    vmovdqa    ymm6, kPermdARGBToY_AVX
-
-    align      4
- convertloop:
-    vmovdqu    ymm0, [eax]
-    vmovdqu    ymm1, [eax + 32]
-    vmovdqu    ymm2, [eax + 64]
-    vmovdqu    ymm3, [eax + 96]
-    vpmaddubsw ymm0, ymm0, ymm4
-    vpmaddubsw ymm1, ymm1, ymm4
-    vpmaddubsw ymm2, ymm2, ymm4
-    vpmaddubsw ymm3, ymm3, ymm4
-    lea        eax, [eax + 128]
-    vphaddw    ymm0, ymm0, ymm1  // mutates.
-    vphaddw    ymm2, ymm2, ymm3
-    vpaddw     ymm0, ymm0, ymm5  // Add .5 for rounding.
-    vpaddw     ymm2, ymm2, ymm5
-    vpsrlw     ymm0, ymm0, 7
-    vpsrlw     ymm2, ymm2, 7
-    vpackuswb  ymm0, ymm0, ymm2  // mutates.
-    vpermd     ymm0, ymm6, ymm0  // For vphaddw + vpackuswb mutation.
-    sub        ecx, 32
-    vmovdqu    [edx], ymm0
-    lea        edx, [edx + 32]
-    jg         convertloop
-
-    vzeroupper
-    ret
-  }
-}
-#endif  //  HAS_ARGBTOYJROW_AVX2
-
-__declspec(naked) __declspec(align(16))
-void ARGBToYRow_Unaligned_SSSE3(const uint8* src_argb, uint8* dst_y, int pix) {
-  __asm {
-    mov        eax, [esp + 4]   /* src_argb */
-    mov        edx, [esp + 8]   /* dst_y */
-    mov        ecx, [esp + 12]  /* pix */
-    movdqa     xmm5, kAddY16
-    movdqa     xmm4, kARGBToY
-
-    align      4
- convertloop:
-    movdqu     xmm0, [eax]
-    movdqu     xmm1, [eax + 16]
-    movdqu     xmm2, [eax + 32]
-    movdqu     xmm3, [eax + 48]
-    pmaddubsw  xmm0, xmm4
-    pmaddubsw  xmm1, xmm4
-    pmaddubsw  xmm2, xmm4
-    pmaddubsw  xmm3, xmm4
-    lea        eax, [eax + 64]
-    phaddw     xmm0, xmm1
-    phaddw     xmm2, xmm3
-    psrlw      xmm0, 7
-    psrlw      xmm2, 7
-    packuswb   xmm0, xmm2
-    paddb      xmm0, xmm5
-    sub        ecx, 16
-    movdqu     [edx], xmm0
-    lea        edx, [edx + 16]
-    jg         convertloop
-    ret
-  }
-}
-
-__declspec(naked) __declspec(align(16))
-void ARGBToYJRow_Unaligned_SSSE3(const uint8* src_argb, uint8* dst_y, int pix) {
-  __asm {
-    mov        eax, [esp + 4]   /* src_argb */
-    mov        edx, [esp + 8]   /* dst_y */
-    mov        ecx, [esp + 12]  /* pix */
-    movdqa     xmm4, kARGBToYJ
-    movdqa     xmm5, kAddYJ64
-
-    align      4
- convertloop:
-    movdqu     xmm0, [eax]
-    movdqu     xmm1, [eax + 16]
-    movdqu     xmm2, [eax + 32]
-    movdqu     xmm3, [eax + 48]
-    pmaddubsw  xmm0, xmm4
-    pmaddubsw  xmm1, xmm4
-    pmaddubsw  xmm2, xmm4
-    pmaddubsw  xmm3, xmm4
-    lea        eax, [eax + 64]
-    phaddw     xmm0, xmm1
-    phaddw     xmm2, xmm3
-    paddw      xmm0, xmm5
-    paddw      xmm2, xmm5
-    psrlw      xmm0, 7
-    psrlw      xmm2, 7
-    packuswb   xmm0, xmm2
-    sub        ecx, 16
-    movdqu     [edx], xmm0
-    lea        edx, [edx + 16]
-    jg         convertloop
-    ret
-  }
-}
-
-__declspec(naked) __declspec(align(16))
-void BGRAToYRow_SSSE3(const uint8* src_argb, uint8* dst_y, int pix) {
-  __asm {
-    mov        eax, [esp + 4]   /* src_argb */
-    mov        edx, [esp + 8]   /* dst_y */
-    mov        ecx, [esp + 12]  /* pix */
-    movdqa     xmm5, kAddY16
-    movdqa     xmm4, kBGRAToY
-
-    align      4
- convertloop:
-    movdqa     xmm0, [eax]
-    movdqa     xmm1, [eax + 16]
-    movdqa     xmm2, [eax + 32]
-    movdqa     xmm3, [eax + 48]
-    pmaddubsw  xmm0, xmm4
-    pmaddubsw  xmm1, xmm4
-    pmaddubsw  xmm2, xmm4
-    pmaddubsw  xmm3, xmm4
-    lea        eax, [eax + 64]
-    phaddw     xmm0, xmm1
-    phaddw     xmm2, xmm3
-    psrlw      xmm0, 7
-    psrlw      xmm2, 7
-    packuswb   xmm0, xmm2
-    paddb      xmm0, xmm5
-    sub        ecx, 16
-    movdqa     [edx], xmm0
-    lea        edx, [edx + 16]
-    jg         convertloop
-    ret
-  }
-}
-
-__declspec(naked) __declspec(align(16))
-void BGRAToYRow_Unaligned_SSSE3(const uint8* src_argb, uint8* dst_y, int pix) {
-  __asm {
-    mov        eax, [esp + 4]   /* src_argb */
-    mov        edx, [esp + 8]   /* dst_y */
-    mov        ecx, [esp + 12]  /* pix */
-    movdqa     xmm5, kAddY16
-    movdqa     xmm4, kBGRAToY
-
-    align      4
- convertloop:
-    movdqu     xmm0, [eax]
-    movdqu     xmm1, [eax + 16]
-    movdqu     xmm2, [eax + 32]
-    movdqu     xmm3, [eax + 48]
-    pmaddubsw  xmm0, xmm4
-    pmaddubsw  xmm1, xmm4
-    pmaddubsw  xmm2, xmm4
-    pmaddubsw  xmm3, xmm4
-    lea        eax, [eax + 64]
-    phaddw     xmm0, xmm1
-    phaddw     xmm2, xmm3
-    psrlw      xmm0, 7
-    psrlw      xmm2, 7
-    packuswb   xmm0, xmm2
-    paddb      xmm0, xmm5
-    sub        ecx, 16
-    movdqu     [edx], xmm0
-    lea        edx, [edx + 16]
-    jg         convertloop
-    ret
-  }
-}
-
-__declspec(naked) __declspec(align(16))
-void ABGRToYRow_SSSE3(const uint8* src_argb, uint8* dst_y, int pix) {
-  __asm {
-    mov        eax, [esp + 4]   /* src_argb */
-    mov        edx, [esp + 8]   /* dst_y */
-    mov        ecx, [esp + 12]  /* pix */
-    movdqa     xmm5, kAddY16
-    movdqa     xmm4, kABGRToY
-
-    align      4
- convertloop:
-    movdqa     xmm0, [eax]
-    movdqa     xmm1, [eax + 16]
-    movdqa     xmm2, [eax + 32]
-    movdqa     xmm3, [eax + 48]
-    pmaddubsw  xmm0, xmm4
-    pmaddubsw  xmm1, xmm4
-    pmaddubsw  xmm2, xmm4
-    pmaddubsw  xmm3, xmm4
-    lea        eax, [eax + 64]
-    phaddw     xmm0, xmm1
-    phaddw     xmm2, xmm3
-    psrlw      xmm0, 7
-    psrlw      xmm2, 7
-    packuswb   xmm0, xmm2
-    paddb      xmm0, xmm5
-    sub        ecx, 16
-    movdqa     [edx], xmm0
-    lea        edx, [edx + 16]
-    jg         convertloop
-    ret
-  }
-}
-
-__declspec(naked) __declspec(align(16))
-void ABGRToYRow_Unaligned_SSSE3(const uint8* src_argb, uint8* dst_y, int pix) {
-  __asm {
-    mov        eax, [esp + 4]   /* src_argb */
-    mov        edx, [esp + 8]   /* dst_y */
-    mov        ecx, [esp + 12]  /* pix */
-    movdqa     xmm5, kAddY16
-    movdqa     xmm4, kABGRToY
-
-    align      4
- convertloop:
-    movdqu     xmm0, [eax]
-    movdqu     xmm1, [eax + 16]
-    movdqu     xmm2, [eax + 32]
-    movdqu     xmm3, [eax + 48]
-    pmaddubsw  xmm0, xmm4
-    pmaddubsw  xmm1, xmm4
-    pmaddubsw  xmm2, xmm4
-    pmaddubsw  xmm3, xmm4
-    lea        eax, [eax + 64]
-    phaddw     xmm0, xmm1
-    phaddw     xmm2, xmm3
-    psrlw      xmm0, 7
-    psrlw      xmm2, 7
-    packuswb   xmm0, xmm2
-    paddb      xmm0, xmm5
-    sub        ecx, 16
-    movdqu     [edx], xmm0
-    lea        edx, [edx + 16]
-    jg         convertloop
-    ret
-  }
-}
-
-__declspec(naked) __declspec(align(16))
-void RGBAToYRow_SSSE3(const uint8* src_argb, uint8* dst_y, int pix) {
-  __asm {
-    mov        eax, [esp + 4]   /* src_argb */
-    mov        edx, [esp + 8]   /* dst_y */
-    mov        ecx, [esp + 12]  /* pix */
-    movdqa     xmm5, kAddY16
-    movdqa     xmm4, kRGBAToY
-
-    align      4
- convertloop:
-    movdqa     xmm0, [eax]
-    movdqa     xmm1, [eax + 16]
-    movdqa     xmm2, [eax + 32]
-    movdqa     xmm3, [eax + 48]
-    pmaddubsw  xmm0, xmm4
-    pmaddubsw  xmm1, xmm4
-    pmaddubsw  xmm2, xmm4
-    pmaddubsw  xmm3, xmm4
-    lea        eax, [eax + 64]
-    phaddw     xmm0, xmm1
-    phaddw     xmm2, xmm3
-    psrlw      xmm0, 7
-    psrlw      xmm2, 7
-    packuswb   xmm0, xmm2
-    paddb      xmm0, xmm5
-    sub        ecx, 16
-    movdqa     [edx], xmm0
-    lea        edx, [edx + 16]
-    jg         convertloop
-    ret
-  }
-}
-
-__declspec(naked) __declspec(align(16))
-void RGBAToYRow_Unaligned_SSSE3(const uint8* src_argb, uint8* dst_y, int pix) {
-  __asm {
-    mov        eax, [esp + 4]   /* src_argb */
-    mov        edx, [esp + 8]   /* dst_y */
-    mov        ecx, [esp + 12]  /* pix */
-    movdqa     xmm5, kAddY16
-    movdqa     xmm4, kRGBAToY
-
-    align      4
- convertloop:
-    movdqu     xmm0, [eax]
-    movdqu     xmm1, [eax + 16]
-    movdqu     xmm2, [eax + 32]
-    movdqu     xmm3, [eax + 48]
-    pmaddubsw  xmm0, xmm4
-    pmaddubsw  xmm1, xmm4
-    pmaddubsw  xmm2, xmm4
-    pmaddubsw  xmm3, xmm4
-    lea        eax, [eax + 64]
-    phaddw     xmm0, xmm1
-    phaddw     xmm2, xmm3
-    psrlw      xmm0, 7
-    psrlw      xmm2, 7
-    packuswb   xmm0, xmm2
-    paddb      xmm0, xmm5
-    sub        ecx, 16
-    movdqu     [edx], xmm0
-    lea        edx, [edx + 16]
-    jg         convertloop
-    ret
-  }
-}
-
-__declspec(naked) __declspec(align(16))
-void ARGBToUVRow_SSSE3(const uint8* src_argb0, int src_stride_argb,
-                       uint8* dst_u, uint8* dst_v, int width) {
-  __asm {
-    push       esi
-    push       edi
-    mov        eax, [esp + 8 + 4]   // src_argb
-    mov        esi, [esp + 8 + 8]   // src_stride_argb
-    mov        edx, [esp + 8 + 12]  // dst_u
-    mov        edi, [esp + 8 + 16]  // dst_v
-    mov        ecx, [esp + 8 + 20]  // pix
-    movdqa     xmm7, kARGBToU
-    movdqa     xmm6, kARGBToV
-    movdqa     xmm5, kAddUV128
-    sub        edi, edx             // stride from u to v
-
-    align      4
- convertloop:
-    /* step 1 - subsample 16x2 argb pixels to 8x1 */
-    movdqa     xmm0, [eax]
-    movdqa     xmm1, [eax + 16]
-    movdqa     xmm2, [eax + 32]
-    movdqa     xmm3, [eax + 48]
-    pavgb      xmm0, [eax + esi]
-    pavgb      xmm1, [eax + esi + 16]
-    pavgb      xmm2, [eax + esi + 32]
-    pavgb      xmm3, [eax + esi + 48]
-    lea        eax,  [eax + 64]
-    movdqa     xmm4, xmm0
-    shufps     xmm0, xmm1, 0x88
-    shufps     xmm4, xmm1, 0xdd
-    pavgb      xmm0, xmm4
-    movdqa     xmm4, xmm2
-    shufps     xmm2, xmm3, 0x88
-    shufps     xmm4, xmm3, 0xdd
-    pavgb      xmm2, xmm4
-
-    // step 2 - convert to U and V
-    // from here down is very similar to Y code except
-    // instead of 16 different pixels, its 8 pixels of U and 8 of V
-    movdqa     xmm1, xmm0
-    movdqa     xmm3, xmm2
-    pmaddubsw  xmm0, xmm7  // U
-    pmaddubsw  xmm2, xmm7
-    pmaddubsw  xmm1, xmm6  // V
-    pmaddubsw  xmm3, xmm6
-    phaddw     xmm0, xmm2
-    phaddw     xmm1, xmm3
-    psraw      xmm0, 8
-    psraw      xmm1, 8
-    packsswb   xmm0, xmm1
-    paddb      xmm0, xmm5            // -> unsigned
-
-    // step 3 - store 8 U and 8 V values
-    sub        ecx, 16
-    movlps     qword ptr [edx], xmm0 // U
-    movhps     qword ptr [edx + edi], xmm0 // V
-    lea        edx, [edx + 8]
-    jg         convertloop
-
-    pop        edi
-    pop        esi
-    ret
-  }
-}
-
-__declspec(naked) __declspec(align(16))
-void ARGBToUVJRow_SSSE3(const uint8* src_argb0, int src_stride_argb,
-                        uint8* dst_u, uint8* dst_v, int width) {
-  __asm {
-    push       esi
-    push       edi
-    mov        eax, [esp + 8 + 4]   // src_argb
-    mov        esi, [esp + 8 + 8]   // src_stride_argb
-    mov        edx, [esp + 8 + 12]  // dst_u
-    mov        edi, [esp + 8 + 16]  // dst_v
-    mov        ecx, [esp + 8 + 20]  // pix
-    movdqa     xmm7, kARGBToUJ
-    movdqa     xmm6, kARGBToVJ
-    movdqa     xmm5, kAddUVJ128
-    sub        edi, edx             // stride from u to v
-
-    align      4
- convertloop:
-    /* step 1 - subsample 16x2 argb pixels to 8x1 */
-    movdqa     xmm0, [eax]
-    movdqa     xmm1, [eax + 16]
-    movdqa     xmm2, [eax + 32]
-    movdqa     xmm3, [eax + 48]
-    pavgb      xmm0, [eax + esi]
-    pavgb      xmm1, [eax + esi + 16]
-    pavgb      xmm2, [eax + esi + 32]
-    pavgb      xmm3, [eax + esi + 48]
-    lea        eax,  [eax + 64]
-    movdqa     xmm4, xmm0
-    shufps     xmm0, xmm1, 0x88
-    shufps     xmm4, xmm1, 0xdd
-    pavgb      xmm0, xmm4
-    movdqa     xmm4, xmm2
-    shufps     xmm2, xmm3, 0x88
-    shufps     xmm4, xmm3, 0xdd
-    pavgb      xmm2, xmm4
-
-    // step 2 - convert to U and V
-    // from here down is very similar to Y code except
-    // instead of 16 different pixels, its 8 pixels of U and 8 of V
-    movdqa     xmm1, xmm0
-    movdqa     xmm3, xmm2
-    pmaddubsw  xmm0, xmm7  // U
-    pmaddubsw  xmm2, xmm7
-    pmaddubsw  xmm1, xmm6  // V
-    pmaddubsw  xmm3, xmm6
-    phaddw     xmm0, xmm2
-    phaddw     xmm1, xmm3
-    paddw      xmm0, xmm5            // +.5 rounding -> unsigned
-    paddw      xmm1, xmm5
-    psraw      xmm0, 8
-    psraw      xmm1, 8
-    packsswb   xmm0, xmm1
-
-    // step 3 - store 8 U and 8 V values
-    sub        ecx, 16
-    movlps     qword ptr [edx], xmm0 // U
-    movhps     qword ptr [edx + edi], xmm0 // V
-    lea        edx, [edx + 8]
-    jg         convertloop
-
-    pop        edi
-    pop        esi
-    ret
-  }
-}
-
-#ifdef HAS_ARGBTOUVROW_AVX2
-__declspec(naked) __declspec(align(32))
-void ARGBToUVRow_AVX2(const uint8* src_argb0, int src_stride_argb,
-                      uint8* dst_u, uint8* dst_v, int width) {
-  __asm {
-    push       esi
-    push       edi
-    mov        eax, [esp + 8 + 4]   // src_argb
-    mov        esi, [esp + 8 + 8]   // src_stride_argb
-    mov        edx, [esp + 8 + 12]  // dst_u
-    mov        edi, [esp + 8 + 16]  // dst_v
-    mov        ecx, [esp + 8 + 20]  // pix
-    vbroadcastf128 ymm5, kAddUV128
-    vbroadcastf128 ymm6, kARGBToV
-    vbroadcastf128 ymm7, kARGBToU
-    sub        edi, edx             // stride from u to v
-
-    align      4
- convertloop:
-    /* step 1 - subsample 32x2 argb pixels to 16x1 */
-    vmovdqu    ymm0, [eax]
-    vmovdqu    ymm1, [eax + 32]
-    vmovdqu    ymm2, [eax + 64]
-    vmovdqu    ymm3, [eax + 96]
-    vpavgb     ymm0, ymm0, [eax + esi]
-    vpavgb     ymm1, ymm1, [eax + esi + 32]
-    vpavgb     ymm2, ymm2, [eax + esi + 64]
-    vpavgb     ymm3, ymm3, [eax + esi + 96]
-    lea        eax,  [eax + 128]
-    vshufps    ymm4, ymm0, ymm1, 0x88
-    vshufps    ymm0, ymm0, ymm1, 0xdd
-    vpavgb     ymm0, ymm0, ymm4  // mutated by vshufps
-    vshufps    ymm4, ymm2, ymm3, 0x88
-    vshufps    ymm2, ymm2, ymm3, 0xdd
-    vpavgb     ymm2, ymm2, ymm4  // mutated by vshufps
-
-    // step 2 - convert to U and V
-    // from here down is very similar to Y code except
-    // instead of 32 different pixels, its 16 pixels of U and 16 of V
-    vpmaddubsw ymm1, ymm0, ymm7  // U
-    vpmaddubsw ymm3, ymm2, ymm7
-    vpmaddubsw ymm0, ymm0, ymm6  // V
-    vpmaddubsw ymm2, ymm2, ymm6
-    vphaddw    ymm1, ymm1, ymm3  // mutates
-    vphaddw    ymm0, ymm0, ymm2
-    vpsraw     ymm1, ymm1, 8
-    vpsraw     ymm0, ymm0, 8
-    vpacksswb  ymm0, ymm1, ymm0  // mutates
-    vpermq     ymm0, ymm0, 0xd8  // For vpacksswb
-    vpshufb    ymm0, ymm0, kShufARGBToUV_AVX  // For vshufps + vphaddw
-    vpaddb     ymm0, ymm0, ymm5  // -> unsigned
-
-    // step 3 - store 16 U and 16 V values
-    sub         ecx, 32
-    vextractf128 [edx], ymm0, 0 // U
-    vextractf128 [edx + edi], ymm0, 1 // V
-    lea        edx, [edx + 16]
-    jg         convertloop
-
-    pop        edi
-    pop        esi
-    vzeroupper
-    ret
-  }
-}
-#endif  // HAS_ARGBTOUVROW_AVX2
-
-__declspec(naked) __declspec(align(16))
-void ARGBToUVRow_Unaligned_SSSE3(const uint8* src_argb0, int src_stride_argb,
-                                 uint8* dst_u, uint8* dst_v, int width) {
-  __asm {
-    push       esi
-    push       edi
-    mov        eax, [esp + 8 + 4]   // src_argb
-    mov        esi, [esp + 8 + 8]   // src_stride_argb
-    mov        edx, [esp + 8 + 12]  // dst_u
-    mov        edi, [esp + 8 + 16]  // dst_v
-    mov        ecx, [esp + 8 + 20]  // pix
-    movdqa     xmm7, kARGBToU
-    movdqa     xmm6, kARGBToV
-    movdqa     xmm5, kAddUV128
-    sub        edi, edx             // stride from u to v
-
-    align      4
- convertloop:
-    /* step 1 - subsample 16x2 argb pixels to 8x1 */
-    movdqu     xmm0, [eax]
-    movdqu     xmm1, [eax + 16]
-    movdqu     xmm2, [eax + 32]
-    movdqu     xmm3, [eax + 48]
-    movdqu     xmm4, [eax + esi]
-    pavgb      xmm0, xmm4
-    movdqu     xmm4, [eax + esi + 16]
-    pavgb      xmm1, xmm4
-    movdqu     xmm4, [eax + esi + 32]
-    pavgb      xmm2, xmm4
-    movdqu     xmm4, [eax + esi + 48]
-    pavgb      xmm3, xmm4
-    lea        eax,  [eax + 64]
-    movdqa     xmm4, xmm0
-    shufps     xmm0, xmm1, 0x88
-    shufps     xmm4, xmm1, 0xdd
-    pavgb      xmm0, xmm4
-    movdqa     xmm4, xmm2
-    shufps     xmm2, xmm3, 0x88
-    shufps     xmm4, xmm3, 0xdd
-    pavgb      xmm2, xmm4
-
-    // step 2 - convert to U and V
-    // from here down is very similar to Y code except
-    // instead of 16 different pixels, its 8 pixels of U and 8 of V
-    movdqa     xmm1, xmm0
-    movdqa     xmm3, xmm2
-    pmaddubsw  xmm0, xmm7  // U
-    pmaddubsw  xmm2, xmm7
-    pmaddubsw  xmm1, xmm6  // V
-    pmaddubsw  xmm3, xmm6
-    phaddw     xmm0, xmm2
-    phaddw     xmm1, xmm3
-    psraw      xmm0, 8
-    psraw      xmm1, 8
-    packsswb   xmm0, xmm1
-    paddb      xmm0, xmm5            // -> unsigned
-
-    // step 3 - store 8 U and 8 V values
-    sub        ecx, 16
-    movlps     qword ptr [edx], xmm0 // U
-    movhps     qword ptr [edx + edi], xmm0 // V
-    lea        edx, [edx + 8]
-    jg         convertloop
-
-    pop        edi
-    pop        esi
-    ret
-  }
-}
-
-__declspec(naked) __declspec(align(16))
-void ARGBToUVJRow_Unaligned_SSSE3(const uint8* src_argb0, int src_stride_argb,
-                                 uint8* dst_u, uint8* dst_v, int width) {
-  __asm {
-    push       esi
-    push       edi
-    mov        eax, [esp + 8 + 4]   // src_argb
-    mov        esi, [esp + 8 + 8]   // src_stride_argb
-    mov        edx, [esp + 8 + 12]  // dst_u
-    mov        edi, [esp + 8 + 16]  // dst_v
-    mov        ecx, [esp + 8 + 20]  // pix
-    movdqa     xmm7, kARGBToUJ
-    movdqa     xmm6, kARGBToVJ
-    movdqa     xmm5, kAddUVJ128
-    sub        edi, edx             // stride from u to v
-
-    align      4
- convertloop:
-    /* step 1 - subsample 16x2 argb pixels to 8x1 */
-    movdqu     xmm0, [eax]
-    movdqu     xmm1, [eax + 16]
-    movdqu     xmm2, [eax + 32]
-    movdqu     xmm3, [eax + 48]
-    movdqu     xmm4, [eax + esi]
-    pavgb      xmm0, xmm4
-    movdqu     xmm4, [eax + esi + 16]
-    pavgb      xmm1, xmm4
-    movdqu     xmm4, [eax + esi + 32]
-    pavgb      xmm2, xmm4
-    movdqu     xmm4, [eax + esi + 48]
-    pavgb      xmm3, xmm4
-    lea        eax,  [eax + 64]
-    movdqa     xmm4, xmm0
-    shufps     xmm0, xmm1, 0x88
-    shufps     xmm4, xmm1, 0xdd
-    pavgb      xmm0, xmm4
-    movdqa     xmm4, xmm2
-    shufps     xmm2, xmm3, 0x88
-    shufps     xmm4, xmm3, 0xdd
-    pavgb      xmm2, xmm4
-
-    // step 2 - convert to U and V
-    // from here down is very similar to Y code except
-    // instead of 16 different pixels, its 8 pixels of U and 8 of V
-    movdqa     xmm1, xmm0
-    movdqa     xmm3, xmm2
-    pmaddubsw  xmm0, xmm7  // U
-    pmaddubsw  xmm2, xmm7
-    pmaddubsw  xmm1, xmm6  // V
-    pmaddubsw  xmm3, xmm6
-    phaddw     xmm0, xmm2
-    phaddw     xmm1, xmm3
-    paddw      xmm0, xmm5            // +.5 rounding -> unsigned
-    paddw      xmm1, xmm5
-    psraw      xmm0, 8
-    psraw      xmm1, 8
-    packsswb   xmm0, xmm1
-
-    // step 3 - store 8 U and 8 V values
-    sub        ecx, 16
-    movlps     qword ptr [edx], xmm0 // U
-    movhps     qword ptr [edx + edi], xmm0 // V
-    lea        edx, [edx + 8]
-    jg         convertloop
-
-    pop        edi
-    pop        esi
-    ret
-  }
-}
-
-__declspec(naked) __declspec(align(16))
-void ARGBToUV444Row_SSSE3(const uint8* src_argb0,
-                          uint8* dst_u, uint8* dst_v, int width) {
-  __asm {
-    push       edi
-    mov        eax, [esp + 4 + 4]   // src_argb
-    mov        edx, [esp + 4 + 8]   // dst_u
-    mov        edi, [esp + 4 + 12]  // dst_v
-    mov        ecx, [esp + 4 + 16]  // pix
-    movdqa     xmm7, kARGBToU
-    movdqa     xmm6, kARGBToV
-    movdqa     xmm5, kAddUV128
-    sub        edi, edx             // stride from u to v
-
-    align      4
- convertloop:
-    /* convert to U and V */
-    movdqa     xmm0, [eax]          // U
-    movdqa     xmm1, [eax + 16]
-    movdqa     xmm2, [eax + 32]
-    movdqa     xmm3, [eax + 48]
-    pmaddubsw  xmm0, xmm7
-    pmaddubsw  xmm1, xmm7
-    pmaddubsw  xmm2, xmm7
-    pmaddubsw  xmm3, xmm7
-    phaddw     xmm0, xmm1
-    phaddw     xmm2, xmm3
-    psraw      xmm0, 8
-    psraw      xmm2, 8
-    packsswb   xmm0, xmm2
-    paddb      xmm0, xmm5
-    sub        ecx,  16
-    movdqa     [edx], xmm0
-
-    movdqa     xmm0, [eax]          // V
-    movdqa     xmm1, [eax + 16]
-    movdqa     xmm2, [eax + 32]
-    movdqa     xmm3, [eax + 48]
-    pmaddubsw  xmm0, xmm6
-    pmaddubsw  xmm1, xmm6
-    pmaddubsw  xmm2, xmm6
-    pmaddubsw  xmm3, xmm6
-    phaddw     xmm0, xmm1
-    phaddw     xmm2, xmm3
-    psraw      xmm0, 8
-    psraw      xmm2, 8
-    packsswb   xmm0, xmm2
-    paddb      xmm0, xmm5
-    lea        eax,  [eax + 64]
-    movdqa     [edx + edi], xmm0
-    lea        edx,  [edx + 16]
-    jg         convertloop
-
-    pop        edi
-    ret
-  }
-}
-
-__declspec(naked) __declspec(align(16))
-void ARGBToUV444Row_Unaligned_SSSE3(const uint8* src_argb0,
-                                    uint8* dst_u, uint8* dst_v, int width) {
-  __asm {
-    push       edi
-    mov        eax, [esp + 4 + 4]   // src_argb
-    mov        edx, [esp + 4 + 8]   // dst_u
-    mov        edi, [esp + 4 + 12]  // dst_v
-    mov        ecx, [esp + 4 + 16]  // pix
-    movdqa     xmm7, kARGBToU
-    movdqa     xmm6, kARGBToV
-    movdqa     xmm5, kAddUV128
-    sub        edi, edx             // stride from u to v
-
-    align      4
- convertloop:
-    /* convert to U and V */
-    movdqu     xmm0, [eax]          // U
-    movdqu     xmm1, [eax + 16]
-    movdqu     xmm2, [eax + 32]
-    movdqu     xmm3, [eax + 48]
-    pmaddubsw  xmm0, xmm7
-    pmaddubsw  xmm1, xmm7
-    pmaddubsw  xmm2, xmm7
-    pmaddubsw  xmm3, xmm7
-    phaddw     xmm0, xmm1
-    phaddw     xmm2, xmm3
-    psraw      xmm0, 8
-    psraw      xmm2, 8
-    packsswb   xmm0, xmm2
-    paddb      xmm0, xmm5
-    sub        ecx,  16
-    movdqu     [edx], xmm0
-
-    movdqu     xmm0, [eax]          // V
-    movdqu     xmm1, [eax + 16]
-    movdqu     xmm2, [eax + 32]
-    movdqu     xmm3, [eax + 48]
-    pmaddubsw  xmm0, xmm6
-    pmaddubsw  xmm1, xmm6
-    pmaddubsw  xmm2, xmm6
-    pmaddubsw  xmm3, xmm6
-    phaddw     xmm0, xmm1
-    phaddw     xmm2, xmm3
-    psraw      xmm0, 8
-    psraw      xmm2, 8
-    packsswb   xmm0, xmm2
-    paddb      xmm0, xmm5
-    lea        eax,  [eax + 64]
-    movdqu     [edx + edi], xmm0
-    lea        edx,  [edx + 16]
-    jg         convertloop
-
-    pop        edi
-    ret
-  }
-}
-
-__declspec(naked) __declspec(align(16))
-void ARGBToUV422Row_SSSE3(const uint8* src_argb0,
-                          uint8* dst_u, uint8* dst_v, int width) {
-  __asm {
-    push       edi
-    mov        eax, [esp + 4 + 4]   // src_argb
-    mov        edx, [esp + 4 + 8]   // dst_u
-    mov        edi, [esp + 4 + 12]  // dst_v
-    mov        ecx, [esp + 4 + 16]  // pix
-    movdqa     xmm7, kARGBToU
-    movdqa     xmm6, kARGBToV
-    movdqa     xmm5, kAddUV128
-    sub        edi, edx             // stride from u to v
-
-    align      4
- convertloop:
-    /* step 1 - subsample 16x2 argb pixels to 8x1 */
-    movdqa     xmm0, [eax]
-    movdqa     xmm1, [eax + 16]
-    movdqa     xmm2, [eax + 32]
-    movdqa     xmm3, [eax + 48]
-    lea        eax,  [eax + 64]
-    movdqa     xmm4, xmm0
-    shufps     xmm0, xmm1, 0x88
-    shufps     xmm4, xmm1, 0xdd
-    pavgb      xmm0, xmm4
-    movdqa     xmm4, xmm2
-    shufps     xmm2, xmm3, 0x88
-    shufps     xmm4, xmm3, 0xdd
-    pavgb      xmm2, xmm4
-
-    // step 2 - convert to U and V
-    // from here down is very similar to Y code except
-    // instead of 16 different pixels, its 8 pixels of U and 8 of V
-    movdqa     xmm1, xmm0
-    movdqa     xmm3, xmm2
-    pmaddubsw  xmm0, xmm7  // U
-    pmaddubsw  xmm2, xmm7
-    pmaddubsw  xmm1, xmm6  // V
-    pmaddubsw  xmm3, xmm6
-    phaddw     xmm0, xmm2
-    phaddw     xmm1, xmm3
-    psraw      xmm0, 8
-    psraw      xmm1, 8
-    packsswb   xmm0, xmm1
-    paddb      xmm0, xmm5            // -> unsigned
-
-    // step 3 - store 8 U and 8 V values
-    sub        ecx, 16
-    movlps     qword ptr [edx], xmm0 // U
-    movhps     qword ptr [edx + edi], xmm0 // V
-    lea        edx, [edx + 8]
-    jg         convertloop
-
-    pop        edi
-    ret
-  }
-}
-
-__declspec(naked) __declspec(align(16))
-void ARGBToUV422Row_Unaligned_SSSE3(const uint8* src_argb0,
-                                    uint8* dst_u, uint8* dst_v, int width) {
-  __asm {
-    push       edi
-    mov        eax, [esp + 4 + 4]   // src_argb
-    mov        edx, [esp + 4 + 8]   // dst_u
-    mov        edi, [esp + 4 + 12]  // dst_v
-    mov        ecx, [esp + 4 + 16]  // pix
-    movdqa     xmm7, kARGBToU
-    movdqa     xmm6, kARGBToV
-    movdqa     xmm5, kAddUV128
-    sub        edi, edx             // stride from u to v
-
-    align      4
- convertloop:
-    /* step 1 - subsample 16x2 argb pixels to 8x1 */
-    movdqu     xmm0, [eax]
-    movdqu     xmm1, [eax + 16]
-    movdqu     xmm2, [eax + 32]
-    movdqu     xmm3, [eax + 48]
-    lea        eax,  [eax + 64]
-    movdqa     xmm4, xmm0
-    shufps     xmm0, xmm1, 0x88
-    shufps     xmm4, xmm1, 0xdd
-    pavgb      xmm0, xmm4
-    movdqa     xmm4, xmm2
-    shufps     xmm2, xmm3, 0x88
-    shufps     xmm4, xmm3, 0xdd
-    pavgb      xmm2, xmm4
-
-    // step 2 - convert to U and V
-    // from here down is very similar to Y code except
-    // instead of 16 different pixels, its 8 pixels of U and 8 of V
-    movdqa     xmm1, xmm0
-    movdqa     xmm3, xmm2
-    pmaddubsw  xmm0, xmm7  // U
-    pmaddubsw  xmm2, xmm7
-    pmaddubsw  xmm1, xmm6  // V
-    pmaddubsw  xmm3, xmm6
-    phaddw     xmm0, xmm2
-    phaddw     xmm1, xmm3
-    psraw      xmm0, 8
-    psraw      xmm1, 8
-    packsswb   xmm0, xmm1
-    paddb      xmm0, xmm5            // -> unsigned
-
-    // step 3 - store 8 U and 8 V values
-    sub        ecx, 16
-    movlps     qword ptr [edx], xmm0 // U
-    movhps     qword ptr [edx + edi], xmm0 // V
-    lea        edx, [edx + 8]
-    jg         convertloop
-
-    pop        edi
-    ret
-  }
-}
-
-__declspec(naked) __declspec(align(16))
-void BGRAToUVRow_SSSE3(const uint8* src_argb0, int src_stride_argb,
-                       uint8* dst_u, uint8* dst_v, int width) {
-  __asm {
-    push       esi
-    push       edi
-    mov        eax, [esp + 8 + 4]   // src_argb
-    mov        esi, [esp + 8 + 8]   // src_stride_argb
-    mov        edx, [esp + 8 + 12]  // dst_u
-    mov        edi, [esp + 8 + 16]  // dst_v
-    mov        ecx, [esp + 8 + 20]  // pix
-    movdqa     xmm7, kBGRAToU
-    movdqa     xmm6, kBGRAToV
-    movdqa     xmm5, kAddUV128
-    sub        edi, edx             // stride from u to v
-
-    align      4
- convertloop:
-    /* step 1 - subsample 16x2 argb pixels to 8x1 */
-    movdqa     xmm0, [eax]
-    movdqa     xmm1, [eax + 16]
-    movdqa     xmm2, [eax + 32]
-    movdqa     xmm3, [eax + 48]
-    pavgb      xmm0, [eax + esi]
-    pavgb      xmm1, [eax + esi + 16]
-    pavgb      xmm2, [eax + esi + 32]
-    pavgb      xmm3, [eax + esi + 48]
-    lea        eax,  [eax + 64]
-    movdqa     xmm4, xmm0
-    shufps     xmm0, xmm1, 0x88
-    shufps     xmm4, xmm1, 0xdd
-    pavgb      xmm0, xmm4
-    movdqa     xmm4, xmm2
-    shufps     xmm2, xmm3, 0x88
-    shufps     xmm4, xmm3, 0xdd
-    pavgb      xmm2, xmm4
-
-    // step 2 - convert to U and V
-    // from here down is very similar to Y code except
-    // instead of 16 different pixels, its 8 pixels of U and 8 of V
-    movdqa     xmm1, xmm0
-    movdqa     xmm3, xmm2
-    pmaddubsw  xmm0, xmm7  // U
-    pmaddubsw  xmm2, xmm7
-    pmaddubsw  xmm1, xmm6  // V
-    pmaddubsw  xmm3, xmm6
-    phaddw     xmm0, xmm2
-    phaddw     xmm1, xmm3
-    psraw      xmm0, 8
-    psraw      xmm1, 8
-    packsswb   xmm0, xmm1
-    paddb      xmm0, xmm5            // -> unsigned
-
-    // step 3 - store 8 U and 8 V values
-    sub        ecx, 16
-    movlps     qword ptr [edx], xmm0 // U
-    movhps     qword ptr [edx + edi], xmm0 // V
-    lea        edx, [edx + 8]
-    jg         convertloop
-
-    pop        edi
-    pop        esi
-    ret
-  }
-}
-
-__declspec(naked) __declspec(align(16))
-void BGRAToUVRow_Unaligned_SSSE3(const uint8* src_argb0, int src_stride_argb,
-                                 uint8* dst_u, uint8* dst_v, int width) {
-  __asm {
-    push       esi
-    push       edi
-    mov        eax, [esp + 8 + 4]   // src_argb
-    mov        esi, [esp + 8 + 8]   // src_stride_argb
-    mov        edx, [esp + 8 + 12]  // dst_u
-    mov        edi, [esp + 8 + 16]  // dst_v
-    mov        ecx, [esp + 8 + 20]  // pix
-    movdqa     xmm7, kBGRAToU
-    movdqa     xmm6, kBGRAToV
-    movdqa     xmm5, kAddUV128
-    sub        edi, edx             // stride from u to v
-
-    align      4
- convertloop:
-    /* step 1 - subsample 16x2 argb pixels to 8x1 */
-    movdqu     xmm0, [eax]
-    movdqu     xmm1, [eax + 16]
-    movdqu     xmm2, [eax + 32]
-    movdqu     xmm3, [eax + 48]
-    movdqu     xmm4, [eax + esi]
-    pavgb      xmm0, xmm4
-    movdqu     xmm4, [eax + esi + 16]
-    pavgb      xmm1, xmm4
-    movdqu     xmm4, [eax + esi + 32]
-    pavgb      xmm2, xmm4
-    movdqu     xmm4, [eax + esi + 48]
-    pavgb      xmm3, xmm4
-    lea        eax,  [eax + 64]
-    movdqa     xmm4, xmm0
-    shufps     xmm0, xmm1, 0x88
-    shufps     xmm4, xmm1, 0xdd
-    pavgb      xmm0, xmm4
-    movdqa     xmm4, xmm2
-    shufps     xmm2, xmm3, 0x88
-    shufps     xmm4, xmm3, 0xdd
-    pavgb      xmm2, xmm4
-
-    // step 2 - convert to U and V
-    // from here down is very similar to Y code except
-    // instead of 16 different pixels, its 8 pixels of U and 8 of V
-    movdqa     xmm1, xmm0
-    movdqa     xmm3, xmm2
-    pmaddubsw  xmm0, xmm7  // U
-    pmaddubsw  xmm2, xmm7
-    pmaddubsw  xmm1, xmm6  // V
-    pmaddubsw  xmm3, xmm6
-    phaddw     xmm0, xmm2
-    phaddw     xmm1, xmm3
-    psraw      xmm0, 8
-    psraw      xmm1, 8
-    packsswb   xmm0, xmm1
-    paddb      xmm0, xmm5            // -> unsigned
-
-    // step 3 - store 8 U and 8 V values
-    sub        ecx, 16
-    movlps     qword ptr [edx], xmm0 // U
-    movhps     qword ptr [edx + edi], xmm0 // V
-    lea        edx, [edx + 8]
-    jg         convertloop
-
-    pop        edi
-    pop        esi
-    ret
-  }
-}
-
-__declspec(naked) __declspec(align(16))
-void ABGRToUVRow_SSSE3(const uint8* src_argb0, int src_stride_argb,
-                       uint8* dst_u, uint8* dst_v, int width) {
-  __asm {
-    push       esi
-    push       edi
-    mov        eax, [esp + 8 + 4]   // src_argb
-    mov        esi, [esp + 8 + 8]   // src_stride_argb
-    mov        edx, [esp + 8 + 12]  // dst_u
-    mov        edi, [esp + 8 + 16]  // dst_v
-    mov        ecx, [esp + 8 + 20]  // pix
-    movdqa     xmm7, kABGRToU
-    movdqa     xmm6, kABGRToV
-    movdqa     xmm5, kAddUV128
-    sub        edi, edx             // stride from u to v
-
-    align      4
- convertloop:
-    /* step 1 - subsample 16x2 argb pixels to 8x1 */
-    movdqa     xmm0, [eax]
-    movdqa     xmm1, [eax + 16]
-    movdqa     xmm2, [eax + 32]
-    movdqa     xmm3, [eax + 48]
-    pavgb      xmm0, [eax + esi]
-    pavgb      xmm1, [eax + esi + 16]
-    pavgb      xmm2, [eax + esi + 32]
-    pavgb      xmm3, [eax + esi + 48]
-    lea        eax,  [eax + 64]
-    movdqa     xmm4, xmm0
-    shufps     xmm0, xmm1, 0x88
-    shufps     xmm4, xmm1, 0xdd
-    pavgb      xmm0, xmm4
-    movdqa     xmm4, xmm2
-    shufps     xmm2, xmm3, 0x88
-    shufps     xmm4, xmm3, 0xdd
-    pavgb      xmm2, xmm4
-
-    // step 2 - convert to U and V
-    // from here down is very similar to Y code except
-    // instead of 16 different pixels, its 8 pixels of U and 8 of V
-    movdqa     xmm1, xmm0
-    movdqa     xmm3, xmm2
-    pmaddubsw  xmm0, xmm7  // U
-    pmaddubsw  xmm2, xmm7
-    pmaddubsw  xmm1, xmm6  // V
-    pmaddubsw  xmm3, xmm6
-    phaddw     xmm0, xmm2
-    phaddw     xmm1, xmm3
-    psraw      xmm0, 8
-    psraw      xmm1, 8
-    packsswb   xmm0, xmm1
-    paddb      xmm0, xmm5            // -> unsigned
-
-    // step 3 - store 8 U and 8 V values
-    sub        ecx, 16
-    movlps     qword ptr [edx], xmm0 // U
-    movhps     qword ptr [edx + edi], xmm0 // V
-    lea        edx, [edx + 8]
-    jg         convertloop
-
-    pop        edi
-    pop        esi
-    ret
-  }
-}
-
-__declspec(naked) __declspec(align(16))
-void ABGRToUVRow_Unaligned_SSSE3(const uint8* src_argb0, int src_stride_argb,
-                                 uint8* dst_u, uint8* dst_v, int width) {
-  __asm {
-    push       esi
-    push       edi
-    mov        eax, [esp + 8 + 4]   // src_argb
-    mov        esi, [esp + 8 + 8]   // src_stride_argb
-    mov        edx, [esp + 8 + 12]  // dst_u
-    mov        edi, [esp + 8 + 16]  // dst_v
-    mov        ecx, [esp + 8 + 20]  // pix
-    movdqa     xmm7, kABGRToU
-    movdqa     xmm6, kABGRToV
-    movdqa     xmm5, kAddUV128
-    sub        edi, edx             // stride from u to v
-
-    align      4
- convertloop:
-    /* step 1 - subsample 16x2 argb pixels to 8x1 */
-    movdqu     xmm0, [eax]
-    movdqu     xmm1, [eax + 16]
-    movdqu     xmm2, [eax + 32]
-    movdqu     xmm3, [eax + 48]
-    movdqu     xmm4, [eax + esi]
-    pavgb      xmm0, xmm4
-    movdqu     xmm4, [eax + esi + 16]
-    pavgb      xmm1, xmm4
-    movdqu     xmm4, [eax + esi + 32]
-    pavgb      xmm2, xmm4
-    movdqu     xmm4, [eax + esi + 48]
-    pavgb      xmm3, xmm4
-    lea        eax,  [eax + 64]
-    movdqa     xmm4, xmm0
-    shufps     xmm0, xmm1, 0x88
-    shufps     xmm4, xmm1, 0xdd
-    pavgb      xmm0, xmm4
-    movdqa     xmm4, xmm2
-    shufps     xmm2, xmm3, 0x88
-    shufps     xmm4, xmm3, 0xdd
-    pavgb      xmm2, xmm4
-
-    // step 2 - convert to U and V
-    // from here down is very similar to Y code except
-    // instead of 16 different pixels, its 8 pixels of U and 8 of V
-    movdqa     xmm1, xmm0
-    movdqa     xmm3, xmm2
-    pmaddubsw  xmm0, xmm7  // U
-    pmaddubsw  xmm2, xmm7
-    pmaddubsw  xmm1, xmm6  // V
-    pmaddubsw  xmm3, xmm6
-    phaddw     xmm0, xmm2
-    phaddw     xmm1, xmm3
-    psraw      xmm0, 8
-    psraw      xmm1, 8
-    packsswb   xmm0, xmm1
-    paddb      xmm0, xmm5            // -> unsigned
-
-    // step 3 - store 8 U and 8 V values
-    sub        ecx, 16
-    movlps     qword ptr [edx], xmm0 // U
-    movhps     qword ptr [edx + edi], xmm0 // V
-    lea        edx, [edx + 8]
-    jg         convertloop
-
-    pop        edi
-    pop        esi
-    ret
-  }
-}
-
-__declspec(naked) __declspec(align(16))
-void RGBAToUVRow_SSSE3(const uint8* src_argb0, int src_stride_argb,
-                       uint8* dst_u, uint8* dst_v, int width) {
-  __asm {
-    push       esi
-    push       edi
-    mov        eax, [esp + 8 + 4]   // src_argb
-    mov        esi, [esp + 8 + 8]   // src_stride_argb
-    mov        edx, [esp + 8 + 12]  // dst_u
-    mov        edi, [esp + 8 + 16]  // dst_v
-    mov        ecx, [esp + 8 + 20]  // pix
-    movdqa     xmm7, kRGBAToU
-    movdqa     xmm6, kRGBAToV
-    movdqa     xmm5, kAddUV128
-    sub        edi, edx             // stride from u to v
-
-    align      4
- convertloop:
-    /* step 1 - subsample 16x2 argb pixels to 8x1 */
-    movdqa     xmm0, [eax]
-    movdqa     xmm1, [eax + 16]
-    movdqa     xmm2, [eax + 32]
-    movdqa     xmm3, [eax + 48]
-    pavgb      xmm0, [eax + esi]
-    pavgb      xmm1, [eax + esi + 16]
-    pavgb      xmm2, [eax + esi + 32]
-    pavgb      xmm3, [eax + esi + 48]
-    lea        eax,  [eax + 64]
-    movdqa     xmm4, xmm0
-    shufps     xmm0, xmm1, 0x88
-    shufps     xmm4, xmm1, 0xdd
-    pavgb      xmm0, xmm4
-    movdqa     xmm4, xmm2
-    shufps     xmm2, xmm3, 0x88
-    shufps     xmm4, xmm3, 0xdd
-    pavgb      xmm2, xmm4
-
-    // step 2 - convert to U and V
-    // from here down is very similar to Y code except
-    // instead of 16 different pixels, its 8 pixels of U and 8 of V
-    movdqa     xmm1, xmm0
-    movdqa     xmm3, xmm2
-    pmaddubsw  xmm0, xmm7  // U
-    pmaddubsw  xmm2, xmm7
-    pmaddubsw  xmm1, xmm6  // V
-    pmaddubsw  xmm3, xmm6
-    phaddw     xmm0, xmm2
-    phaddw     xmm1, xmm3
-    psraw      xmm0, 8
-    psraw      xmm1, 8
-    packsswb   xmm0, xmm1
-    paddb      xmm0, xmm5            // -> unsigned
-
-    // step 3 - store 8 U and 8 V values
-    sub        ecx, 16
-    movlps     qword ptr [edx], xmm0 // U
-    movhps     qword ptr [edx + edi], xmm0 // V
-    lea        edx, [edx + 8]
-    jg         convertloop
-
-    pop        edi
-    pop        esi
-    ret
-  }
-}
-
-__declspec(naked) __declspec(align(16))
-void RGBAToUVRow_Unaligned_SSSE3(const uint8* src_argb0, int src_stride_argb,
-                                 uint8* dst_u, uint8* dst_v, int width) {
-  __asm {
-    push       esi
-    push       edi
-    mov        eax, [esp + 8 + 4]   // src_argb
-    mov        esi, [esp + 8 + 8]   // src_stride_argb
-    mov        edx, [esp + 8 + 12]  // dst_u
-    mov        edi, [esp + 8 + 16]  // dst_v
-    mov        ecx, [esp + 8 + 20]  // pix
-    movdqa     xmm7, kRGBAToU
-    movdqa     xmm6, kRGBAToV
-    movdqa     xmm5, kAddUV128
-    sub        edi, edx             // stride from u to v
-
-    align      4
- convertloop:
-    /* step 1 - subsample 16x2 argb pixels to 8x1 */
-    movdqu     xmm0, [eax]
-    movdqu     xmm1, [eax + 16]
-    movdqu     xmm2, [eax + 32]
-    movdqu     xmm3, [eax + 48]
-    movdqu     xmm4, [eax + esi]
-    pavgb      xmm0, xmm4
-    movdqu     xmm4, [eax + esi + 16]
-    pavgb      xmm1, xmm4
-    movdqu     xmm4, [eax + esi + 32]
-    pavgb      xmm2, xmm4
-    movdqu     xmm4, [eax + esi + 48]
-    pavgb      xmm3, xmm4
-    lea        eax,  [eax + 64]
-    movdqa     xmm4, xmm0
-    shufps     xmm0, xmm1, 0x88
-    shufps     xmm4, xmm1, 0xdd
-    pavgb      xmm0, xmm4
-    movdqa     xmm4, xmm2
-    shufps     xmm2, xmm3, 0x88
-    shufps     xmm4, xmm3, 0xdd
-    pavgb      xmm2, xmm4
-
-    // step 2 - convert to U and V
-    // from here down is very similar to Y code except
-    // instead of 16 different pixels, its 8 pixels of U and 8 of V
-    movdqa     xmm1, xmm0
-    movdqa     xmm3, xmm2
-    pmaddubsw  xmm0, xmm7  // U
-    pmaddubsw  xmm2, xmm7
-    pmaddubsw  xmm1, xmm6  // V
-    pmaddubsw  xmm3, xmm6
-    phaddw     xmm0, xmm2
-    phaddw     xmm1, xmm3
-    psraw      xmm0, 8
-    psraw      xmm1, 8
-    packsswb   xmm0, xmm1
-    paddb      xmm0, xmm5            // -> unsigned
-
-    // step 3 - store 8 U and 8 V values
-    sub        ecx, 16
-    movlps     qword ptr [edx], xmm0 // U
-    movhps     qword ptr [edx + edi], xmm0 // V
-    lea        edx, [edx + 8]
-    jg         convertloop
-
-    pop        edi
-    pop        esi
-    ret
-  }
-}
-#endif  // HAS_ARGBTOYROW_SSSE3
-
-#define YG 74 /* (int8)(1.164 * 64 + 0.5) */
-
-#define UB 127 /* min(63,(int8)(2.018 * 64)) */
-#define UG -25 /* (int8)(-0.391 * 64 - 0.5) */
-#define UR 0
-
-#define VB 0
-#define VG -52 /* (int8)(-0.813 * 64 - 0.5) */
-#define VR 102 /* (int8)(1.596 * 64 + 0.5) */
-
-// Bias
-#define BB UB * 128 + VB * 128
-#define BG UG * 128 + VG * 128
-#define BR UR * 128 + VR * 128
-
-#ifdef HAS_I422TOARGBROW_AVX2
-
-static const lvec8 kUVToB_AVX = {
-  UB, VB, UB, VB, UB, VB, UB, VB, UB, VB, UB, VB, UB, VB, UB, VB,
-  UB, VB, UB, VB, UB, VB, UB, VB, UB, VB, UB, VB, UB, VB, UB, VB
-};
-static const lvec8 kUVToR_AVX = {
-  UR, VR, UR, VR, UR, VR, UR, VR, UR, VR, UR, VR, UR, VR, UR, VR,
-  UR, VR, UR, VR, UR, VR, UR, VR, UR, VR, UR, VR, UR, VR, UR, VR
-};
-static const lvec8 kUVToG_AVX = {
-  UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG,
-  UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG
-};
-static const lvec16 kYToRgb_AVX = {
-  YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG
-};
-static const lvec16 kYSub16_AVX = {
-  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16
-};
-static const lvec16 kUVBiasB_AVX = {
-  BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB
-};
-static const lvec16 kUVBiasG_AVX = {
-  BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG
-};
-static const lvec16 kUVBiasR_AVX = {
-  BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR
-};
-
-// 16 pixels
-// 8 UV values upsampled to 16 UV, mixed with 16 Y producing 16 ARGB (64 bytes).
-__declspec(naked) __declspec(align(16))
-void I422ToARGBRow_AVX2(const uint8* y_buf,
-                         const uint8* u_buf,
-                         const uint8* v_buf,
-                         uint8* dst_argb,
-                         int width) {
-  __asm {
-    push       esi
-    push       edi
-    mov        eax, [esp + 8 + 4]   // Y
-    mov        esi, [esp + 8 + 8]   // U
-    mov        edi, [esp + 8 + 12]  // V
-    mov        edx, [esp + 8 + 16]  // argb
-    mov        ecx, [esp + 8 + 20]  // width
-    sub        edi, esi
-    vpcmpeqb   ymm5, ymm5, ymm5     // generate 0xffffffffffffffff for alpha
-    vpxor      ymm4, ymm4, ymm4
-
-    align      4
- convertloop:
-    vmovq      xmm0, qword ptr [esi]          //  U
-    vmovq      xmm1, qword ptr [esi + edi]    //  V
-    lea        esi,  [esi + 8]
-    vpunpcklbw ymm0, ymm0, ymm1               // UV
-    vpermq     ymm0, ymm0, 0xd8
-    vpunpcklwd ymm0, ymm0, ymm0              // UVUV
-    vpmaddubsw ymm2, ymm0, kUVToB_AVX        // scale B UV
-    vpmaddubsw ymm1, ymm0, kUVToG_AVX        // scale G UV
-    vpmaddubsw ymm0, ymm0, kUVToR_AVX        // scale R UV
-    vpsubw     ymm2, ymm2, kUVBiasB_AVX      // unbias back to signed
-    vpsubw     ymm1, ymm1, kUVBiasG_AVX
-    vpsubw     ymm0, ymm0, kUVBiasR_AVX
-
-    // Step 2: Find Y contribution to 16 R,G,B values
-    vmovdqu    xmm3, [eax]                  // NOLINT
-    lea        eax, [eax + 16]
-    vpermq     ymm3, ymm3, 0xd8
-    vpunpcklbw ymm3, ymm3, ymm4
-    vpsubsw    ymm3, ymm3, kYSub16_AVX
-    vpmullw    ymm3, ymm3, kYToRgb_AVX
-    vpaddsw    ymm2, ymm2, ymm3           // B += Y
-    vpaddsw    ymm1, ymm1, ymm3           // G += Y
-    vpaddsw    ymm0, ymm0, ymm3           // R += Y
-    vpsraw     ymm2, ymm2, 6
-    vpsraw     ymm1, ymm1, 6
-    vpsraw     ymm0, ymm0, 6
-    vpackuswb  ymm2, ymm2, ymm2           // B
-    vpackuswb  ymm1, ymm1, ymm1           // G
-    vpackuswb  ymm0, ymm0, ymm0           // R
-
-    // Step 3: Weave into ARGB
-    vpunpcklbw ymm2, ymm2, ymm1           // BG
-    vpermq     ymm2, ymm2, 0xd8
-    vpunpcklbw ymm0, ymm0, ymm5           // RA
-    vpermq     ymm0, ymm0, 0xd8
-    vpunpcklwd ymm1, ymm2, ymm0           // BGRA first 8 pixels
-    vpunpckhwd ymm2, ymm2, ymm0           // BGRA next 8 pixels
-    vmovdqu    [edx], ymm1
-    vmovdqu    [edx + 32], ymm2
-    lea        edx,  [edx + 64]
-    sub        ecx, 16
-    jg         convertloop
-    vzeroupper
-
-    pop        edi
-    pop        esi
-    ret
-  }
-}
-#endif  // HAS_I422TOARGBROW_AVX2
-
-#ifdef HAS_I422TOARGBROW_SSSE3
-
-static const vec8 kUVToB = {
-  UB, VB, UB, VB, UB, VB, UB, VB, UB, VB, UB, VB, UB, VB, UB, VB
-};
-
-static const vec8 kUVToR = {
-  UR, VR, UR, VR, UR, VR, UR, VR, UR, VR, UR, VR, UR, VR, UR, VR
-};
-
-static const vec8 kUVToG = {
-  UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG
-};
-
-static const vec8 kVUToB = {
-  VB, UB, VB, UB, VB, UB, VB, UB, VB, UB, VB, UB, VB, UB, VB, UB,
-};
-
-static const vec8 kVUToR = {
-  VR, UR, VR, UR, VR, UR, VR, UR, VR, UR, VR, UR, VR, UR, VR, UR,
-};
-
-static const vec8 kVUToG = {
-  VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG,
-};
-
-static const vec16 kYToRgb = { YG, YG, YG, YG, YG, YG, YG, YG };
-static const vec16 kYSub16 = { 16, 16, 16, 16, 16, 16, 16, 16 };
-static const vec16 kUVBiasB = { BB, BB, BB, BB, BB, BB, BB, BB };
-static const vec16 kUVBiasG = { BG, BG, BG, BG, BG, BG, BG, BG };
-static const vec16 kUVBiasR = { BR, BR, BR, BR, BR, BR, BR, BR };
-
-// TODO(fbarchard): Read that does half size on Y and treats 420 as 444.
-
-// Read 8 UV from 444.
-#define READYUV444 __asm {                                                     \
-    __asm movq       xmm0, qword ptr [esi] /* U */                /* NOLINT */ \
-    __asm movq       xmm1, qword ptr [esi + edi] /* V */          /* NOLINT */ \
-    __asm lea        esi,  [esi + 8]                                           \
-    __asm punpcklbw  xmm0, xmm1           /* UV */                             \
-  }
-
-// Read 4 UV from 422, upsample to 8 UV.
-#define READYUV422 __asm {                                                     \
-    __asm movd       xmm0, [esi]          /* U */                              \
-    __asm movd       xmm1, [esi + edi]    /* V */                              \
-    __asm lea        esi,  [esi + 4]                                           \
-    __asm punpcklbw  xmm0, xmm1           /* UV */                             \
-    __asm punpcklwd  xmm0, xmm0           /* UVUV (upsample) */                \
-  }
-
-// Read 2 UV from 411, upsample to 8 UV.
-#define READYUV411 __asm {                                                     \
-    __asm movzx      ebx, word ptr [esi]        /* U */           /* NOLINT */ \
-    __asm movd       xmm0, ebx                                                 \
-    __asm movzx      ebx, word ptr [esi + edi]  /* V */           /* NOLINT */ \
-    __asm movd       xmm1, ebx                                                 \
-    __asm lea        esi,  [esi + 2]                                           \
-    __asm punpcklbw  xmm0, xmm1           /* UV */                             \
-    __asm punpcklwd  xmm0, xmm0           /* UVUV (upsample) */                \
-    __asm punpckldq  xmm0, xmm0           /* UVUV (upsample) */                \
-  }
-
-// Read 4 UV from NV12, upsample to 8 UV.
-#define READNV12 __asm {                                                       \
-    __asm movq       xmm0, qword ptr [esi] /* UV */               /* NOLINT */ \
-    __asm lea        esi,  [esi + 8]                                           \
-    __asm punpcklwd  xmm0, xmm0           /* UVUV (upsample) */                \
-  }
-
-// Convert 8 pixels: 8 UV and 8 Y.
-#define YUVTORGB __asm {                                                       \
-    /* Step 1: Find 4 UV contributions to 8 R,G,B values */                    \
-    __asm movdqa     xmm1, xmm0                                                \
-    __asm movdqa     xmm2, xmm0                                                \
-    __asm pmaddubsw  xmm0, kUVToB        /* scale B UV */                      \
-    __asm pmaddubsw  xmm1, kUVToG        /* scale G UV */                      \
-    __asm pmaddubsw  xmm2, kUVToR        /* scale R UV */                      \
-    __asm psubw      xmm0, kUVBiasB      /* unbias back to signed */           \
-    __asm psubw      xmm1, kUVBiasG                                            \
-    __asm psubw      xmm2, kUVBiasR                                            \
-    /* Step 2: Find Y contribution to 8 R,G,B values */                        \
-    __asm movq       xmm3, qword ptr [eax]                        /* NOLINT */ \
-    __asm lea        eax, [eax + 8]                                            \
-    __asm punpcklbw  xmm3, xmm4                                                \
-    __asm psubsw     xmm3, kYSub16                                             \
-    __asm pmullw     xmm3, kYToRgb                                             \
-    __asm paddsw     xmm0, xmm3           /* B += Y */                         \
-    __asm paddsw     xmm1, xmm3           /* G += Y */                         \
-    __asm paddsw     xmm2, xmm3           /* R += Y */                         \
-    __asm psraw      xmm0, 6                                                   \
-    __asm psraw      xmm1, 6                                                   \
-    __asm psraw      xmm2, 6                                                   \
-    __asm packuswb   xmm0, xmm0           /* B */                              \
-    __asm packuswb   xmm1, xmm1           /* G */                              \
-    __asm packuswb   xmm2, xmm2           /* R */                              \
-  }
-
-// Convert 8 pixels: 8 VU and 8 Y.
-#define YVUTORGB __asm {                                                       \
-    /* Step 1: Find 4 UV contributions to 8 R,G,B values */                    \
-    __asm movdqa     xmm1, xmm0                                                \
-    __asm movdqa     xmm2, xmm0                                                \
-    __asm pmaddubsw  xmm0, kVUToB        /* scale B UV */                      \
-    __asm pmaddubsw  xmm1, kVUToG        /* scale G UV */                      \
-    __asm pmaddubsw  xmm2, kVUToR        /* scale R UV */                      \
-    __asm psubw      xmm0, kUVBiasB      /* unbias back to signed */           \
-    __asm psubw      xmm1, kUVBiasG                                            \
-    __asm psubw      xmm2, kUVBiasR                                            \
-    /* Step 2: Find Y contribution to 8 R,G,B values */                        \
-    __asm movq       xmm3, qword ptr [eax]                        /* NOLINT */ \
-    __asm lea        eax, [eax + 8]                                            \
-    __asm punpcklbw  xmm3, xmm4                                                \
-    __asm psubsw     xmm3, kYSub16                                             \
-    __asm pmullw     xmm3, kYToRgb                                             \
-    __asm paddsw     xmm0, xmm3           /* B += Y */                         \
-    __asm paddsw     xmm1, xmm3           /* G += Y */                         \
-    __asm paddsw     xmm2, xmm3           /* R += Y */                         \
-    __asm psraw      xmm0, 6                                                   \
-    __asm psraw      xmm1, 6                                                   \
-    __asm psraw      xmm2, 6                                                   \
-    __asm packuswb   xmm0, xmm0           /* B */                              \
-    __asm packuswb   xmm1, xmm1           /* G */                              \
-    __asm packuswb   xmm2, xmm2           /* R */                              \
-  }
-
-// 8 pixels, dest aligned 16.
-// 8 UV values, mixed with 8 Y producing 8 ARGB (32 bytes).
-__declspec(naked) __declspec(align(16))
-void I444ToARGBRow_SSSE3(const uint8* y_buf,
-                         const uint8* u_buf,
-                         const uint8* v_buf,
-                         uint8* dst_argb,
-                         int width) {
-  __asm {
-    push       esi
-    push       edi
-    mov        eax, [esp + 8 + 4]   // Y
-    mov        esi, [esp + 8 + 8]   // U
-    mov        edi, [esp + 8 + 12]  // V
-    mov        edx, [esp + 8 + 16]  // argb
-    mov        ecx, [esp + 8 + 20]  // width
-    sub        edi, esi
-    pcmpeqb    xmm5, xmm5           // generate 0xffffffff for alpha
-    pxor       xmm4, xmm4
-
-    align      4
- convertloop:
-    READYUV444
-    YUVTORGB
-
-    // Step 3: Weave into ARGB
-    punpcklbw  xmm0, xmm1           // BG
-    punpcklbw  xmm2, xmm5           // RA
-    movdqa     xmm1, xmm0
-    punpcklwd  xmm0, xmm2           // BGRA first 4 pixels
-    punpckhwd  xmm1, xmm2           // BGRA next 4 pixels
-    movdqa     [edx], xmm0
-    movdqa     [edx + 16], xmm1
-    lea        edx,  [edx + 32]
-    sub        ecx, 8
-    jg         convertloop
-
-    pop        edi
-    pop        esi
-    ret
-  }
-}
-
-// 8 pixels, dest aligned 16.
-// 4 UV values upsampled to 8 UV, mixed with 8 Y producing 8 ARGB (32 bytes).
-__declspec(naked) __declspec(align(16))
-void I422ToRGB24Row_SSSE3(const uint8* y_buf,
-                          const uint8* u_buf,
-                          const uint8* v_buf,
-                          uint8* dst_rgb24,
-                          int width) {
-  __asm {
-    push       esi
-    push       edi
-    mov        eax, [esp + 8 + 4]   // Y
-    mov        esi, [esp + 8 + 8]   // U
-    mov        edi, [esp + 8 + 12]  // V
-    mov        edx, [esp + 8 + 16]  // rgb24
-    mov        ecx, [esp + 8 + 20]  // width
-    sub        edi, esi
-    pxor       xmm4, xmm4
-    movdqa     xmm5, kShuffleMaskARGBToRGB24_0
-    movdqa     xmm6, kShuffleMaskARGBToRGB24
-
-    align      4
- convertloop:
-    READYUV422
-    YUVTORGB
-
-    // Step 3: Weave into RRGB
-    punpcklbw  xmm0, xmm1           // BG
-    punpcklbw  xmm2, xmm2           // RR
-    movdqa     xmm1, xmm0
-    punpcklwd  xmm0, xmm2           // BGRR first 4 pixels
-    punpckhwd  xmm1, xmm2           // BGRR next 4 pixels
-    pshufb     xmm0, xmm5           // Pack into first 8 and last 4 bytes.
-    pshufb     xmm1, xmm6           // Pack into first 12 bytes.
-    palignr    xmm1, xmm0, 12       // last 4 bytes of xmm0 + 12 from xmm1
-    movq       qword ptr [edx], xmm0  // First 8 bytes
-    movdqu     [edx + 8], xmm1      // Last 16 bytes. = 24 bytes, 8 RGB pixels.
-    lea        edx,  [edx + 24]
-    sub        ecx, 8
-    jg         convertloop
-
-    pop        edi
-    pop        esi
-    ret
-  }
-}
-
-// 8 pixels, dest aligned 16.
-// 4 UV values upsampled to 8 UV, mixed with 8 Y producing 8 ARGB (32 bytes).
-__declspec(naked) __declspec(align(16))
-void I422ToRAWRow_SSSE3(const uint8* y_buf,
-                        const uint8* u_buf,
-                        const uint8* v_buf,
-                        uint8* dst_raw,
-                        int width) {
-  __asm {
-    push       esi
-    push       edi
-    mov        eax, [esp + 8 + 4]   // Y
-    mov        esi, [esp + 8 + 8]   // U
-    mov        edi, [esp + 8 + 12]  // V
-    mov        edx, [esp + 8 + 16]  // raw
-    mov        ecx, [esp + 8 + 20]  // width
-    sub        edi, esi
-    pxor       xmm4, xmm4
-    movdqa     xmm5, kShuffleMaskARGBToRAW_0
-    movdqa     xmm6, kShuffleMaskARGBToRAW
-
-    align      4
- convertloop:
-    READYUV422
-    YUVTORGB
-
-    // Step 3: Weave into RRGB
-    punpcklbw  xmm0, xmm1           // BG
-    punpcklbw  xmm2, xmm2           // RR
-    movdqa     xmm1, xmm0
-    punpcklwd  xmm0, xmm2           // BGRR first 4 pixels
-    punpckhwd  xmm1, xmm2           // BGRR next 4 pixels
-    pshufb     xmm0, xmm5           // Pack into first 8 and last 4 bytes.
-    pshufb     xmm1, xmm6           // Pack into first 12 bytes.
-    palignr    xmm1, xmm0, 12       // last 4 bytes of xmm0 + 12 from xmm1
-    movq       qword ptr [edx], xmm0  // First 8 bytes
-    movdqu     [edx + 8], xmm1      // Last 16 bytes. = 24 bytes, 8 RGB pixels.
-    lea        edx,  [edx + 24]
-    sub        ecx, 8
-    jg         convertloop
-
-    pop        edi
-    pop        esi
-    ret
-  }
-}
-
-// 8 pixels, dest unaligned.
-// 4 UV values upsampled to 8 UV, mixed with 8 Y producing 8 ARGB (32 bytes).
-__declspec(naked) __declspec(align(16))
-void I422ToRGB565Row_SSSE3(const uint8* y_buf,
-                           const uint8* u_buf,
-                           const uint8* v_buf,
-                           uint8* rgb565_buf,
-                           int width) {
-  __asm {
-    push       esi
-    push       edi
-    mov        eax, [esp + 8 + 4]   // Y
-    mov        esi, [esp + 8 + 8]   // U
-    mov        edi, [esp + 8 + 12]  // V
-    mov        edx, [esp + 8 + 16]  // rgb565
-    mov        ecx, [esp + 8 + 20]  // width
-    sub        edi, esi
-    pxor       xmm4, xmm4
-    pcmpeqb    xmm5, xmm5       // generate mask 0x0000001f
-    psrld      xmm5, 27
-    pcmpeqb    xmm6, xmm6       // generate mask 0x000007e0
-    psrld      xmm6, 26
-    pslld      xmm6, 5
-    pcmpeqb    xmm7, xmm7       // generate mask 0xfffff800
-    pslld      xmm7, 11
-
-    align      4
- convertloop:
-    READYUV422
-    YUVTORGB
-
-    // Step 3: Weave into RRGB
-    punpcklbw  xmm0, xmm1           // BG
-    punpcklbw  xmm2, xmm2           // RR
-    movdqa     xmm1, xmm0
-    punpcklwd  xmm0, xmm2           // BGRR first 4 pixels
-    punpckhwd  xmm1, xmm2           // BGRR next 4 pixels
-
-    // Step 3b: RRGB -> RGB565
-    movdqa     xmm3, xmm0    // B  first 4 pixels of argb
-    movdqa     xmm2, xmm0    // G
-    pslld      xmm0, 8       // R
-    psrld      xmm3, 3       // B
-    psrld      xmm2, 5       // G
-    psrad      xmm0, 16      // R
-    pand       xmm3, xmm5    // B
-    pand       xmm2, xmm6    // G
-    pand       xmm0, xmm7    // R
-    por        xmm3, xmm2    // BG
-    por        xmm0, xmm3    // BGR
-    movdqa     xmm3, xmm1    // B  next 4 pixels of argb
-    movdqa     xmm2, xmm1    // G
-    pslld      xmm1, 8       // R
-    psrld      xmm3, 3       // B
-    psrld      xmm2, 5       // G
-    psrad      xmm1, 16      // R
-    pand       xmm3, xmm5    // B
-    pand       xmm2, xmm6    // G
-    pand       xmm1, xmm7    // R
-    por        xmm3, xmm2    // BG
-    por        xmm1, xmm3    // BGR
-    packssdw   xmm0, xmm1
-    sub        ecx, 8
-    movdqu     [edx], xmm0   // store 8 pixels of RGB565
-    lea        edx, [edx + 16]
-    jg         convertloop
-
-    pop        edi
-    pop        esi
-    ret
-  }
-}
-
-// 8 pixels, dest aligned 16.
-// 4 UV values upsampled to 8 UV, mixed with 8 Y producing 8 ARGB (32 bytes).
-__declspec(naked) __declspec(align(16))
-void I422ToARGBRow_SSSE3(const uint8* y_buf,
-                         const uint8* u_buf,
-                         const uint8* v_buf,
-                         uint8* dst_argb,
-                         int width) {
-  __asm {
-    push       esi
-    push       edi
-    mov        eax, [esp + 8 + 4]   // Y
-    mov        esi, [esp + 8 + 8]   // U
-    mov        edi, [esp + 8 + 12]  // V
-    mov        edx, [esp + 8 + 16]  // argb
-    mov        ecx, [esp + 8 + 20]  // width
-    sub        edi, esi
-    pcmpeqb    xmm5, xmm5           // generate 0xffffffff for alpha
-    pxor       xmm4, xmm4
-
-    align      4
- convertloop:
-    READYUV422
-    YUVTORGB
-
-    // Step 3: Weave into ARGB
-    punpcklbw  xmm0, xmm1           // BG
-    punpcklbw  xmm2, xmm5           // RA
-    movdqa     xmm1, xmm0
-    punpcklwd  xmm0, xmm2           // BGRA first 4 pixels
-    punpckhwd  xmm1, xmm2           // BGRA next 4 pixels
-    movdqa     [edx], xmm0
-    movdqa     [edx + 16], xmm1
-    lea        edx,  [edx + 32]
-    sub        ecx, 8
-    jg         convertloop
-
-    pop        edi
-    pop        esi
-    ret
-  }
-}
-
-// 8 pixels, dest aligned 16.
-// 2 UV values upsampled to 8 UV, mixed with 8 Y producing 8 ARGB (32 bytes).
-// Similar to I420 but duplicate UV once more.
-__declspec(naked) __declspec(align(16))
-void I411ToARGBRow_SSSE3(const uint8* y_buf,
-                         const uint8* u_buf,
-                         const uint8* v_buf,
-                         uint8* dst_argb,
-                         int width) {
-  __asm {
-    push       ebx
-    push       esi
-    push       edi
-    mov        eax, [esp + 12 + 4]   // Y
-    mov        esi, [esp + 12 + 8]   // U
-    mov        edi, [esp + 12 + 12]  // V
-    mov        edx, [esp + 12 + 16]  // argb
-    mov        ecx, [esp + 12 + 20]  // width
-    sub        edi, esi
-    pcmpeqb    xmm5, xmm5           // generate 0xffffffff for alpha
-    pxor       xmm4, xmm4
-
-    align      4
- convertloop:
-    READYUV411  // modifies EBX
-    YUVTORGB
-
-    // Step 3: Weave into ARGB
-    punpcklbw  xmm0, xmm1           // BG
-    punpcklbw  xmm2, xmm5           // RA
-    movdqa     xmm1, xmm0
-    punpcklwd  xmm0, xmm2           // BGRA first 4 pixels
-    punpckhwd  xmm1, xmm2           // BGRA next 4 pixels
-    movdqa     [edx], xmm0
-    movdqa     [edx + 16], xmm1
-    lea        edx,  [edx + 32]
-    sub        ecx, 8
-    jg         convertloop
-
-    pop        edi
-    pop        esi
-    pop        ebx
-    ret
-  }
-}
-
-// 8 pixels, dest aligned 16.
-// 4 UV values upsampled to 8 UV, mixed with 8 Y producing 8 ARGB (32 bytes).
-__declspec(naked) __declspec(align(16))
-void NV12ToARGBRow_SSSE3(const uint8* y_buf,
-                         const uint8* uv_buf,
-                         uint8* dst_argb,
-                         int width) {
-  __asm {
-    push       esi
-    mov        eax, [esp + 4 + 4]   // Y
-    mov        esi, [esp + 4 + 8]   // UV
-    mov        edx, [esp + 4 + 12]  // argb
-    mov        ecx, [esp + 4 + 16]  // width
-    pcmpeqb    xmm5, xmm5           // generate 0xffffffff for alpha
-    pxor       xmm4, xmm4
-
-    align      4
- convertloop:
-    READNV12
-    YUVTORGB
-
-    // Step 3: Weave into ARGB
-    punpcklbw  xmm0, xmm1           // BG
-    punpcklbw  xmm2, xmm5           // RA
-    movdqa     xmm1, xmm0
-    punpcklwd  xmm0, xmm2           // BGRA first 4 pixels
-    punpckhwd  xmm1, xmm2           // BGRA next 4 pixels
-    movdqa     [edx], xmm0
-    movdqa     [edx + 16], xmm1
-    lea        edx,  [edx + 32]
-    sub        ecx, 8
-    jg         convertloop
-
-    pop        esi
-    ret
-  }
-}
-
-// 8 pixels, dest aligned 16.
-// 4 UV values upsampled to 8 UV, mixed with 8 Y producing 8 ARGB (32 bytes).
-__declspec(naked) __declspec(align(16))
-void NV21ToARGBRow_SSSE3(const uint8* y_buf,
-                         const uint8* uv_buf,
-                         uint8* dst_argb,
-                         int width) {
-  __asm {
-    push       esi
-    mov        eax, [esp + 4 + 4]   // Y
-    mov        esi, [esp + 4 + 8]   // VU
-    mov        edx, [esp + 4 + 12]  // argb
-    mov        ecx, [esp + 4 + 16]  // width
-    pcmpeqb    xmm5, xmm5           // generate 0xffffffff for alpha
-    pxor       xmm4, xmm4
-
-    align      4
- convertloop:
-    READNV12
-    YVUTORGB
-
-    // Step 3: Weave into ARGB
-    punpcklbw  xmm0, xmm1           // BG
-    punpcklbw  xmm2, xmm5           // RA
-    movdqa     xmm1, xmm0
-    punpcklwd  xmm0, xmm2           // BGRA first 4 pixels
-    punpckhwd  xmm1, xmm2           // BGRA next 4 pixels
-    movdqa     [edx], xmm0
-    movdqa     [edx + 16], xmm1
-    lea        edx,  [edx + 32]
-    sub        ecx, 8
-    jg         convertloop
-
-    pop        esi
-    ret
-  }
-}
-
-// 8 pixels, unaligned.
-// 8 UV values, mixed with 8 Y producing 8 ARGB (32 bytes).
-__declspec(naked) __declspec(align(16))
-void I444ToARGBRow_Unaligned_SSSE3(const uint8* y_buf,
-                                   const uint8* u_buf,
-                                   const uint8* v_buf,
-                                   uint8* dst_argb,
-                                   int width) {
-  __asm {
-    push       esi
-    push       edi
-    mov        eax, [esp + 8 + 4]   // Y
-    mov        esi, [esp + 8 + 8]   // U
-    mov        edi, [esp + 8 + 12]  // V
-    mov        edx, [esp + 8 + 16]  // argb
-    mov        ecx, [esp + 8 + 20]  // width
-    sub        edi, esi
-    pcmpeqb    xmm5, xmm5           // generate 0xffffffff for alpha
-    pxor       xmm4, xmm4
-
-    align      4
- convertloop:
-    READYUV444
-    YUVTORGB
-
-    // Step 3: Weave into ARGB
-    punpcklbw  xmm0, xmm1           // BG
-    punpcklbw  xmm2, xmm5           // RA
-    movdqa     xmm1, xmm0
-    punpcklwd  xmm0, xmm2           // BGRA first 4 pixels
-    punpckhwd  xmm1, xmm2           // BGRA next 4 pixels
-    movdqu     [edx], xmm0
-    movdqu     [edx + 16], xmm1
-    lea        edx,  [edx + 32]
-    sub        ecx, 8
-    jg         convertloop
-
-    pop        edi
-    pop        esi
-    ret
-  }
-}
-
-// 8 pixels, unaligned.
-// 4 UV values upsampled to 8 UV, mixed with 8 Y producing 8 ARGB (32 bytes).
-__declspec(naked) __declspec(align(16))
-void I422ToARGBRow_Unaligned_SSSE3(const uint8* y_buf,
-                                   const uint8* u_buf,
-                                   const uint8* v_buf,
-                                   uint8* dst_argb,
-                                   int width) {
-  __asm {
-    push       esi
-    push       edi
-    mov        eax, [esp + 8 + 4]   // Y
-    mov        esi, [esp + 8 + 8]   // U
-    mov        edi, [esp + 8 + 12]  // V
-    mov        edx, [esp + 8 + 16]  // argb
-    mov        ecx, [esp + 8 + 20]  // width
-    sub        edi, esi
-    pcmpeqb    xmm5, xmm5           // generate 0xffffffff for alpha
-    pxor       xmm4, xmm4
-
-    align      4
- convertloop:
-    READYUV422
-    YUVTORGB
-
-    // Step 3: Weave into ARGB
-    punpcklbw  xmm0, xmm1           // BG
-    punpcklbw  xmm2, xmm5           // RA
-    movdqa     xmm1, xmm0
-    punpcklwd  xmm0, xmm2           // BGRA first 4 pixels
-    punpckhwd  xmm1, xmm2           // BGRA next 4 pixels
-    movdqu     [edx], xmm0
-    movdqu     [edx + 16], xmm1
-    lea        edx,  [edx + 32]
-    sub        ecx, 8
-    jg         convertloop
-
-    pop        edi
-    pop        esi
-    ret
-  }
-}
-
-// 8 pixels, unaligned.
-// 2 UV values upsampled to 8 UV, mixed with 8 Y producing 8 ARGB (32 bytes).
-// Similar to I420 but duplicate UV once more.
-__declspec(naked) __declspec(align(16))
-void I411ToARGBRow_Unaligned_SSSE3(const uint8* y_buf,
-                                   const uint8* u_buf,
-                                   const uint8* v_buf,
-                                   uint8* dst_argb,
-                                   int width) {
-  __asm {
-    push       ebx
-    push       esi
-    push       edi
-    mov        eax, [esp + 12 + 4]   // Y
-    mov        esi, [esp + 12 + 8]   // U
-    mov        edi, [esp + 12 + 12]  // V
-    mov        edx, [esp + 12 + 16]  // argb
-    mov        ecx, [esp + 12 + 20]  // width
-    sub        edi, esi
-    pcmpeqb    xmm5, xmm5           // generate 0xffffffff for alpha
-    pxor       xmm4, xmm4
-
-    align      4
- convertloop:
-    READYUV411  // modifies EBX
-    YUVTORGB
-
-    // Step 3: Weave into ARGB
-    punpcklbw  xmm0, xmm1           // BG
-    punpcklbw  xmm2, xmm5           // RA
-    movdqa     xmm1, xmm0
-    punpcklwd  xmm0, xmm2           // BGRA first 4 pixels
-    punpckhwd  xmm1, xmm2           // BGRA next 4 pixels
-    movdqu     [edx], xmm0
-    movdqu     [edx + 16], xmm1
-    lea        edx,  [edx + 32]
-    sub        ecx, 8
-    jg         convertloop
-
-    pop        edi
-    pop        esi
-    pop        ebx
-    ret
-  }
-}
-
-// 8 pixels, dest aligned 16.
-// 4 UV values upsampled to 8 UV, mixed with 8 Y producing 8 ARGB (32 bytes).
-__declspec(naked) __declspec(align(16))
-void NV12ToARGBRow_Unaligned_SSSE3(const uint8* y_buf,
-                                   const uint8* uv_buf,
-                                   uint8* dst_argb,
-                                   int width) {
-  __asm {
-    push       esi
-    mov        eax, [esp + 4 + 4]   // Y
-    mov        esi, [esp + 4 + 8]   // UV
-    mov        edx, [esp + 4 + 12]  // argb
-    mov        ecx, [esp + 4 + 16]  // width
-    pcmpeqb    xmm5, xmm5           // generate 0xffffffff for alpha
-    pxor       xmm4, xmm4
-
-    align      4
- convertloop:
-    READNV12
-    YUVTORGB
-
-    // Step 3: Weave into ARGB
-    punpcklbw  xmm0, xmm1           // BG
-    punpcklbw  xmm2, xmm5           // RA
-    movdqa     xmm1, xmm0
-    punpcklwd  xmm0, xmm2           // BGRA first 4 pixels
-    punpckhwd  xmm1, xmm2           // BGRA next 4 pixels
-    movdqu     [edx], xmm0
-    movdqu     [edx + 16], xmm1
-    lea        edx,  [edx + 32]
-    sub        ecx, 8
-    jg         convertloop
-
-    pop        esi
-    ret
-  }
-}
-
-// 8 pixels, dest aligned 16.
-// 4 UV values upsampled to 8 UV, mixed with 8 Y producing 8 ARGB (32 bytes).
-__declspec(naked) __declspec(align(16))
-void NV21ToARGBRow_Unaligned_SSSE3(const uint8* y_buf,
-                                   const uint8* uv_buf,
-                                   uint8* dst_argb,
-                                   int width) {
-  __asm {
-    push       esi
-    mov        eax, [esp + 4 + 4]   // Y
-    mov        esi, [esp + 4 + 8]   // VU
-    mov        edx, [esp + 4 + 12]  // argb
-    mov        ecx, [esp + 4 + 16]  // width
-    pcmpeqb    xmm5, xmm5           // generate 0xffffffff for alpha
-    pxor       xmm4, xmm4
-
-    align      4
- convertloop:
-    READNV12
-    YVUTORGB
-
-    // Step 3: Weave into ARGB
-    punpcklbw  xmm0, xmm1           // BG
-    punpcklbw  xmm2, xmm5           // RA
-    movdqa     xmm1, xmm0
-    punpcklwd  xmm0, xmm2           // BGRA first 4 pixels
-    punpckhwd  xmm1, xmm2           // BGRA next 4 pixels
-    movdqu     [edx], xmm0
-    movdqu     [edx + 16], xmm1
-    lea        edx,  [edx + 32]
-    sub        ecx, 8
-    jg         convertloop
-
-    pop        esi
-    ret
-  }
-}
-
-__declspec(naked) __declspec(align(16))
-void I422ToBGRARow_SSSE3(const uint8* y_buf,
-                         const uint8* u_buf,
-                         const uint8* v_buf,
-                         uint8* dst_bgra,
-                         int width) {
-  __asm {
-    push       esi
-    push       edi
-    mov        eax, [esp + 8 + 4]   // Y
-    mov        esi, [esp + 8 + 8]   // U
-    mov        edi, [esp + 8 + 12]  // V
-    mov        edx, [esp + 8 + 16]  // bgra
-    mov        ecx, [esp + 8 + 20]  // width
-    sub        edi, esi
-    pxor       xmm4, xmm4
-
-    align      4
- convertloop:
-    READYUV422
-    YUVTORGB
-
-    // Step 3: Weave into BGRA
-    pcmpeqb    xmm5, xmm5           // generate 0xffffffff for alpha
-    punpcklbw  xmm1, xmm0           // GB
-    punpcklbw  xmm5, xmm2           // AR
-    movdqa     xmm0, xmm5
-    punpcklwd  xmm5, xmm1           // BGRA first 4 pixels
-    punpckhwd  xmm0, xmm1           // BGRA next 4 pixels
-    movdqa     [edx], xmm5
-    movdqa     [edx + 16], xmm0
-    lea        edx,  [edx + 32]
-    sub        ecx, 8
-    jg         convertloop
-
-    pop        edi
-    pop        esi
-    ret
-  }
-}
-
-__declspec(naked) __declspec(align(16))
-void I422ToBGRARow_Unaligned_SSSE3(const uint8* y_buf,
-                                   const uint8* u_buf,
-                                   const uint8* v_buf,
-                                   uint8* dst_bgra,
-                                   int width) {
-  __asm {
-    push       esi
-    push       edi
-    mov        eax, [esp + 8 + 4]   // Y
-    mov        esi, [esp + 8 + 8]   // U
-    mov        edi, [esp + 8 + 12]  // V
-    mov        edx, [esp + 8 + 16]  // bgra
-    mov        ecx, [esp + 8 + 20]  // width
-    sub        edi, esi
-    pxor       xmm4, xmm4
-
-    align      4
- convertloop:
-    READYUV422
-    YUVTORGB
-
-    // Step 3: Weave into BGRA
-    pcmpeqb    xmm5, xmm5           // generate 0xffffffff for alpha
-    punpcklbw  xmm1, xmm0           // GB
-    punpcklbw  xmm5, xmm2           // AR
-    movdqa     xmm0, xmm5
-    punpcklwd  xmm5, xmm1           // BGRA first 4 pixels
-    punpckhwd  xmm0, xmm1           // BGRA next 4 pixels
-    movdqu     [edx], xmm5
-    movdqu     [edx + 16], xmm0
-    lea        edx,  [edx + 32]
-    sub        ecx, 8
-    jg         convertloop
-
-    pop        edi
-    pop        esi
-    ret
-  }
-}
-
-__declspec(naked) __declspec(align(16))
-void I422ToABGRRow_SSSE3(const uint8* y_buf,
-                         const uint8* u_buf,
-                         const uint8* v_buf,
-                         uint8* dst_abgr,
-                         int width) {
-  __asm {
-    push       esi
-    push       edi
-    mov        eax, [esp + 8 + 4]   // Y
-    mov        esi, [esp + 8 + 8]   // U
-    mov        edi, [esp + 8 + 12]  // V
-    mov        edx, [esp + 8 + 16]  // abgr
-    mov        ecx, [esp + 8 + 20]  // width
-    sub        edi, esi
-    pcmpeqb    xmm5, xmm5           // generate 0xffffffff for alpha
-    pxor       xmm4, xmm4
-
-    align      4
- convertloop:
-    READYUV422
-    YUVTORGB
-
-    // Step 3: Weave into ARGB
-    punpcklbw  xmm2, xmm1           // RG
-    punpcklbw  xmm0, xmm5           // BA
-    movdqa     xmm1, xmm2
-    punpcklwd  xmm2, xmm0           // RGBA first 4 pixels
-    punpckhwd  xmm1, xmm0           // RGBA next 4 pixels
-    movdqa     [edx], xmm2
-    movdqa     [edx + 16], xmm1
-    lea        edx,  [edx + 32]
-    sub        ecx, 8
-    jg         convertloop
-
-    pop        edi
-    pop        esi
-    ret
-  }
-}
-
-__declspec(naked) __declspec(align(16))
-void I422ToABGRRow_Unaligned_SSSE3(const uint8* y_buf,
-                                   const uint8* u_buf,
-                                   const uint8* v_buf,
-                                   uint8* dst_abgr,
-                                   int width) {
-  __asm {
-    push       esi
-    push       edi
-    mov        eax, [esp + 8 + 4]   // Y
-    mov        esi, [esp + 8 + 8]   // U
-    mov        edi, [esp + 8 + 12]  // V
-    mov        edx, [esp + 8 + 16]  // abgr
-    mov        ecx, [esp + 8 + 20]  // width
-    sub        edi, esi
-    pcmpeqb    xmm5, xmm5           // generate 0xffffffff for alpha
-    pxor       xmm4, xmm4
-
-    align      4
- convertloop:
-    READYUV422
-    YUVTORGB
-
-    // Step 3: Weave into ARGB
-    punpcklbw  xmm2, xmm1           // RG
-    punpcklbw  xmm0, xmm5           // BA
-    movdqa     xmm1, xmm2
-    punpcklwd  xmm2, xmm0           // RGBA first 4 pixels
-    punpckhwd  xmm1, xmm0           // RGBA next 4 pixels
-    movdqu     [edx], xmm2
-    movdqu     [edx + 16], xmm1
-    lea        edx,  [edx + 32]
-    sub        ecx, 8
-    jg         convertloop
-
-    pop        edi
-    pop        esi
-    ret
-  }
-}
-
-__declspec(naked) __declspec(align(16))
-void I422ToRGBARow_SSSE3(const uint8* y_buf,
-                         const uint8* u_buf,
-                         const uint8* v_buf,
-                         uint8* dst_rgba,
-                         int width) {
-  __asm {
-    push       esi
-    push       edi
-    mov        eax, [esp + 8 + 4]   // Y
-    mov        esi, [esp + 8 + 8]   // U
-    mov        edi, [esp + 8 + 12]  // V
-    mov        edx, [esp + 8 + 16]  // rgba
-    mov        ecx, [esp + 8 + 20]  // width
-    sub        edi, esi
-    pxor       xmm4, xmm4
-
-    align      4
- convertloop:
-    READYUV422
-    YUVTORGB
-
-    // Step 3: Weave into RGBA
-    pcmpeqb    xmm5, xmm5           // generate 0xffffffff for alpha
-    punpcklbw  xmm1, xmm2           // GR
-    punpcklbw  xmm5, xmm0           // AB
-    movdqa     xmm0, xmm5
-    punpcklwd  xmm5, xmm1           // RGBA first 4 pixels
-    punpckhwd  xmm0, xmm1           // RGBA next 4 pixels
-    movdqa     [edx], xmm5
-    movdqa     [edx + 16], xmm0
-    lea        edx,  [edx + 32]
-    sub        ecx, 8
-    jg         convertloop
-
-    pop        edi
-    pop        esi
-    ret
-  }
-}
-
-__declspec(naked) __declspec(align(16))
-void I422ToRGBARow_Unaligned_SSSE3(const uint8* y_buf,
-                                   const uint8* u_buf,
-                                   const uint8* v_buf,
-                                   uint8* dst_rgba,
-                                   int width) {
-  __asm {
-    push       esi
-    push       edi
-    mov        eax, [esp + 8 + 4]   // Y
-    mov        esi, [esp + 8 + 8]   // U
-    mov        edi, [esp + 8 + 12]  // V
-    mov        edx, [esp + 8 + 16]  // rgba
-    mov        ecx, [esp + 8 + 20]  // width
-    sub        edi, esi
-    pxor       xmm4, xmm4
-
-    align      4
- convertloop:
-    READYUV422
-    YUVTORGB
-
-    // Step 3: Weave into RGBA
-    pcmpeqb    xmm5, xmm5           // generate 0xffffffff for alpha
-    punpcklbw  xmm1, xmm2           // GR
-    punpcklbw  xmm5, xmm0           // AB
-    movdqa     xmm0, xmm5
-    punpcklwd  xmm5, xmm1           // RGBA first 4 pixels
-    punpckhwd  xmm0, xmm1           // RGBA next 4 pixels
-    movdqu     [edx], xmm5
-    movdqu     [edx + 16], xmm0
-    lea        edx,  [edx + 32]
-    sub        ecx, 8
-    jg         convertloop
-
-    pop        edi
-    pop        esi
-    ret
-  }
-}
-
-#endif  // HAS_I422TOARGBROW_SSSE3
-
-#ifdef HAS_YTOARGBROW_SSE2
-__declspec(naked) __declspec(align(16))
-void YToARGBRow_SSE2(const uint8* y_buf,
-                     uint8* rgb_buf,
-                     int width) {
-  __asm {
-    pxor       xmm5, xmm5
-    pcmpeqb    xmm4, xmm4           // generate mask 0xff000000
-    pslld      xmm4, 24
-    mov        eax, 0x00100010
-    movd       xmm3, eax
-    pshufd     xmm3, xmm3, 0
-    mov        eax, 0x004a004a       // 74
-    movd       xmm2, eax
-    pshufd     xmm2, xmm2,0
-    mov        eax, [esp + 4]       // Y
-    mov        edx, [esp + 8]       // rgb
-    mov        ecx, [esp + 12]      // width
-
-    align      4
- convertloop:
-    // Step 1: Scale Y contribution to 8 G values. G = (y - 16) * 1.164
-    movq       xmm0, qword ptr [eax]
-    lea        eax, [eax + 8]
-    punpcklbw  xmm0, xmm5           // 0.Y
-    psubusw    xmm0, xmm3
-    pmullw     xmm0, xmm2
-    psrlw      xmm0, 6
-    packuswb   xmm0, xmm0           // G
-
-    // Step 2: Weave into ARGB
-    punpcklbw  xmm0, xmm0           // GG
-    movdqa     xmm1, xmm0
-    punpcklwd  xmm0, xmm0           // BGRA first 4 pixels
-    punpckhwd  xmm1, xmm1           // BGRA next 4 pixels
-    por        xmm0, xmm4
-    por        xmm1, xmm4
-    movdqa     [edx], xmm0
-    movdqa     [edx + 16], xmm1
-    lea        edx,  [edx + 32]
-    sub        ecx, 8
-    jg         convertloop
-
-    ret
-  }
-}
-#endif  // HAS_YTOARGBROW_SSE2
-
-#ifdef HAS_MIRRORROW_SSSE3
-// Shuffle table for reversing the bytes.
-static const uvec8 kShuffleMirror = {
-  15u, 14u, 13u, 12u, 11u, 10u, 9u, 8u, 7u, 6u, 5u, 4u, 3u, 2u, 1u, 0u
-};
-
-__declspec(naked) __declspec(align(16))
-void MirrorRow_SSSE3(const uint8* src, uint8* dst, int width) {
-  __asm {
-    mov       eax, [esp + 4]   // src
-    mov       edx, [esp + 8]   // dst
-    mov       ecx, [esp + 12]  // width
-    movdqa    xmm5, kShuffleMirror
-    lea       eax, [eax - 16]
-
-    align      4
- convertloop:
-    movdqa    xmm0, [eax + ecx]
-    pshufb    xmm0, xmm5
-    sub       ecx, 16
-    movdqa    [edx], xmm0
-    lea       edx, [edx + 16]
-    jg        convertloop
-    ret
-  }
-}
-#endif  // HAS_MIRRORROW_SSSE3
-
-#ifdef HAS_MIRRORROW_AVX2
-// Shuffle table for reversing the bytes.
-static const ulvec8 kShuffleMirror_AVX2 = {
-  15u, 14u, 13u, 12u, 11u, 10u, 9u, 8u, 7u, 6u, 5u, 4u, 3u, 2u, 1u, 0u,
-  15u, 14u, 13u, 12u, 11u, 10u, 9u, 8u, 7u, 6u, 5u, 4u, 3u, 2u, 1u, 0u
-};
-
-__declspec(naked) __declspec(align(16))
-void MirrorRow_AVX2(const uint8* src, uint8* dst, int width) {
-  __asm {
-    mov       eax, [esp + 4]   // src
-    mov       edx, [esp + 8]   // dst
-    mov       ecx, [esp + 12]  // width
-    vmovdqa   ymm5, kShuffleMirror_AVX2
-    lea       eax, [eax - 32]
-
-    align      4
- convertloop:
-    vmovdqu   ymm0, [eax + ecx]
-    vpshufb   ymm0, ymm0, ymm5
-    vpermq    ymm0, ymm0, 0x4e  // swap high and low halfs
-    sub       ecx, 32
-    vmovdqu   [edx], ymm0
-    lea       edx, [edx + 32]
-    jg        convertloop
-    vzeroupper
-    ret
-  }
-}
-#endif  // HAS_MIRRORROW_AVX2
-
-#ifdef HAS_MIRRORROW_SSE2
-// SSE2 version has movdqu so it can be used on unaligned buffers when SSSE3
-// version can not.
-__declspec(naked) __declspec(align(16))
-void MirrorRow_SSE2(const uint8* src, uint8* dst, int width) {
-  __asm {
-    mov       eax, [esp + 4]   // src
-    mov       edx, [esp + 8]   // dst
-    mov       ecx, [esp + 12]  // width
-    lea       eax, [eax - 16]
-
-    align      4
- convertloop:
-    movdqu    xmm0, [eax + ecx]
-    movdqa    xmm1, xmm0        // swap bytes
-    psllw     xmm0, 8
-    psrlw     xmm1, 8
-    por       xmm0, xmm1
-    pshuflw   xmm0, xmm0, 0x1b  // swap words
-    pshufhw   xmm0, xmm0, 0x1b
-    pshufd    xmm0, xmm0, 0x4e  // swap qwords
-    sub       ecx, 16
-    movdqu    [edx], xmm0
-    lea       edx, [edx + 16]
-    jg        convertloop
-    ret
-  }
-}
-#endif  // HAS_MIRRORROW_SSE2
-
-#ifdef HAS_MIRRORROW_UV_SSSE3
-// Shuffle table for reversing the bytes of UV channels.
-static const uvec8 kShuffleMirrorUV = {
-  14u, 12u, 10u, 8u, 6u, 4u, 2u, 0u, 15u, 13u, 11u, 9u, 7u, 5u, 3u, 1u
-};
-
-__declspec(naked) __declspec(align(16))
-void MirrorUVRow_SSSE3(const uint8* src, uint8* dst_u, uint8* dst_v,
-                       int width) {
-  __asm {
-    push      edi
-    mov       eax, [esp + 4 + 4]   // src
-    mov       edx, [esp + 4 + 8]   // dst_u
-    mov       edi, [esp + 4 + 12]  // dst_v
-    mov       ecx, [esp + 4 + 16]  // width
-    movdqa    xmm1, kShuffleMirrorUV
-    lea       eax, [eax + ecx * 2 - 16]
-    sub       edi, edx
-
-    align      4
- convertloop:
-    movdqa    xmm0, [eax]
-    lea       eax, [eax - 16]
-    pshufb    xmm0, xmm1
-    sub       ecx, 8
-    movlpd    qword ptr [edx], xmm0
-    movhpd    qword ptr [edx + edi], xmm0
-    lea       edx, [edx + 8]
-    jg        convertloop
-
-    pop       edi
-    ret
-  }
-}
-#endif  // HAS_MIRRORROW_UV_SSSE3
-
-#ifdef HAS_ARGBMIRRORROW_SSSE3
-// Shuffle table for reversing the bytes.
-static const uvec8 kARGBShuffleMirror = {
-  12u, 13u, 14u, 15u, 8u, 9u, 10u, 11u, 4u, 5u, 6u, 7u, 0u, 1u, 2u, 3u
-};
-
-__declspec(naked) __declspec(align(16))
-void ARGBMirrorRow_SSSE3(const uint8* src, uint8* dst, int width) {
-  __asm {
-    mov       eax, [esp + 4]   // src
-    mov       edx, [esp + 8]   // dst
-    mov       ecx, [esp + 12]  // width
-    lea       eax, [eax - 16 + ecx * 4]  // last 4 pixels.
-    movdqa    xmm5, kARGBShuffleMirror
-
-    align      4
- convertloop:
-    movdqa    xmm0, [eax]
-    lea       eax, [eax - 16]
-    pshufb    xmm0, xmm5
-    sub       ecx, 4
-    movdqa    [edx], xmm0
-    lea       edx, [edx + 16]
-    jg        convertloop
-    ret
-  }
-}
-#endif  // HAS_ARGBMIRRORROW_SSSE3
-
-#ifdef HAS_ARGBMIRRORROW_AVX2
-// Shuffle table for reversing the bytes.
-static const ulvec32 kARGBShuffleMirror_AVX2 = {
-  7u, 6u, 5u, 4u, 3u, 2u, 1u, 0u
-};
-
-__declspec(naked) __declspec(align(16))
-void ARGBMirrorRow_AVX2(const uint8* src, uint8* dst, int width) {
-  __asm {
-    mov       eax, [esp + 4]   // src
-    mov       edx, [esp + 8]   // dst
-    mov       ecx, [esp + 12]  // width
-    lea       eax, [eax - 32]
-    vmovdqa   ymm5, kARGBShuffleMirror_AVX2
-
-    align      4
- convertloop:
-    vpermd    ymm0, ymm5, [eax + ecx * 4]  // permute dword order
-    sub       ecx, 8
-    vmovdqu   [edx], ymm0
-    lea       edx, [edx + 32]
-    jg        convertloop
-    vzeroupper
-    ret
-  }
-}
-#endif  // HAS_ARGBMIRRORROW_AVX2
-
-#ifdef HAS_SPLITUVROW_SSE2
-__declspec(naked) __declspec(align(16))
-void SplitUVRow_SSE2(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int pix) {
-  __asm {
-    push       edi
-    mov        eax, [esp + 4 + 4]    // src_uv
-    mov        edx, [esp + 4 + 8]    // dst_u
-    mov        edi, [esp + 4 + 12]   // dst_v
-    mov        ecx, [esp + 4 + 16]   // pix
-    pcmpeqb    xmm5, xmm5            // generate mask 0x00ff00ff
-    psrlw      xmm5, 8
-    sub        edi, edx
-
-    align      4
-  convertloop:
-    movdqa     xmm0, [eax]
-    movdqa     xmm1, [eax + 16]
-    lea        eax,  [eax + 32]
-    movdqa     xmm2, xmm0
-    movdqa     xmm3, xmm1
-    pand       xmm0, xmm5   // even bytes
-    pand       xmm1, xmm5
-    packuswb   xmm0, xmm1
-    psrlw      xmm2, 8      // odd bytes
-    psrlw      xmm3, 8
-    packuswb   xmm2, xmm3
-    movdqa     [edx], xmm0
-    movdqa     [edx + edi], xmm2
-    lea        edx, [edx + 16]
-    sub        ecx, 16
-    jg         convertloop
-
-    pop        edi
-    ret
-  }
-}
-
-__declspec(naked) __declspec(align(16))
-void SplitUVRow_Unaligned_SSE2(const uint8* src_uv, uint8* dst_u, uint8* dst_v,
-                               int pix) {
-  __asm {
-    push       edi
-    mov        eax, [esp + 4 + 4]    // src_uv
-    mov        edx, [esp + 4 + 8]    // dst_u
-    mov        edi, [esp + 4 + 12]   // dst_v
-    mov        ecx, [esp + 4 + 16]   // pix
-    pcmpeqb    xmm5, xmm5            // generate mask 0x00ff00ff
-    psrlw      xmm5, 8
-    sub        edi, edx
-
-    align      4
-  convertloop:
-    movdqu     xmm0, [eax]
-    movdqu     xmm1, [eax + 16]
-    lea        eax,  [eax + 32]
-    movdqa     xmm2, xmm0
-    movdqa     xmm3, xmm1
-    pand       xmm0, xmm5   // even bytes
-    pand       xmm1, xmm5
-    packuswb   xmm0, xmm1
-    psrlw      xmm2, 8      // odd bytes
-    psrlw      xmm3, 8
-    packuswb   xmm2, xmm3
-    movdqu     [edx], xmm0
-    movdqu     [edx + edi], xmm2
-    lea        edx, [edx + 16]
-    sub        ecx, 16
-    jg         convertloop
-
-    pop        edi
-    ret
-  }
-}
-#endif  // HAS_SPLITUVROW_SSE2
-
-#ifdef HAS_SPLITUVROW_AVX2
-__declspec(naked) __declspec(align(16))
-void SplitUVRow_AVX2(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int pix) {
-  __asm {
-    push       edi
-    mov        eax, [esp + 4 + 4]    // src_uv
-    mov        edx, [esp + 4 + 8]    // dst_u
-    mov        edi, [esp + 4 + 12]   // dst_v
-    mov        ecx, [esp + 4 + 16]   // pix
-    vpcmpeqb   ymm5, ymm5, ymm5      // generate mask 0x00ff00ff
-    vpsrlw     ymm5, ymm5, 8
-    sub        edi, edx
-
-    align      4
-  convertloop:
-    vmovdqu    ymm0, [eax]
-    vmovdqu    ymm1, [eax + 32]
-    lea        eax,  [eax + 64]
-    vpsrlw     ymm2, ymm0, 8      // odd bytes
-    vpsrlw     ymm3, ymm1, 8
-    vpand      ymm0, ymm0, ymm5   // even bytes
-    vpand      ymm1, ymm1, ymm5
-    vpackuswb  ymm0, ymm0, ymm1
-    vpackuswb  ymm2, ymm2, ymm3
-    vpermq     ymm0, ymm0, 0xd8
-    vpermq     ymm2, ymm2, 0xd8
-    vmovdqu    [edx], ymm0
-    vmovdqu    [edx + edi], ymm2
-    lea        edx, [edx + 32]
-    sub        ecx, 32
-    jg         convertloop
-
-    pop        edi
-    vzeroupper
-    ret
-  }
-}
-#endif  // HAS_SPLITUVROW_AVX2
-
-#ifdef HAS_MERGEUVROW_SSE2
-__declspec(naked) __declspec(align(16))
-void MergeUVRow_SSE2(const uint8* src_u, const uint8* src_v, uint8* dst_uv,
-                     int width) {
-  __asm {
-    push       edi
-    mov        eax, [esp + 4 + 4]    // src_u
-    mov        edx, [esp + 4 + 8]    // src_v
-    mov        edi, [esp + 4 + 12]   // dst_uv
-    mov        ecx, [esp + 4 + 16]   // width
-    sub        edx, eax
-
-    align      4
-  convertloop:
-    movdqa     xmm0, [eax]      // read 16 U's
-    movdqa     xmm1, [eax + edx]  // and 16 V's
-    lea        eax,  [eax + 16]
-    movdqa     xmm2, xmm0
-    punpcklbw  xmm0, xmm1       // first 8 UV pairs
-    punpckhbw  xmm2, xmm1       // next 8 UV pairs
-    movdqa     [edi], xmm0
-    movdqa     [edi + 16], xmm2
-    lea        edi, [edi + 32]
-    sub        ecx, 16
-    jg         convertloop
-
-    pop        edi
-    ret
-  }
-}
-
-__declspec(naked) __declspec(align(16))
-void MergeUVRow_Unaligned_SSE2(const uint8* src_u, const uint8* src_v,
-                               uint8* dst_uv, int width) {
-  __asm {
-    push       edi
-    mov        eax, [esp + 4 + 4]    // src_u
-    mov        edx, [esp + 4 + 8]    // src_v
-    mov        edi, [esp + 4 + 12]   // dst_uv
-    mov        ecx, [esp + 4 + 16]   // width
-    sub        edx, eax
-
-    align      4
-  convertloop:
-    movdqu     xmm0, [eax]      // read 16 U's
-    movdqu     xmm1, [eax + edx]  // and 16 V's
-    lea        eax,  [eax + 16]
-    movdqa     xmm2, xmm0
-    punpcklbw  xmm0, xmm1       // first 8 UV pairs
-    punpckhbw  xmm2, xmm1       // next 8 UV pairs
-    movdqu     [edi], xmm0
-    movdqu     [edi + 16], xmm2
-    lea        edi, [edi + 32]
-    sub        ecx, 16
-    jg         convertloop
-
-    pop        edi
-    ret
-  }
-}
-#endif  //  HAS_MERGEUVROW_SSE2
-
-#ifdef HAS_MERGEUVROW_AVX2
-__declspec(naked) __declspec(align(16))
-void MergeUVRow_AVX2(const uint8* src_u, const uint8* src_v, uint8* dst_uv,
-                     int width) {
-  __asm {
-    push       edi
-    mov        eax, [esp + 4 + 4]    // src_u
-    mov        edx, [esp + 4 + 8]    // src_v
-    mov        edi, [esp + 4 + 12]   // dst_uv
-    mov        ecx, [esp + 4 + 16]   // width
-    sub        edx, eax
-
-    align      4
-  convertloop:
-    vmovdqu    ymm0, [eax]           // read 32 U's
-    vmovdqu    ymm1, [eax + edx]     // and 32 V's
-    lea        eax,  [eax + 32]
-    vpunpcklbw ymm2, ymm0, ymm1      // low 16 UV pairs. mutated qqword 0,2
-    vpunpckhbw ymm0, ymm0, ymm1      // high 16 UV pairs. mutated qqword 1,3
-    vperm2i128 ymm1, ymm2, ymm0, 0x20  // low 128 of ymm2 and low 128 of ymm0
-    vperm2i128 ymm2, ymm2, ymm0, 0x31  // high 128 of ymm2 and high 128 of ymm0
-    vmovdqu    [edi], ymm1
-    vmovdqu    [edi + 32], ymm2
-    lea        edi, [edi + 64]
-    sub        ecx, 32
-    jg         convertloop
-
-    pop        edi
-    vzeroupper
-    ret
-  }
-}
-#endif  //  HAS_MERGEUVROW_AVX2
-
-#ifdef HAS_COPYROW_SSE2
-// CopyRow copys 'count' bytes using a 16 byte load/store, 32 bytes at time.
-__declspec(naked) __declspec(align(16))
-void CopyRow_SSE2(const uint8* src, uint8* dst, int count) {
-  __asm {
-    mov        eax, [esp + 4]   // src
-    mov        edx, [esp + 8]   // dst
-    mov        ecx, [esp + 12]  // count
-
-    align      4
-  convertloop:
-    movdqa     xmm0, [eax]
-    movdqa     xmm1, [eax + 16]
-    lea        eax, [eax + 32]
-    movdqa     [edx], xmm0
-    movdqa     [edx + 16], xmm1
-    lea        edx, [edx + 32]
-    sub        ecx, 32
-    jg         convertloop
-    ret
-  }
-}
-#endif  // HAS_COPYROW_SSE2
-
-// Unaligned Multiple of 1.
-__declspec(naked) __declspec(align(16))
-void CopyRow_ERMS(const uint8* src, uint8* dst, int count) {
-  __asm {
-    mov        eax, esi
-    mov        edx, edi
-    mov        esi, [esp + 4]   // src
-    mov        edi, [esp + 8]   // dst
-    mov        ecx, [esp + 12]  // count
-    rep movsb
-    mov        edi, edx
-    mov        esi, eax
-    ret
-  }
-}
-
-#ifdef HAS_COPYROW_X86
-__declspec(naked) __declspec(align(16))
-void CopyRow_X86(const uint8* src, uint8* dst, int count) {
-  __asm {
-    mov        eax, esi
-    mov        edx, edi
-    mov        esi, [esp + 4]   // src
-    mov        edi, [esp + 8]   // dst
-    mov        ecx, [esp + 12]  // count
-    shr        ecx, 2
-    rep movsd
-    mov        edi, edx
-    mov        esi, eax
-    ret
-  }
-}
-#endif  // HAS_COPYROW_X86
-
-#ifdef HAS_ARGBCOPYALPHAROW_SSE2
-// width in pixels
-__declspec(naked) __declspec(align(16))
-void ARGBCopyAlphaRow_SSE2(const uint8* src, uint8* dst, int width) {
-  __asm {
-    mov        eax, [esp + 4]   // src
-    mov        edx, [esp + 8]   // dst
-    mov        ecx, [esp + 12]  // count
-    pcmpeqb    xmm0, xmm0       // generate mask 0xff000000
-    pslld      xmm0, 24
-    pcmpeqb    xmm1, xmm1       // generate mask 0x00ffffff
-    psrld      xmm1, 8
-
-    align      4
-  convertloop:
-    movdqa     xmm2, [eax]
-    movdqa     xmm3, [eax + 16]
-    lea        eax, [eax + 32]
-    movdqa     xmm4, [edx]
-    movdqa     xmm5, [edx + 16]
-    pand       xmm2, xmm0
-    pand       xmm3, xmm0
-    pand       xmm4, xmm1
-    pand       xmm5, xmm1
-    por        xmm2, xmm4
-    por        xmm3, xmm5
-    movdqa     [edx], xmm2
-    movdqa     [edx + 16], xmm3
-    lea        edx, [edx + 32]
-    sub        ecx, 8
-    jg         convertloop
-
-    ret
-  }
-}
-#endif  // HAS_ARGBCOPYALPHAROW_SSE2
-
-#ifdef HAS_ARGBCOPYALPHAROW_AVX2
-// width in pixels
-__declspec(naked) __declspec(align(16))
-void ARGBCopyAlphaRow_AVX2(const uint8* src, uint8* dst, int width) {
-  __asm {
-    mov        eax, [esp + 4]   // src
-    mov        edx, [esp + 8]   // dst
-    mov        ecx, [esp + 12]  // count
-    vpcmpeqb   ymm0, ymm0, ymm0
-    vpsrld     ymm0, ymm0, 8    // generate mask 0x00ffffff
-
-    align      4
-  convertloop:
-    vmovdqu    ymm1, [eax]
-    vmovdqu    ymm2, [eax + 32]
-    lea        eax, [eax + 64]
-    vpblendvb  ymm1, ymm1, [edx], ymm0
-    vpblendvb  ymm2, ymm2, [edx + 32], ymm0
-    vmovdqu    [edx], ymm1
-    vmovdqu    [edx + 32], ymm2
-    lea        edx, [edx + 64]
-    sub        ecx, 16
-    jg         convertloop
-
-    vzeroupper
-    ret
-  }
-}
-#endif  // HAS_ARGBCOPYALPHAROW_AVX2
-
-#ifdef HAS_ARGBCOPYYTOALPHAROW_SSE2
-// width in pixels
-__declspec(naked) __declspec(align(16))
-void ARGBCopyYToAlphaRow_SSE2(const uint8* src, uint8* dst, int width) {
-  __asm {
-    mov        eax, [esp + 4]   // src
-    mov        edx, [esp + 8]   // dst
-    mov        ecx, [esp + 12]  // count
-    pcmpeqb    xmm0, xmm0       // generate mask 0xff000000
-    pslld      xmm0, 24
-    pcmpeqb    xmm1, xmm1       // generate mask 0x00ffffff
-    psrld      xmm1, 8
-
-    align      4
-  convertloop:
-    movq       xmm2, qword ptr [eax]  // 8 Y's
-    lea        eax, [eax + 8]
-    punpcklbw  xmm2, xmm2
-    punpckhwd  xmm3, xmm2
-    punpcklwd  xmm2, xmm2
-    movdqa     xmm4, [edx]
-    movdqa     xmm5, [edx + 16]
-    pand       xmm2, xmm0
-    pand       xmm3, xmm0
-    pand       xmm4, xmm1
-    pand       xmm5, xmm1
-    por        xmm2, xmm4
-    por        xmm3, xmm5
-    movdqa     [edx], xmm2
-    movdqa     [edx + 16], xmm3
-    lea        edx, [edx + 32]
-    sub        ecx, 8
-    jg         convertloop
-
-    ret
-  }
-}
-#endif  // HAS_ARGBCOPYYTOALPHAROW_SSE2
-
-#ifdef HAS_ARGBCOPYYTOALPHAROW_AVX2
-// width in pixels
-__declspec(naked) __declspec(align(16))
-void ARGBCopyYToAlphaRow_AVX2(const uint8* src, uint8* dst, int width) {
-  __asm {
-    mov        eax, [esp + 4]   // src
-    mov        edx, [esp + 8]   // dst
-    mov        ecx, [esp + 12]  // count
-    vpcmpeqb   ymm0, ymm0, ymm0
-    vpsrld     ymm0, ymm0, 8    // generate mask 0x00ffffff
-
-    align      4
-  convertloop:
-    vpmovzxbd  ymm1, qword ptr [eax]
-    vpmovzxbd  ymm2, qword ptr [eax + 8]
-    lea        eax, [eax + 16]
-    vpslld     ymm1, ymm1, 24
-    vpslld     ymm2, ymm2, 24
-    vpblendvb  ymm1, ymm1, [edx], ymm0
-    vpblendvb  ymm2, ymm2, [edx + 32], ymm0
-    vmovdqu    [edx], ymm1
-    vmovdqu    [edx + 32], ymm2
-    lea        edx, [edx + 64]
-    sub        ecx, 16
-    jg         convertloop
-
-    vzeroupper
-    ret
-  }
-}
-#endif  // HAS_ARGBCOPYYTOALPHAROW_AVX2
-
-#ifdef HAS_SETROW_X86
-// SetRow8 writes 'count' bytes using a 32 bit value repeated.
-__declspec(naked) __declspec(align(16))
-void SetRow_X86(uint8* dst, uint32 v32, int count) {
-  __asm {
-    mov        edx, edi
-    mov        edi, [esp + 4]   // dst
-    mov        eax, [esp + 8]   // v32
-    mov        ecx, [esp + 12]  // count
-    shr        ecx, 2
-    rep stosd
-    mov        edi, edx
-    ret
-  }
-}
-
-// SetRow32 writes 'count' words using a 32 bit value repeated.
-__declspec(naked) __declspec(align(16))
-void ARGBSetRows_X86(uint8* dst, uint32 v32, int width,
-                   int dst_stride, int height) {
-  __asm {
-    push       esi
-    push       edi
-    push       ebp
-    mov        edi, [esp + 12 + 4]   // dst
-    mov        eax, [esp + 12 + 8]   // v32
-    mov        ebp, [esp + 12 + 12]  // width
-    mov        edx, [esp + 12 + 16]  // dst_stride
-    mov        esi, [esp + 12 + 20]  // height
-    lea        ecx, [ebp * 4]
-    sub        edx, ecx             // stride - width * 4
-
-    align      4
-  convertloop:
-    mov        ecx, ebp
-    rep stosd
-    add        edi, edx
-    sub        esi, 1
-    jg         convertloop
-
-    pop        ebp
-    pop        edi
-    pop        esi
-    ret
-  }
-}
-#endif  // HAS_SETROW_X86
-
-#ifdef HAS_YUY2TOYROW_AVX2
-__declspec(naked) __declspec(align(16))
-void YUY2ToYRow_AVX2(const uint8* src_yuy2,
-                     uint8* dst_y, int pix) {
-  __asm {
-    mov        eax, [esp + 4]    // src_yuy2
-    mov        edx, [esp + 8]    // dst_y
-    mov        ecx, [esp + 12]   // pix
-    vpcmpeqb   ymm5, ymm5, ymm5  // generate mask 0x00ff00ff
-    vpsrlw     ymm5, ymm5, 8
-
-    align      4
-  convertloop:
-    vmovdqu    ymm0, [eax]
-    vmovdqu    ymm1, [eax + 32]
-    lea        eax,  [eax + 64]
-    vpand      ymm0, ymm0, ymm5   // even bytes are Y
-    vpand      ymm1, ymm1, ymm5
-    vpackuswb  ymm0, ymm0, ymm1   // mutates.
-    vpermq     ymm0, ymm0, 0xd8
-    sub        ecx, 32
-    vmovdqu    [edx], ymm0
-    lea        edx, [edx + 32]
-    jg         convertloop
-    vzeroupper
-    ret
-  }
-}
-
-__declspec(naked) __declspec(align(16))
-void YUY2ToUVRow_AVX2(const uint8* src_yuy2, int stride_yuy2,
-                      uint8* dst_u, uint8* dst_v, int pix) {
-  __asm {
-    push       esi
-    push       edi
-    mov        eax, [esp + 8 + 4]    // src_yuy2
-    mov        esi, [esp + 8 + 8]    // stride_yuy2
-    mov        edx, [esp + 8 + 12]   // dst_u
-    mov        edi, [esp + 8 + 16]   // dst_v
-    mov        ecx, [esp + 8 + 20]   // pix
-    vpcmpeqb   ymm5, ymm5, ymm5      // generate mask 0x00ff00ff
-    vpsrlw     ymm5, ymm5, 8
-    sub        edi, edx
-
-    align      4
-  convertloop:
-    vmovdqu    ymm0, [eax]
-    vmovdqu    ymm1, [eax + 32]
-    vpavgb     ymm0, ymm0, [eax + esi]
-    vpavgb     ymm1, ymm1, [eax + esi + 32]
-    lea        eax,  [eax + 64]
-    vpsrlw     ymm0, ymm0, 8      // YUYV -> UVUV
-    vpsrlw     ymm1, ymm1, 8
-    vpackuswb  ymm0, ymm0, ymm1   // mutates.
-    vpermq     ymm0, ymm0, 0xd8
-    vpand      ymm1, ymm0, ymm5  // U
-    vpsrlw     ymm0, ymm0, 8     // V
-    vpackuswb  ymm1, ymm1, ymm1  // mutates.
-    vpackuswb  ymm0, ymm0, ymm0  // mutates.
-    vpermq     ymm1, ymm1, 0xd8
-    vpermq     ymm0, ymm0, 0xd8
-    vextractf128 [edx], ymm1, 0  // U
-    vextractf128 [edx + edi], ymm0, 0 // V
-    lea        edx, [edx + 16]
-    sub        ecx, 32
-    jg         convertloop
-
-    pop        edi
-    pop        esi
-    vzeroupper
-    ret
-  }
-}
-
-__declspec(naked) __declspec(align(16))
-void YUY2ToUV422Row_AVX2(const uint8* src_yuy2,
-                         uint8* dst_u, uint8* dst_v, int pix) {
-  __asm {
-    push       edi
-    mov        eax, [esp + 4 + 4]    // src_yuy2
-    mov        edx, [esp + 4 + 8]    // dst_u
-    mov        edi, [esp + 4 + 12]   // dst_v
-    mov        ecx, [esp + 4 + 16]   // pix
-    vpcmpeqb   ymm5, ymm5, ymm5      // generate mask 0x00ff00ff
-    vpsrlw     ymm5, ymm5, 8
-    sub        edi, edx
-
-    align      4
-  convertloop:
-    vmovdqu    ymm0, [eax]
-    vmovdqu    ymm1, [eax + 32]
-    lea        eax,  [eax + 64]
-    vpsrlw     ymm0, ymm0, 8      // YUYV -> UVUV
-    vpsrlw     ymm1, ymm1, 8
-    vpackuswb  ymm0, ymm0, ymm1   // mutates.
-    vpermq     ymm0, ymm0, 0xd8
-    vpand      ymm1, ymm0, ymm5  // U
-    vpsrlw     ymm0, ymm0, 8     // V
-    vpackuswb  ymm1, ymm1, ymm1  // mutates.
-    vpackuswb  ymm0, ymm0, ymm0  // mutates.
-    vpermq     ymm1, ymm1, 0xd8
-    vpermq     ymm0, ymm0, 0xd8
-    vextractf128 [edx], ymm1, 0  // U
-    vextractf128 [edx + edi], ymm0, 0 // V
-    lea        edx, [edx + 16]
-    sub        ecx, 32
-    jg         convertloop
-
-    pop        edi
-    vzeroupper
-    ret
-  }
-}
-
-__declspec(naked) __declspec(align(16))
-void UYVYToYRow_AVX2(const uint8* src_uyvy,
-                     uint8* dst_y, int pix) {
-  __asm {
-    mov        eax, [esp + 4]    // src_uyvy
-    mov        edx, [esp + 8]    // dst_y
-    mov        ecx, [esp + 12]   // pix
-
-    align      4
-  convertloop:
-    vmovdqu    ymm0, [eax]
-    vmovdqu    ymm1, [eax + 32]
-    lea        eax,  [eax + 64]
-    vpsrlw     ymm0, ymm0, 8      // odd bytes are Y
-    vpsrlw     ymm1, ymm1, 8
-    vpackuswb  ymm0, ymm0, ymm1   // mutates.
-    vpermq     ymm0, ymm0, 0xd8
-    sub        ecx, 32
-    vmovdqu    [edx], ymm0
-    lea        edx, [edx + 32]
-    jg         convertloop
-    ret
-    vzeroupper
-  }
-}
-
-__declspec(naked) __declspec(align(16))
-void UYVYToUVRow_AVX2(const uint8* src_uyvy, int stride_uyvy,
-                      uint8* dst_u, uint8* dst_v, int pix) {
-  __asm {
-    push       esi
-    push       edi
-    mov        eax, [esp + 8 + 4]    // src_yuy2
-    mov        esi, [esp + 8 + 8]    // stride_yuy2
-    mov        edx, [esp + 8 + 12]   // dst_u
-    mov        edi, [esp + 8 + 16]   // dst_v
-    mov        ecx, [esp + 8 + 20]   // pix
-    vpcmpeqb   ymm5, ymm5, ymm5      // generate mask 0x00ff00ff
-    vpsrlw     ymm5, ymm5, 8
-    sub        edi, edx
-
-    align      4
-  convertloop:
-    vmovdqu    ymm0, [eax]
-    vmovdqu    ymm1, [eax + 32]
-    vpavgb     ymm0, ymm0, [eax + esi]
-    vpavgb     ymm1, ymm1, [eax + esi + 32]
-    lea        eax,  [eax + 64]
-    vpand      ymm0, ymm0, ymm5   // UYVY -> UVUV
-    vpand      ymm1, ymm1, ymm5
-    vpackuswb  ymm0, ymm0, ymm1   // mutates.
-    vpermq     ymm0, ymm0, 0xd8
-    vpand      ymm1, ymm0, ymm5  // U
-    vpsrlw     ymm0, ymm0, 8     // V
-    vpackuswb  ymm1, ymm1, ymm1  // mutates.
-    vpackuswb  ymm0, ymm0, ymm0  // mutates.
-    vpermq     ymm1, ymm1, 0xd8
-    vpermq     ymm0, ymm0, 0xd8
-    vextractf128 [edx], ymm1, 0  // U
-    vextractf128 [edx + edi], ymm0, 0 // V
-    lea        edx, [edx + 16]
-    sub        ecx, 32
-    jg         convertloop
-
-    pop        edi
-    pop        esi
-    vzeroupper
-    ret
-  }
-}
-
-__declspec(naked) __declspec(align(16))
-void UYVYToUV422Row_AVX2(const uint8* src_uyvy,
-                         uint8* dst_u, uint8* dst_v, int pix) {
-  __asm {
-    push       edi
-    mov        eax, [esp + 4 + 4]    // src_yuy2
-    mov        edx, [esp + 4 + 8]    // dst_u
-    mov        edi, [esp + 4 + 12]   // dst_v
-    mov        ecx, [esp + 4 + 16]   // pix
-    vpcmpeqb   ymm5, ymm5, ymm5      // generate mask 0x00ff00ff
-    vpsrlw     ymm5, ymm5, 8
-    sub        edi, edx
-
-    align      4
-  convertloop:
-    vmovdqu    ymm0, [eax]
-    vmovdqu    ymm1, [eax + 32]
-    lea        eax,  [eax + 64]
-    vpand      ymm0, ymm0, ymm5   // UYVY -> UVUV
-    vpand      ymm1, ymm1, ymm5
-    vpackuswb  ymm0, ymm0, ymm1   // mutates.
-    vpermq     ymm0, ymm0, 0xd8
-    vpand      ymm1, ymm0, ymm5  // U
-    vpsrlw     ymm0, ymm0, 8     // V
-    vpackuswb  ymm1, ymm1, ymm1  // mutates.
-    vpackuswb  ymm0, ymm0, ymm0  // mutates.
-    vpermq     ymm1, ymm1, 0xd8
-    vpermq     ymm0, ymm0, 0xd8
-    vextractf128 [edx], ymm1, 0  // U
-    vextractf128 [edx + edi], ymm0, 0 // V
-    lea        edx, [edx + 16]
-    sub        ecx, 32
-    jg         convertloop
-
-    pop        edi
-    vzeroupper
-    ret
-  }
-}
-#endif  // HAS_YUY2TOYROW_AVX2
-
-#ifdef HAS_YUY2TOYROW_SSE2
-__declspec(naked) __declspec(align(16))
-void YUY2ToYRow_SSE2(const uint8* src_yuy2,
-                     uint8* dst_y, int pix) {
-  __asm {
-    mov        eax, [esp + 4]    // src_yuy2
-    mov        edx, [esp + 8]    // dst_y
-    mov        ecx, [esp + 12]   // pix
-    pcmpeqb    xmm5, xmm5        // generate mask 0x00ff00ff
-    psrlw      xmm5, 8
-
-    align      4
-  convertloop:
-    movdqa     xmm0, [eax]
-    movdqa     xmm1, [eax + 16]
-    lea        eax,  [eax + 32]
-    pand       xmm0, xmm5   // even bytes are Y
-    pand       xmm1, xmm5
-    packuswb   xmm0, xmm1
-    sub        ecx, 16
-    movdqa     [edx], xmm0
-    lea        edx, [edx + 16]
-    jg         convertloop
-    ret
-  }
-}
-
-__declspec(naked) __declspec(align(16))
-void YUY2ToUVRow_SSE2(const uint8* src_yuy2, int stride_yuy2,
-                      uint8* dst_u, uint8* dst_v, int pix) {
-  __asm {
-    push       esi
-    push       edi
-    mov        eax, [esp + 8 + 4]    // src_yuy2
-    mov        esi, [esp + 8 + 8]    // stride_yuy2
-    mov        edx, [esp + 8 + 12]   // dst_u
-    mov        edi, [esp + 8 + 16]   // dst_v
-    mov        ecx, [esp + 8 + 20]   // pix
-    pcmpeqb    xmm5, xmm5            // generate mask 0x00ff00ff
-    psrlw      xmm5, 8
-    sub        edi, edx
-
-    align      4
-  convertloop:
-    movdqa     xmm0, [eax]
-    movdqa     xmm1, [eax + 16]
-    movdqa     xmm2, [eax + esi]
-    movdqa     xmm3, [eax + esi + 16]
-    lea        eax,  [eax + 32]
-    pavgb      xmm0, xmm2
-    pavgb      xmm1, xmm3
-    psrlw      xmm0, 8      // YUYV -> UVUV
-    psrlw      xmm1, 8
-    packuswb   xmm0, xmm1
-    movdqa     xmm1, xmm0
-    pand       xmm0, xmm5  // U
-    packuswb   xmm0, xmm0
-    psrlw      xmm1, 8     // V
-    packuswb   xmm1, xmm1
-    movq       qword ptr [edx], xmm0
-    movq       qword ptr [edx + edi], xmm1
-    lea        edx, [edx + 8]
-    sub        ecx, 16
-    jg         convertloop
-
-    pop        edi
-    pop        esi
-    ret
-  }
-}
-
-__declspec(naked) __declspec(align(16))
-void YUY2ToUV422Row_SSE2(const uint8* src_yuy2,
-                         uint8* dst_u, uint8* dst_v, int pix) {
-  __asm {
-    push       edi
-    mov        eax, [esp + 4 + 4]    // src_yuy2
-    mov        edx, [esp + 4 + 8]    // dst_u
-    mov        edi, [esp + 4 + 12]   // dst_v
-    mov        ecx, [esp + 4 + 16]   // pix
-    pcmpeqb    xmm5, xmm5            // generate mask 0x00ff00ff
-    psrlw      xmm5, 8
-    sub        edi, edx
-
-    align      4
-  convertloop:
-    movdqa     xmm0, [eax]
-    movdqa     xmm1, [eax + 16]
-    lea        eax,  [eax + 32]
-    psrlw      xmm0, 8      // YUYV -> UVUV
-    psrlw      xmm1, 8
-    packuswb   xmm0, xmm1
-    movdqa     xmm1, xmm0
-    pand       xmm0, xmm5  // U
-    packuswb   xmm0, xmm0
-    psrlw      xmm1, 8     // V
-    packuswb   xmm1, xmm1
-    movq       qword ptr [edx], xmm0
-    movq       qword ptr [edx + edi], xmm1
-    lea        edx, [edx + 8]
-    sub        ecx, 16
-    jg         convertloop
-
-    pop        edi
-    ret
-  }
-}
-
-__declspec(naked) __declspec(align(16))
-void YUY2ToYRow_Unaligned_SSE2(const uint8* src_yuy2,
-                               uint8* dst_y, int pix) {
-  __asm {
-    mov        eax, [esp + 4]    // src_yuy2
-    mov        edx, [esp + 8]    // dst_y
-    mov        ecx, [esp + 12]   // pix
-    pcmpeqb    xmm5, xmm5        // generate mask 0x00ff00ff
-    psrlw      xmm5, 8
-
-    align      4
-  convertloop:
-    movdqu     xmm0, [eax]
-    movdqu     xmm1, [eax + 16]
-    lea        eax,  [eax + 32]
-    pand       xmm0, xmm5   // even bytes are Y
-    pand       xmm1, xmm5
-    packuswb   xmm0, xmm1
-    sub        ecx, 16
-    movdqu     [edx], xmm0
-    lea        edx, [edx + 16]
-    jg         convertloop
-    ret
-  }
-}
-
-__declspec(naked) __declspec(align(16))
-void YUY2ToUVRow_Unaligned_SSE2(const uint8* src_yuy2, int stride_yuy2,
-                                uint8* dst_u, uint8* dst_v, int pix) {
-  __asm {
-    push       esi
-    push       edi
-    mov        eax, [esp + 8 + 4]    // src_yuy2
-    mov        esi, [esp + 8 + 8]    // stride_yuy2
-    mov        edx, [esp + 8 + 12]   // dst_u
-    mov        edi, [esp + 8 + 16]   // dst_v
-    mov        ecx, [esp + 8 + 20]   // pix
-    pcmpeqb    xmm5, xmm5            // generate mask 0x00ff00ff
-    psrlw      xmm5, 8
-    sub        edi, edx
-
-    align      4
-  convertloop:
-    movdqu     xmm0, [eax]
-    movdqu     xmm1, [eax + 16]
-    movdqu     xmm2, [eax + esi]
-    movdqu     xmm3, [eax + esi + 16]
-    lea        eax,  [eax + 32]
-    pavgb      xmm0, xmm2
-    pavgb      xmm1, xmm3
-    psrlw      xmm0, 8      // YUYV -> UVUV
-    psrlw      xmm1, 8
-    packuswb   xmm0, xmm1
-    movdqa     xmm1, xmm0
-    pand       xmm0, xmm5  // U
-    packuswb   xmm0, xmm0
-    psrlw      xmm1, 8     // V
-    packuswb   xmm1, xmm1
-    movq       qword ptr [edx], xmm0
-    movq       qword ptr [edx + edi], xmm1
-    lea        edx, [edx + 8]
-    sub        ecx, 16
-    jg         convertloop
-
-    pop        edi
-    pop        esi
-    ret
-  }
-}
-
-__declspec(naked) __declspec(align(16))
-void YUY2ToUV422Row_Unaligned_SSE2(const uint8* src_yuy2,
-                                   uint8* dst_u, uint8* dst_v, int pix) {
-  __asm {
-    push       edi
-    mov        eax, [esp + 4 + 4]    // src_yuy2
-    mov        edx, [esp + 4 + 8]    // dst_u
-    mov        edi, [esp + 4 + 12]   // dst_v
-    mov        ecx, [esp + 4 + 16]   // pix
-    pcmpeqb    xmm5, xmm5            // generate mask 0x00ff00ff
-    psrlw      xmm5, 8
-    sub        edi, edx
-
-    align      4
-  convertloop:
-    movdqu     xmm0, [eax]
-    movdqu     xmm1, [eax + 16]
-    lea        eax,  [eax + 32]
-    psrlw      xmm0, 8      // YUYV -> UVUV
-    psrlw      xmm1, 8
-    packuswb   xmm0, xmm1
-    movdqa     xmm1, xmm0
-    pand       xmm0, xmm5  // U
-    packuswb   xmm0, xmm0
-    psrlw      xmm1, 8     // V
-    packuswb   xmm1, xmm1
-    movq       qword ptr [edx], xmm0
-    movq       qword ptr [edx + edi], xmm1
-    lea        edx, [edx + 8]
-    sub        ecx, 16
-    jg         convertloop
-
-    pop        edi
-    ret
-  }
-}
-
-__declspec(naked) __declspec(align(16))
-void UYVYToYRow_SSE2(const uint8* src_uyvy,
-                     uint8* dst_y, int pix) {
-  __asm {
-    mov        eax, [esp + 4]    // src_uyvy
-    mov        edx, [esp + 8]    // dst_y
-    mov        ecx, [esp + 12]   // pix
-
-    align      4
-  convertloop:
-    movdqa     xmm0, [eax]
-    movdqa     xmm1, [eax + 16]
-    lea        eax,  [eax + 32]
-    psrlw      xmm0, 8    // odd bytes are Y
-    psrlw      xmm1, 8
-    packuswb   xmm0, xmm1
-    sub        ecx, 16
-    movdqa     [edx], xmm0
-    lea        edx, [edx + 16]
-    jg         convertloop
-    ret
-  }
-}
-
-__declspec(naked) __declspec(align(16))
-void UYVYToUVRow_SSE2(const uint8* src_uyvy, int stride_uyvy,
-                      uint8* dst_u, uint8* dst_v, int pix) {
-  __asm {
-    push       esi
-    push       edi
-    mov        eax, [esp + 8 + 4]    // src_yuy2
-    mov        esi, [esp + 8 + 8]    // stride_yuy2
-    mov        edx, [esp + 8 + 12]   // dst_u
-    mov        edi, [esp + 8 + 16]   // dst_v
-    mov        ecx, [esp + 8 + 20]   // pix
-    pcmpeqb    xmm5, xmm5            // generate mask 0x00ff00ff
-    psrlw      xmm5, 8
-    sub        edi, edx
-
-    align      4
-  convertloop:
-    movdqa     xmm0, [eax]
-    movdqa     xmm1, [eax + 16]
-    movdqa     xmm2, [eax + esi]
-    movdqa     xmm3, [eax + esi + 16]
-    lea        eax,  [eax + 32]
-    pavgb      xmm0, xmm2
-    pavgb      xmm1, xmm3
-    pand       xmm0, xmm5   // UYVY -> UVUV
-    pand       xmm1, xmm5
-    packuswb   xmm0, xmm1
-    movdqa     xmm1, xmm0
-    pand       xmm0, xmm5  // U
-    packuswb   xmm0, xmm0
-    psrlw      xmm1, 8     // V
-    packuswb   xmm1, xmm1
-    movq       qword ptr [edx], xmm0
-    movq       qword ptr [edx + edi], xmm1
-    lea        edx, [edx + 8]
-    sub        ecx, 16
-    jg         convertloop
-
-    pop        edi
-    pop        esi
-    ret
-  }
-}
-
-__declspec(naked) __declspec(align(16))
-void UYVYToUV422Row_SSE2(const uint8* src_uyvy,
-                         uint8* dst_u, uint8* dst_v, int pix) {
-  __asm {
-    push       edi
-    mov        eax, [esp + 4 + 4]    // src_yuy2
-    mov        edx, [esp + 4 + 8]    // dst_u
-    mov        edi, [esp + 4 + 12]   // dst_v
-    mov        ecx, [esp + 4 + 16]   // pix
-    pcmpeqb    xmm5, xmm5            // generate mask 0x00ff00ff
-    psrlw      xmm5, 8
-    sub        edi, edx
-
-    align      4
-  convertloop:
-    movdqa     xmm0, [eax]
-    movdqa     xmm1, [eax + 16]
-    lea        eax,  [eax + 32]
-    pand       xmm0, xmm5   // UYVY -> UVUV
-    pand       xmm1, xmm5
-    packuswb   xmm0, xmm1
-    movdqa     xmm1, xmm0
-    pand       xmm0, xmm5  // U
-    packuswb   xmm0, xmm0
-    psrlw      xmm1, 8     // V
-    packuswb   xmm1, xmm1
-    movq       qword ptr [edx], xmm0
-    movq       qword ptr [edx + edi], xmm1
-    lea        edx, [edx + 8]
-    sub        ecx, 16
-    jg         convertloop
-
-    pop        edi
-    ret
-  }
-}
-
-__declspec(naked) __declspec(align(16))
-void UYVYToYRow_Unaligned_SSE2(const uint8* src_uyvy,
-                               uint8* dst_y, int pix) {
-  __asm {
-    mov        eax, [esp + 4]    // src_uyvy
-    mov        edx, [esp + 8]    // dst_y
-    mov        ecx, [esp + 12]   // pix
-
-    align      4
-  convertloop:
-    movdqu     xmm0, [eax]
-    movdqu     xmm1, [eax + 16]
-    lea        eax,  [eax + 32]
-    psrlw      xmm0, 8    // odd bytes are Y
-    psrlw      xmm1, 8
-    packuswb   xmm0, xmm1
-    sub        ecx, 16
-    movdqu     [edx], xmm0
-    lea        edx, [edx + 16]
-    jg         convertloop
-    ret
-  }
-}
-
-__declspec(naked) __declspec(align(16))
-void UYVYToUVRow_Unaligned_SSE2(const uint8* src_uyvy, int stride_uyvy,
-                                uint8* dst_u, uint8* dst_v, int pix) {
-  __asm {
-    push       esi
-    push       edi
-    mov        eax, [esp + 8 + 4]    // src_yuy2
-    mov        esi, [esp + 8 + 8]    // stride_yuy2
-    mov        edx, [esp + 8 + 12]   // dst_u
-    mov        edi, [esp + 8 + 16]   // dst_v
-    mov        ecx, [esp + 8 + 20]   // pix
-    pcmpeqb    xmm5, xmm5            // generate mask 0x00ff00ff
-    psrlw      xmm5, 8
-    sub        edi, edx
-
-    align      4
-  convertloop:
-    movdqu     xmm0, [eax]
-    movdqu     xmm1, [eax + 16]
-    movdqu     xmm2, [eax + esi]
-    movdqu     xmm3, [eax + esi + 16]
-    lea        eax,  [eax + 32]
-    pavgb      xmm0, xmm2
-    pavgb      xmm1, xmm3
-    pand       xmm0, xmm5   // UYVY -> UVUV
-    pand       xmm1, xmm5
-    packuswb   xmm0, xmm1
-    movdqa     xmm1, xmm0
-    pand       xmm0, xmm5  // U
-    packuswb   xmm0, xmm0
-    psrlw      xmm1, 8     // V
-    packuswb   xmm1, xmm1
-    movq       qword ptr [edx], xmm0
-    movq       qword ptr [edx + edi], xmm1
-    lea        edx, [edx + 8]
-    sub        ecx, 16
-    jg         convertloop
-
-    pop        edi
-    pop        esi
-    ret
-  }
-}
-
-__declspec(naked) __declspec(align(16))
-void UYVYToUV422Row_Unaligned_SSE2(const uint8* src_uyvy,
-                                   uint8* dst_u, uint8* dst_v, int pix) {
-  __asm {
-    push       edi
-    mov        eax, [esp + 4 + 4]    // src_yuy2
-    mov        edx, [esp + 4 + 8]    // dst_u
-    mov        edi, [esp + 4 + 12]   // dst_v
-    mov        ecx, [esp + 4 + 16]   // pix
-    pcmpeqb    xmm5, xmm5            // generate mask 0x00ff00ff
-    psrlw      xmm5, 8
-    sub        edi, edx
-
-    align      4
-  convertloop:
-    movdqu     xmm0, [eax]
-    movdqu     xmm1, [eax + 16]
-    lea        eax,  [eax + 32]
-    pand       xmm0, xmm5   // UYVY -> UVUV
-    pand       xmm1, xmm5
-    packuswb   xmm0, xmm1
-    movdqa     xmm1, xmm0
-    pand       xmm0, xmm5  // U
-    packuswb   xmm0, xmm0
-    psrlw      xmm1, 8     // V
-    packuswb   xmm1, xmm1
-    movq       qword ptr [edx], xmm0
-    movq       qword ptr [edx + edi], xmm1
-    lea        edx, [edx + 8]
-    sub        ecx, 16
-    jg         convertloop
-
-    pop        edi
-    ret
-  }
-}
-#endif  // HAS_YUY2TOYROW_SSE2
-
-#ifdef HAS_ARGBBLENDROW_SSE2
-// Blend 8 pixels at a time.
-__declspec(naked) __declspec(align(16))
-void ARGBBlendRow_SSE2(const uint8* src_argb0, const uint8* src_argb1,
-                       uint8* dst_argb, int width) {
-  __asm {
-    push       esi
-    mov        eax, [esp + 4 + 4]   // src_argb0
-    mov        esi, [esp + 4 + 8]   // src_argb1
-    mov        edx, [esp + 4 + 12]  // dst_argb
-    mov        ecx, [esp + 4 + 16]  // width
-    pcmpeqb    xmm7, xmm7       // generate constant 1
-    psrlw      xmm7, 15
-    pcmpeqb    xmm6, xmm6       // generate mask 0x00ff00ff
-    psrlw      xmm6, 8
-    pcmpeqb    xmm5, xmm5       // generate mask 0xff00ff00
-    psllw      xmm5, 8
-    pcmpeqb    xmm4, xmm4       // generate mask 0xff000000
-    pslld      xmm4, 24
-
-    sub        ecx, 1
-    je         convertloop1     // only 1 pixel?
-    jl         convertloop1b
-
-    // 1 pixel loop until destination pointer is aligned.
-  alignloop1:
-    test       edx, 15          // aligned?
-    je         alignloop1b
-    movd       xmm3, [eax]
-    lea        eax, [eax + 4]
-    movdqa     xmm0, xmm3       // src argb
-    pxor       xmm3, xmm4       // ~alpha
-    movd       xmm2, [esi]      // _r_b
-    psrlw      xmm3, 8          // alpha
-    pshufhw    xmm3, xmm3, 0F5h // 8 alpha words
-    pshuflw    xmm3, xmm3, 0F5h
-    pand       xmm2, xmm6       // _r_b
-    paddw      xmm3, xmm7       // 256 - alpha
-    pmullw     xmm2, xmm3       // _r_b * alpha
-    movd       xmm1, [esi]      // _a_g
-    lea        esi, [esi + 4]
-    psrlw      xmm1, 8          // _a_g
-    por        xmm0, xmm4       // set alpha to 255
-    pmullw     xmm1, xmm3       // _a_g * alpha
-    psrlw      xmm2, 8          // _r_b convert to 8 bits again
-    paddusb    xmm0, xmm2       // + src argb
-    pand       xmm1, xmm5       // a_g_ convert to 8 bits again
-    paddusb    xmm0, xmm1       // + src argb
-    sub        ecx, 1
-    movd       [edx], xmm0
-    lea        edx, [edx + 4]
-    jge        alignloop1
-
-  alignloop1b:
-    add        ecx, 1 - 4
-    jl         convertloop4b
-
-    // 4 pixel loop.
-  convertloop4:
-    movdqu     xmm3, [eax]      // src argb
-    lea        eax, [eax + 16]
-    movdqa     xmm0, xmm3       // src argb
-    pxor       xmm3, xmm4       // ~alpha
-    movdqu     xmm2, [esi]      // _r_b
-    psrlw      xmm3, 8          // alpha
-    pshufhw    xmm3, xmm3, 0F5h // 8 alpha words
-    pshuflw    xmm3, xmm3, 0F5h
-    pand       xmm2, xmm6       // _r_b
-    paddw      xmm3, xmm7       // 256 - alpha
-    pmullw     xmm2, xmm3       // _r_b * alpha
-    movdqu     xmm1, [esi]      // _a_g
-    lea        esi, [esi + 16]
-    psrlw      xmm1, 8          // _a_g
-    por        xmm0, xmm4       // set alpha to 255
-    pmullw     xmm1, xmm3       // _a_g * alpha
-    psrlw      xmm2, 8          // _r_b convert to 8 bits again
-    paddusb    xmm0, xmm2       // + src argb
-    pand       xmm1, xmm5       // a_g_ convert to 8 bits again
-    paddusb    xmm0, xmm1       // + src argb
-    sub        ecx, 4
-    movdqa     [edx], xmm0
-    lea        edx, [edx + 16]
-    jge        convertloop4
-
-  convertloop4b:
-    add        ecx, 4 - 1
-    jl         convertloop1b
-
-    // 1 pixel loop.
-  convertloop1:
-    movd       xmm3, [eax]      // src argb
-    lea        eax, [eax + 4]
-    movdqa     xmm0, xmm3       // src argb
-    pxor       xmm3, xmm4       // ~alpha
-    movd       xmm2, [esi]      // _r_b
-    psrlw      xmm3, 8          // alpha
-    pshufhw    xmm3, xmm3, 0F5h // 8 alpha words
-    pshuflw    xmm3, xmm3, 0F5h
-    pand       xmm2, xmm6       // _r_b
-    paddw      xmm3, xmm7       // 256 - alpha
-    pmullw     xmm2, xmm3       // _r_b * alpha
-    movd       xmm1, [esi]      // _a_g
-    lea        esi, [esi + 4]
-    psrlw      xmm1, 8          // _a_g
-    por        xmm0, xmm4       // set alpha to 255
-    pmullw     xmm1, xmm3       // _a_g * alpha
-    psrlw      xmm2, 8          // _r_b convert to 8 bits again
-    paddusb    xmm0, xmm2       // + src argb
-    pand       xmm1, xmm5       // a_g_ convert to 8 bits again
-    paddusb    xmm0, xmm1       // + src argb
-    sub        ecx, 1
-    movd       [edx], xmm0
-    lea        edx, [edx + 4]
-    jge        convertloop1
-
-  convertloop1b:
-    pop        esi
-    ret
-  }
-}
-#endif  // HAS_ARGBBLENDROW_SSE2
-
-#ifdef HAS_ARGBBLENDROW_SSSE3
-// Shuffle table for isolating alpha.
-static const uvec8 kShuffleAlpha = {
-  3u, 0x80, 3u, 0x80, 7u, 0x80, 7u, 0x80,
-  11u, 0x80, 11u, 0x80, 15u, 0x80, 15u, 0x80
-};
-// Same as SSE2, but replaces:
-//    psrlw      xmm3, 8          // alpha
-//    pshufhw    xmm3, xmm3, 0F5h // 8 alpha words
-//    pshuflw    xmm3, xmm3, 0F5h
-// with..
-//    pshufb     xmm3, kShuffleAlpha // alpha
-// Blend 8 pixels at a time.
-
-__declspec(naked) __declspec(align(16))
-void ARGBBlendRow_SSSE3(const uint8* src_argb0, const uint8* src_argb1,
-                        uint8* dst_argb, int width) {
-  __asm {
-    push       esi
-    mov        eax, [esp + 4 + 4]   // src_argb0
-    mov        esi, [esp + 4 + 8]   // src_argb1
-    mov        edx, [esp + 4 + 12]  // dst_argb
-    mov        ecx, [esp + 4 + 16]  // width
-    pcmpeqb    xmm7, xmm7       // generate constant 0x0001
-    psrlw      xmm7, 15
-    pcmpeqb    xmm6, xmm6       // generate mask 0x00ff00ff
-    psrlw      xmm6, 8
-    pcmpeqb    xmm5, xmm5       // generate mask 0xff00ff00
-    psllw      xmm5, 8
-    pcmpeqb    xmm4, xmm4       // generate mask 0xff000000
-    pslld      xmm4, 24
-
-    sub        ecx, 1
-    je         convertloop1     // only 1 pixel?
-    jl         convertloop1b
-
-    // 1 pixel loop until destination pointer is aligned.
-  alignloop1:
-    test       edx, 15          // aligned?
-    je         alignloop1b
-    movd       xmm3, [eax]
-    lea        eax, [eax + 4]
-    movdqa     xmm0, xmm3       // src argb
-    pxor       xmm3, xmm4       // ~alpha
-    movd       xmm2, [esi]      // _r_b
-    pshufb     xmm3, kShuffleAlpha // alpha
-    pand       xmm2, xmm6       // _r_b
-    paddw      xmm3, xmm7       // 256 - alpha
-    pmullw     xmm2, xmm3       // _r_b * alpha
-    movd       xmm1, [esi]      // _a_g
-    lea        esi, [esi + 4]
-    psrlw      xmm1, 8          // _a_g
-    por        xmm0, xmm4       // set alpha to 255
-    pmullw     xmm1, xmm3       // _a_g * alpha
-    psrlw      xmm2, 8          // _r_b convert to 8 bits again
-    paddusb    xmm0, xmm2       // + src argb
-    pand       xmm1, xmm5       // a_g_ convert to 8 bits again
-    paddusb    xmm0, xmm1       // + src argb
-    sub        ecx, 1
-    movd       [edx], xmm0
-    lea        edx, [edx + 4]
-    jge        alignloop1
-
-  alignloop1b:
-    add        ecx, 1 - 4
-    jl         convertloop4b
-
-    test       eax, 15          // unaligned?
-    jne        convertuloop4
-    test       esi, 15          // unaligned?
-    jne        convertuloop4
-
-    // 4 pixel loop.
-  convertloop4:
-    movdqa     xmm3, [eax]      // src argb
-    lea        eax, [eax + 16]
-    movdqa     xmm0, xmm3       // src argb
-    pxor       xmm3, xmm4       // ~alpha
-    movdqa     xmm2, [esi]      // _r_b
-    pshufb     xmm3, kShuffleAlpha // alpha
-    pand       xmm2, xmm6       // _r_b
-    paddw      xmm3, xmm7       // 256 - alpha
-    pmullw     xmm2, xmm3       // _r_b * alpha
-    movdqa     xmm1, [esi]      // _a_g
-    lea        esi, [esi + 16]
-    psrlw      xmm1, 8          // _a_g
-    por        xmm0, xmm4       // set alpha to 255
-    pmullw     xmm1, xmm3       // _a_g * alpha
-    psrlw      xmm2, 8          // _r_b convert to 8 bits again
-    paddusb    xmm0, xmm2       // + src argb
-    pand       xmm1, xmm5       // a_g_ convert to 8 bits again
-    paddusb    xmm0, xmm1       // + src argb
-    sub        ecx, 4
-    movdqa     [edx], xmm0
-    lea        edx, [edx + 16]
-    jge        convertloop4
-    jmp        convertloop4b
-
-    // 4 pixel unaligned loop.
-  convertuloop4:
-    movdqu     xmm3, [eax]      // src argb
-    lea        eax, [eax + 16]
-    movdqa     xmm0, xmm3       // src argb
-    pxor       xmm3, xmm4       // ~alpha
-    movdqu     xmm2, [esi]      // _r_b
-    pshufb     xmm3, kShuffleAlpha // alpha
-    pand       xmm2, xmm6       // _r_b
-    paddw      xmm3, xmm7       // 256 - alpha
-    pmullw     xmm2, xmm3       // _r_b * alpha
-    movdqu     xmm1, [esi]      // _a_g
-    lea        esi, [esi + 16]
-    psrlw      xmm1, 8          // _a_g
-    por        xmm0, xmm4       // set alpha to 255
-    pmullw     xmm1, xmm3       // _a_g * alpha
-    psrlw      xmm2, 8          // _r_b convert to 8 bits again
-    paddusb    xmm0, xmm2       // + src argb
-    pand       xmm1, xmm5       // a_g_ convert to 8 bits again
-    paddusb    xmm0, xmm1       // + src argb
-    sub        ecx, 4
-    movdqa     [edx], xmm0
-    lea        edx, [edx + 16]
-    jge        convertuloop4
-
-  convertloop4b:
-    add        ecx, 4 - 1
-    jl         convertloop1b
-
-    // 1 pixel loop.
-  convertloop1:
-    movd       xmm3, [eax]      // src argb
-    lea        eax, [eax + 4]
-    movdqa     xmm0, xmm3       // src argb
-    pxor       xmm3, xmm4       // ~alpha
-    movd       xmm2, [esi]      // _r_b
-    pshufb     xmm3, kShuffleAlpha // alpha
-    pand       xmm2, xmm6       // _r_b
-    paddw      xmm3, xmm7       // 256 - alpha
-    pmullw     xmm2, xmm3       // _r_b * alpha
-    movd       xmm1, [esi]      // _a_g
-    lea        esi, [esi + 4]
-    psrlw      xmm1, 8          // _a_g
-    por        xmm0, xmm4       // set alpha to 255
-    pmullw     xmm1, xmm3       // _a_g * alpha
-    psrlw      xmm2, 8          // _r_b convert to 8 bits again
-    paddusb    xmm0, xmm2       // + src argb
-    pand       xmm1, xmm5       // a_g_ convert to 8 bits again
-    paddusb    xmm0, xmm1       // + src argb
-    sub        ecx, 1
-    movd       [edx], xmm0
-    lea        edx, [edx + 4]
-    jge        convertloop1
-
-  convertloop1b:
-    pop        esi
-    ret
-  }
-}
-#endif  // HAS_ARGBBLENDROW_SSSE3
-
-#ifdef HAS_ARGBATTENUATEROW_SSE2
-// Attenuate 4 pixels at a time.
-// Aligned to 16 bytes.
-__declspec(naked) __declspec(align(16))
-void ARGBAttenuateRow_SSE2(const uint8* src_argb, uint8* dst_argb, int width) {
-  __asm {
-    mov        eax, [esp + 4]   // src_argb0
-    mov        edx, [esp + 8]   // dst_argb
-    mov        ecx, [esp + 12]  // width
-    pcmpeqb    xmm4, xmm4       // generate mask 0xff000000
-    pslld      xmm4, 24
-    pcmpeqb    xmm5, xmm5       // generate mask 0x00ffffff
-    psrld      xmm5, 8
-
-    align      4
- convertloop:
-    movdqa     xmm0, [eax]      // read 4 pixels
-    punpcklbw  xmm0, xmm0       // first 2
-    pshufhw    xmm2, xmm0, 0FFh // 8 alpha words
-    pshuflw    xmm2, xmm2, 0FFh
-    pmulhuw    xmm0, xmm2       // rgb * a
-    movdqa     xmm1, [eax]      // read 4 pixels
-    punpckhbw  xmm1, xmm1       // next 2 pixels
-    pshufhw    xmm2, xmm1, 0FFh // 8 alpha words
-    pshuflw    xmm2, xmm2, 0FFh
-    pmulhuw    xmm1, xmm2       // rgb * a
-    movdqa     xmm2, [eax]      // alphas
-    lea        eax, [eax + 16]
-    psrlw      xmm0, 8
-    pand       xmm2, xmm4
-    psrlw      xmm1, 8
-    packuswb   xmm0, xmm1
-    pand       xmm0, xmm5       // keep original alphas
-    por        xmm0, xmm2
-    sub        ecx, 4
-    movdqa     [edx], xmm0
-    lea        edx, [edx + 16]
-    jg         convertloop
-
-    ret
-  }
-}
-#endif  // HAS_ARGBATTENUATEROW_SSE2
-
-#ifdef HAS_ARGBATTENUATEROW_SSSE3
-// Shuffle table duplicating alpha.
-static const uvec8 kShuffleAlpha0 = {
-  3u, 3u, 3u, 3u, 3u, 3u, 128u, 128u, 7u, 7u, 7u, 7u, 7u, 7u, 128u, 128u,
-};
-static const uvec8 kShuffleAlpha1 = {
-  11u, 11u, 11u, 11u, 11u, 11u, 128u, 128u,
-  15u, 15u, 15u, 15u, 15u, 15u, 128u, 128u,
-};
-__declspec(naked) __declspec(align(16))
-void ARGBAttenuateRow_SSSE3(const uint8* src_argb, uint8* dst_argb, int width) {
-  __asm {
-    mov        eax, [esp + 4]   // src_argb0
-    mov        edx, [esp + 8]   // dst_argb
-    mov        ecx, [esp + 12]  // width
-    pcmpeqb    xmm3, xmm3       // generate mask 0xff000000
-    pslld      xmm3, 24
-    movdqa     xmm4, kShuffleAlpha0
-    movdqa     xmm5, kShuffleAlpha1
-
-    align      4
- convertloop:
-    movdqu     xmm0, [eax]      // read 4 pixels
-    pshufb     xmm0, xmm4       // isolate first 2 alphas
-    movdqu     xmm1, [eax]      // read 4 pixels
-    punpcklbw  xmm1, xmm1       // first 2 pixel rgbs
-    pmulhuw    xmm0, xmm1       // rgb * a
-    movdqu     xmm1, [eax]      // read 4 pixels
-    pshufb     xmm1, xmm5       // isolate next 2 alphas
-    movdqu     xmm2, [eax]      // read 4 pixels
-    punpckhbw  xmm2, xmm2       // next 2 pixel rgbs
-    pmulhuw    xmm1, xmm2       // rgb * a
-    movdqu     xmm2, [eax]      // mask original alpha
-    lea        eax, [eax + 16]
-    pand       xmm2, xmm3
-    psrlw      xmm0, 8
-    psrlw      xmm1, 8
-    packuswb   xmm0, xmm1
-    por        xmm0, xmm2       // copy original alpha
-    sub        ecx, 4
-    movdqu     [edx], xmm0
-    lea        edx, [edx + 16]
-    jg         convertloop
-
-    ret
-  }
-}
-#endif  // HAS_ARGBATTENUATEROW_SSSE3
-
-#ifdef HAS_ARGBATTENUATEROW_AVX2
-// Shuffle table duplicating alpha.
-static const ulvec8 kShuffleAlpha_AVX2 = {
-  6u, 7u, 6u, 7u, 6u, 7u, 128u, 128u,
-  14u, 15u, 14u, 15u, 14u, 15u, 128u, 128u,
-  6u, 7u, 6u, 7u, 6u, 7u, 128u, 128u,
-  14u, 15u, 14u, 15u, 14u, 15u, 128u, 128u,
-};
-__declspec(naked) __declspec(align(16))
-void ARGBAttenuateRow_AVX2(const uint8* src_argb, uint8* dst_argb, int width) {
-  __asm {
-    mov        eax, [esp + 4]   // src_argb0
-    mov        edx, [esp + 8]   // dst_argb
-    mov        ecx, [esp + 12]  // width
-    sub        edx, eax
-    vmovdqa    ymm4, kShuffleAlpha_AVX2
-    vpcmpeqb   ymm5, ymm5, ymm5 // generate mask 0xff000000
-    vpslld     ymm5, ymm5, 24
-
-    align      4
- convertloop:
-    vmovdqu    ymm6, [eax]       // read 8 pixels.
-    vpunpcklbw ymm0, ymm6, ymm6  // low 4 pixels. mutated.
-    vpunpckhbw ymm1, ymm6, ymm6  // high 4 pixels. mutated.
-    vpshufb    ymm2, ymm0, ymm4  // low 4 alphas
-    vpshufb    ymm3, ymm1, ymm4  // high 4 alphas
-    vpmulhuw   ymm0, ymm0, ymm2  // rgb * a
-    vpmulhuw   ymm1, ymm1, ymm3  // rgb * a
-    vpand      ymm6, ymm6, ymm5  // isolate alpha
-    vpsrlw     ymm0, ymm0, 8
-    vpsrlw     ymm1, ymm1, 8
-    vpackuswb  ymm0, ymm0, ymm1  // unmutated.
-    vpor       ymm0, ymm0, ymm6  // copy original alpha
-    sub        ecx, 8
-    vmovdqu    [eax + edx], ymm0
-    lea        eax, [eax + 32]
-    jg         convertloop
-
-    vzeroupper
-    ret
-  }
-}
-#endif  // HAS_ARGBATTENUATEROW_AVX2
-
-#ifdef HAS_ARGBUNATTENUATEROW_SSE2
-// Unattenuate 4 pixels at a time.
-// Aligned to 16 bytes.
-__declspec(naked) __declspec(align(16))
-void ARGBUnattenuateRow_SSE2(const uint8* src_argb, uint8* dst_argb,
-                             int width) {
-  __asm {
-    push       esi
-    push       edi
-    mov        eax, [esp + 8 + 4]   // src_argb0
-    mov        edx, [esp + 8 + 8]   // dst_argb
-    mov        ecx, [esp + 8 + 12]  // width
-
-    align      4
- convertloop:
-    movdqu     xmm0, [eax]      // read 4 pixels
-    movzx      esi, byte ptr [eax + 3]  // first alpha
-    movzx      edi, byte ptr [eax + 7]  // second alpha
-    punpcklbw  xmm0, xmm0       // first 2
-    movd       xmm2, dword ptr fixed_invtbl8[esi * 4]
-    movd       xmm3, dword ptr fixed_invtbl8[edi * 4]
-    pshuflw    xmm2, xmm2, 040h // first 4 inv_alpha words.  1, a, a, a
-    pshuflw    xmm3, xmm3, 040h // next 4 inv_alpha words
-    movlhps    xmm2, xmm3
-    pmulhuw    xmm0, xmm2       // rgb * a
-
-    movdqu     xmm1, [eax]      // read 4 pixels
-    movzx      esi, byte ptr [eax + 11]  // third alpha
-    movzx      edi, byte ptr [eax + 15]  // forth alpha
-    punpckhbw  xmm1, xmm1       // next 2
-    movd       xmm2, dword ptr fixed_invtbl8[esi * 4]
-    movd       xmm3, dword ptr fixed_invtbl8[edi * 4]
-    pshuflw    xmm2, xmm2, 040h // first 4 inv_alpha words
-    pshuflw    xmm3, xmm3, 040h // next 4 inv_alpha words
-    movlhps    xmm2, xmm3
-    pmulhuw    xmm1, xmm2       // rgb * a
-    lea        eax, [eax + 16]
-
-    packuswb   xmm0, xmm1
-    sub        ecx, 4
-    movdqu     [edx], xmm0
-    lea        edx, [edx + 16]
-    jg         convertloop
-    pop        edi
-    pop        esi
-    ret
-  }
-}
-#endif  // HAS_ARGBUNATTENUATEROW_SSE2
-
-#ifdef HAS_ARGBUNATTENUATEROW_AVX2
-// Shuffle table duplicating alpha.
-static const ulvec8 kUnattenShuffleAlpha_AVX2 = {
-  0u, 1u, 0u, 1u, 0u, 1u, 6u, 7u, 8u, 9u, 8u, 9u, 8u, 9u, 14u, 15,
-  0u, 1u, 0u, 1u, 0u, 1u, 6u, 7u, 8u, 9u, 8u, 9u, 8u, 9u, 14u, 15,
-};
-// TODO(fbarchard): Enable USE_GATHER for future hardware if faster.
-// USE_GATHER is not on by default, due to being a slow instruction.
-#ifdef USE_GATHER
-__declspec(naked) __declspec(align(16))
-void ARGBUnattenuateRow_AVX2(const uint8* src_argb, uint8* dst_argb,
-                             int width) {
-  __asm {
-    mov        eax, [esp + 4]   // src_argb0
-    mov        edx, [esp + 8]   // dst_argb
-    mov        ecx, [esp + 12]  // width
-    sub        edx, eax
-    vmovdqa    ymm4, kUnattenShuffleAlpha_AVX2
-
-    align      4
- convertloop:
-    vmovdqu    ymm6, [eax]       // read 8 pixels.
-    vpcmpeqb   ymm5, ymm5, ymm5  // generate mask 0xffffffff for gather.
-    vpsrld     ymm2, ymm6, 24    // alpha in low 8 bits.
-    vpunpcklbw ymm0, ymm6, ymm6  // low 4 pixels. mutated.
-    vpunpckhbw ymm1, ymm6, ymm6  // high 4 pixels. mutated.
-    vpgatherdd ymm3, [ymm2 * 4 + fixed_invtbl8], ymm5  // ymm5 cleared.  1, a
-    vpunpcklwd ymm2, ymm3, ymm3  // low 4 inverted alphas. mutated. 1, 1, a, a
-    vpunpckhwd ymm3, ymm3, ymm3  // high 4 inverted alphas. mutated.
-    vpshufb    ymm2, ymm2, ymm4  // replicate low 4 alphas. 1, a, a, a
-    vpshufb    ymm3, ymm3, ymm4  // replicate high 4 alphas
-    vpmulhuw   ymm0, ymm0, ymm2  // rgb * ia
-    vpmulhuw   ymm1, ymm1, ymm3  // rgb * ia
-    vpackuswb  ymm0, ymm0, ymm1  // unmutated.
-    sub        ecx, 8
-    vmovdqu    [eax + edx], ymm0
-    lea        eax, [eax + 32]
-    jg         convertloop
-
-    vzeroupper
-    ret
-  }
-}
-#else  // USE_GATHER
-__declspec(naked) __declspec(align(16))
-void ARGBUnattenuateRow_AVX2(const uint8* src_argb, uint8* dst_argb,
-                             int width) {
-  __asm {
-
-    mov        eax, [esp + 4]   // src_argb0
-    mov        edx, [esp + 8]   // dst_argb
-    mov        ecx, [esp + 12]  // width
-    sub        edx, eax
-    vmovdqa    ymm5, kUnattenShuffleAlpha_AVX2
-
-    push       esi
-    push       edi
-
-    align      4
- convertloop:
-    // replace VPGATHER
-    movzx      esi, byte ptr [eax + 3]                 // alpha0
-    movzx      edi, byte ptr [eax + 7]                 // alpha1
-    vmovd      xmm0, dword ptr fixed_invtbl8[esi * 4]  // [1,a0]
-    vmovd      xmm1, dword ptr fixed_invtbl8[edi * 4]  // [1,a1]
-    movzx      esi, byte ptr [eax + 11]                // alpha2
-    movzx      edi, byte ptr [eax + 15]                // alpha3
-    vpunpckldq xmm6, xmm0, xmm1                        // [1,a1,1,a0]
-    vmovd      xmm2, dword ptr fixed_invtbl8[esi * 4]  // [1,a2]
-    vmovd      xmm3, dword ptr fixed_invtbl8[edi * 4]  // [1,a3]
-    movzx      esi, byte ptr [eax + 19]                // alpha4
-    movzx      edi, byte ptr [eax + 23]                // alpha5
-    vpunpckldq xmm7, xmm2, xmm3                        // [1,a3,1,a2]
-    vmovd      xmm0, dword ptr fixed_invtbl8[esi * 4]  // [1,a4]
-    vmovd      xmm1, dword ptr fixed_invtbl8[edi * 4]  // [1,a5]
-    movzx      esi, byte ptr [eax + 27]                // alpha6
-    movzx      edi, byte ptr [eax + 31]                // alpha7
-    vpunpckldq xmm0, xmm0, xmm1                        // [1,a5,1,a4]
-    vmovd      xmm2, dword ptr fixed_invtbl8[esi * 4]  // [1,a6]
-    vmovd      xmm3, dword ptr fixed_invtbl8[edi * 4]  // [1,a7]
-    vpunpckldq xmm2, xmm2, xmm3                        // [1,a7,1,a6]
-    vpunpcklqdq xmm3, xmm6, xmm7                       // [1,a3,1,a2,1,a1,1,a0]
-    vpunpcklqdq xmm0, xmm0, xmm2                       // [1,a7,1,a6,1,a5,1,a4]
-    vinserti128 ymm3, ymm3, xmm0, 1 // [1,a7,1,a6,1,a5,1,a4,1,a3,1,a2,1,a1,1,a0]
-    // end of VPGATHER
-
-    vmovdqu    ymm6, [eax]       // read 8 pixels.
-    vpunpcklbw ymm0, ymm6, ymm6  // low 4 pixels. mutated.
-    vpunpckhbw ymm1, ymm6, ymm6  // high 4 pixels. mutated.
-    vpunpcklwd ymm2, ymm3, ymm3  // low 4 inverted alphas. mutated. 1, 1, a, a
-    vpunpckhwd ymm3, ymm3, ymm3  // high 4 inverted alphas. mutated.
-    vpshufb    ymm2, ymm2, ymm5  // replicate low 4 alphas. 1, a, a, a
-    vpshufb    ymm3, ymm3, ymm5  // replicate high 4 alphas
-    vpmulhuw   ymm0, ymm0, ymm2  // rgb * ia
-    vpmulhuw   ymm1, ymm1, ymm3  // rgb * ia
-    vpackuswb  ymm0, ymm0, ymm1  // unmutated.
-    sub        ecx, 8
-    vmovdqu    [eax + edx], ymm0
-    lea        eax, [eax + 32]
-    jg         convertloop
-
-    pop        edi
-    pop        esi
-    vzeroupper
-    ret
-  }
-}
-#endif  // USE_GATHER
-#endif  // HAS_ARGBATTENUATEROW_AVX2
-
-#ifdef HAS_ARGBGRAYROW_SSSE3
-// Convert 8 ARGB pixels (64 bytes) to 8 Gray ARGB pixels.
-__declspec(naked) __declspec(align(16))
-void ARGBGrayRow_SSSE3(const uint8* src_argb, uint8* dst_argb, int width) {
-  __asm {
-    mov        eax, [esp + 4]   /* src_argb */
-    mov        edx, [esp + 8]   /* dst_argb */
-    mov        ecx, [esp + 12]  /* width */
-    movdqa     xmm4, kARGBToYJ
-    movdqa     xmm5, kAddYJ64
-
-    align      4
- convertloop:
-    movdqa     xmm0, [eax]  // G
-    movdqa     xmm1, [eax + 16]
-    pmaddubsw  xmm0, xmm4
-    pmaddubsw  xmm1, xmm4
-    phaddw     xmm0, xmm1
-    paddw      xmm0, xmm5  // Add .5 for rounding.
-    psrlw      xmm0, 7
-    packuswb   xmm0, xmm0   // 8 G bytes
-    movdqa     xmm2, [eax]  // A
-    movdqa     xmm3, [eax + 16]
-    lea        eax, [eax + 32]
-    psrld      xmm2, 24
-    psrld      xmm3, 24
-    packuswb   xmm2, xmm3
-    packuswb   xmm2, xmm2   // 8 A bytes
-    movdqa     xmm3, xmm0   // Weave into GG, GA, then GGGA
-    punpcklbw  xmm0, xmm0   // 8 GG words
-    punpcklbw  xmm3, xmm2   // 8 GA words
-    movdqa     xmm1, xmm0
-    punpcklwd  xmm0, xmm3   // GGGA first 4
-    punpckhwd  xmm1, xmm3   // GGGA next 4
-    sub        ecx, 8
-    movdqa     [edx], xmm0
-    movdqa     [edx + 16], xmm1
-    lea        edx, [edx + 32]
-    jg         convertloop
-    ret
-  }
-}
-#endif  // HAS_ARGBGRAYROW_SSSE3
-
-#ifdef HAS_ARGBSEPIAROW_SSSE3
-//    b = (r * 35 + g * 68 + b * 17) >> 7
-//    g = (r * 45 + g * 88 + b * 22) >> 7
-//    r = (r * 50 + g * 98 + b * 24) >> 7
-// Constant for ARGB color to sepia tone.
-static const vec8 kARGBToSepiaB = {
-  17, 68, 35, 0, 17, 68, 35, 0, 17, 68, 35, 0, 17, 68, 35, 0
-};
-
-static const vec8 kARGBToSepiaG = {
-  22, 88, 45, 0, 22, 88, 45, 0, 22, 88, 45, 0, 22, 88, 45, 0
-};
-
-static const vec8 kARGBToSepiaR = {
-  24, 98, 50, 0, 24, 98, 50, 0, 24, 98, 50, 0, 24, 98, 50, 0
-};
-
-// Convert 8 ARGB pixels (32 bytes) to 8 Sepia ARGB pixels.
-__declspec(naked) __declspec(align(16))
-void ARGBSepiaRow_SSSE3(uint8* dst_argb, int width) {
-  __asm {
-    mov        eax, [esp + 4]   /* dst_argb */
-    mov        ecx, [esp + 8]   /* width */
-    movdqa     xmm2, kARGBToSepiaB
-    movdqa     xmm3, kARGBToSepiaG
-    movdqa     xmm4, kARGBToSepiaR
-
-    align      4
- convertloop:
-    movdqa     xmm0, [eax]  // B
-    movdqa     xmm6, [eax + 16]
-    pmaddubsw  xmm0, xmm2
-    pmaddubsw  xmm6, xmm2
-    phaddw     xmm0, xmm6
-    psrlw      xmm0, 7
-    packuswb   xmm0, xmm0   // 8 B values
-    movdqa     xmm5, [eax]  // G
-    movdqa     xmm1, [eax + 16]
-    pmaddubsw  xmm5, xmm3
-    pmaddubsw  xmm1, xmm3
-    phaddw     xmm5, xmm1
-    psrlw      xmm5, 7
-    packuswb   xmm5, xmm5   // 8 G values
-    punpcklbw  xmm0, xmm5   // 8 BG values
-    movdqa     xmm5, [eax]  // R
-    movdqa     xmm1, [eax + 16]
-    pmaddubsw  xmm5, xmm4
-    pmaddubsw  xmm1, xmm4
-    phaddw     xmm5, xmm1
-    psrlw      xmm5, 7
-    packuswb   xmm5, xmm5   // 8 R values
-    movdqa     xmm6, [eax]  // A
-    movdqa     xmm1, [eax + 16]
-    psrld      xmm6, 24
-    psrld      xmm1, 24
-    packuswb   xmm6, xmm1
-    packuswb   xmm6, xmm6   // 8 A values
-    punpcklbw  xmm5, xmm6   // 8 RA values
-    movdqa     xmm1, xmm0   // Weave BG, RA together
-    punpcklwd  xmm0, xmm5   // BGRA first 4
-    punpckhwd  xmm1, xmm5   // BGRA next 4
-    sub        ecx, 8
-    movdqa     [eax], xmm0
-    movdqa     [eax + 16], xmm1
-    lea        eax, [eax + 32]
-    jg         convertloop
-    ret
-  }
-}
-#endif  // HAS_ARGBSEPIAROW_SSSE3
-
-#ifdef HAS_ARGBCOLORMATRIXROW_SSSE3
-// Tranform 8 ARGB pixels (32 bytes) with color matrix.
-// Same as Sepia except matrix is provided.
-// TODO(fbarchard): packuswbs only use half of the reg. To make RGBA, combine R
-// and B into a high and low, then G/A, unpackl/hbw and then unpckl/hwd.
-__declspec(naked) __declspec(align(16))
-void ARGBColorMatrixRow_SSSE3(const uint8* src_argb, uint8* dst_argb,
-                              const int8* matrix_argb, int width) {
-  __asm {
-    mov        eax, [esp + 4]   /* src_argb */
-    mov        edx, [esp + 8]   /* dst_argb */
-    mov        ecx, [esp + 12]  /* matrix_argb */
-    movdqu     xmm5, [ecx]
-    pshufd     xmm2, xmm5, 0x00
-    pshufd     xmm3, xmm5, 0x55
-    pshufd     xmm4, xmm5, 0xaa
-    pshufd     xmm5, xmm5, 0xff
-    mov        ecx, [esp + 16]  /* width */
-
-    align      4
- convertloop:
-    movdqa     xmm0, [eax]  // B
-    movdqa     xmm7, [eax + 16]
-    pmaddubsw  xmm0, xmm2
-    pmaddubsw  xmm7, xmm2
-    movdqa     xmm6, [eax]  // G
-    movdqa     xmm1, [eax + 16]
-    pmaddubsw  xmm6, xmm3
-    pmaddubsw  xmm1, xmm3
-    phaddsw    xmm0, xmm7   // B
-    phaddsw    xmm6, xmm1   // G
-    psraw      xmm0, 6      // B
-    psraw      xmm6, 6      // G
-    packuswb   xmm0, xmm0   // 8 B values
-    packuswb   xmm6, xmm6   // 8 G values
-    punpcklbw  xmm0, xmm6   // 8 BG values
-    movdqa     xmm1, [eax]  // R
-    movdqa     xmm7, [eax + 16]
-    pmaddubsw  xmm1, xmm4
-    pmaddubsw  xmm7, xmm4
-    phaddsw    xmm1, xmm7   // R
-    movdqa     xmm6, [eax]  // A
-    movdqa     xmm7, [eax + 16]
-    pmaddubsw  xmm6, xmm5
-    pmaddubsw  xmm7, xmm5
-    phaddsw    xmm6, xmm7   // A
-    psraw      xmm1, 6      // R
-    psraw      xmm6, 6      // A
-    packuswb   xmm1, xmm1   // 8 R values
-    packuswb   xmm6, xmm6   // 8 A values
-    punpcklbw  xmm1, xmm6   // 8 RA values
-    movdqa     xmm6, xmm0   // Weave BG, RA together
-    punpcklwd  xmm0, xmm1   // BGRA first 4
-    punpckhwd  xmm6, xmm1   // BGRA next 4
-    sub        ecx, 8
-    movdqa     [edx], xmm0
-    movdqa     [edx + 16], xmm6
-    lea        eax, [eax + 32]
-    lea        edx, [edx + 32]
-    jg         convertloop
-    ret
-  }
-}
-#endif  // HAS_ARGBCOLORMATRIXROW_SSSE3
-
-#ifdef HAS_ARGBQUANTIZEROW_SSE2
-// Quantize 4 ARGB pixels (16 bytes).
-// Aligned to 16 bytes.
-__declspec(naked) __declspec(align(16))
-void ARGBQuantizeRow_SSE2(uint8* dst_argb, int scale, int interval_size,
-                          int interval_offset, int width) {
-  __asm {
-    mov        eax, [esp + 4]    /* dst_argb */
-    movd       xmm2, [esp + 8]   /* scale */
-    movd       xmm3, [esp + 12]  /* interval_size */
-    movd       xmm4, [esp + 16]  /* interval_offset */
-    mov        ecx, [esp + 20]   /* width */
-    pshuflw    xmm2, xmm2, 040h
-    pshufd     xmm2, xmm2, 044h
-    pshuflw    xmm3, xmm3, 040h
-    pshufd     xmm3, xmm3, 044h
-    pshuflw    xmm4, xmm4, 040h
-    pshufd     xmm4, xmm4, 044h
-    pxor       xmm5, xmm5  // constant 0
-    pcmpeqb    xmm6, xmm6  // generate mask 0xff000000
-    pslld      xmm6, 24
-
-    align      4
- convertloop:
-    movdqa     xmm0, [eax]  // read 4 pixels
-    punpcklbw  xmm0, xmm5   // first 2 pixels
-    pmulhuw    xmm0, xmm2   // pixel * scale >> 16
-    movdqa     xmm1, [eax]  // read 4 pixels
-    punpckhbw  xmm1, xmm5   // next 2 pixels
-    pmulhuw    xmm1, xmm2
-    pmullw     xmm0, xmm3   // * interval_size
-    movdqa     xmm7, [eax]  // read 4 pixels
-    pmullw     xmm1, xmm3
-    pand       xmm7, xmm6   // mask alpha
-    paddw      xmm0, xmm4   // + interval_size / 2
-    paddw      xmm1, xmm4
-    packuswb   xmm0, xmm1
-    por        xmm0, xmm7
-    sub        ecx, 4
-    movdqa     [eax], xmm0
-    lea        eax, [eax + 16]
-    jg         convertloop
-    ret
-  }
-}
-#endif  // HAS_ARGBQUANTIZEROW_SSE2
-
-#ifdef HAS_ARGBSHADEROW_SSE2
-// Shade 4 pixels at a time by specified value.
-// Aligned to 16 bytes.
-__declspec(naked) __declspec(align(16))
-void ARGBShadeRow_SSE2(const uint8* src_argb, uint8* dst_argb, int width,
-                       uint32 value) {
-  __asm {
-    mov        eax, [esp + 4]   // src_argb
-    mov        edx, [esp + 8]   // dst_argb
-    mov        ecx, [esp + 12]  // width
-    movd       xmm2, [esp + 16]  // value
-    punpcklbw  xmm2, xmm2
-    punpcklqdq xmm2, xmm2
-
-    align      4
- convertloop:
-    movdqa     xmm0, [eax]      // read 4 pixels
-    lea        eax, [eax + 16]
-    movdqa     xmm1, xmm0
-    punpcklbw  xmm0, xmm0       // first 2
-    punpckhbw  xmm1, xmm1       // next 2
-    pmulhuw    xmm0, xmm2       // argb * value
-    pmulhuw    xmm1, xmm2       // argb * value
-    psrlw      xmm0, 8
-    psrlw      xmm1, 8
-    packuswb   xmm0, xmm1
-    sub        ecx, 4
-    movdqa     [edx], xmm0
-    lea        edx, [edx + 16]
-    jg         convertloop
-
-    ret
-  }
-}
-#endif  // HAS_ARGBSHADEROW_SSE2
-
-#ifdef HAS_ARGBMULTIPLYROW_SSE2
-// Multiply 2 rows of ARGB pixels together, 4 pixels at a time.
-__declspec(naked) __declspec(align(16))
-void ARGBMultiplyRow_SSE2(const uint8* src_argb0, const uint8* src_argb1,
-                          uint8* dst_argb, int width) {
-  __asm {
-    push       esi
-    mov        eax, [esp + 4 + 4]   // src_argb0
-    mov        esi, [esp + 4 + 8]   // src_argb1
-    mov        edx, [esp + 4 + 12]  // dst_argb
-    mov        ecx, [esp + 4 + 16]  // width
-    pxor       xmm5, xmm5  // constant 0
-
-    align      4
- convertloop:
-    movdqu     xmm0, [eax]        // read 4 pixels from src_argb0
-    movdqu     xmm2, [esi]        // read 4 pixels from src_argb1
-    movdqu     xmm1, xmm0
-    movdqu     xmm3, xmm2
-    punpcklbw  xmm0, xmm0         // first 2
-    punpckhbw  xmm1, xmm1         // next 2
-    punpcklbw  xmm2, xmm5         // first 2
-    punpckhbw  xmm3, xmm5         // next 2
-    pmulhuw    xmm0, xmm2         // src_argb0 * src_argb1 first 2
-    pmulhuw    xmm1, xmm3         // src_argb0 * src_argb1 next 2
-    lea        eax, [eax + 16]
-    lea        esi, [esi + 16]
-    packuswb   xmm0, xmm1
-    sub        ecx, 4
-    movdqu     [edx], xmm0
-    lea        edx, [edx + 16]
-    jg         convertloop
-
-    pop        esi
-    ret
-  }
-}
-#endif  // HAS_ARGBMULTIPLYROW_SSE2
-
-#ifdef HAS_ARGBADDROW_SSE2
-// Add 2 rows of ARGB pixels together, 4 pixels at a time.
-// TODO(fbarchard): Port this to posix, neon and other math functions.
-__declspec(naked) __declspec(align(16))
-void ARGBAddRow_SSE2(const uint8* src_argb0, const uint8* src_argb1,
-                     uint8* dst_argb, int width) {
-  __asm {
-    push       esi
-    mov        eax, [esp + 4 + 4]   // src_argb0
-    mov        esi, [esp + 4 + 8]   // src_argb1
-    mov        edx, [esp + 4 + 12]  // dst_argb
-    mov        ecx, [esp + 4 + 16]  // width
-
-    sub        ecx, 4
-    jl         convertloop49
-
-    align      4
- convertloop4:
-    movdqu     xmm0, [eax]        // read 4 pixels from src_argb0
-    lea        eax, [eax + 16]
-    movdqu     xmm1, [esi]        // read 4 pixels from src_argb1
-    lea        esi, [esi + 16]
-    paddusb    xmm0, xmm1         // src_argb0 + src_argb1
-    sub        ecx, 4
-    movdqu     [edx], xmm0
-    lea        edx, [edx + 16]
-    jge        convertloop4
-
- convertloop49:
-    add        ecx, 4 - 1
-    jl         convertloop19
-
- convertloop1:
-    movd       xmm0, [eax]        // read 1 pixels from src_argb0
-    lea        eax, [eax + 4]
-    movd       xmm1, [esi]        // read 1 pixels from src_argb1
-    lea        esi, [esi + 4]
-    paddusb    xmm0, xmm1         // src_argb0 + src_argb1
-    sub        ecx, 1
-    movd       [edx], xmm0
-    lea        edx, [edx + 4]
-    jge        convertloop1
-
- convertloop19:
-    pop        esi
-    ret
-  }
-}
-#endif  // HAS_ARGBADDROW_SSE2
-
-#ifdef HAS_ARGBSUBTRACTROW_SSE2
-// Subtract 2 rows of ARGB pixels together, 4 pixels at a time.
-__declspec(naked) __declspec(align(16))
-void ARGBSubtractRow_SSE2(const uint8* src_argb0, const uint8* src_argb1,
-                          uint8* dst_argb, int width) {
-  __asm {
-    push       esi
-    mov        eax, [esp + 4 + 4]   // src_argb0
-    mov        esi, [esp + 4 + 8]   // src_argb1
-    mov        edx, [esp + 4 + 12]  // dst_argb
-    mov        ecx, [esp + 4 + 16]  // width
-
-    align      4
- convertloop:
-    movdqu     xmm0, [eax]        // read 4 pixels from src_argb0
-    lea        eax, [eax + 16]
-    movdqu     xmm1, [esi]        // read 4 pixels from src_argb1
-    lea        esi, [esi + 16]
-    psubusb    xmm0, xmm1         // src_argb0 - src_argb1
-    sub        ecx, 4
-    movdqu     [edx], xmm0
-    lea        edx, [edx + 16]
-    jg         convertloop
-
-    pop        esi
-    ret
-  }
-}
-#endif  // HAS_ARGBSUBTRACTROW_SSE2
-
-#ifdef HAS_ARGBMULTIPLYROW_AVX2
-// Multiply 2 rows of ARGB pixels together, 8 pixels at a time.
-__declspec(naked) __declspec(align(16))
-void ARGBMultiplyRow_AVX2(const uint8* src_argb0, const uint8* src_argb1,
-                          uint8* dst_argb, int width) {
-  __asm {
-    push       esi
-    mov        eax, [esp + 4 + 4]   // src_argb0
-    mov        esi, [esp + 4 + 8]   // src_argb1
-    mov        edx, [esp + 4 + 12]  // dst_argb
-    mov        ecx, [esp + 4 + 16]  // width
-    vpxor      ymm5, ymm5, ymm5     // constant 0
-
-    align      4
- convertloop:
-    vmovdqu    ymm1, [eax]        // read 8 pixels from src_argb0
-    lea        eax, [eax + 32]
-    vmovdqu    ymm3, [esi]        // read 8 pixels from src_argb1
-    lea        esi, [esi + 32]
-    vpunpcklbw ymm0, ymm1, ymm1   // low 4
-    vpunpckhbw ymm1, ymm1, ymm1   // high 4
-    vpunpcklbw ymm2, ymm3, ymm5   // low 4
-    vpunpckhbw ymm3, ymm3, ymm5   // high 4
-    vpmulhuw   ymm0, ymm0, ymm2   // src_argb0 * src_argb1 low 4
-    vpmulhuw   ymm1, ymm1, ymm3   // src_argb0 * src_argb1 high 4
-    vpackuswb  ymm0, ymm0, ymm1
-    vmovdqu    [edx], ymm0
-    lea        edx, [edx + 32]
-    sub        ecx, 8
-    jg         convertloop
-
-    pop        esi
-    vzeroupper
-    ret
-  }
-}
-#endif  // HAS_ARGBMULTIPLYROW_AVX2
-
-#ifdef HAS_ARGBADDROW_AVX2
-// Add 2 rows of ARGB pixels together, 8 pixels at a time.
-__declspec(naked) __declspec(align(16))
-void ARGBAddRow_AVX2(const uint8* src_argb0, const uint8* src_argb1,
-                     uint8* dst_argb, int width) {
-  __asm {
-    push       esi
-    mov        eax, [esp + 4 + 4]   // src_argb0
-    mov        esi, [esp + 4 + 8]   // src_argb1
-    mov        edx, [esp + 4 + 12]  // dst_argb
-    mov        ecx, [esp + 4 + 16]  // width
-
-    align      4
- convertloop:
-    vmovdqu    ymm0, [eax]              // read 8 pixels from src_argb0
-    lea        eax, [eax + 32]
-    vpaddusb   ymm0, ymm0, [esi]        // add 8 pixels from src_argb1
-    lea        esi, [esi + 32]
-    vmovdqu    [edx], ymm0
-    lea        edx, [edx + 32]
-    sub        ecx, 8
-    jg         convertloop
-
-    pop        esi
-    vzeroupper
-    ret
-  }
-}
-#endif  // HAS_ARGBADDROW_AVX2
-
-#ifdef HAS_ARGBSUBTRACTROW_AVX2
-// Subtract 2 rows of ARGB pixels together, 8 pixels at a time.
-__declspec(naked) __declspec(align(16))
-void ARGBSubtractRow_AVX2(const uint8* src_argb0, const uint8* src_argb1,
-                          uint8* dst_argb, int width) {
-  __asm {
-    push       esi
-    mov        eax, [esp + 4 + 4]   // src_argb0
-    mov        esi, [esp + 4 + 8]   // src_argb1
-    mov        edx, [esp + 4 + 12]  // dst_argb
-    mov        ecx, [esp + 4 + 16]  // width
-
-    align      4
- convertloop:
-    vmovdqu    ymm0, [eax]              // read 8 pixels from src_argb0
-    lea        eax, [eax + 32]
-    vpsubusb   ymm0, ymm0, [esi]        // src_argb0 - src_argb1
-    lea        esi, [esi + 32]
-    vmovdqu    [edx], ymm0
-    lea        edx, [edx + 32]
-    sub        ecx, 8
-    jg         convertloop
-
-    pop        esi
-    vzeroupper
-    ret
-  }
-}
-#endif  // HAS_ARGBSUBTRACTROW_AVX2
-
-#ifdef HAS_SOBELXROW_SSE2
-// SobelX as a matrix is
-// -1  0  1
-// -2  0  2
-// -1  0  1
-__declspec(naked) __declspec(align(16))
-void SobelXRow_SSE2(const uint8* src_y0, const uint8* src_y1,
-                    const uint8* src_y2, uint8* dst_sobelx, int width) {
-  __asm {
-    push       esi
-    push       edi
-    mov        eax, [esp + 8 + 4]   // src_y0
-    mov        esi, [esp + 8 + 8]   // src_y1
-    mov        edi, [esp + 8 + 12]  // src_y2
-    mov        edx, [esp + 8 + 16]  // dst_sobelx
-    mov        ecx, [esp + 8 + 20]  // width
-    sub        esi, eax
-    sub        edi, eax
-    sub        edx, eax
-    pxor       xmm5, xmm5  // constant 0
-
-    align      4
- convertloop:
-    movq       xmm0, qword ptr [eax]            // read 8 pixels from src_y0[0]
-    movq       xmm1, qword ptr [eax + 2]        // read 8 pixels from src_y0[2]
-    punpcklbw  xmm0, xmm5
-    punpcklbw  xmm1, xmm5
-    psubw      xmm0, xmm1
-    movq       xmm1, qword ptr [eax + esi]      // read 8 pixels from src_y1[0]
-    movq       xmm2, qword ptr [eax + esi + 2]  // read 8 pixels from src_y1[2]
-    punpcklbw  xmm1, xmm5
-    punpcklbw  xmm2, xmm5
-    psubw      xmm1, xmm2
-    movq       xmm2, qword ptr [eax + edi]      // read 8 pixels from src_y2[0]
-    movq       xmm3, qword ptr [eax + edi + 2]  // read 8 pixels from src_y2[2]
-    punpcklbw  xmm2, xmm5
-    punpcklbw  xmm3, xmm5
-    psubw      xmm2, xmm3
-    paddw      xmm0, xmm2
-    paddw      xmm0, xmm1
-    paddw      xmm0, xmm1
-    pxor       xmm1, xmm1   // abs = max(xmm0, -xmm0).  SSSE3 could use pabsw
-    psubw      xmm1, xmm0
-    pmaxsw     xmm0, xmm1
-    packuswb   xmm0, xmm0
-    sub        ecx, 8
-    movq       qword ptr [eax + edx], xmm0
-    lea        eax, [eax + 8]
-    jg         convertloop
-
-    pop        edi
-    pop        esi
-    ret
-  }
-}
-#endif  // HAS_SOBELXROW_SSE2
-
-#ifdef HAS_SOBELYROW_SSE2
-// SobelY as a matrix is
-// -1 -2 -1
-//  0  0  0
-//  1  2  1
-__declspec(naked) __declspec(align(16))
-void SobelYRow_SSE2(const uint8* src_y0, const uint8* src_y1,
-                    uint8* dst_sobely, int width) {
-  __asm {
-    push       esi
-    mov        eax, [esp + 4 + 4]   // src_y0
-    mov        esi, [esp + 4 + 8]   // src_y1
-    mov        edx, [esp + 4 + 12]  // dst_sobely
-    mov        ecx, [esp + 4 + 16]  // width
-    sub        esi, eax
-    sub        edx, eax
-    pxor       xmm5, xmm5  // constant 0
-
-    align      4
- convertloop:
-    movq       xmm0, qword ptr [eax]            // read 8 pixels from src_y0[0]
-    movq       xmm1, qword ptr [eax + esi]      // read 8 pixels from src_y1[0]
-    punpcklbw  xmm0, xmm5
-    punpcklbw  xmm1, xmm5
-    psubw      xmm0, xmm1
-    movq       xmm1, qword ptr [eax + 1]        // read 8 pixels from src_y0[1]
-    movq       xmm2, qword ptr [eax + esi + 1]  // read 8 pixels from src_y1[1]
-    punpcklbw  xmm1, xmm5
-    punpcklbw  xmm2, xmm5
-    psubw      xmm1, xmm2
-    movq       xmm2, qword ptr [eax + 2]        // read 8 pixels from src_y0[2]
-    movq       xmm3, qword ptr [eax + esi + 2]  // read 8 pixels from src_y1[2]
-    punpcklbw  xmm2, xmm5
-    punpcklbw  xmm3, xmm5
-    psubw      xmm2, xmm3
-    paddw      xmm0, xmm2
-    paddw      xmm0, xmm1
-    paddw      xmm0, xmm1
-    pxor       xmm1, xmm1   // abs = max(xmm0, -xmm0).  SSSE3 could use pabsw
-    psubw      xmm1, xmm0
-    pmaxsw     xmm0, xmm1
-    packuswb   xmm0, xmm0
-    sub        ecx, 8
-    movq       qword ptr [eax + edx], xmm0
-    lea        eax, [eax + 8]
-    jg         convertloop
-
-    pop        esi
-    ret
-  }
-}
-#endif  // HAS_SOBELYROW_SSE2
-
-#ifdef HAS_SOBELROW_SSE2
-// Adds Sobel X and Sobel Y and stores Sobel into ARGB.
-// A = 255
-// R = Sobel
-// G = Sobel
-// B = Sobel
-__declspec(naked) __declspec(align(16))
-void SobelRow_SSE2(const uint8* src_sobelx, const uint8* src_sobely,
-                   uint8* dst_argb, int width) {
-  __asm {
-    push       esi
-    mov        eax, [esp + 4 + 4]   // src_sobelx
-    mov        esi, [esp + 4 + 8]   // src_sobely
-    mov        edx, [esp + 4 + 12]  // dst_argb
-    mov        ecx, [esp + 4 + 16]  // width
-    sub        esi, eax
-    pcmpeqb    xmm5, xmm5           // alpha 255
-    pslld      xmm5, 24             // 0xff000000
-
-    align      4
- convertloop:
-    movdqa     xmm0, [eax]            // read 16 pixels src_sobelx
-    movdqa     xmm1, [eax + esi]      // read 16 pixels src_sobely
-    lea        eax, [eax + 16]
-    paddusb    xmm0, xmm1             // sobel = sobelx + sobely
-    movdqa     xmm2, xmm0             // GG
-    punpcklbw  xmm2, xmm0             // First 8
-    punpckhbw  xmm0, xmm0             // Next 8
-    movdqa     xmm1, xmm2             // GGGG
-    punpcklwd  xmm1, xmm2             // First 4
-    punpckhwd  xmm2, xmm2             // Next 4
-    por        xmm1, xmm5             // GGGA
-    por        xmm2, xmm5
-    movdqa     xmm3, xmm0             // GGGG
-    punpcklwd  xmm3, xmm0             // Next 4
-    punpckhwd  xmm0, xmm0             // Last 4
-    por        xmm3, xmm5             // GGGA
-    por        xmm0, xmm5
-    sub        ecx, 16
-    movdqa     [edx], xmm1
-    movdqa     [edx + 16], xmm2
-    movdqa     [edx + 32], xmm3
-    movdqa     [edx + 48], xmm0
-    lea        edx, [edx + 64]
-    jg         convertloop
-
-    pop        esi
-    ret
-  }
-}
-#endif  // HAS_SOBELROW_SSE2
-
-#ifdef HAS_SOBELTOPLANEROW_SSE2
-// Adds Sobel X and Sobel Y and stores Sobel into a plane.
-__declspec(naked) __declspec(align(16))
-void SobelToPlaneRow_SSE2(const uint8* src_sobelx, const uint8* src_sobely,
-                          uint8* dst_y, int width) {
-  __asm {
-    push       esi
-    mov        eax, [esp + 4 + 4]   // src_sobelx
-    mov        esi, [esp + 4 + 8]   // src_sobely
-    mov        edx, [esp + 4 + 12]  // dst_argb
-    mov        ecx, [esp + 4 + 16]  // width
-    sub        esi, eax
-
-    align      4
- convertloop:
-    movdqa     xmm0, [eax]            // read 16 pixels src_sobelx
-    movdqa     xmm1, [eax + esi]      // read 16 pixels src_sobely
-    lea        eax, [eax + 16]
-    paddusb    xmm0, xmm1             // sobel = sobelx + sobely
-    sub        ecx, 16
-    movdqa     [edx], xmm0
-    lea        edx, [edx + 16]
-    jg         convertloop
-
-    pop        esi
-    ret
-  }
-}
-#endif  // HAS_SOBELTOPLANEROW_SSE2
-
-#ifdef HAS_SOBELXYROW_SSE2
-// Mixes Sobel X, Sobel Y and Sobel into ARGB.
-// A = 255
-// R = Sobel X
-// G = Sobel
-// B = Sobel Y
-__declspec(naked) __declspec(align(16))
-void SobelXYRow_SSE2(const uint8* src_sobelx, const uint8* src_sobely,
-                     uint8* dst_argb, int width) {
-  __asm {
-    push       esi
-    mov        eax, [esp + 4 + 4]   // src_sobelx
-    mov        esi, [esp + 4 + 8]   // src_sobely
-    mov        edx, [esp + 4 + 12]  // dst_argb
-    mov        ecx, [esp + 4 + 16]  // width
-    sub        esi, eax
-    pcmpeqb    xmm5, xmm5           // alpha 255
-
-    align      4
- convertloop:
-    movdqa     xmm0, [eax]            // read 16 pixels src_sobelx
-    movdqa     xmm1, [eax + esi]      // read 16 pixels src_sobely
-    lea        eax, [eax + 16]
-    movdqa     xmm2, xmm0
-    paddusb    xmm2, xmm1             // sobel = sobelx + sobely
-    movdqa     xmm3, xmm0             // XA
-    punpcklbw  xmm3, xmm5
-    punpckhbw  xmm0, xmm5
-    movdqa     xmm4, xmm1             // YS
-    punpcklbw  xmm4, xmm2
-    punpckhbw  xmm1, xmm2
-    movdqa     xmm6, xmm4             // YSXA
-    punpcklwd  xmm6, xmm3             // First 4
-    punpckhwd  xmm4, xmm3             // Next 4
-    movdqa     xmm7, xmm1             // YSXA
-    punpcklwd  xmm7, xmm0             // Next 4
-    punpckhwd  xmm1, xmm0             // Last 4
-    sub        ecx, 16
-    movdqa     [edx], xmm6
-    movdqa     [edx + 16], xmm4
-    movdqa     [edx + 32], xmm7
-    movdqa     [edx + 48], xmm1
-    lea        edx, [edx + 64]
-    jg         convertloop
-
-    pop        esi
-    ret
-  }
-}
-#endif  // HAS_SOBELXYROW_SSE2
-
-#ifdef HAS_CUMULATIVESUMTOAVERAGEROW_SSE2
-// Consider float CumulativeSum.
-// Consider calling CumulativeSum one row at time as needed.
-// Consider circular CumulativeSum buffer of radius * 2 + 1 height.
-// Convert cumulative sum for an area to an average for 1 pixel.
-// topleft is pointer to top left of CumulativeSum buffer for area.
-// botleft is pointer to bottom left of CumulativeSum buffer.
-// width is offset from left to right of area in CumulativeSum buffer measured
-//   in number of ints.
-// area is the number of pixels in the area being averaged.
-// dst points to pixel to store result to.
-// count is number of averaged pixels to produce.
-// Does 4 pixels at a time, requires CumulativeSum pointers to be 16 byte
-// aligned.
-void CumulativeSumToAverageRow_SSE2(const int32* topleft, const int32* botleft,
-                                    int width, int area, uint8* dst,
-                                    int count) {
-  __asm {
-    mov        eax, topleft  // eax topleft
-    mov        esi, botleft  // esi botleft
-    mov        edx, width
-    movd       xmm5, area
-    mov        edi, dst
-    mov        ecx, count
-    cvtdq2ps   xmm5, xmm5
-    rcpss      xmm4, xmm5  // 1.0f / area
-    pshufd     xmm4, xmm4, 0
-    sub        ecx, 4
-    jl         l4b
-
-    cmp        area, 128  // 128 pixels will not overflow 15 bits.
-    ja         l4
-
-    pshufd     xmm5, xmm5, 0        // area
-    pcmpeqb    xmm6, xmm6           // constant of 65536.0 - 1 = 65535.0
-    psrld      xmm6, 16
-    cvtdq2ps   xmm6, xmm6
-    addps      xmm5, xmm6           // (65536.0 + area - 1)
-    mulps      xmm5, xmm4           // (65536.0 + area - 1) * 1 / area
-    cvtps2dq   xmm5, xmm5           // 0.16 fixed point
-    packssdw   xmm5, xmm5           // 16 bit shorts
-
-    // 4 pixel loop small blocks.
-    align      4
-  s4:
-    // top left
-    movdqa     xmm0, [eax]
-    movdqa     xmm1, [eax + 16]
-    movdqa     xmm2, [eax + 32]
-    movdqa     xmm3, [eax + 48]
-
-    // - top right
-    psubd      xmm0, [eax + edx * 4]
-    psubd      xmm1, [eax + edx * 4 + 16]
-    psubd      xmm2, [eax + edx * 4 + 32]
-    psubd      xmm3, [eax + edx * 4 + 48]
-    lea        eax, [eax + 64]
-
-    // - bottom left
-    psubd      xmm0, [esi]
-    psubd      xmm1, [esi + 16]
-    psubd      xmm2, [esi + 32]
-    psubd      xmm3, [esi + 48]
-
-    // + bottom right
-    paddd      xmm0, [esi + edx * 4]
-    paddd      xmm1, [esi + edx * 4 + 16]
-    paddd      xmm2, [esi + edx * 4 + 32]
-    paddd      xmm3, [esi + edx * 4 + 48]
-    lea        esi, [esi + 64]
-
-    packssdw   xmm0, xmm1  // pack 4 pixels into 2 registers
-    packssdw   xmm2, xmm3
-
-    pmulhuw    xmm0, xmm5
-    pmulhuw    xmm2, xmm5
-
-    packuswb   xmm0, xmm2
-    movdqu     [edi], xmm0
-    lea        edi, [edi + 16]
-    sub        ecx, 4
-    jge        s4
-
-    jmp        l4b
-
-    // 4 pixel loop
-    align      4
-  l4:
-    // top left
-    movdqa     xmm0, [eax]
-    movdqa     xmm1, [eax + 16]
-    movdqa     xmm2, [eax + 32]
-    movdqa     xmm3, [eax + 48]
-
-    // - top right
-    psubd      xmm0, [eax + edx * 4]
-    psubd      xmm1, [eax + edx * 4 + 16]
-    psubd      xmm2, [eax + edx * 4 + 32]
-    psubd      xmm3, [eax + edx * 4 + 48]
-    lea        eax, [eax + 64]
-
-    // - bottom left
-    psubd      xmm0, [esi]
-    psubd      xmm1, [esi + 16]
-    psubd      xmm2, [esi + 32]
-    psubd      xmm3, [esi + 48]
-
-    // + bottom right
-    paddd      xmm0, [esi + edx * 4]
-    paddd      xmm1, [esi + edx * 4 + 16]
-    paddd      xmm2, [esi + edx * 4 + 32]
-    paddd      xmm3, [esi + edx * 4 + 48]
-    lea        esi, [esi + 64]
-
-    cvtdq2ps   xmm0, xmm0   // Average = Sum * 1 / Area
-    cvtdq2ps   xmm1, xmm1
-    mulps      xmm0, xmm4
-    mulps      xmm1, xmm4
-    cvtdq2ps   xmm2, xmm2
-    cvtdq2ps   xmm3, xmm3
-    mulps      xmm2, xmm4
-    mulps      xmm3, xmm4
-    cvtps2dq   xmm0, xmm0
-    cvtps2dq   xmm1, xmm1
-    cvtps2dq   xmm2, xmm2
-    cvtps2dq   xmm3, xmm3
-    packssdw   xmm0, xmm1
-    packssdw   xmm2, xmm3
-    packuswb   xmm0, xmm2
-    movdqu     [edi], xmm0
-    lea        edi, [edi + 16]
-    sub        ecx, 4
-    jge        l4
-
-  l4b:
-    add        ecx, 4 - 1
-    jl         l1b
-
-    // 1 pixel loop
-    align      4
-  l1:
-    movdqa     xmm0, [eax]
-    psubd      xmm0, [eax + edx * 4]
-    lea        eax, [eax + 16]
-    psubd      xmm0, [esi]
-    paddd      xmm0, [esi + edx * 4]
-    lea        esi, [esi + 16]
-    cvtdq2ps   xmm0, xmm0
-    mulps      xmm0, xmm4
-    cvtps2dq   xmm0, xmm0
-    packssdw   xmm0, xmm0
-    packuswb   xmm0, xmm0
-    movd       dword ptr [edi], xmm0
-    lea        edi, [edi + 4]
-    sub        ecx, 1
-    jge        l1
-  l1b:
-  }
-}
-#endif  // HAS_CUMULATIVESUMTOAVERAGEROW_SSE2
-
-#ifdef HAS_COMPUTECUMULATIVESUMROW_SSE2
-// Creates a table of cumulative sums where each value is a sum of all values
-// above and to the left of the value.
-void ComputeCumulativeSumRow_SSE2(const uint8* row, int32* cumsum,
-                                  const int32* previous_cumsum, int width) {
-  __asm {
-    mov        eax, row
-    mov        edx, cumsum
-    mov        esi, previous_cumsum
-    mov        ecx, width
-    pxor       xmm0, xmm0
-    pxor       xmm1, xmm1
-
-    sub        ecx, 4
-    jl         l4b
-    test       edx, 15
-    jne        l4b
-
-    // 4 pixel loop
-    align      4
-  l4:
-    movdqu     xmm2, [eax]  // 4 argb pixels 16 bytes.
-    lea        eax, [eax + 16]
-    movdqa     xmm4, xmm2
-
-    punpcklbw  xmm2, xmm1
-    movdqa     xmm3, xmm2
-    punpcklwd  xmm2, xmm1
-    punpckhwd  xmm3, xmm1
-
-    punpckhbw  xmm4, xmm1
-    movdqa     xmm5, xmm4
-    punpcklwd  xmm4, xmm1
-    punpckhwd  xmm5, xmm1
-
-    paddd      xmm0, xmm2
-    movdqa     xmm2, [esi]  // previous row above.
-    paddd      xmm2, xmm0
-
-    paddd      xmm0, xmm3
-    movdqa     xmm3, [esi + 16]
-    paddd      xmm3, xmm0
-
-    paddd      xmm0, xmm4
-    movdqa     xmm4, [esi + 32]
-    paddd      xmm4, xmm0
-
-    paddd      xmm0, xmm5
-    movdqa     xmm5, [esi + 48]
-    lea        esi, [esi + 64]
-    paddd      xmm5, xmm0
-
-    movdqa     [edx], xmm2
-    movdqa     [edx + 16], xmm3
-    movdqa     [edx + 32], xmm4
-    movdqa     [edx + 48], xmm5
-
-    lea        edx, [edx + 64]
-    sub        ecx, 4
-    jge        l4
-
-  l4b:
-    add        ecx, 4 - 1
-    jl         l1b
-
-    // 1 pixel loop
-    align      4
-  l1:
-    movd       xmm2, dword ptr [eax]  // 1 argb pixel 4 bytes.
-    lea        eax, [eax + 4]
-    punpcklbw  xmm2, xmm1
-    punpcklwd  xmm2, xmm1
-    paddd      xmm0, xmm2
-    movdqu     xmm2, [esi]
-    lea        esi, [esi + 16]
-    paddd      xmm2, xmm0
-    movdqu     [edx], xmm2
-    lea        edx, [edx + 16]
-    sub        ecx, 1
-    jge        l1
-
- l1b:
-  }
-}
-#endif  // HAS_COMPUTECUMULATIVESUMROW_SSE2
-
-#ifdef HAS_ARGBAFFINEROW_SSE2
-// Copy ARGB pixels from source image with slope to a row of destination.
-__declspec(naked) __declspec(align(16))
-LIBYUV_API
-void ARGBAffineRow_SSE2(const uint8* src_argb, int src_argb_stride,
-                        uint8* dst_argb, const float* uv_dudv, int width) {
-  __asm {
-    push       esi
-    push       edi
-    mov        eax, [esp + 12]  // src_argb
-    mov        esi, [esp + 16]  // stride
-    mov        edx, [esp + 20]  // dst_argb
-    mov        ecx, [esp + 24]  // pointer to uv_dudv
-    movq       xmm2, qword ptr [ecx]  // uv
-    movq       xmm7, qword ptr [ecx + 8]  // dudv
-    mov        ecx, [esp + 28]  // width
-    shl        esi, 16          // 4, stride
-    add        esi, 4
-    movd       xmm5, esi
-    sub        ecx, 4
-    jl         l4b
-
-    // setup for 4 pixel loop
-    pshufd     xmm7, xmm7, 0x44  // dup dudv
-    pshufd     xmm5, xmm5, 0  // dup 4, stride
-    movdqa     xmm0, xmm2    // x0, y0, x1, y1
-    addps      xmm0, xmm7
-    movlhps    xmm2, xmm0
-    movdqa     xmm4, xmm7
-    addps      xmm4, xmm4    // dudv *= 2
-    movdqa     xmm3, xmm2    // x2, y2, x3, y3
-    addps      xmm3, xmm4
-    addps      xmm4, xmm4    // dudv *= 4
-
-    // 4 pixel loop
-    align      4
-  l4:
-    cvttps2dq  xmm0, xmm2    // x, y float to int first 2
-    cvttps2dq  xmm1, xmm3    // x, y float to int next 2
-    packssdw   xmm0, xmm1    // x, y as 8 shorts
-    pmaddwd    xmm0, xmm5    // offsets = x * 4 + y * stride.
-    movd       esi, xmm0
-    pshufd     xmm0, xmm0, 0x39  // shift right
-    movd       edi, xmm0
-    pshufd     xmm0, xmm0, 0x39  // shift right
-    movd       xmm1, [eax + esi]  // read pixel 0
-    movd       xmm6, [eax + edi]  // read pixel 1
-    punpckldq  xmm1, xmm6     // combine pixel 0 and 1
-    addps      xmm2, xmm4    // x, y += dx, dy first 2
-    movq       qword ptr [edx], xmm1
-    movd       esi, xmm0
-    pshufd     xmm0, xmm0, 0x39  // shift right
-    movd       edi, xmm0
-    movd       xmm6, [eax + esi]  // read pixel 2
-    movd       xmm0, [eax + edi]  // read pixel 3
-    punpckldq  xmm6, xmm0     // combine pixel 2 and 3
-    addps      xmm3, xmm4    // x, y += dx, dy next 2
-    sub        ecx, 4
-    movq       qword ptr 8[edx], xmm6
-    lea        edx, [edx + 16]
-    jge        l4
-
-  l4b:
-    add        ecx, 4 - 1
-    jl         l1b
-
-    // 1 pixel loop
-    align      4
-  l1:
-    cvttps2dq  xmm0, xmm2    // x, y float to int
-    packssdw   xmm0, xmm0    // x, y as shorts
-    pmaddwd    xmm0, xmm5    // offset = x * 4 + y * stride
-    addps      xmm2, xmm7    // x, y += dx, dy
-    movd       esi, xmm0
-    movd       xmm0, [eax + esi]  // copy a pixel
-    sub        ecx, 1
-    movd       [edx], xmm0
-    lea        edx, [edx + 4]
-    jge        l1
-  l1b:
-    pop        edi
-    pop        esi
-    ret
-  }
-}
-#endif  // HAS_ARGBAFFINEROW_SSE2
-
-#ifdef HAS_INTERPOLATEROW_AVX2
-// Bilinear filter 16x2 -> 16x1
-__declspec(naked) __declspec(align(16))
-void InterpolateRow_AVX2(uint8* dst_ptr, const uint8* src_ptr,
-                          ptrdiff_t src_stride, int dst_width,
-                          int source_y_fraction) {
-  __asm {
-    push       esi
-    push       edi
-    mov        edi, [esp + 8 + 4]   // dst_ptr
-    mov        esi, [esp + 8 + 8]   // src_ptr
-    mov        edx, [esp + 8 + 12]  // src_stride
-    mov        ecx, [esp + 8 + 16]  // dst_width
-    mov        eax, [esp + 8 + 20]  // source_y_fraction (0..255)
-    shr        eax, 1
-    // Dispatch to specialized filters if applicable.
-    cmp        eax, 0
-    je         xloop100  // 0 / 128.  Blend 100 / 0.
-    sub        edi, esi
-    cmp        eax, 32
-    je         xloop75   // 32 / 128 is 0.25.  Blend 75 / 25.
-    cmp        eax, 64
-    je         xloop50   // 64 / 128 is 0.50.  Blend 50 / 50.
-    cmp        eax, 96
-    je         xloop25   // 96 / 128 is 0.75.  Blend 25 / 75.
-
-    vmovd      xmm0, eax  // high fraction 0..127
-    neg        eax
-    add        eax, 128
-    vmovd      xmm5, eax  // low fraction 128..1
-    vpunpcklbw xmm5, xmm5, xmm0
-    vpunpcklwd xmm5, xmm5, xmm5
-    vpxor      ymm0, ymm0, ymm0
-    vpermd     ymm5, ymm0, ymm5
-
-    align      4
-  xloop:
-    vmovdqu    ymm0, [esi]
-    vmovdqu    ymm2, [esi + edx]
-    vpunpckhbw ymm1, ymm0, ymm2  // mutates
-    vpunpcklbw ymm0, ymm0, ymm2  // mutates
-    vpmaddubsw ymm0, ymm0, ymm5
-    vpmaddubsw ymm1, ymm1, ymm5
-    vpsrlw     ymm0, ymm0, 7
-    vpsrlw     ymm1, ymm1, 7
-    vpackuswb  ymm0, ymm0, ymm1  // unmutates
-    sub        ecx, 32
-    vmovdqu    [esi + edi], ymm0
-    lea        esi, [esi + 32]
-    jg         xloop
-    jmp        xloop99
-
-    // Blend 25 / 75.
-    align      4
-  xloop25:
-    vmovdqu    ymm0, [esi]
-    vpavgb     ymm0, ymm0, [esi + edx]
-    vpavgb     ymm0, ymm0, [esi + edx]
-    sub        ecx, 32
-    vmovdqu    [esi + edi], ymm0
-    lea        esi, [esi + 32]
-    jg         xloop25
-    jmp        xloop99
-
-    // Blend 50 / 50.
-    align      4
-  xloop50:
-    vmovdqu    ymm0, [esi]
-    vpavgb     ymm0, ymm0, [esi + edx]
-    sub        ecx, 32
-    vmovdqu    [esi + edi], ymm0
-    lea        esi, [esi + 32]
-    jg         xloop50
-    jmp        xloop99
-
-    // Blend 75 / 25.
-    align      4
-  xloop75:
-    vmovdqu    ymm0, [esi + edx]
-    vpavgb     ymm0, ymm0, [esi]
-    vpavgb     ymm0, ymm0, [esi]
-    sub        ecx, 32
-    vmovdqu     [esi + edi], ymm0
-    lea        esi, [esi + 32]
-    jg         xloop75
-    jmp        xloop99
-
-    // Blend 100 / 0 - Copy row unchanged.
-    align      4
-  xloop100:
-    rep movsb
-
-  xloop99:
-    pop        edi
-    pop        esi
-    vzeroupper
-    ret
-  }
-}
-#endif  // HAS_INTERPOLATEROW_AVX2
-
-#ifdef HAS_INTERPOLATEROW_SSSE3
-// Bilinear filter 16x2 -> 16x1
-__declspec(naked) __declspec(align(16))
-void InterpolateRow_SSSE3(uint8* dst_ptr, const uint8* src_ptr,
-                          ptrdiff_t src_stride, int dst_width,
-                          int source_y_fraction) {
-  __asm {
-    push       esi
-    push       edi
-    mov        edi, [esp + 8 + 4]   // dst_ptr
-    mov        esi, [esp + 8 + 8]   // src_ptr
-    mov        edx, [esp + 8 + 12]  // src_stride
-    mov        ecx, [esp + 8 + 16]  // dst_width
-    mov        eax, [esp + 8 + 20]  // source_y_fraction (0..255)
-    sub        edi, esi
-    shr        eax, 1
-    // Dispatch to specialized filters if applicable.
-    cmp        eax, 0
-    je         xloop100  // 0 / 128.  Blend 100 / 0.
-    cmp        eax, 32
-    je         xloop75   // 32 / 128 is 0.25.  Blend 75 / 25.
-    cmp        eax, 64
-    je         xloop50   // 64 / 128 is 0.50.  Blend 50 / 50.
-    cmp        eax, 96
-    je         xloop25   // 96 / 128 is 0.75.  Blend 25 / 75.
-
-    movd       xmm0, eax  // high fraction 0..127
-    neg        eax
-    add        eax, 128
-    movd       xmm5, eax  // low fraction 128..1
-    punpcklbw  xmm5, xmm0
-    punpcklwd  xmm5, xmm5
-    pshufd     xmm5, xmm5, 0
-
-    align      4
-  xloop:
-    movdqa     xmm0, [esi]
-    movdqa     xmm2, [esi + edx]
-    movdqa     xmm1, xmm0
-    punpcklbw  xmm0, xmm2
-    punpckhbw  xmm1, xmm2
-    pmaddubsw  xmm0, xmm5
-    pmaddubsw  xmm1, xmm5
-    psrlw      xmm0, 7
-    psrlw      xmm1, 7
-    packuswb   xmm0, xmm1
-    sub        ecx, 16
-    movdqa     [esi + edi], xmm0
-    lea        esi, [esi + 16]
-    jg         xloop
-    jmp        xloop99
-
-    // Blend 25 / 75.
-    align      4
-  xloop25:
-    movdqa     xmm0, [esi]
-    movdqa     xmm1, [esi + edx]
-    pavgb      xmm0, xmm1
-    pavgb      xmm0, xmm1
-    sub        ecx, 16
-    movdqa     [esi + edi], xmm0
-    lea        esi, [esi + 16]
-    jg         xloop25
-    jmp        xloop99
-
-    // Blend 50 / 50.
-    align      4
-  xloop50:
-    movdqa     xmm0, [esi]
-    movdqa     xmm1, [esi + edx]
-    pavgb      xmm0, xmm1
-    sub        ecx, 16
-    movdqa     [esi + edi], xmm0
-    lea        esi, [esi + 16]
-    jg         xloop50
-    jmp        xloop99
-
-    // Blend 75 / 25.
-    align      4
-  xloop75:
-    movdqa     xmm1, [esi]
-    movdqa     xmm0, [esi + edx]
-    pavgb      xmm0, xmm1
-    pavgb      xmm0, xmm1
-    sub        ecx, 16
-    movdqa     [esi + edi], xmm0
-    lea        esi, [esi + 16]
-    jg         xloop75
-    jmp        xloop99
-
-    // Blend 100 / 0 - Copy row unchanged.
-    align      4
-  xloop100:
-    movdqa     xmm0, [esi]
-    sub        ecx, 16
-    movdqa     [esi + edi], xmm0
-    lea        esi, [esi + 16]
-    jg         xloop100
-
-  xloop99:
-    pop        edi
-    pop        esi
-    ret
-  }
-}
-#endif  // HAS_INTERPOLATEROW_SSSE3
-
-#ifdef HAS_INTERPOLATEROW_SSE2
-// Bilinear filter 16x2 -> 16x1
-__declspec(naked) __declspec(align(16))
-void InterpolateRow_SSE2(uint8* dst_ptr, const uint8* src_ptr,
-                         ptrdiff_t src_stride, int dst_width,
-                         int source_y_fraction) {
-  __asm {
-    push       esi
-    push       edi
-    mov        edi, [esp + 8 + 4]   // dst_ptr
-    mov        esi, [esp + 8 + 8]   // src_ptr
-    mov        edx, [esp + 8 + 12]  // src_stride
-    mov        ecx, [esp + 8 + 16]  // dst_width
-    mov        eax, [esp + 8 + 20]  // source_y_fraction (0..255)
-    sub        edi, esi
-    // Dispatch to specialized filters if applicable.
-    cmp        eax, 0
-    je         xloop100  // 0 / 256.  Blend 100 / 0.
-    cmp        eax, 64
-    je         xloop75   // 64 / 256 is 0.25.  Blend 75 / 25.
-    cmp        eax, 128
-    je         xloop50   // 128 / 256 is 0.50.  Blend 50 / 50.
-    cmp        eax, 192
-    je         xloop25   // 192 / 256 is 0.75.  Blend 25 / 75.
-
-    movd       xmm5, eax            // xmm5 = y fraction
-    punpcklbw  xmm5, xmm5
-    psrlw      xmm5, 1
-    punpcklwd  xmm5, xmm5
-    punpckldq  xmm5, xmm5
-    punpcklqdq xmm5, xmm5
-    pxor       xmm4, xmm4
-
-    align      4
-  xloop:
-    movdqa     xmm0, [esi]  // row0
-    movdqa     xmm2, [esi + edx]  // row1
-    movdqa     xmm1, xmm0
-    movdqa     xmm3, xmm2
-    punpcklbw  xmm2, xmm4
-    punpckhbw  xmm3, xmm4
-    punpcklbw  xmm0, xmm4
-    punpckhbw  xmm1, xmm4
-    psubw      xmm2, xmm0  // row1 - row0
-    psubw      xmm3, xmm1
-    paddw      xmm2, xmm2  // 9 bits * 15 bits = 8.16
-    paddw      xmm3, xmm3
-    pmulhw     xmm2, xmm5  // scale diff
-    pmulhw     xmm3, xmm5
-    paddw      xmm0, xmm2  // sum rows
-    paddw      xmm1, xmm3
-    packuswb   xmm0, xmm1
-    sub        ecx, 16
-    movdqa     [esi + edi], xmm0
-    lea        esi, [esi + 16]
-    jg         xloop
-    jmp        xloop99
-
-    // Blend 25 / 75.
-    align      4
-  xloop25:
-    movdqa     xmm0, [esi]
-    movdqa     xmm1, [esi + edx]
-    pavgb      xmm0, xmm1
-    pavgb      xmm0, xmm1
-    sub        ecx, 16
-    movdqa     [esi + edi], xmm0
-    lea        esi, [esi + 16]
-    jg         xloop25
-    jmp        xloop99
-
-    // Blend 50 / 50.
-    align      4
-  xloop50:
-    movdqa     xmm0, [esi]
-    movdqa     xmm1, [esi + edx]
-    pavgb      xmm0, xmm1
-    sub        ecx, 16
-    movdqa     [esi + edi], xmm0
-    lea        esi, [esi + 16]
-    jg         xloop50
-    jmp        xloop99
-
-    // Blend 75 / 25.
-    align      4
-  xloop75:
-    movdqa     xmm1, [esi]
-    movdqa     xmm0, [esi + edx]
-    pavgb      xmm0, xmm1
-    pavgb      xmm0, xmm1
-    sub        ecx, 16
-    movdqa     [esi + edi], xmm0
-    lea        esi, [esi + 16]
-    jg         xloop75
-    jmp        xloop99
-
-    // Blend 100 / 0 - Copy row unchanged.
-    align      4
-  xloop100:
-    movdqa     xmm0, [esi]
-    sub        ecx, 16
-    movdqa     [esi + edi], xmm0
-    lea        esi, [esi + 16]
-    jg         xloop100
-
-  xloop99:
-    pop        edi
-    pop        esi
-    ret
-  }
-}
-#endif  // HAS_INTERPOLATEROW_SSE2
-
-// Bilinear filter 16x2 -> 16x1
-__declspec(naked) __declspec(align(16))
-void InterpolateRow_Unaligned_SSSE3(uint8* dst_ptr, const uint8* src_ptr,
-                                    ptrdiff_t src_stride, int dst_width,
-                                    int source_y_fraction) {
-  __asm {
-    push       esi
-    push       edi
-    mov        edi, [esp + 8 + 4]   // dst_ptr
-    mov        esi, [esp + 8 + 8]   // src_ptr
-    mov        edx, [esp + 8 + 12]  // src_stride
-    mov        ecx, [esp + 8 + 16]  // dst_width
-    mov        eax, [esp + 8 + 20]  // source_y_fraction (0..255)
-    sub        edi, esi
-    shr        eax, 1
-    // Dispatch to specialized filters if applicable.
-    cmp        eax, 0
-    je         xloop100  // 0 / 128.  Blend 100 / 0.
-    cmp        eax, 32
-    je         xloop75   // 32 / 128 is 0.25.  Blend 75 / 25.
-    cmp        eax, 64
-    je         xloop50   // 64 / 128 is 0.50.  Blend 50 / 50.
-    cmp        eax, 96
-    je         xloop25   // 96 / 128 is 0.75.  Blend 25 / 75.
-
-    movd       xmm0, eax  // high fraction 0..127
-    neg        eax
-    add        eax, 128
-    movd       xmm5, eax  // low fraction 128..1
-    punpcklbw  xmm5, xmm0
-    punpcklwd  xmm5, xmm5
-    pshufd     xmm5, xmm5, 0
-
-    align      4
-  xloop:
-    movdqu     xmm0, [esi]
-    movdqu     xmm2, [esi + edx]
-    movdqu     xmm1, xmm0
-    punpcklbw  xmm0, xmm2
-    punpckhbw  xmm1, xmm2
-    pmaddubsw  xmm0, xmm5
-    pmaddubsw  xmm1, xmm5
-    psrlw      xmm0, 7
-    psrlw      xmm1, 7
-    packuswb   xmm0, xmm1
-    sub        ecx, 16
-    movdqu     [esi + edi], xmm0
-    lea        esi, [esi + 16]
-    jg         xloop
-    jmp        xloop99
-
-    // Blend 25 / 75.
-    align      4
-  xloop25:
-    movdqu     xmm0, [esi]
-    movdqu     xmm1, [esi + edx]
-    pavgb      xmm0, xmm1
-    pavgb      xmm0, xmm1
-    sub        ecx, 16
-    movdqu     [esi + edi], xmm0
-    lea        esi, [esi + 16]
-    jg         xloop25
-    jmp        xloop99
-
-    // Blend 50 / 50.
-    align      4
-  xloop50:
-    movdqu     xmm0, [esi]
-    movdqu     xmm1, [esi + edx]
-    pavgb      xmm0, xmm1
-    sub        ecx, 16
-    movdqu     [esi + edi], xmm0
-    lea        esi, [esi + 16]
-    jg         xloop50
-    jmp        xloop99
-
-    // Blend 75 / 25.
-    align      4
-  xloop75:
-    movdqu     xmm1, [esi]
-    movdqu     xmm0, [esi + edx]
-    pavgb      xmm0, xmm1
-    pavgb      xmm0, xmm1
-    sub        ecx, 16
-    movdqu     [esi + edi], xmm0
-    lea        esi, [esi + 16]
-    jg         xloop75
-    jmp        xloop99
-
-    // Blend 100 / 0 - Copy row unchanged.
-    align      4
-  xloop100:
-    movdqu     xmm0, [esi]
-    sub        ecx, 16
-    movdqu     [esi + edi], xmm0
-    lea        esi, [esi + 16]
-    jg         xloop100
-
-  xloop99:
-    pop        edi
-    pop        esi
-    ret
-  }
-}
-
-#ifdef HAS_INTERPOLATEROW_SSE2
-// Bilinear filter 16x2 -> 16x1
-__declspec(naked) __declspec(align(16))
-void InterpolateRow_Unaligned_SSE2(uint8* dst_ptr, const uint8* src_ptr,
-                                   ptrdiff_t src_stride, int dst_width,
-                                   int source_y_fraction) {
-  __asm {
-    push       esi
-    push       edi
-    mov        edi, [esp + 8 + 4]   // dst_ptr
-    mov        esi, [esp + 8 + 8]   // src_ptr
-    mov        edx, [esp + 8 + 12]  // src_stride
-    mov        ecx, [esp + 8 + 16]  // dst_width
-    mov        eax, [esp + 8 + 20]  // source_y_fraction (0..255)
-    sub        edi, esi
-    // Dispatch to specialized filters if applicable.
-    cmp        eax, 0
-    je         xloop100  // 0 / 256.  Blend 100 / 0.
-    cmp        eax, 64
-    je         xloop75   // 64 / 256 is 0.25.  Blend 75 / 25.
-    cmp        eax, 128
-    je         xloop50   // 128 / 256 is 0.50.  Blend 50 / 50.
-    cmp        eax, 192
-    je         xloop25   // 192 / 256 is 0.75.  Blend 25 / 75.
-
-    movd       xmm5, eax            // xmm5 = y fraction
-    punpcklbw  xmm5, xmm5
-    psrlw      xmm5, 1
-    punpcklwd  xmm5, xmm5
-    punpckldq  xmm5, xmm5
-    punpcklqdq xmm5, xmm5
-    pxor       xmm4, xmm4
-
-    align      4
-  xloop:
-    movdqu     xmm0, [esi]  // row0
-    movdqu     xmm2, [esi + edx]  // row1
-    movdqu     xmm1, xmm0
-    movdqu     xmm3, xmm2
-    punpcklbw  xmm2, xmm4
-    punpckhbw  xmm3, xmm4
-    punpcklbw  xmm0, xmm4
-    punpckhbw  xmm1, xmm4
-    psubw      xmm2, xmm0  // row1 - row0
-    psubw      xmm3, xmm1
-    paddw      xmm2, xmm2  // 9 bits * 15 bits = 8.16
-    paddw      xmm3, xmm3
-    pmulhw     xmm2, xmm5  // scale diff
-    pmulhw     xmm3, xmm5
-    paddw      xmm0, xmm2  // sum rows
-    paddw      xmm1, xmm3
-    packuswb   xmm0, xmm1
-    sub        ecx, 16
-    movdqu     [esi + edi], xmm0
-    lea        esi, [esi + 16]
-    jg         xloop
-    jmp        xloop99
-
-    // Blend 25 / 75.
-    align      4
-  xloop25:
-    movdqu     xmm0, [esi]
-    movdqu     xmm1, [esi + edx]
-    pavgb      xmm0, xmm1
-    pavgb      xmm0, xmm1
-    sub        ecx, 16
-    movdqu     [esi + edi], xmm0
-    lea        esi, [esi + 16]
-    jg         xloop25
-    jmp        xloop99
-
-    // Blend 50 / 50.
-    align      4
-  xloop50:
-    movdqu     xmm0, [esi]
-    movdqu     xmm1, [esi + edx]
-    pavgb      xmm0, xmm1
-    sub        ecx, 16
-    movdqu     [esi + edi], xmm0
-    lea        esi, [esi + 16]
-    jg         xloop50
-    jmp        xloop99
-
-    // Blend 75 / 25.
-    align      4
-  xloop75:
-    movdqu     xmm1, [esi]
-    movdqu     xmm0, [esi + edx]
-    pavgb      xmm0, xmm1
-    pavgb      xmm0, xmm1
-    sub        ecx, 16
-    movdqu     [esi + edi], xmm0
-    lea        esi, [esi + 16]
-    jg         xloop75
-    jmp        xloop99
-
-    // Blend 100 / 0 - Copy row unchanged.
-    align      4
-  xloop100:
-    movdqu     xmm0, [esi]
-    sub        ecx, 16
-    movdqu     [esi + edi], xmm0
-    lea        esi, [esi + 16]
-    jg         xloop100
-
-  xloop99:
-    pop        edi
-    pop        esi
-    ret
-  }
-}
-#endif  // HAS_INTERPOLATEROW_SSE2
-
-__declspec(naked) __declspec(align(16))
-void HalfRow_SSE2(const uint8* src_uv, int src_uv_stride,
-                  uint8* dst_uv, int pix) {
-  __asm {
-    push       edi
-    mov        eax, [esp + 4 + 4]    // src_uv
-    mov        edx, [esp + 4 + 8]    // src_uv_stride
-    mov        edi, [esp + 4 + 12]   // dst_v
-    mov        ecx, [esp + 4 + 16]   // pix
-    sub        edi, eax
-
-    align      4
-  convertloop:
-    movdqa     xmm0, [eax]
-    pavgb      xmm0, [eax + edx]
-    sub        ecx, 16
-    movdqa     [eax + edi], xmm0
-    lea        eax,  [eax + 16]
-    jg         convertloop
-    pop        edi
-    ret
-  }
-}
-
-#ifdef HAS_HALFROW_AVX2
-__declspec(naked) __declspec(align(16))
-void HalfRow_AVX2(const uint8* src_uv, int src_uv_stride,
-                  uint8* dst_uv, int pix) {
-  __asm {
-    push       edi
-    mov        eax, [esp + 4 + 4]    // src_uv
-    mov        edx, [esp + 4 + 8]    // src_uv_stride
-    mov        edi, [esp + 4 + 12]   // dst_v
-    mov        ecx, [esp + 4 + 16]   // pix
-    sub        edi, eax
-
-    align      4
-  convertloop:
-    vmovdqu    ymm0, [eax]
-    vpavgb     ymm0, ymm0, [eax + edx]
-    sub        ecx, 32
-    vmovdqu    [eax + edi], ymm0
-    lea        eax,  [eax + 32]
-    jg         convertloop
-
-    pop        edi
-    vzeroupper
-    ret
-  }
-}
-#endif  // HAS_HALFROW_AVX2
-
-__declspec(naked) __declspec(align(16))
-void ARGBToBayerRow_SSSE3(const uint8* src_argb, uint8* dst_bayer,
-                          uint32 selector, int pix) {
-  __asm {
-    mov        eax, [esp + 4]    // src_argb
-    mov        edx, [esp + 8]    // dst_bayer
-    movd       xmm5, [esp + 12]  // selector
-    mov        ecx, [esp + 16]   // pix
-    pshufd     xmm5, xmm5, 0
-
-    align      4
-  wloop:
-    movdqa     xmm0, [eax]
-    movdqa     xmm1, [eax + 16]
-    lea        eax, [eax + 32]
-    pshufb     xmm0, xmm5
-    pshufb     xmm1, xmm5
-    punpckldq  xmm0, xmm1
-    sub        ecx, 8
-    movq       qword ptr [edx], xmm0
-    lea        edx, [edx + 8]
-    jg         wloop
-    ret
-  }
-}
-
-// Specialized ARGB to Bayer that just isolates G channel.
-__declspec(naked) __declspec(align(16))
-void ARGBToBayerGGRow_SSE2(const uint8* src_argb, uint8* dst_bayer,
-                           uint32 selector, int pix) {
-  __asm {
-    mov        eax, [esp + 4]    // src_argb
-    mov        edx, [esp + 8]    // dst_bayer
-                                 // selector
-    mov        ecx, [esp + 16]   // pix
-    pcmpeqb    xmm5, xmm5        // generate mask 0x000000ff
-    psrld      xmm5, 24
-
-    align      4
-  wloop:
-    movdqa     xmm0, [eax]
-    movdqa     xmm1, [eax + 16]
-    lea        eax, [eax + 32]
-    psrld      xmm0, 8  // Move green to bottom.
-    psrld      xmm1, 8
-    pand       xmm0, xmm5
-    pand       xmm1, xmm5
-    packssdw   xmm0, xmm1
-    packuswb   xmm0, xmm1
-    sub        ecx, 8
-    movq       qword ptr [edx], xmm0
-    lea        edx, [edx + 8]
-    jg         wloop
-    ret
-  }
-}
-
-// For BGRAToARGB, ABGRToARGB, RGBAToARGB, and ARGBToRGBA.
-__declspec(naked) __declspec(align(16))
-void ARGBShuffleRow_SSSE3(const uint8* src_argb, uint8* dst_argb,
-                          const uint8* shuffler, int pix) {
-  __asm {
-    mov        eax, [esp + 4]    // src_argb
-    mov        edx, [esp + 8]    // dst_argb
-    mov        ecx, [esp + 12]   // shuffler
-    movdqa     xmm5, [ecx]
-    mov        ecx, [esp + 16]   // pix
-
-    align      4
-  wloop:
-    movdqa     xmm0, [eax]
-    movdqa     xmm1, [eax + 16]
-    lea        eax, [eax + 32]
-    pshufb     xmm0, xmm5
-    pshufb     xmm1, xmm5
-    sub        ecx, 8
-    movdqa     [edx], xmm0
-    movdqa     [edx + 16], xmm1
-    lea        edx, [edx + 32]
-    jg         wloop
-    ret
-  }
-}
-
-__declspec(naked) __declspec(align(16))
-void ARGBShuffleRow_Unaligned_SSSE3(const uint8* src_argb, uint8* dst_argb,
-                                    const uint8* shuffler, int pix) {
-  __asm {
-    mov        eax, [esp + 4]    // src_argb
-    mov        edx, [esp + 8]    // dst_argb
-    mov        ecx, [esp + 12]   // shuffler
-    movdqa     xmm5, [ecx]
-    mov        ecx, [esp + 16]   // pix
-
-    align      4
-  wloop:
-    movdqu     xmm0, [eax]
-    movdqu     xmm1, [eax + 16]
-    lea        eax, [eax + 32]
-    pshufb     xmm0, xmm5
-    pshufb     xmm1, xmm5
-    sub        ecx, 8
-    movdqu     [edx], xmm0
-    movdqu     [edx + 16], xmm1
-    lea        edx, [edx + 32]
-    jg         wloop
-    ret
-  }
-}
-
-#ifdef HAS_ARGBSHUFFLEROW_AVX2
-__declspec(naked) __declspec(align(16))
-void ARGBShuffleRow_AVX2(const uint8* src_argb, uint8* dst_argb,
-                         const uint8* shuffler, int pix) {
-  __asm {
-    mov        eax, [esp + 4]     // src_argb
-    mov        edx, [esp + 8]     // dst_argb
-    mov        ecx, [esp + 12]    // shuffler
-    vbroadcastf128 ymm5, [ecx]    // same shuffle in high as low.
-    mov        ecx, [esp + 16]    // pix
-
-    align      4
-  wloop:
-    vmovdqu    ymm0, [eax]
-    vmovdqu    ymm1, [eax + 32]
-    lea        eax, [eax + 64]
-    vpshufb    ymm0, ymm0, ymm5
-    vpshufb    ymm1, ymm1, ymm5
-    sub        ecx, 16
-    vmovdqu    [edx], ymm0
-    vmovdqu    [edx + 32], ymm1
-    lea        edx, [edx + 64]
-    jg         wloop
-
-    vzeroupper
-    ret
-  }
-}
-#endif  // HAS_ARGBSHUFFLEROW_AVX2
-
-__declspec(naked) __declspec(align(16))
-void ARGBShuffleRow_SSE2(const uint8* src_argb, uint8* dst_argb,
-                         const uint8* shuffler, int pix) {
-  __asm {
-    push       ebx
-    push       esi
-    mov        eax, [esp + 8 + 4]    // src_argb
-    mov        edx, [esp + 8 + 8]    // dst_argb
-    mov        esi, [esp + 8 + 12]   // shuffler
-    mov        ecx, [esp + 8 + 16]   // pix
-    pxor       xmm5, xmm5
-
-    mov        ebx, [esi]   // shuffler
-    cmp        ebx, 0x03000102
-    je         shuf_3012
-    cmp        ebx, 0x00010203
-    je         shuf_0123
-    cmp        ebx, 0x00030201
-    je         shuf_0321
-    cmp        ebx, 0x02010003
-    je         shuf_2103
-
-  // TODO(fbarchard): Use one source pointer and 3 offsets.
-  shuf_any1:
-    movzx      ebx, byte ptr [esi]
-    movzx      ebx, byte ptr [eax + ebx]
-    mov        [edx], bl
-    movzx      ebx, byte ptr [esi + 1]
-    movzx      ebx, byte ptr [eax + ebx]
-    mov        [edx + 1], bl
-    movzx      ebx, byte ptr [esi + 2]
-    movzx      ebx, byte ptr [eax + ebx]
-    mov        [edx + 2], bl
-    movzx      ebx, byte ptr [esi + 3]
-    movzx      ebx, byte ptr [eax + ebx]
-    mov        [edx + 3], bl
-    lea        eax, [eax + 4]
-    lea        edx, [edx + 4]
-    sub        ecx, 1
-    jg         shuf_any1
-    jmp        shuf99
-
-    align      4
-  shuf_0123:
-    movdqu     xmm0, [eax]
-    lea        eax, [eax + 16]
-    movdqa     xmm1, xmm0
-    punpcklbw  xmm0, xmm5
-    punpckhbw  xmm1, xmm5
-    pshufhw    xmm0, xmm0, 01Bh   // 1B = 00011011 = 0x0123 = BGRAToARGB
-    pshuflw    xmm0, xmm0, 01Bh
-    pshufhw    xmm1, xmm1, 01Bh
-    pshuflw    xmm1, xmm1, 01Bh
-    packuswb   xmm0, xmm1
-    sub        ecx, 4
-    movdqu     [edx], xmm0
-    lea        edx, [edx + 16]
-    jg         shuf_0123
-    jmp        shuf99
-
-    align      4
-  shuf_0321:
-    movdqu     xmm0, [eax]
-    lea        eax, [eax + 16]
-    movdqa     xmm1, xmm0
-    punpcklbw  xmm0, xmm5
-    punpckhbw  xmm1, xmm5
-    pshufhw    xmm0, xmm0, 039h   // 39 = 00111001 = 0x0321 = RGBAToARGB
-    pshuflw    xmm0, xmm0, 039h
-    pshufhw    xmm1, xmm1, 039h
-    pshuflw    xmm1, xmm1, 039h
-    packuswb   xmm0, xmm1
-    sub        ecx, 4
-    movdqu     [edx], xmm0
-    lea        edx, [edx + 16]
-    jg         shuf_0321
-    jmp        shuf99
-
-    align      4
-  shuf_2103:
-    movdqu     xmm0, [eax]
-    lea        eax, [eax + 16]
-    movdqa     xmm1, xmm0
-    punpcklbw  xmm0, xmm5
-    punpckhbw  xmm1, xmm5
-    pshufhw    xmm0, xmm0, 093h   // 93 = 10010011 = 0x2103 = ARGBToRGBA
-    pshuflw    xmm0, xmm0, 093h
-    pshufhw    xmm1, xmm1, 093h
-    pshuflw    xmm1, xmm1, 093h
-    packuswb   xmm0, xmm1
-    sub        ecx, 4
-    movdqu     [edx], xmm0
-    lea        edx, [edx + 16]
-    jg         shuf_2103
-    jmp        shuf99
-
-    align      4
-  shuf_3012:
-    movdqu     xmm0, [eax]
-    lea        eax, [eax + 16]
-    movdqa     xmm1, xmm0
-    punpcklbw  xmm0, xmm5
-    punpckhbw  xmm1, xmm5
-    pshufhw    xmm0, xmm0, 0C6h   // C6 = 11000110 = 0x3012 = ABGRToARGB
-    pshuflw    xmm0, xmm0, 0C6h
-    pshufhw    xmm1, xmm1, 0C6h
-    pshuflw    xmm1, xmm1, 0C6h
-    packuswb   xmm0, xmm1
-    sub        ecx, 4
-    movdqu     [edx], xmm0
-    lea        edx, [edx + 16]
-    jg         shuf_3012
-
-  shuf99:
-    pop        esi
-    pop        ebx
-    ret
-  }
-}
-
-// YUY2 - Macro-pixel = 2 image pixels
-// Y0U0Y1V0....Y2U2Y3V2...Y4U4Y5V4....
-
-// UYVY - Macro-pixel = 2 image pixels
-// U0Y0V0Y1
-
-__declspec(naked) __declspec(align(16))
-void I422ToYUY2Row_SSE2(const uint8* src_y,
-                        const uint8* src_u,
-                        const uint8* src_v,
-                        uint8* dst_frame, int width) {
-  __asm {
-    push       esi
-    push       edi
-    mov        eax, [esp + 8 + 4]    // src_y
-    mov        esi, [esp + 8 + 8]    // src_u
-    mov        edx, [esp + 8 + 12]   // src_v
-    mov        edi, [esp + 8 + 16]   // dst_frame
-    mov        ecx, [esp + 8 + 20]   // width
-    sub        edx, esi
-
-    align      4
-  convertloop:
-    movq       xmm2, qword ptr [esi] // U
-    movq       xmm3, qword ptr [esi + edx] // V
-    lea        esi, [esi + 8]
-    punpcklbw  xmm2, xmm3 // UV
-    movdqu     xmm0, [eax] // Y
-    lea        eax, [eax + 16]
-    movdqa     xmm1, xmm0
-    punpcklbw  xmm0, xmm2 // YUYV
-    punpckhbw  xmm1, xmm2
-    movdqu     [edi], xmm0
-    movdqu     [edi + 16], xmm1
-    lea        edi, [edi + 32]
-    sub        ecx, 16
-    jg         convertloop
-
-    pop        edi
-    pop        esi
-    ret
-  }
-}
-
-__declspec(naked) __declspec(align(16))
-void I422ToUYVYRow_SSE2(const uint8* src_y,
-                        const uint8* src_u,
-                        const uint8* src_v,
-                        uint8* dst_frame, int width) {
-  __asm {
-    push       esi
-    push       edi
-    mov        eax, [esp + 8 + 4]    // src_y
-    mov        esi, [esp + 8 + 8]    // src_u
-    mov        edx, [esp + 8 + 12]   // src_v
-    mov        edi, [esp + 8 + 16]   // dst_frame
-    mov        ecx, [esp + 8 + 20]   // width
-    sub        edx, esi
-
-    align      4
-  convertloop:
-    movq       xmm2, qword ptr [esi] // U
-    movq       xmm3, qword ptr [esi + edx] // V
-    lea        esi, [esi + 8]
-    punpcklbw  xmm2, xmm3 // UV
-    movdqu     xmm0, [eax] // Y
-    movdqa     xmm1, xmm2
-    lea        eax, [eax + 16]
-    punpcklbw  xmm1, xmm0 // UYVY
-    punpckhbw  xmm2, xmm0
-    movdqu     [edi], xmm1
-    movdqu     [edi + 16], xmm2
-    lea        edi, [edi + 32]
-    sub        ecx, 16
-    jg         convertloop
-
-    pop        edi
-    pop        esi
-    ret
-  }
-}
-
-#ifdef HAS_ARGBPOLYNOMIALROW_SSE2
-__declspec(naked) __declspec(align(16))
-void ARGBPolynomialRow_SSE2(const uint8* src_argb,
-                            uint8* dst_argb, const float* poly,
-                            int width) {
-  __asm {
-    push       esi
-    mov        eax, [esp + 4 + 4]   /* src_argb */
-    mov        edx, [esp + 4 + 8]   /* dst_argb */
-    mov        esi, [esp + 4 + 12]  /* poly */
-    mov        ecx, [esp + 4 + 16]  /* width */
-    pxor       xmm3, xmm3  // 0 constant for zero extending bytes to ints.
-
-    // 2 pixel loop.
-    align      4
- convertloop:
-//    pmovzxbd  xmm0, dword ptr [eax]  // BGRA pixel
-//    pmovzxbd  xmm4, dword ptr [eax + 4]  // BGRA pixel
-    movq       xmm0, qword ptr [eax]  // BGRABGRA
-    lea        eax, [eax + 8]
-    punpcklbw  xmm0, xmm3
-    movdqa     xmm4, xmm0
-    punpcklwd  xmm0, xmm3  // pixel 0
-    punpckhwd  xmm4, xmm3  // pixel 1
-    cvtdq2ps   xmm0, xmm0  // 4 floats
-    cvtdq2ps   xmm4, xmm4
-    movdqa     xmm1, xmm0  // X
-    movdqa     xmm5, xmm4
-    mulps      xmm0, [esi + 16]  // C1 * X
-    mulps      xmm4, [esi + 16]
-    addps      xmm0, [esi]  // result = C0 + C1 * X
-    addps      xmm4, [esi]
-    movdqa     xmm2, xmm1
-    movdqa     xmm6, xmm5
-    mulps      xmm2, xmm1  // X * X
-    mulps      xmm6, xmm5
-    mulps      xmm1, xmm2  // X * X * X
-    mulps      xmm5, xmm6
-    mulps      xmm2, [esi + 32]  // C2 * X * X
-    mulps      xmm6, [esi + 32]
-    mulps      xmm1, [esi + 48]  // C3 * X * X * X
-    mulps      xmm5, [esi + 48]
-    addps      xmm0, xmm2  // result += C2 * X * X
-    addps      xmm4, xmm6
-    addps      xmm0, xmm1  // result += C3 * X * X * X
-    addps      xmm4, xmm5
-    cvttps2dq  xmm0, xmm0
-    cvttps2dq  xmm4, xmm4
-    packuswb   xmm0, xmm4
-    packuswb   xmm0, xmm0
-    sub        ecx, 2
-    movq       qword ptr [edx], xmm0
-    lea        edx, [edx + 8]
-    jg         convertloop
-    pop        esi
-    ret
-  }
-}
-#endif  // HAS_ARGBPOLYNOMIALROW_SSE2
-
-#ifdef HAS_ARGBPOLYNOMIALROW_AVX2
-__declspec(naked) __declspec(align(16))
-void ARGBPolynomialRow_AVX2(const uint8* src_argb,
-                            uint8* dst_argb, const float* poly,
-                            int width) {
-  __asm {
-    mov        eax, [esp + 4]   /* src_argb */
-    mov        edx, [esp + 8]   /* dst_argb */
-    mov        ecx, [esp + 12]   /* poly */
-    vbroadcastf128 ymm4, [ecx]       // C0
-    vbroadcastf128 ymm5, [ecx + 16]  // C1
-    vbroadcastf128 ymm6, [ecx + 32]  // C2
-    vbroadcastf128 ymm7, [ecx + 48]  // C3
-    mov        ecx, [esp + 16]  /* width */
-
-    // 2 pixel loop.
-    align      4
- convertloop:
-    vpmovzxbd   ymm0, qword ptr [eax]  // 2 BGRA pixels
-    lea         eax, [eax + 8]
-    vcvtdq2ps   ymm0, ymm0        // X 8 floats
-    vmulps      ymm2, ymm0, ymm0  // X * X
-    vmulps      ymm3, ymm0, ymm7  // C3 * X
-    vfmadd132ps ymm0, ymm4, ymm5  // result = C0 + C1 * X
-    vfmadd231ps ymm0, ymm2, ymm6  // result += C2 * X * X
-    vfmadd231ps ymm0, ymm2, ymm3  // result += C3 * X * X * X
-    vcvttps2dq  ymm0, ymm0
-    vpackusdw   ymm0, ymm0, ymm0  // b0g0r0a0_00000000_b0g0r0a0_00000000
-    vpermq      ymm0, ymm0, 0xd8  // b0g0r0a0_b0g0r0a0_00000000_00000000
-    vpackuswb   xmm0, xmm0, xmm0  // bgrabgra_00000000_00000000_00000000
-    sub         ecx, 2
-    vmovq       qword ptr [edx], xmm0
-    lea         edx, [edx + 8]
-    jg          convertloop
-    vzeroupper
-    ret
-  }
-}
-#endif  // HAS_ARGBPOLYNOMIALROW_AVX2
-
-#ifdef HAS_ARGBCOLORTABLEROW_X86
-// Tranform ARGB pixels with color table.
-__declspec(naked) __declspec(align(16))
-void ARGBColorTableRow_X86(uint8* dst_argb, const uint8* table_argb,
-                           int width) {
-  __asm {
-    push       esi
-    mov        eax, [esp + 4 + 4]   /* dst_argb */
-    mov        esi, [esp + 4 + 8]   /* table_argb */
-    mov        ecx, [esp + 4 + 12]  /* width */
-
-    // 1 pixel loop.
-    align      4
-  convertloop:
-    movzx      edx, byte ptr [eax]
-    lea        eax, [eax + 4]
-    movzx      edx, byte ptr [esi + edx * 4]
-    mov        byte ptr [eax - 4], dl
-    movzx      edx, byte ptr [eax - 4 + 1]
-    movzx      edx, byte ptr [esi + edx * 4 + 1]
-    mov        byte ptr [eax - 4 + 1], dl
-    movzx      edx, byte ptr [eax - 4 + 2]
-    movzx      edx, byte ptr [esi + edx * 4 + 2]
-    mov        byte ptr [eax - 4 + 2], dl
-    movzx      edx, byte ptr [eax - 4 + 3]
-    movzx      edx, byte ptr [esi + edx * 4 + 3]
-    mov        byte ptr [eax - 4 + 3], dl
-    dec        ecx
-    jg         convertloop
-    pop        esi
-    ret
-  }
-}
-#endif  // HAS_ARGBCOLORTABLEROW_X86
-
-#ifdef HAS_RGBCOLORTABLEROW_X86
-// Tranform RGB pixels with color table.
-__declspec(naked) __declspec(align(16))
-void RGBColorTableRow_X86(uint8* dst_argb, const uint8* table_argb, int width) {
-  __asm {
-    push       esi
-    mov        eax, [esp + 4 + 4]   /* dst_argb */
-    mov        esi, [esp + 4 + 8]   /* table_argb */
-    mov        ecx, [esp + 4 + 12]  /* width */
-
-    // 1 pixel loop.
-    align      4
-  convertloop:
-    movzx      edx, byte ptr [eax]
-    lea        eax, [eax + 4]
-    movzx      edx, byte ptr [esi + edx * 4]
-    mov        byte ptr [eax - 4], dl
-    movzx      edx, byte ptr [eax - 4 + 1]
-    movzx      edx, byte ptr [esi + edx * 4 + 1]
-    mov        byte ptr [eax - 4 + 1], dl
-    movzx      edx, byte ptr [eax - 4 + 2]
-    movzx      edx, byte ptr [esi + edx * 4 + 2]
-    mov        byte ptr [eax - 4 + 2], dl
-    dec        ecx
-    jg         convertloop
-
-    pop        esi
-    ret
-  }
-}
-#endif  // HAS_RGBCOLORTABLEROW_X86
-
-#ifdef HAS_ARGBLUMACOLORTABLEROW_SSSE3
-// Tranform RGB pixels with luma table.
-__declspec(naked) __declspec(align(16))
-void ARGBLumaColorTableRow_SSSE3(const uint8* src_argb, uint8* dst_argb,
-                                 int width,
-                                 const uint8* luma, uint32 lumacoeff) {
-  __asm {
-    push       esi
-    push       edi
-    mov        eax, [esp + 8 + 4]   /* src_argb */
-    mov        edi, [esp + 8 + 8]   /* dst_argb */
-    mov        ecx, [esp + 8 + 12]  /* width */
-    movd       xmm2, dword ptr [esp + 8 + 16]  // luma table
-    movd       xmm3, dword ptr [esp + 8 + 20]  // lumacoeff
-    pshufd     xmm2, xmm2, 0
-    pshufd     xmm3, xmm3, 0
-    pcmpeqb    xmm4, xmm4        // generate mask 0xff00ff00
-    psllw      xmm4, 8
-    pxor       xmm5, xmm5
-
-    // 4 pixel loop.
-    align      4
-  convertloop:
-    movdqu     xmm0, qword ptr [eax]      // generate luma ptr
-    pmaddubsw  xmm0, xmm3
-    phaddw     xmm0, xmm0
-    pand       xmm0, xmm4  // mask out low bits
-    punpcklwd  xmm0, xmm5
-    paddd      xmm0, xmm2  // add table base
-    movd       esi, xmm0
-    pshufd     xmm0, xmm0, 0x39  // 00111001 to rotate right 32
-
-    movzx      edx, byte ptr [eax]
-    movzx      edx, byte ptr [esi + edx]
-    mov        byte ptr [edi], dl
-    movzx      edx, byte ptr [eax + 1]
-    movzx      edx, byte ptr [esi + edx]
-    mov        byte ptr [edi + 1], dl
-    movzx      edx, byte ptr [eax + 2]
-    movzx      edx, byte ptr [esi + edx]
-    mov        byte ptr [edi + 2], dl
-    movzx      edx, byte ptr [eax + 3]  // copy alpha.
-    mov        byte ptr [edi + 3], dl
-
-    movd       esi, xmm0
-    pshufd     xmm0, xmm0, 0x39  // 00111001 to rotate right 32
-
-    movzx      edx, byte ptr [eax + 4]
-    movzx      edx, byte ptr [esi + edx]
-    mov        byte ptr [edi + 4], dl
-    movzx      edx, byte ptr [eax + 5]
-    movzx      edx, byte ptr [esi + edx]
-    mov        byte ptr [edi + 5], dl
-    movzx      edx, byte ptr [eax + 6]
-    movzx      edx, byte ptr [esi + edx]
-    mov        byte ptr [edi + 6], dl
-    movzx      edx, byte ptr [eax + 7]  // copy alpha.
-    mov        byte ptr [edi + 7], dl
-
-    movd       esi, xmm0
-    pshufd     xmm0, xmm0, 0x39  // 00111001 to rotate right 32
-
-    movzx      edx, byte ptr [eax + 8]
-    movzx      edx, byte ptr [esi + edx]
-    mov        byte ptr [edi + 8], dl
-    movzx      edx, byte ptr [eax + 9]
-    movzx      edx, byte ptr [esi + edx]
-    mov        byte ptr [edi + 9], dl
-    movzx      edx, byte ptr [eax + 10]
-    movzx      edx, byte ptr [esi + edx]
-    mov        byte ptr [edi + 10], dl
-    movzx      edx, byte ptr [eax + 11]  // copy alpha.
-    mov        byte ptr [edi + 11], dl
-
-    movd       esi, xmm0
-
-    movzx      edx, byte ptr [eax + 12]
-    movzx      edx, byte ptr [esi + edx]
-    mov        byte ptr [edi + 12], dl
-    movzx      edx, byte ptr [eax + 13]
-    movzx      edx, byte ptr [esi + edx]
-    mov        byte ptr [edi + 13], dl
-    movzx      edx, byte ptr [eax + 14]
-    movzx      edx, byte ptr [esi + edx]
-    mov        byte ptr [edi + 14], dl
-    movzx      edx, byte ptr [eax + 15]  // copy alpha.
-    mov        byte ptr [edi + 15], dl
-
-    sub        ecx, 4
-    lea        eax, [eax + 16]
-    lea        edi, [edi + 16]
-    jg         convertloop
-
-    pop        edi
-    pop        esi
-    ret
-  }
-}
-#endif  // HAS_ARGBLUMACOLORTABLEROW_SSSE3
-
-#endif  // !defined(LIBYUV_DISABLE_X86) && defined(_M_IX86) && defined(_MSC_VER)
-
-#ifdef __cplusplus
-}  // extern "C"
-}  // namespace libyuv
-#endif
diff --git a/drivers/theoraplayer/src/YUV/libyuv/src/row_x86.asm b/drivers/theoraplayer/src/YUV/libyuv/src/row_x86.asm
deleted file mode 100755
index 0cb326f8e5..0000000000
--- a/drivers/theoraplayer/src/YUV/libyuv/src/row_x86.asm
+++ /dev/null
@@ -1,146 +0,0 @@
-;
-; Copyright 2012 The LibYuv Project Authors. All rights reserved.
-;
-; Use of this source code is governed by a BSD-style license
-; that can be found in the LICENSE file in the root of the source
-; tree. An additional intellectual property rights grant can be found
-; in the file PATENTS. All contributing project authors may
-; be found in the AUTHORS file in the root of the source tree.
-;
-
-%ifdef __YASM_VERSION_ID__
-%if __YASM_VERSION_ID__ < 01020000h
-%error AVX2 is supported only by yasm 1.2.0 or later.
-%endif
-%endif
-%include "x86inc.asm"
-
-SECTION .text
-
-; cglobal numeric constants are parameters, gpr regs, mm regs
-
-; void YUY2ToYRow_SSE2(const uint8* src_yuy2, uint8* dst_y, int pix)
-
-%macro YUY2TOYROW 2-3
-cglobal %1ToYRow%3, 3, 3, 3, src_yuy2, dst_y, pix
-%ifidn %1,YUY2
-    pcmpeqb    m2, m2, m2        ; generate mask 0x00ff00ff
-    psrlw      m2, m2, 8
-%endif
-
-    ALIGN      4
-.convertloop:
-    mov%2      m0, [src_yuy2q]
-    mov%2      m1, [src_yuy2q + mmsize]
-    lea        src_yuy2q, [src_yuy2q + mmsize * 2]
-%ifidn %1,YUY2
-    pand       m0, m0, m2   ; YUY2 even bytes are Y
-    pand       m1, m1, m2
-%else
-    psrlw      m0, m0, 8    ; UYVY odd bytes are Y
-    psrlw      m1, m1, 8
-%endif
-    packuswb   m0, m0, m1
-%if cpuflag(AVX2)
-    vpermq     m0, m0, 0xd8
-%endif
-    sub        pixd, mmsize
-    mov%2      [dst_yq], m0
-    lea        dst_yq, [dst_yq + mmsize]
-    jg         .convertloop
-    REP_RET
-%endmacro
-
-; TODO(fbarchard): Remove MMX.  Add SSSE3 pshufb version.
-INIT_MMX MMX
-YUY2TOYROW YUY2,a,
-YUY2TOYROW YUY2,u,_Unaligned
-YUY2TOYROW UYVY,a,
-YUY2TOYROW UYVY,u,_Unaligned
-INIT_XMM SSE2
-YUY2TOYROW YUY2,a,
-YUY2TOYROW YUY2,u,_Unaligned
-YUY2TOYROW UYVY,a,
-YUY2TOYROW UYVY,u,_Unaligned
-INIT_YMM AVX2
-YUY2TOYROW YUY2,a,
-YUY2TOYROW UYVY,a,
-
-; void SplitUVRow_SSE2(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int pix)
-
-%macro SplitUVRow 1-2
-cglobal SplitUVRow%2, 4, 4, 5, src_uv, dst_u, dst_v, pix
-    pcmpeqb    m4, m4, m4        ; generate mask 0x00ff00ff
-    psrlw      m4, m4, 8
-    sub        dst_vq, dst_uq
-
-    ALIGN      4
-.convertloop:
-    mov%1      m0, [src_uvq]
-    mov%1      m1, [src_uvq + mmsize]
-    lea        src_uvq, [src_uvq + mmsize * 2]
-    psrlw      m2, m0, 8         ; odd bytes
-    psrlw      m3, m1, 8
-    pand       m0, m0, m4        ; even bytes
-    pand       m1, m1, m4
-    packuswb   m0, m0, m1
-    packuswb   m2, m2, m3
-%if cpuflag(AVX2)
-    vpermq     m0, m0, 0xd8
-    vpermq     m2, m2, 0xd8
-%endif
-    mov%1      [dst_uq], m0
-    mov%1      [dst_uq + dst_vq], m2
-    lea        dst_uq, [dst_uq + mmsize]
-    sub        pixd, mmsize
-    jg         .convertloop
-    REP_RET
-%endmacro
-
-INIT_MMX MMX
-SplitUVRow a,
-SplitUVRow u,_Unaligned
-INIT_XMM SSE2
-SplitUVRow a,
-SplitUVRow u,_Unaligned
-INIT_YMM AVX2
-SplitUVRow a,
-
-; void MergeUVRow_SSE2(const uint8* src_u, const uint8* src_v, uint8* dst_uv,
-;                      int width);
-
-%macro MergeUVRow_ 1-2
-cglobal MergeUVRow_%2, 4, 4, 3, src_u, src_v, dst_uv, pix
-    sub        src_vq, src_uq
-
-    ALIGN      4
-.convertloop:
-    mov%1      m0, [src_uq]
-    mov%1      m1, [src_vq]
-    lea        src_uq, [src_uq + mmsize]
-    punpcklbw  m2, m0, m1       // first 8 UV pairs
-    punpckhbw  m0, m0, m1       // next 8 UV pairs
-%if cpuflag(AVX2)
-    vperm2i128 m1, m2, m0, 0x20  // low 128 of ymm2 and low 128 of ymm0
-    vperm2i128 m2, m2, m0, 0x31  // high 128 of ymm2 and high 128 of ymm0
-    mov%1      [dst_uvq], m1
-    mov%1      [dst_uvq + mmsize], m2
-%else
-    mov%1      [dst_uvq], m2
-    mov%1      [dst_uvq + mmsize], m0
-%endif
-    lea        dst_uvq, [dst_uvq + mmsize * 2]
-    sub        pixd, mmsize
-    jg         .convertloop
-    REP_RET
-%endmacro
-
-INIT_MMX MMX
-MergeUVRow_ a,
-MergeUVRow_ u,_Unaligned
-INIT_XMM SSE2
-MergeUVRow_ a,
-MergeUVRow_ u,_Unaligned
-INIT_YMM AVX2
-MergeUVRow_ a,
-
diff --git a/drivers/theoraplayer/src/YUV/libyuv/src/scale.cc b/drivers/theoraplayer/src/YUV/libyuv/src/scale.cc
deleted file mode 100755
index b3893cc00c..0000000000
--- a/drivers/theoraplayer/src/YUV/libyuv/src/scale.cc
+++ /dev/null
@@ -1,926 +0,0 @@
-/*
- *  Copyright 2011 The LibYuv Project Authors. All rights reserved.
- *
- *  Use of this source code is governed by a BSD-style license
- *  that can be found in the LICENSE file in the root of the source
- *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS. All contributing project authors may
- *  be found in the AUTHORS file in the root of the source tree.
- */
-
-#include "libyuv/scale.h"
-
-#include <assert.h>
-#include <string.h>
-
-#include "libyuv/cpu_id.h"
-#include "libyuv/planar_functions.h"  // For CopyPlane
-#include "libyuv/row.h"
-#include "libyuv/scale_row.h"
-
-#ifdef __cplusplus
-namespace libyuv {
-extern "C" {
-#endif
-
-// Remove this macro if OVERREAD is safe.
-#define AVOID_OVERREAD 1
-
-static __inline int Abs(int v) {
-  return v >= 0 ? v : -v;
-}
-
-#define SUBSAMPLE(v, a, s) (v < 0) ? (-((-v + a) >> s)) : ((v + a) >> s)
-
-// Scale plane, 1/2
-// This is an optimized version for scaling down a plane to 1/2 of
-// its original size.
-
-static void ScalePlaneDown2(int src_width, int src_height,
-                            int dst_width, int dst_height,
-                            int src_stride, int dst_stride,
-                            const uint8* src_ptr, uint8* dst_ptr,
-                            enum FilterMode filtering) {
-  int y;
-  void (*ScaleRowDown2)(const uint8* src_ptr, ptrdiff_t src_stride,
-                        uint8* dst_ptr, int dst_width) =
-    filtering == kFilterNone ? ScaleRowDown2_C :
-        (filtering == kFilterLinear ? ScaleRowDown2Linear_C :
-        ScaleRowDown2Box_C);
-  int row_stride = src_stride << 1;
-  if (!filtering) {
-    src_ptr += src_stride;  // Point to odd rows.
-    src_stride = 0;
-  }
-
-#if defined(HAS_SCALEROWDOWN2_NEON)
-  if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(dst_width, 16)) {
-    ScaleRowDown2 = filtering ? ScaleRowDown2Box_NEON : ScaleRowDown2_NEON;
-  }
-#elif defined(HAS_SCALEROWDOWN2_SSE2)
-  if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 16)) {
-    ScaleRowDown2 = filtering == kFilterNone ? ScaleRowDown2_Unaligned_SSE2 :
-        (filtering == kFilterLinear ? ScaleRowDown2Linear_Unaligned_SSE2 :
-        ScaleRowDown2Box_Unaligned_SSE2);
-    if (IS_ALIGNED(src_ptr, 16) &&
-        IS_ALIGNED(src_stride, 16) && IS_ALIGNED(row_stride, 16) &&
-        IS_ALIGNED(dst_ptr, 16) && IS_ALIGNED(dst_stride, 16)) {
-      ScaleRowDown2 = filtering == kFilterNone ? ScaleRowDown2_SSE2 :
-          (filtering == kFilterLinear ? ScaleRowDown2Linear_SSE2 :
-          ScaleRowDown2Box_SSE2);
-    }
-  }
-#elif defined(HAS_SCALEROWDOWN2_MIPS_DSPR2)
-  if (TestCpuFlag(kCpuHasMIPS_DSPR2) && IS_ALIGNED(src_ptr, 4) &&
-      IS_ALIGNED(src_stride, 4) && IS_ALIGNED(row_stride, 4) &&
-      IS_ALIGNED(dst_ptr, 4) && IS_ALIGNED(dst_stride, 4)) {
-    ScaleRowDown2 = filtering ?
-        ScaleRowDown2Box_MIPS_DSPR2 : ScaleRowDown2_MIPS_DSPR2;
-  }
-#endif
-
-  if (filtering == kFilterLinear) {
-    src_stride = 0;
-  }
-  // TODO(fbarchard): Loop through source height to allow odd height.
-  for (y = 0; y < dst_height; ++y) {
-    ScaleRowDown2(src_ptr, src_stride, dst_ptr, dst_width);
-    src_ptr += row_stride;
-    dst_ptr += dst_stride;
-  }
-}
-
-// Scale plane, 1/4
-// This is an optimized version for scaling down a plane to 1/4 of
-// its original size.
-
-static void ScalePlaneDown4(int src_width, int src_height,
-                            int dst_width, int dst_height,
-                            int src_stride, int dst_stride,
-                            const uint8* src_ptr, uint8* dst_ptr,
-                            enum FilterMode filtering) {
-  int y;
-  void (*ScaleRowDown4)(const uint8* src_ptr, ptrdiff_t src_stride,
-                        uint8* dst_ptr, int dst_width) =
-      filtering ? ScaleRowDown4Box_C : ScaleRowDown4_C;
-  int row_stride = src_stride << 2;
-  if (!filtering) {
-    src_ptr += src_stride * 2;  // Point to row 2.
-    src_stride = 0;
-  }
-#if defined(HAS_SCALEROWDOWN4_NEON)
-  if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(dst_width, 8)) {
-    ScaleRowDown4 = filtering ? ScaleRowDown4Box_NEON : ScaleRowDown4_NEON;
-  }
-#elif defined(HAS_SCALEROWDOWN4_SSE2)
-  if (TestCpuFlag(kCpuHasSSE2) &&
-      IS_ALIGNED(dst_width, 8) && IS_ALIGNED(row_stride, 16) &&
-      IS_ALIGNED(src_ptr, 16) && IS_ALIGNED(src_stride, 16)) {
-    ScaleRowDown4 = filtering ? ScaleRowDown4Box_SSE2 : ScaleRowDown4_SSE2;
-  }
-#elif defined(HAS_SCALEROWDOWN4_MIPS_DSPR2)
-  if (TestCpuFlag(kCpuHasMIPS_DSPR2) && IS_ALIGNED(row_stride, 4) &&
-      IS_ALIGNED(src_ptr, 4) && IS_ALIGNED(src_stride, 4) &&
-      IS_ALIGNED(dst_ptr, 4) && IS_ALIGNED(dst_stride, 4)) {
-    ScaleRowDown4 = filtering ?
-        ScaleRowDown4Box_MIPS_DSPR2 : ScaleRowDown4_MIPS_DSPR2;
-  }
-#endif
-
-  if (filtering == kFilterLinear) {
-    src_stride = 0;
-  }
-  for (y = 0; y < dst_height; ++y) {
-    ScaleRowDown4(src_ptr, src_stride, dst_ptr, dst_width);
-    src_ptr += row_stride;
-    dst_ptr += dst_stride;
-  }
-}
-
-// Scale plane down, 3/4
-
-static void ScalePlaneDown34(int src_width, int src_height,
-                             int dst_width, int dst_height,
-                             int src_stride, int dst_stride,
-                             const uint8* src_ptr, uint8* dst_ptr,
-                             enum FilterMode filtering) {
-  int y;
-  void (*ScaleRowDown34_0)(const uint8* src_ptr, ptrdiff_t src_stride,
-                           uint8* dst_ptr, int dst_width);
-  void (*ScaleRowDown34_1)(const uint8* src_ptr, ptrdiff_t src_stride,
-                           uint8* dst_ptr, int dst_width);
-  const int filter_stride = (filtering == kFilterLinear) ? 0 : src_stride;
-  assert(dst_width % 3 == 0);
-  if (!filtering) {
-    ScaleRowDown34_0 = ScaleRowDown34_C;
-    ScaleRowDown34_1 = ScaleRowDown34_C;
-  } else {
-    ScaleRowDown34_0 = ScaleRowDown34_0_Box_C;
-    ScaleRowDown34_1 = ScaleRowDown34_1_Box_C;
-  }
-#if defined(HAS_SCALEROWDOWN34_NEON)
-  if (TestCpuFlag(kCpuHasNEON) && (dst_width % 24 == 0)) {
-    if (!filtering) {
-      ScaleRowDown34_0 = ScaleRowDown34_NEON;
-      ScaleRowDown34_1 = ScaleRowDown34_NEON;
-    } else {
-      ScaleRowDown34_0 = ScaleRowDown34_0_Box_NEON;
-      ScaleRowDown34_1 = ScaleRowDown34_1_Box_NEON;
-    }
-  }
-#endif
-#if defined(HAS_SCALEROWDOWN34_SSSE3)
-  if (TestCpuFlag(kCpuHasSSSE3) && (dst_width % 24 == 0) &&
-      IS_ALIGNED(src_ptr, 16) && IS_ALIGNED(src_stride, 16)) {
-    if (!filtering) {
-      ScaleRowDown34_0 = ScaleRowDown34_SSSE3;
-      ScaleRowDown34_1 = ScaleRowDown34_SSSE3;
-    } else {
-      ScaleRowDown34_0 = ScaleRowDown34_0_Box_SSSE3;
-      ScaleRowDown34_1 = ScaleRowDown34_1_Box_SSSE3;
-    }
-  }
-#endif
-#if defined(HAS_SCALEROWDOWN34_MIPS_DSPR2)
-  if (TestCpuFlag(kCpuHasMIPS_DSPR2) && (dst_width % 24 == 0) &&
-      IS_ALIGNED(src_ptr, 4) && IS_ALIGNED(src_stride, 4) &&
-      IS_ALIGNED(dst_ptr, 4) && IS_ALIGNED(dst_stride, 4)) {
-    if (!filtering) {
-      ScaleRowDown34_0 = ScaleRowDown34_MIPS_DSPR2;
-      ScaleRowDown34_1 = ScaleRowDown34_MIPS_DSPR2;
-    } else {
-      ScaleRowDown34_0 = ScaleRowDown34_0_Box_MIPS_DSPR2;
-      ScaleRowDown34_1 = ScaleRowDown34_1_Box_MIPS_DSPR2;
-    }
-  }
-#endif
-
-  for (y = 0; y < dst_height - 2; y += 3) {
-    ScaleRowDown34_0(src_ptr, filter_stride, dst_ptr, dst_width);
-    src_ptr += src_stride;
-    dst_ptr += dst_stride;
-    ScaleRowDown34_1(src_ptr, filter_stride, dst_ptr, dst_width);
-    src_ptr += src_stride;
-    dst_ptr += dst_stride;
-    ScaleRowDown34_0(src_ptr + src_stride, -filter_stride,
-                     dst_ptr, dst_width);
-    src_ptr += src_stride * 2;
-    dst_ptr += dst_stride;
-  }
-
-  // Remainder 1 or 2 rows with last row vertically unfiltered
-  if ((dst_height % 3) == 2) {
-    ScaleRowDown34_0(src_ptr, filter_stride, dst_ptr, dst_width);
-    src_ptr += src_stride;
-    dst_ptr += dst_stride;
-    ScaleRowDown34_1(src_ptr, 0, dst_ptr, dst_width);
-  } else if ((dst_height % 3) == 1) {
-    ScaleRowDown34_0(src_ptr, 0, dst_ptr, dst_width);
-  }
-}
-
-
-// Scale plane, 3/8
-// This is an optimized version for scaling down a plane to 3/8
-// of its original size.
-//
-// Uses box filter arranges like this
-// aaabbbcc -> abc
-// aaabbbcc    def
-// aaabbbcc    ghi
-// dddeeeff
-// dddeeeff
-// dddeeeff
-// ggghhhii
-// ggghhhii
-// Boxes are 3x3, 2x3, 3x2 and 2x2
-
-static void ScalePlaneDown38(int src_width, int src_height,
-                             int dst_width, int dst_height,
-                             int src_stride, int dst_stride,
-                             const uint8* src_ptr, uint8* dst_ptr,
-                             enum FilterMode filtering) {
-  int y;
-  void (*ScaleRowDown38_3)(const uint8* src_ptr, ptrdiff_t src_stride,
-                           uint8* dst_ptr, int dst_width);
-  void (*ScaleRowDown38_2)(const uint8* src_ptr, ptrdiff_t src_stride,
-                           uint8* dst_ptr, int dst_width);
-  const int filter_stride = (filtering == kFilterLinear) ? 0 : src_stride;
-  assert(dst_width % 3 == 0);
-  if (!filtering) {
-    ScaleRowDown38_3 = ScaleRowDown38_C;
-    ScaleRowDown38_2 = ScaleRowDown38_C;
-  } else {
-    ScaleRowDown38_3 = ScaleRowDown38_3_Box_C;
-    ScaleRowDown38_2 = ScaleRowDown38_2_Box_C;
-  }
-#if defined(HAS_SCALEROWDOWN38_NEON)
-  if (TestCpuFlag(kCpuHasNEON) && (dst_width % 12 == 0)) {
-    if (!filtering) {
-      ScaleRowDown38_3 = ScaleRowDown38_NEON;
-      ScaleRowDown38_2 = ScaleRowDown38_NEON;
-    } else {
-      ScaleRowDown38_3 = ScaleRowDown38_3_Box_NEON;
-      ScaleRowDown38_2 = ScaleRowDown38_2_Box_NEON;
-    }
-  }
-#elif defined(HAS_SCALEROWDOWN38_SSSE3)
-  if (TestCpuFlag(kCpuHasSSSE3) && (dst_width % 24 == 0) &&
-      IS_ALIGNED(src_ptr, 16) && IS_ALIGNED(src_stride, 16)) {
-    if (!filtering) {
-      ScaleRowDown38_3 = ScaleRowDown38_SSSE3;
-      ScaleRowDown38_2 = ScaleRowDown38_SSSE3;
-    } else {
-      ScaleRowDown38_3 = ScaleRowDown38_3_Box_SSSE3;
-      ScaleRowDown38_2 = ScaleRowDown38_2_Box_SSSE3;
-    }
-  }
-#elif defined(HAS_SCALEROWDOWN38_MIPS_DSPR2)
-  if (TestCpuFlag(kCpuHasMIPS_DSPR2) && (dst_width % 12 == 0) &&
-      IS_ALIGNED(src_ptr, 4) && IS_ALIGNED(src_stride, 4) &&
-      IS_ALIGNED(dst_ptr, 4) && IS_ALIGNED(dst_stride, 4)) {
-    if (!filtering) {
-      ScaleRowDown38_3 = ScaleRowDown38_MIPS_DSPR2;
-      ScaleRowDown38_2 = ScaleRowDown38_MIPS_DSPR2;
-    } else {
-      ScaleRowDown38_3 = ScaleRowDown38_3_Box_MIPS_DSPR2;
-      ScaleRowDown38_2 = ScaleRowDown38_2_Box_MIPS_DSPR2;
-    }
-  }
-#endif
-
-  for (y = 0; y < dst_height - 2; y += 3) {
-    ScaleRowDown38_3(src_ptr, filter_stride, dst_ptr, dst_width);
-    src_ptr += src_stride * 3;
-    dst_ptr += dst_stride;
-    ScaleRowDown38_3(src_ptr, filter_stride, dst_ptr, dst_width);
-    src_ptr += src_stride * 3;
-    dst_ptr += dst_stride;
-    ScaleRowDown38_2(src_ptr, filter_stride, dst_ptr, dst_width);
-    src_ptr += src_stride * 2;
-    dst_ptr += dst_stride;
-  }
-
-  // Remainder 1 or 2 rows with last row vertically unfiltered
-  if ((dst_height % 3) == 2) {
-    ScaleRowDown38_3(src_ptr, filter_stride, dst_ptr, dst_width);
-    src_ptr += src_stride * 3;
-    dst_ptr += dst_stride;
-    ScaleRowDown38_3(src_ptr, 0, dst_ptr, dst_width);
-  } else if ((dst_height % 3) == 1) {
-    ScaleRowDown38_3(src_ptr, 0, dst_ptr, dst_width);
-  }
-}
-
-static __inline uint32 SumBox(int iboxwidth, int iboxheight,
-                              ptrdiff_t src_stride, const uint8* src_ptr) {
-  uint32 sum = 0u;
-  int y;
-  assert(iboxwidth > 0);
-  assert(iboxheight > 0);
-  for (y = 0; y < iboxheight; ++y) {
-    int x;
-    for (x = 0; x < iboxwidth; ++x) {
-      sum += src_ptr[x];
-    }
-    src_ptr += src_stride;
-  }
-  return sum;
-}
-
-static void ScalePlaneBoxRow_C(int dst_width, int boxheight,
-                               int x, int dx, ptrdiff_t src_stride,
-                               const uint8* src_ptr, uint8* dst_ptr) {
-  int i;
-  int boxwidth;
-  for (i = 0; i < dst_width; ++i) {
-    int ix = x >> 16;
-    x += dx;
-    boxwidth = (x >> 16) - ix;
-    *dst_ptr++ = SumBox(boxwidth, boxheight, src_stride, src_ptr + ix) /
-        (boxwidth * boxheight);
-  }
-}
-
-static __inline uint32 SumPixels(int iboxwidth, const uint16* src_ptr) {
-  uint32 sum = 0u;
-  int x;
-  assert(iboxwidth > 0);
-  for (x = 0; x < iboxwidth; ++x) {
-    sum += src_ptr[x];
-  }
-  return sum;
-}
-
-static void ScaleAddCols2_C(int dst_width, int boxheight, int x, int dx,
-                            const uint16* src_ptr, uint8* dst_ptr) {
-  int i;
-  int scaletbl[2];
-  int minboxwidth = (dx >> 16);
-  int* scaleptr = scaletbl - minboxwidth;
-  int boxwidth;
-  scaletbl[0] = 65536 / (minboxwidth * boxheight);
-  scaletbl[1] = 65536 / ((minboxwidth + 1) * boxheight);
-  for (i = 0; i < dst_width; ++i) {
-    int ix = x >> 16;
-    x += dx;
-    boxwidth = (x >> 16) - ix;
-    *dst_ptr++ = SumPixels(boxwidth, src_ptr + ix) * scaleptr[boxwidth] >> 16;
-  }
-}
-
-static void ScaleAddCols1_C(int dst_width, int boxheight, int x, int dx,
-                            const uint16* src_ptr, uint8* dst_ptr) {
-  int boxwidth = (dx >> 16);
-  int scaleval = 65536 / (boxwidth * boxheight);
-  int i;
-  for (i = 0; i < dst_width; ++i) {
-    *dst_ptr++ = SumPixels(boxwidth, src_ptr + x) * scaleval >> 16;
-    x += boxwidth;
-  }
-}
-
-// Scale plane down to any dimensions, with interpolation.
-// (boxfilter).
-//
-// Same method as SimpleScale, which is fixed point, outputting
-// one pixel of destination using fixed point (16.16) to step
-// through source, sampling a box of pixel with simple
-// averaging.
-static void ScalePlaneBox(int src_width, int src_height,
-                          int dst_width, int dst_height,
-                          int src_stride, int dst_stride,
-                          const uint8* src_ptr, uint8* dst_ptr) {
-  int j;
-  // Initial source x/y coordinate and step values as 16.16 fixed point.
-  int x = 0;
-  int y = 0;
-  int dx = 0;
-  int dy = 0;
-  const int max_y = (src_height << 16);
-  ScaleSlope(src_width, src_height, dst_width, dst_height, kFilterBox,
-             &x, &y, &dx, &dy);
-  src_width = Abs(src_width);
-  // TODO(fbarchard): Remove this and make AddRows handle boxheight 1.
-  if (!IS_ALIGNED(src_width, 16) || dst_height * 2 > src_height) {
-    uint8* dst = dst_ptr;
-    int j;
-    for (j = 0; j < dst_height; ++j) {
-      int boxheight;
-      int iy = y >> 16;
-      const uint8* src = src_ptr + iy * src_stride;
-      y += dy;
-      if (y > max_y) {
-        y = max_y;
-      }
-      boxheight = (y >> 16) - iy;
-      ScalePlaneBoxRow_C(dst_width, boxheight,
-                         x, dx, src_stride,
-                         src, dst);
-      dst += dst_stride;
-    }
-    return;
-  }
-  {
-    // Allocate a row buffer of uint16.
-    align_buffer_64(row16, src_width * 2);
-    void (*ScaleAddCols)(int dst_width, int boxheight, int x, int dx,
-        const uint16* src_ptr, uint8* dst_ptr) =
-        (dx & 0xffff) ? ScaleAddCols2_C: ScaleAddCols1_C;
-    void (*ScaleAddRows)(const uint8* src_ptr, ptrdiff_t src_stride,
-        uint16* dst_ptr, int src_width, int src_height) = ScaleAddRows_C;
-
-#if defined(HAS_SCALEADDROWS_SSE2)
-    if (TestCpuFlag(kCpuHasSSE2) &&
-#ifdef AVOID_OVERREAD
-        IS_ALIGNED(src_width, 16) &&
-#endif
-        IS_ALIGNED(src_ptr, 16) && IS_ALIGNED(src_stride, 16)) {
-      ScaleAddRows = ScaleAddRows_SSE2;
-    }
-#endif
-
-    for (j = 0; j < dst_height; ++j) {
-      int boxheight;
-      int iy = y >> 16;
-      const uint8* src = src_ptr + iy * src_stride;
-      y += dy;
-      if (y > (src_height << 16)) {
-        y = (src_height << 16);
-      }
-      boxheight = (y >> 16) - iy;
-      ScaleAddRows(src, src_stride, (uint16*)(row16),
-                 src_width, boxheight);
-      ScaleAddCols(dst_width, boxheight, x, dx, (uint16*)(row16),
-                 dst_ptr);
-      dst_ptr += dst_stride;
-    }
-    free_aligned_buffer_64(row16);
-  }
-}
-
-// Scale plane down with bilinear interpolation.
-void ScalePlaneBilinearDown(int src_width, int src_height,
-                            int dst_width, int dst_height,
-                            int src_stride, int dst_stride,
-                            const uint8* src_ptr, uint8* dst_ptr,
-                            enum FilterMode filtering) {
-  // Initial source x/y coordinate and step values as 16.16 fixed point.
-  int x = 0;
-  int y = 0;
-  int dx = 0;
-  int dy = 0;
-  // TODO(fbarchard): Consider not allocating row buffer for kFilterLinear.
-  // Allocate a row buffer.
-  align_buffer_64(row, src_width);
-
-  const int max_y = (src_height - 1) << 16;
-  int j;
-  void (*ScaleFilterCols)(uint8* dst_ptr, const uint8* src_ptr,
-      int dst_width, int x, int dx) =
-      (src_width >= 32768) ? ScaleFilterCols64_C : ScaleFilterCols_C;
-  void (*InterpolateRow)(uint8* dst_ptr, const uint8* src_ptr,
-      ptrdiff_t src_stride, int dst_width, int source_y_fraction) =
-      InterpolateRow_C;
-  ScaleSlope(src_width, src_height, dst_width, dst_height, filtering,
-             &x, &y, &dx, &dy);
-  src_width = Abs(src_width);
-
-#if defined(HAS_INTERPOLATEROW_SSE2)
-  if (TestCpuFlag(kCpuHasSSE2) && src_width >= 16) {
-    InterpolateRow = InterpolateRow_Any_SSE2;
-    if (IS_ALIGNED(src_width, 16)) {
-      InterpolateRow = InterpolateRow_Unaligned_SSE2;
-      if (IS_ALIGNED(src_ptr, 16) && IS_ALIGNED(src_stride, 16)) {
-        InterpolateRow = InterpolateRow_SSE2;
-      }
-    }
-  }
-#endif
-#if defined(HAS_INTERPOLATEROW_SSSE3)
-  if (TestCpuFlag(kCpuHasSSSE3) && src_width >= 16) {
-    InterpolateRow = InterpolateRow_Any_SSSE3;
-    if (IS_ALIGNED(src_width, 16)) {
-      InterpolateRow = InterpolateRow_Unaligned_SSSE3;
-      if (IS_ALIGNED(src_ptr, 16) && IS_ALIGNED(src_stride, 16)) {
-        InterpolateRow = InterpolateRow_SSSE3;
-      }
-    }
-  }
-#endif
-#if defined(HAS_INTERPOLATEROW_AVX2)
-  if (TestCpuFlag(kCpuHasAVX2) && src_width >= 32) {
-    InterpolateRow = InterpolateRow_Any_AVX2;
-    if (IS_ALIGNED(src_width, 32)) {
-      InterpolateRow = InterpolateRow_AVX2;
-    }
-  }
-#endif
-#if defined(HAS_INTERPOLATEROW_NEON)
-  if (TestCpuFlag(kCpuHasNEON) && src_width >= 16) {
-    InterpolateRow = InterpolateRow_Any_NEON;
-    if (IS_ALIGNED(src_width, 16)) {
-      InterpolateRow = InterpolateRow_NEON;
-    }
-  }
-#endif
-#if defined(HAS_INTERPOLATEROW_MIPS_DSPR2)
-  if (TestCpuFlag(kCpuHasMIPS_DSPR2) && src_width >= 4) {
-    InterpolateRow = InterpolateRow_Any_MIPS_DSPR2;
-    if (IS_ALIGNED(src_width, 4)) {
-      InterpolateRow = InterpolateRow_MIPS_DSPR2;
-    }
-  }
-#endif
-
-
-#if defined(HAS_SCALEFILTERCOLS_SSSE3)
-  if (TestCpuFlag(kCpuHasSSSE3) && src_width < 32768) {
-    ScaleFilterCols = ScaleFilterCols_SSSE3;
-  }
-#endif
-  if (y > max_y) {
-    y = max_y;
-  }
-
-  for (j = 0; j < dst_height; ++j) {
-    int yi = y >> 16;
-    const uint8* src = src_ptr + yi * src_stride;
-    if (filtering == kFilterLinear) {
-      ScaleFilterCols(dst_ptr, src, dst_width, x, dx);
-    } else {
-      int yf = (y >> 8) & 255;
-      InterpolateRow(row, src, src_stride, src_width, yf);
-      ScaleFilterCols(dst_ptr, row, dst_width, x, dx);
-    }
-    dst_ptr += dst_stride;
-    y += dy;
-    if (y > max_y) {
-      y = max_y;
-    }
-  }
-  free_aligned_buffer_64(row);
-}
-
-// Scale up down with bilinear interpolation.
-void ScalePlaneBilinearUp(int src_width, int src_height,
-                          int dst_width, int dst_height,
-                          int src_stride, int dst_stride,
-                          const uint8* src_ptr, uint8* dst_ptr,
-                          enum FilterMode filtering) {
-  int j;
-  // Initial source x/y coordinate and step values as 16.16 fixed point.
-  int x = 0;
-  int y = 0;
-  int dx = 0;
-  int dy = 0;
-  const int max_y = (src_height - 1) << 16;
-  void (*InterpolateRow)(uint8* dst_ptr, const uint8* src_ptr,
-      ptrdiff_t src_stride, int dst_width, int source_y_fraction) =
-      InterpolateRow_C;
-  void (*ScaleFilterCols)(uint8* dst_ptr, const uint8* src_ptr,
-       int dst_width, int x, int dx) =
-       filtering ? ScaleFilterCols_C : ScaleCols_C;
-  ScaleSlope(src_width, src_height, dst_width, dst_height, filtering,
-             &x, &y, &dx, &dy);
-  src_width = Abs(src_width);
-
-#if defined(HAS_INTERPOLATEROW_SSE2)
-  if (TestCpuFlag(kCpuHasSSE2) && dst_width >= 16) {
-    InterpolateRow = InterpolateRow_Any_SSE2;
-    if (IS_ALIGNED(dst_width, 16)) {
-      InterpolateRow = InterpolateRow_Unaligned_SSE2;
-      if (IS_ALIGNED(dst_ptr, 16) && IS_ALIGNED(dst_stride, 16)) {
-        InterpolateRow = InterpolateRow_SSE2;
-      }
-    }
-  }
-#endif
-#if defined(HAS_INTERPOLATEROW_SSSE3)
-  if (TestCpuFlag(kCpuHasSSSE3) && dst_width >= 16) {
-    InterpolateRow = InterpolateRow_Any_SSSE3;
-    if (IS_ALIGNED(dst_width, 16)) {
-      InterpolateRow = InterpolateRow_Unaligned_SSSE3;
-      if (IS_ALIGNED(dst_ptr, 16) && IS_ALIGNED(dst_stride, 16)) {
-        InterpolateRow = InterpolateRow_SSSE3;
-      }
-    }
-  }
-#endif
-#if defined(HAS_INTERPOLATEROW_AVX2)
-  if (TestCpuFlag(kCpuHasAVX2) && dst_width >= 32) {
-    InterpolateRow = InterpolateRow_Any_AVX2;
-    if (IS_ALIGNED(dst_width, 32)) {
-      InterpolateRow = InterpolateRow_AVX2;
-    }
-  }
-#endif
-#if defined(HAS_INTERPOLATEROW_NEON)
-  if (TestCpuFlag(kCpuHasNEON) && dst_width >= 16) {
-    InterpolateRow = InterpolateRow_Any_NEON;
-    if (IS_ALIGNED(dst_width, 16)) {
-      InterpolateRow = InterpolateRow_NEON;
-    }
-  }
-#endif
-#if defined(HAS_INTERPOLATEROW_MIPS_DSPR2)
-  if (TestCpuFlag(kCpuHasMIPS_DSPR2) && dst_width >= 4) {
-    InterpolateRow = InterpolateRow_Any_MIPS_DSPR2;
-    if (IS_ALIGNED(dst_width, 4)) {
-      InterpolateRow = InterpolateRow_MIPS_DSPR2;
-    }
-  }
-#endif
-
-  if (filtering && src_width >= 32768) {
-    ScaleFilterCols = ScaleFilterCols64_C;
-  }
-#if defined(HAS_SCALEFILTERCOLS_SSSE3)
-  if (filtering && TestCpuFlag(kCpuHasSSSE3) && src_width < 32768) {
-    ScaleFilterCols = ScaleFilterCols_SSSE3;
-  }
-#endif
-  if (!filtering && src_width * 2 == dst_width && x < 0x8000) {
-    ScaleFilterCols = ScaleColsUp2_C;
-#if defined(HAS_SCALECOLS_SSE2)
-    if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 8) &&
-        IS_ALIGNED(src_ptr, 16) && IS_ALIGNED(src_stride, 16) &&
-        IS_ALIGNED(dst_ptr, 16) && IS_ALIGNED(dst_stride, 16)) {
-      ScaleFilterCols = ScaleColsUp2_SSE2;
-    }
-#endif
-  }
-
-  if (y > max_y) {
-    y = max_y;
-  }
-  {
-    int yi = y >> 16;
-    const uint8* src = src_ptr + yi * src_stride;
-
-    // Allocate 2 row buffers.
-    const int kRowSize = (dst_width + 15) & ~15;
-    align_buffer_64(row, kRowSize * 2);
-
-    uint8* rowptr = row;
-    int rowstride = kRowSize;
-    int lasty = yi;
-
-    ScaleFilterCols(rowptr, src, dst_width, x, dx);
-    if (src_height > 1) {
-      src += src_stride;
-    }
-    ScaleFilterCols(rowptr + rowstride, src, dst_width, x, dx);
-    src += src_stride;
-
-    for (j = 0; j < dst_height; ++j) {
-      yi = y >> 16;
-      if (yi != lasty) {
-        if (y > max_y) {
-          y = max_y;
-          yi = y >> 16;
-          src = src_ptr + yi * src_stride;
-        }
-        if (yi != lasty) {
-          ScaleFilterCols(rowptr, src, dst_width, x, dx);
-          rowptr += rowstride;
-          rowstride = -rowstride;
-          lasty = yi;
-          src += src_stride;
-        }
-      }
-      if (filtering == kFilterLinear) {
-        InterpolateRow(dst_ptr, rowptr, 0, dst_width, 0);
-      } else {
-        int yf = (y >> 8) & 255;
-        InterpolateRow(dst_ptr, rowptr, rowstride, dst_width, yf);
-      }
-      dst_ptr += dst_stride;
-      y += dy;
-    }
-    free_aligned_buffer_64(row);
-  }
-}
-
-// Scale Plane to/from any dimensions, without interpolation.
-// Fixed point math is used for performance: The upper 16 bits
-// of x and dx is the integer part of the source position and
-// the lower 16 bits are the fixed decimal part.
-
-static void ScalePlaneSimple(int src_width, int src_height,
-                             int dst_width, int dst_height,
-                             int src_stride, int dst_stride,
-                             const uint8* src_ptr, uint8* dst_ptr) {
-  int i;
-  void (*ScaleCols)(uint8* dst_ptr, const uint8* src_ptr,
-      int dst_width, int x, int dx) = ScaleCols_C;
-  // Initial source x/y coordinate and step values as 16.16 fixed point.
-  int x = 0;
-  int y = 0;
-  int dx = 0;
-  int dy = 0;
-  ScaleSlope(src_width, src_height, dst_width, dst_height, kFilterNone,
-             &x, &y, &dx, &dy);
-  src_width = Abs(src_width);
-
-  if (src_width * 2 == dst_width && x < 0x8000) {
-    ScaleCols = ScaleColsUp2_C;
-#if defined(HAS_SCALECOLS_SSE2)
-    if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 8) &&
-        IS_ALIGNED(src_ptr, 16) && IS_ALIGNED(src_stride, 16) &&
-        IS_ALIGNED(dst_ptr, 16) && IS_ALIGNED(dst_stride, 16)) {
-      ScaleCols = ScaleColsUp2_SSE2;
-    }
-#endif
-  }
-
-  for (i = 0; i < dst_height; ++i) {
-    ScaleCols(dst_ptr, src_ptr + (y >> 16) * src_stride,
-              dst_width, x, dx);
-    dst_ptr += dst_stride;
-    y += dy;
-  }
-}
-
-// Scale a plane.
-// This function dispatches to a specialized scaler based on scale factor.
-
-LIBYUV_API
-void ScalePlane(const uint8* src, int src_stride,
-                int src_width, int src_height,
-                uint8* dst, int dst_stride,
-                int dst_width, int dst_height,
-                enum FilterMode filtering) {
-  // Simplify filtering when possible.
-  filtering = ScaleFilterReduce(src_width, src_height,
-                                dst_width, dst_height,
-                                filtering);
-
-  // Negative height means invert the image.
-  if (src_height < 0) {
-    src_height = -src_height;
-    src = src + (src_height - 1) * src_stride;
-    src_stride = -src_stride;
-  }
-
-  // Use specialized scales to improve performance for common resolutions.
-  // For example, all the 1/2 scalings will use ScalePlaneDown2()
-  if (dst_width == src_width && dst_height == src_height) {
-    // Straight copy.
-    CopyPlane(src, src_stride, dst, dst_stride, dst_width, dst_height);
-    return;
-  }
-  if (dst_width == src_width) {
-    int dy = FixedDiv(src_height, dst_height);
-    // Arbitrary scale vertically, but unscaled vertically.
-    ScalePlaneVertical(src_height,
-                       dst_width, dst_height,
-                       src_stride, dst_stride, src, dst,
-                       0, 0, dy, 1, filtering);
-    return;
-  }
-  if (dst_width <= Abs(src_width) && dst_height <= src_height) {
-    // Scale down.
-    if (4 * dst_width == 3 * src_width &&
-        4 * dst_height == 3 * src_height) {
-      // optimized, 3/4
-      ScalePlaneDown34(src_width, src_height, dst_width, dst_height,
-                       src_stride, dst_stride, src, dst, filtering);
-      return;
-    }
-    if (2 * dst_width == src_width && 2 * dst_height == src_height) {
-      // optimized, 1/2
-      ScalePlaneDown2(src_width, src_height, dst_width, dst_height,
-                      src_stride, dst_stride, src, dst, filtering);
-      return;
-    }
-    // 3/8 rounded up for odd sized chroma height.
-    if (8 * dst_width == 3 * src_width &&
-        dst_height == ((src_height * 3 + 7) / 8)) {
-      // optimized, 3/8
-      ScalePlaneDown38(src_width, src_height, dst_width, dst_height,
-                       src_stride, dst_stride, src, dst, filtering);
-      return;
-    }
-    if (4 * dst_width == src_width && 4 * dst_height == src_height &&
-               filtering != kFilterBilinear) {
-      // optimized, 1/4
-      ScalePlaneDown4(src_width, src_height, dst_width, dst_height,
-                      src_stride, dst_stride, src, dst, filtering);
-      return;
-    }
-  }
-  if (filtering == kFilterBox && dst_height * 2 < src_height) {
-    ScalePlaneBox(src_width, src_height, dst_width, dst_height,
-                  src_stride, dst_stride, src, dst);
-    return;
-  }
-  if (filtering && dst_height > src_height) {
-    ScalePlaneBilinearUp(src_width, src_height, dst_width, dst_height,
-                         src_stride, dst_stride, src, dst, filtering);
-    return;
-  }
-  if (filtering) {
-    ScalePlaneBilinearDown(src_width, src_height, dst_width, dst_height,
-                           src_stride, dst_stride, src, dst, filtering);
-    return;
-  }
-  ScalePlaneSimple(src_width, src_height, dst_width, dst_height,
-                   src_stride, dst_stride, src, dst);
-}
-
-// Scale an I420 image.
-// This function in turn calls a scaling function for each plane.
-
-LIBYUV_API
-int I420Scale(const uint8* src_y, int src_stride_y,
-              const uint8* src_u, int src_stride_u,
-              const uint8* src_v, int src_stride_v,
-              int src_width, int src_height,
-              uint8* dst_y, int dst_stride_y,
-              uint8* dst_u, int dst_stride_u,
-              uint8* dst_v, int dst_stride_v,
-              int dst_width, int dst_height,
-              enum FilterMode filtering) {
-  int src_halfwidth = SUBSAMPLE(src_width, 1, 1);
-  int src_halfheight = SUBSAMPLE(src_height, 1, 1);
-  int dst_halfwidth = SUBSAMPLE(dst_width, 1, 1);
-  int dst_halfheight = SUBSAMPLE(dst_height, 1, 1);
-  if (!src_y || !src_u || !src_v || src_width == 0 || src_height == 0 ||
-      !dst_y || !dst_u || !dst_v || dst_width <= 0 || dst_height <= 0) {
-    return -1;
-  }
-
-  ScalePlane(src_y, src_stride_y, src_width, src_height,
-             dst_y, dst_stride_y, dst_width, dst_height,
-             filtering);
-  ScalePlane(src_u, src_stride_u, src_halfwidth, src_halfheight,
-             dst_u, dst_stride_u, dst_halfwidth, dst_halfheight,
-             filtering);
-  ScalePlane(src_v, src_stride_v, src_halfwidth, src_halfheight,
-             dst_v, dst_stride_v, dst_halfwidth, dst_halfheight,
-             filtering);
-  return 0;
-}
-
-// Deprecated api
-LIBYUV_API
-int Scale(const uint8* src_y, const uint8* src_u, const uint8* src_v,
-          int src_stride_y, int src_stride_u, int src_stride_v,
-          int src_width, int src_height,
-          uint8* dst_y, uint8* dst_u, uint8* dst_v,
-          int dst_stride_y, int dst_stride_u, int dst_stride_v,
-          int dst_width, int dst_height,
-          LIBYUV_BOOL interpolate) {
-  return I420Scale(src_y, src_stride_y,
-                   src_u, src_stride_u,
-                   src_v, src_stride_v,
-                   src_width, src_height,
-                   dst_y, dst_stride_y,
-                   dst_u, dst_stride_u,
-                   dst_v, dst_stride_v,
-                   dst_width, dst_height,
-                   interpolate ? kFilterBox : kFilterNone);
-}
-
-// Deprecated api
-LIBYUV_API
-int ScaleOffset(const uint8* src, int src_width, int src_height,
-                uint8* dst, int dst_width, int dst_height, int dst_yoffset,
-                LIBYUV_BOOL interpolate) {
-  // Chroma requires offset to multiple of 2.
-  int dst_yoffset_even = dst_yoffset & ~1;
-  int src_halfwidth = SUBSAMPLE(src_width, 1, 1);
-  int src_halfheight = SUBSAMPLE(src_height, 1, 1);
-  int dst_halfwidth = SUBSAMPLE(dst_width, 1, 1);
-  int dst_halfheight = SUBSAMPLE(dst_height, 1, 1);
-  int aheight = dst_height - dst_yoffset_even * 2;  // actual output height
-  const uint8* src_y = src;
-  const uint8* src_u = src + src_width * src_height;
-  const uint8* src_v = src + src_width * src_height +
-                             src_halfwidth * src_halfheight;
-  uint8* dst_y = dst + dst_yoffset_even * dst_width;
-  uint8* dst_u = dst + dst_width * dst_height +
-                 (dst_yoffset_even >> 1) * dst_halfwidth;
-  uint8* dst_v = dst + dst_width * dst_height + dst_halfwidth * dst_halfheight +
-                 (dst_yoffset_even >> 1) * dst_halfwidth;
-  if (!src || src_width <= 0 || src_height <= 0 ||
-      !dst || dst_width <= 0 || dst_height <= 0 || dst_yoffset_even < 0 ||
-      dst_yoffset_even >= dst_height) {
-    return -1;
-  }
-  return I420Scale(src_y, src_width,
-                   src_u, src_halfwidth,
-                   src_v, src_halfwidth,
-                   src_width, src_height,
-                   dst_y, dst_width,
-                   dst_u, dst_halfwidth,
-                   dst_v, dst_halfwidth,
-                   dst_width, aheight,
-                   interpolate ? kFilterBox : kFilterNone);
-}
-
-#ifdef __cplusplus
-}  // extern "C"
-}  // namespace libyuv
-#endif
diff --git a/drivers/theoraplayer/src/YUV/libyuv/src/scale_argb.cc b/drivers/theoraplayer/src/YUV/libyuv/src/scale_argb.cc
deleted file mode 100755
index e339cd7c79..0000000000
--- a/drivers/theoraplayer/src/YUV/libyuv/src/scale_argb.cc
+++ /dev/null
@@ -1,809 +0,0 @@
-/*
- *  Copyright 2011 The LibYuv Project Authors. All rights reserved.
- *
- *  Use of this source code is governed by a BSD-style license
- *  that can be found in the LICENSE file in the root of the source
- *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS. All contributing project authors may
- *  be found in the AUTHORS file in the root of the source tree.
- */
-
-#include "libyuv/scale.h"
-
-#include <assert.h>
-#include <string.h>
-
-#include "libyuv/cpu_id.h"
-#include "libyuv/planar_functions.h"  // For CopyARGB
-#include "libyuv/row.h"
-#include "libyuv/scale_row.h"
-
-#ifdef __cplusplus
-namespace libyuv {
-extern "C" {
-#endif
-
-static __inline int Abs(int v) {
-  return v >= 0 ? v : -v;
-}
-
-// ScaleARGB ARGB, 1/2
-// This is an optimized version for scaling down a ARGB to 1/2 of
-// its original size.
-static void ScaleARGBDown2(int src_width, int src_height,
-                           int dst_width, int dst_height,
-                           int src_stride, int dst_stride,
-                           const uint8* src_argb, uint8* dst_argb,
-                           int x, int dx, int y, int dy,
-                           enum FilterMode filtering) {
-  int j;
-  int row_stride = src_stride * (dy >> 16);
-  void (*ScaleARGBRowDown2)(const uint8* src_argb, ptrdiff_t src_stride,
-                            uint8* dst_argb, int dst_width) =
-    filtering == kFilterNone ? ScaleARGBRowDown2_C :
-        (filtering == kFilterLinear ? ScaleARGBRowDown2Linear_C :
-        ScaleARGBRowDown2Box_C);
-  assert(dx == 65536 * 2);  // Test scale factor of 2.
-  assert((dy & 0x1ffff) == 0);  // Test vertical scale is multiple of 2.
-  // Advance to odd row, even column.
-  if (filtering == kFilterBilinear) {
-    src_argb += (y >> 16) * src_stride + (x >> 16) * 4;
-  } else {
-    src_argb += (y >> 16) * src_stride + ((x >> 16) - 1) * 4;
-  }
-
-#if defined(HAS_SCALEARGBROWDOWN2_SSE2)
-  if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 4) &&
-      IS_ALIGNED(src_argb, 16) && IS_ALIGNED(row_stride, 16) &&
-      IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride, 16)) {
-    ScaleARGBRowDown2 = filtering == kFilterNone ? ScaleARGBRowDown2_SSE2 :
-        (filtering == kFilterLinear ? ScaleARGBRowDown2Linear_SSE2 :
-        ScaleARGBRowDown2Box_SSE2);
-  }
-#elif defined(HAS_SCALEARGBROWDOWN2_NEON)
-  if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(dst_width, 8) &&
-      IS_ALIGNED(src_argb, 4) && IS_ALIGNED(row_stride, 4)) {
-    ScaleARGBRowDown2 = filtering ? ScaleARGBRowDown2Box_NEON :
-        ScaleARGBRowDown2_NEON;
-  }
-#endif
-
-  if (filtering == kFilterLinear) {
-    src_stride = 0;
-  }
-  for (j = 0; j < dst_height; ++j) {
-    ScaleARGBRowDown2(src_argb, src_stride, dst_argb, dst_width);
-    src_argb += row_stride;
-    dst_argb += dst_stride;
-  }
-}
-
-// ScaleARGB ARGB, 1/4
-// This is an optimized version for scaling down a ARGB to 1/4 of
-// its original size.
-static void ScaleARGBDown4Box(int src_width, int src_height,
-                              int dst_width, int dst_height,
-                              int src_stride, int dst_stride,
-                              const uint8* src_argb, uint8* dst_argb,
-                              int x, int dx, int y, int dy) {
-  int j;
-  // Allocate 2 rows of ARGB.
-  const int kRowSize = (dst_width * 2 * 4 + 15) & ~15;
-  align_buffer_64(row, kRowSize * 2);
-  int row_stride = src_stride * (dy >> 16);
-  void (*ScaleARGBRowDown2)(const uint8* src_argb, ptrdiff_t src_stride,
-    uint8* dst_argb, int dst_width) = ScaleARGBRowDown2Box_C;
-  // Advance to odd row, even column.
-  src_argb += (y >> 16) * src_stride + (x >> 16) * 4;
-  assert(dx == 65536 * 4);  // Test scale factor of 4.
-  assert((dy & 0x3ffff) == 0);  // Test vertical scale is multiple of 4.
-#if defined(HAS_SCALEARGBROWDOWN2_SSE2)
-  if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 4) &&
-      IS_ALIGNED(src_argb, 16) && IS_ALIGNED(row_stride, 16) &&
-      IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride, 16)) {
-    ScaleARGBRowDown2 = ScaleARGBRowDown2Box_SSE2;
-  }
-#elif defined(HAS_SCALEARGBROWDOWN2_NEON)
-  if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(dst_width, 8) &&
-      IS_ALIGNED(src_argb, 4) && IS_ALIGNED(row_stride, 4)) {
-    ScaleARGBRowDown2 = ScaleARGBRowDown2Box_NEON;
-  }
-#endif
-  for (j = 0; j < dst_height; ++j) {
-    ScaleARGBRowDown2(src_argb, src_stride, row, dst_width * 2);
-    ScaleARGBRowDown2(src_argb + src_stride * 2, src_stride,
-                      row + kRowSize, dst_width * 2);
-    ScaleARGBRowDown2(row, kRowSize, dst_argb, dst_width);
-    src_argb += row_stride;
-    dst_argb += dst_stride;
-  }
-  free_aligned_buffer_64(row);
-}
-
-// ScaleARGB ARGB Even
-// This is an optimized version for scaling down a ARGB to even
-// multiple of its original size.
-static void ScaleARGBDownEven(int src_width, int src_height,
-                              int dst_width, int dst_height,
-                              int src_stride, int dst_stride,
-                              const uint8* src_argb, uint8* dst_argb,
-                              int x, int dx, int y, int dy,
-                              enum FilterMode filtering) {
-  int j;
-  int col_step = dx >> 16;
-  int row_stride = (dy >> 16) * src_stride;
-  void (*ScaleARGBRowDownEven)(const uint8* src_argb, ptrdiff_t src_stride,
-                               int src_step, uint8* dst_argb, int dst_width) =
-      filtering ? ScaleARGBRowDownEvenBox_C : ScaleARGBRowDownEven_C;
-  assert(IS_ALIGNED(src_width, 2));
-  assert(IS_ALIGNED(src_height, 2));
-  src_argb += (y >> 16) * src_stride + (x >> 16) * 4;
-#if defined(HAS_SCALEARGBROWDOWNEVEN_SSE2)
-  if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 4) &&
-      IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride, 16)) {
-    ScaleARGBRowDownEven = filtering ? ScaleARGBRowDownEvenBox_SSE2 :
-        ScaleARGBRowDownEven_SSE2;
-  }
-#elif defined(HAS_SCALEARGBROWDOWNEVEN_NEON)
-  if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(dst_width, 4) &&
-      IS_ALIGNED(src_argb, 4)) {
-    ScaleARGBRowDownEven = filtering ? ScaleARGBRowDownEvenBox_NEON :
-        ScaleARGBRowDownEven_NEON;
-  }
-#endif
-
-  if (filtering == kFilterLinear) {
-    src_stride = 0;
-  }
-  for (j = 0; j < dst_height; ++j) {
-    ScaleARGBRowDownEven(src_argb, src_stride, col_step, dst_argb, dst_width);
-    src_argb += row_stride;
-    dst_argb += dst_stride;
-  }
-}
-
-// Scale ARGB down with bilinear interpolation.
-static void ScaleARGBBilinearDown(int src_width, int src_height,
-                                  int dst_width, int dst_height,
-                                  int src_stride, int dst_stride,
-                                  const uint8* src_argb, uint8* dst_argb,
-                                  int x, int dx, int y, int dy,
-                                  enum FilterMode filtering) {
-  int j;
-  void (*InterpolateRow)(uint8* dst_argb, const uint8* src_argb,
-      ptrdiff_t src_stride, int dst_width, int source_y_fraction) =
-      InterpolateRow_C;
-  void (*ScaleARGBFilterCols)(uint8* dst_argb, const uint8* src_argb,
-      int dst_width, int x, int dx) =
-      (src_width >= 32768) ? ScaleARGBFilterCols64_C : ScaleARGBFilterCols_C;
-  int64 xlast = x + (int64)(dst_width - 1) * dx;
-  int64 xl = (dx >= 0) ? x : xlast;
-  int64 xr = (dx >= 0) ? xlast : x;
-  int clip_src_width;
-  xl = (xl >> 16) & ~3;  // Left edge aligned.
-  xr = (xr >> 16) + 1;  // Right most pixel used.  Bilinear uses 2 pixels.
-  xr = (xr + 1 + 3) & ~3;  // 1 beyond 4 pixel aligned right most pixel.
-  if (xr > src_width) {
-    xr = src_width;
-  }
-  clip_src_width = (int)(xr - xl) * 4;  // Width aligned to 4.
-  src_argb += xl * 4;
-  x -= (int)(xl << 16);
-#if defined(HAS_INTERPOLATEROW_SSE2)
-  if (TestCpuFlag(kCpuHasSSE2) && clip_src_width >= 16) {
-    InterpolateRow = InterpolateRow_Any_SSE2;
-    if (IS_ALIGNED(clip_src_width, 16)) {
-      InterpolateRow = InterpolateRow_Unaligned_SSE2;
-      if (IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride, 16)) {
-        InterpolateRow = InterpolateRow_SSE2;
-      }
-    }
-  }
-#endif
-#if defined(HAS_INTERPOLATEROW_SSSE3)
-  if (TestCpuFlag(kCpuHasSSSE3) && clip_src_width >= 16) {
-    InterpolateRow = InterpolateRow_Any_SSSE3;
-    if (IS_ALIGNED(clip_src_width, 16)) {
-      InterpolateRow = InterpolateRow_Unaligned_SSSE3;
-      if (IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride, 16)) {
-        InterpolateRow = InterpolateRow_SSSE3;
-      }
-    }
-  }
-#endif
-#if defined(HAS_INTERPOLATEROW_AVX2)
-  if (TestCpuFlag(kCpuHasAVX2) && clip_src_width >= 32) {
-    InterpolateRow = InterpolateRow_Any_AVX2;
-    if (IS_ALIGNED(clip_src_width, 32)) {
-      InterpolateRow = InterpolateRow_AVX2;
-    }
-  }
-#endif
-#if defined(HAS_INTERPOLATEROW_NEON)
-  if (TestCpuFlag(kCpuHasNEON) && clip_src_width >= 16) {
-    InterpolateRow = InterpolateRow_Any_NEON;
-    if (IS_ALIGNED(clip_src_width, 16)) {
-      InterpolateRow = InterpolateRow_NEON;
-    }
-  }
-#endif
-#if defined(HAS_INTERPOLATEROWS_MIPS_DSPR2)
-  if (TestCpuFlag(kCpuHasMIPS_DSPR2) && clip_src_width >= 4 &&
-      IS_ALIGNED(src_argb, 4) && IS_ALIGNED(src_stride, 4)) {
-    InterpolateRow = InterpolateRow_Any_MIPS_DSPR2;
-    if (IS_ALIGNED(clip_src_width, 4)) {
-      InterpolateRow = InterpolateRow_MIPS_DSPR2;
-    }
-  }
-#endif
-#if defined(HAS_SCALEARGBFILTERCOLS_SSSE3)
-  if (TestCpuFlag(kCpuHasSSSE3) && src_width < 32768) {
-    ScaleARGBFilterCols = ScaleARGBFilterCols_SSSE3;
-  }
-#endif
-  // TODO(fbarchard): Consider not allocating row buffer for kFilterLinear.
-  // Allocate a row of ARGB.
-  {
-    align_buffer_64(row, clip_src_width * 4);
-
-    const int max_y = (src_height - 1) << 16;
-    if (y > max_y) {
-      y = max_y;
-    }
-    for (j = 0; j < dst_height; ++j) {
-      int yi = y >> 16;
-      const uint8* src = src_argb + yi * src_stride;
-      if (filtering == kFilterLinear) {
-        ScaleARGBFilterCols(dst_argb, src, dst_width, x, dx);
-      } else {
-        int yf = (y >> 8) & 255;
-        InterpolateRow(row, src, src_stride, clip_src_width, yf);
-        ScaleARGBFilterCols(dst_argb, row, dst_width, x, dx);
-      }
-      dst_argb += dst_stride;
-      y += dy;
-      if (y > max_y) {
-        y = max_y;
-      }
-    }
-    free_aligned_buffer_64(row);
-  }
-}
-
-// Scale ARGB up with bilinear interpolation.
-static void ScaleARGBBilinearUp(int src_width, int src_height,
-                                int dst_width, int dst_height,
-                                int src_stride, int dst_stride,
-                                const uint8* src_argb, uint8* dst_argb,
-                                int x, int dx, int y, int dy,
-                                enum FilterMode filtering) {
-  int j;
-  void (*InterpolateRow)(uint8* dst_argb, const uint8* src_argb,
-      ptrdiff_t src_stride, int dst_width, int source_y_fraction) =
-      InterpolateRow_C;
-  void (*ScaleARGBFilterCols)(uint8* dst_argb, const uint8* src_argb,
-      int dst_width, int x, int dx) =
-      filtering ? ScaleARGBFilterCols_C : ScaleARGBCols_C;
-  const int max_y = (src_height - 1) << 16;
-#if defined(HAS_INTERPOLATEROW_SSE2)
-  if (TestCpuFlag(kCpuHasSSE2) && dst_width >= 4) {
-    InterpolateRow = InterpolateRow_Any_SSE2;
-    if (IS_ALIGNED(dst_width, 4)) {
-      InterpolateRow = InterpolateRow_Unaligned_SSE2;
-      if (IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride, 16)) {
-        InterpolateRow = InterpolateRow_SSE2;
-      }
-    }
-  }
-#endif
-#if defined(HAS_INTERPOLATEROW_SSSE3)
-  if (TestCpuFlag(kCpuHasSSSE3) && dst_width >= 4) {
-    InterpolateRow = InterpolateRow_Any_SSSE3;
-    if (IS_ALIGNED(dst_width, 4)) {
-      InterpolateRow = InterpolateRow_Unaligned_SSSE3;
-      if (IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride, 16)) {
-        InterpolateRow = InterpolateRow_SSSE3;
-      }
-    }
-  }
-#endif
-#if defined(HAS_INTERPOLATEROW_AVX2)
-  if (TestCpuFlag(kCpuHasAVX2) && dst_width >= 8) {
-    InterpolateRow = InterpolateRow_Any_AVX2;
-    if (IS_ALIGNED(dst_width, 8)) {
-      InterpolateRow = InterpolateRow_AVX2;
-    }
-  }
-#endif
-#if defined(HAS_INTERPOLATEROW_NEON)
-  if (TestCpuFlag(kCpuHasNEON) && dst_width >= 4) {
-    InterpolateRow = InterpolateRow_Any_NEON;
-    if (IS_ALIGNED(dst_width, 4)) {
-      InterpolateRow = InterpolateRow_NEON;
-    }
-  }
-#endif
-#if defined(HAS_INTERPOLATEROWS_MIPS_DSPR2)
-  if (TestCpuFlag(kCpuHasMIPS_DSPR2) && dst_width >= 1 &&
-      IS_ALIGNED(dst_argb, 4) && IS_ALIGNED(dst_stride, 4)) {
-    InterpolateRow = InterpolateRow_MIPS_DSPR2;
-  }
-#endif
-  if (src_width >= 32768) {
-    ScaleARGBFilterCols = filtering ?
-        ScaleARGBFilterCols64_C : ScaleARGBCols64_C;
-  }
-#if defined(HAS_SCALEARGBFILTERCOLS_SSSE3)
-  if (filtering && TestCpuFlag(kCpuHasSSSE3) && src_width < 32768) {
-    ScaleARGBFilterCols = ScaleARGBFilterCols_SSSE3;
-  }
-#endif
-#if defined(HAS_SCALEARGBCOLS_SSE2)
-  if (!filtering && TestCpuFlag(kCpuHasSSE2) && src_width < 32768) {
-    ScaleARGBFilterCols = ScaleARGBCols_SSE2;
-  }
-#endif
-  if (!filtering && src_width * 2 == dst_width && x < 0x8000) {
-    ScaleARGBFilterCols = ScaleARGBColsUp2_C;
-#if defined(HAS_SCALEARGBCOLSUP2_SSE2)
-    if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 8) &&
-        IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride, 16) &&
-        IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride, 16)) {
-      ScaleARGBFilterCols = ScaleARGBColsUp2_SSE2;
-    }
-#endif
-  }
-
-  if (y > max_y) {
-    y = max_y;
-  }
-
-  {
-    int yi = y >> 16;
-    const uint8* src = src_argb + yi * src_stride;
-
-    // Allocate 2 rows of ARGB.
-    const int kRowSize = (dst_width * 4 + 15) & ~15;
-    align_buffer_64(row, kRowSize * 2);
-
-    uint8* rowptr = row;
-    int rowstride = kRowSize;
-    int lasty = yi;
-
-    ScaleARGBFilterCols(rowptr, src, dst_width, x, dx);
-    if (src_height > 1) {
-      src += src_stride;
-    }
-    ScaleARGBFilterCols(rowptr + rowstride, src, dst_width, x, dx);
-    src += src_stride;
-
-    for (j = 0; j < dst_height; ++j) {
-      yi = y >> 16;
-      if (yi != lasty) {
-        if (y > max_y) {
-          y = max_y;
-          yi = y >> 16;
-          src = src_argb + yi * src_stride;
-        }
-        if (yi != lasty) {
-          ScaleARGBFilterCols(rowptr, src, dst_width, x, dx);
-          rowptr += rowstride;
-          rowstride = -rowstride;
-          lasty = yi;
-          src += src_stride;
-        }
-      }
-      if (filtering == kFilterLinear) {
-        InterpolateRow(dst_argb, rowptr, 0, dst_width * 4, 0);
-      } else {
-        int yf = (y >> 8) & 255;
-        InterpolateRow(dst_argb, rowptr, rowstride, dst_width * 4, yf);
-      }
-      dst_argb += dst_stride;
-      y += dy;
-    }
-    free_aligned_buffer_64(row);
-  }
-}
-
-#ifdef YUVSCALEUP
-// Scale YUV to ARGB up with bilinear interpolation.
-static void ScaleYUVToARGBBilinearUp(int src_width, int src_height,
-                                     int dst_width, int dst_height,
-                                     int src_stride_y,
-                                     int src_stride_u,
-                                     int src_stride_v,
-                                     int dst_stride_argb,
-                                     const uint8* src_y,
-                                     const uint8* src_u,
-                                     const uint8* src_v,
-                                     uint8* dst_argb,
-                                     int x, int dx, int y, int dy,
-                                     enum FilterMode filtering) {
-  int j;
-  void (*I422ToARGBRow)(const uint8* y_buf,
-                        const uint8* u_buf,
-                        const uint8* v_buf,
-                        uint8* rgb_buf,
-                        int width) = I422ToARGBRow_C;
-#if defined(HAS_I422TOARGBROW_SSSE3)
-  if (TestCpuFlag(kCpuHasSSSE3) && src_width >= 8) {
-    I422ToARGBRow = I422ToARGBRow_Any_SSSE3;
-    if (IS_ALIGNED(src_width, 8)) {
-      I422ToARGBRow = I422ToARGBRow_Unaligned_SSSE3;
-      if (IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
-        I422ToARGBRow = I422ToARGBRow_SSSE3;
-      }
-    }
-  }
-#endif
-#if defined(HAS_I422TOARGBROW_AVX2)
-  if (TestCpuFlag(kCpuHasAVX2) && src_width >= 16) {
-    I422ToARGBRow = I422ToARGBRow_Any_AVX2;
-    if (IS_ALIGNED(src_width, 16)) {
-      I422ToARGBRow = I422ToARGBRow_AVX2;
-    }
-  }
-#endif
-#if defined(HAS_I422TOARGBROW_NEON)
-  if (TestCpuFlag(kCpuHasNEON) && src_width >= 8) {
-    I422ToARGBRow = I422ToARGBRow_Any_NEON;
-    if (IS_ALIGNED(src_width, 8)) {
-      I422ToARGBRow = I422ToARGBRow_NEON;
-    }
-  }
-#endif
-#if defined(HAS_I422TOARGBROW_MIPS_DSPR2)
-  if (TestCpuFlag(kCpuHasMIPS_DSPR2) && IS_ALIGNED(src_width, 4) &&
-      IS_ALIGNED(src_y, 4) && IS_ALIGNED(src_stride_y, 4) &&
-      IS_ALIGNED(src_u, 2) && IS_ALIGNED(src_stride_u, 2) &&
-      IS_ALIGNED(src_v, 2) && IS_ALIGNED(src_stride_v, 2) &&
-      IS_ALIGNED(dst_argb, 4) && IS_ALIGNED(dst_stride_argb, 4)) {
-    I422ToARGBRow = I422ToARGBRow_MIPS_DSPR2;
-  }
-#endif
-
-  void (*InterpolateRow)(uint8* dst_argb, const uint8* src_argb,
-      ptrdiff_t src_stride, int dst_width, int source_y_fraction) =
-      InterpolateRow_C;
-#if defined(HAS_INTERPOLATEROW_SSE2)
-  if (TestCpuFlag(kCpuHasSSE2) && dst_width >= 4) {
-    InterpolateRow = InterpolateRow_Any_SSE2;
-    if (IS_ALIGNED(dst_width, 4)) {
-      InterpolateRow = InterpolateRow_Unaligned_SSE2;
-      if (IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
-        InterpolateRow = InterpolateRow_SSE2;
-      }
-    }
-  }
-#endif
-#if defined(HAS_INTERPOLATEROW_SSSE3)
-  if (TestCpuFlag(kCpuHasSSSE3) && dst_width >= 4) {
-    InterpolateRow = InterpolateRow_Any_SSSE3;
-    if (IS_ALIGNED(dst_width, 4)) {
-      InterpolateRow = InterpolateRow_Unaligned_SSSE3;
-      if (IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
-        InterpolateRow = InterpolateRow_SSSE3;
-      }
-    }
-  }
-#endif
-#if defined(HAS_INTERPOLATEROW_AVX2)
-  if (TestCpuFlag(kCpuHasAVX2) && dst_width >= 8) {
-    InterpolateRow = InterpolateRow_Any_AVX2;
-    if (IS_ALIGNED(dst_width, 8)) {
-      InterpolateRow = InterpolateRow_AVX2;
-    }
-  }
-#endif
-#if defined(HAS_INTERPOLATEROW_NEON)
-  if (TestCpuFlag(kCpuHasNEON) && dst_width >= 4) {
-    InterpolateRow = InterpolateRow_Any_NEON;
-    if (IS_ALIGNED(dst_width, 4)) {
-      InterpolateRow = InterpolateRow_NEON;
-    }
-  }
-#endif
-#if defined(HAS_INTERPOLATEROWS_MIPS_DSPR2)
-  if (TestCpuFlag(kCpuHasMIPS_DSPR2) && dst_width >= 1 &&
-      IS_ALIGNED(dst_argb, 4) && IS_ALIGNED(dst_stride_argb, 4)) {
-    InterpolateRow = InterpolateRow_MIPS_DSPR2;
-  }
-#endif
-
-  void (*ScaleARGBFilterCols)(uint8* dst_argb, const uint8* src_argb,
-      int dst_width, int x, int dx) =
-      filtering ? ScaleARGBFilterCols_C : ScaleARGBCols_C;
-  if (src_width >= 32768) {
-    ScaleARGBFilterCols = filtering ?
-        ScaleARGBFilterCols64_C : ScaleARGBCols64_C;
-  }
-#if defined(HAS_SCALEARGBFILTERCOLS_SSSE3)
-  if (filtering && TestCpuFlag(kCpuHasSSSE3) && src_width < 32768) {
-    ScaleARGBFilterCols = ScaleARGBFilterCols_SSSE3;
-  }
-#endif
-#if defined(HAS_SCALEARGBCOLS_SSE2)
-  if (!filtering && TestCpuFlag(kCpuHasSSE2) && src_width < 32768) {
-    ScaleARGBFilterCols = ScaleARGBCols_SSE2;
-  }
-#endif
-  if (!filtering && src_width * 2 == dst_width && x < 0x8000) {
-    ScaleARGBFilterCols = ScaleARGBColsUp2_C;
-#if defined(HAS_SCALEARGBCOLSUP2_SSE2)
-    if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 8) &&
-        IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride, 16) &&
-        IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride, 16)) {
-      ScaleARGBFilterCols = ScaleARGBColsUp2_SSE2;
-    }
-#endif
-  }
-
-  const int max_y = (src_height - 1) << 16;
-  if (y > max_y) {
-    y = max_y;
-  }
-  const int kYShift = 1;  // Shift Y by 1 to convert Y plane to UV coordinate.
-  int yi = y >> 16;
-  int uv_yi = yi >> kYShift;
-  const uint8* src_row_y = src_y + yi * src_stride_y;
-  const uint8* src_row_u = src_u + uv_yi * src_stride_u;
-  const uint8* src_row_v = src_v + uv_yi * src_stride_v;
-
-  // Allocate 2 rows of ARGB.
-  const int kRowSize = (dst_width * 4 + 15) & ~15;
-  align_buffer_64(row, kRowSize * 2);
-
-  // Allocate 1 row of ARGB for source conversion.
-  align_buffer_64(argb_row, src_width * 4);
-
-  uint8* rowptr = row;
-  int rowstride = kRowSize;
-  int lasty = yi;
-
-  // TODO(fbarchard): Convert first 2 rows of YUV to ARGB.
-  ScaleARGBFilterCols(rowptr, src_row_y, dst_width, x, dx);
-  if (src_height > 1) {
-    src_row_y += src_stride_y;
-    if (yi & 1) {
-      src_row_u += src_stride_u;
-      src_row_v += src_stride_v;
-    }
-  }
-  ScaleARGBFilterCols(rowptr + rowstride, src_row_y, dst_width, x, dx);
-  if (src_height > 2) {
-    src_row_y += src_stride_y;
-    if (!(yi & 1)) {
-      src_row_u += src_stride_u;
-      src_row_v += src_stride_v;
-    }
-  }
-
-  for (j = 0; j < dst_height; ++j) {
-    yi = y >> 16;
-    if (yi != lasty) {
-      if (y > max_y) {
-        y = max_y;
-        yi = y >> 16;
-        uv_yi = yi >> kYShift;
-        src_row_y = src_y + yi * src_stride_y;
-        src_row_u = src_u + uv_yi * src_stride_u;
-        src_row_v = src_v + uv_yi * src_stride_v;
-      }
-      if (yi != lasty) {
-        // TODO(fbarchard): Convert the clipped region of row.
-        I422ToARGBRow(src_row_y, src_row_u, src_row_v, argb_row, src_width);
-        ScaleARGBFilterCols(rowptr, argb_row, dst_width, x, dx);
-        rowptr += rowstride;
-        rowstride = -rowstride;
-        lasty = yi;
-        src_row_y += src_stride_y;
-        if (yi & 1) {
-          src_row_u += src_stride_u;
-          src_row_v += src_stride_v;
-        }
-      }
-    }
-    if (filtering == kFilterLinear) {
-      InterpolateRow(dst_argb, rowptr, 0, dst_width * 4, 0);
-    } else {
-      int yf = (y >> 8) & 255;
-      InterpolateRow(dst_argb, rowptr, rowstride, dst_width * 4, yf);
-    }
-    dst_argb += dst_stride_argb;
-    y += dy;
-  }
-  free_aligned_buffer_64(row);
-  free_aligned_buffer_64(row_argb);
-}
-#endif
-
-// Scale ARGB to/from any dimensions, without interpolation.
-// Fixed point math is used for performance: The upper 16 bits
-// of x and dx is the integer part of the source position and
-// the lower 16 bits are the fixed decimal part.
-
-static void ScaleARGBSimple(int src_width, int src_height,
-                            int dst_width, int dst_height,
-                            int src_stride, int dst_stride,
-                            const uint8* src_argb, uint8* dst_argb,
-                            int x, int dx, int y, int dy) {
-  int j;
-  void (*ScaleARGBCols)(uint8* dst_argb, const uint8* src_argb,
-      int dst_width, int x, int dx) =
-      (src_width >= 32768) ? ScaleARGBCols64_C : ScaleARGBCols_C;
-#if defined(HAS_SCALEARGBCOLS_SSE2)
-  if (TestCpuFlag(kCpuHasSSE2) && src_width < 32768) {
-    ScaleARGBCols = ScaleARGBCols_SSE2;
-  }
-#endif
-  if (src_width * 2 == dst_width && x < 0x8000) {
-    ScaleARGBCols = ScaleARGBColsUp2_C;
-#if defined(HAS_SCALEARGBCOLSUP2_SSE2)
-    if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 8) &&
-        IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride, 16) &&
-        IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride, 16)) {
-      ScaleARGBCols = ScaleARGBColsUp2_SSE2;
-    }
-#endif
-  }
-
-  for (j = 0; j < dst_height; ++j) {
-    ScaleARGBCols(dst_argb, src_argb + (y >> 16) * src_stride,
-                  dst_width, x, dx);
-    dst_argb += dst_stride;
-    y += dy;
-  }
-}
-
-// ScaleARGB a ARGB.
-// This function in turn calls a scaling function
-// suitable for handling the desired resolutions.
-static void ScaleARGB(const uint8* src, int src_stride,
-                      int src_width, int src_height,
-                      uint8* dst, int dst_stride,
-                      int dst_width, int dst_height,
-                      int clip_x, int clip_y, int clip_width, int clip_height,
-                      enum FilterMode filtering) {
-  // Initial source x/y coordinate and step values as 16.16 fixed point.
-  int x = 0;
-  int y = 0;
-  int dx = 0;
-  int dy = 0;
-  // ARGB does not support box filter yet, but allow the user to pass it.
-  // Simplify filtering when possible.
-  filtering = ScaleFilterReduce(src_width, src_height,
-                                dst_width, dst_height,
-                                filtering);
-
-  // Negative src_height means invert the image.
-  if (src_height < 0) {
-    src_height = -src_height;
-    src = src + (src_height - 1) * src_stride;
-    src_stride = -src_stride;
-  }
-  ScaleSlope(src_width, src_height, dst_width, dst_height, filtering,
-             &x, &y, &dx, &dy);
-  src_width = Abs(src_width);
-  if (clip_x) {
-    int64 clipf = (int64)(clip_x) * dx;
-    x += (clipf & 0xffff);
-    src += (clipf >> 16) * 4;
-    dst += clip_x * 4;
-  }
-  if (clip_y) {
-    int64 clipf = (int64)(clip_y) * dy;
-    y += (clipf & 0xffff);
-    src += (clipf >> 16) * src_stride;
-    dst += clip_y * dst_stride;
-  }
-
-  // Special case for integer step values.
-  if (((dx | dy) & 0xffff) == 0) {
-    if (!dx || !dy) {  // 1 pixel wide and/or tall.
-      filtering = kFilterNone;
-    } else {
-      // Optimized even scale down. ie 2, 4, 6, 8, 10x.
-      if (!(dx & 0x10000) && !(dy & 0x10000)) {
-        if (dx == 0x20000) {
-          // Optimized 1/2 downsample.
-          ScaleARGBDown2(src_width, src_height,
-                         clip_width, clip_height,
-                         src_stride, dst_stride, src, dst,
-                         x, dx, y, dy, filtering);
-          return;
-        }
-        if (dx == 0x40000 && filtering == kFilterBox) {
-          // Optimized 1/4 box downsample.
-          ScaleARGBDown4Box(src_width, src_height,
-                            clip_width, clip_height,
-                            src_stride, dst_stride, src, dst,
-                            x, dx, y, dy);
-          return;
-        }
-        ScaleARGBDownEven(src_width, src_height,
-                          clip_width, clip_height,
-                          src_stride, dst_stride, src, dst,
-                          x, dx, y, dy, filtering);
-        return;
-      }
-      // Optimized odd scale down. ie 3, 5, 7, 9x.
-      if ((dx & 0x10000) && (dy & 0x10000)) {
-        filtering = kFilterNone;
-        if (dx == 0x10000 && dy == 0x10000) {
-          // Straight copy.
-          ARGBCopy(src + (y >> 16) * src_stride + (x >> 16) * 4, src_stride,
-                   dst, dst_stride, clip_width, clip_height);
-          return;
-        }
-      }
-    }
-  }
-  if (dx == 0x10000 && (x & 0xffff) == 0) {
-    // Arbitrary scale vertically, but unscaled vertically.
-    ScalePlaneVertical(src_height,
-                       clip_width, clip_height,
-                       src_stride, dst_stride, src, dst,
-                       x, y, dy, 4, filtering);
-    return;
-  }
-  if (filtering && dy < 65536) {
-    ScaleARGBBilinearUp(src_width, src_height,
-                        clip_width, clip_height,
-                        src_stride, dst_stride, src, dst,
-                        x, dx, y, dy, filtering);
-    return;
-  }
-  if (filtering) {
-    ScaleARGBBilinearDown(src_width, src_height,
-                          clip_width, clip_height,
-                          src_stride, dst_stride, src, dst,
-                          x, dx, y, dy, filtering);
-    return;
-  }
-  ScaleARGBSimple(src_width, src_height, clip_width, clip_height,
-                  src_stride, dst_stride, src, dst,
-                  x, dx, y, dy);
-}
-
-LIBYUV_API
-int ARGBScaleClip(const uint8* src_argb, int src_stride_argb,
-                  int src_width, int src_height,
-                  uint8* dst_argb, int dst_stride_argb,
-                  int dst_width, int dst_height,
-                  int clip_x, int clip_y, int clip_width, int clip_height,
-                  enum FilterMode filtering) {
-  if (!src_argb || src_width == 0 || src_height == 0 ||
-      !dst_argb || dst_width <= 0 || dst_height <= 0 ||
-      clip_x < 0 || clip_y < 0 ||
-      (clip_x + clip_width) > dst_width ||
-      (clip_y + clip_height) > dst_height) {
-    return -1;
-  }
-  ScaleARGB(src_argb, src_stride_argb, src_width, src_height,
-            dst_argb, dst_stride_argb, dst_width, dst_height,
-            clip_x, clip_y, clip_width, clip_height, filtering);
-  return 0;
-}
-
-// Scale an ARGB image.
-LIBYUV_API
-int ARGBScale(const uint8* src_argb, int src_stride_argb,
-              int src_width, int src_height,
-              uint8* dst_argb, int dst_stride_argb,
-              int dst_width, int dst_height,
-              enum FilterMode filtering) {
-  if (!src_argb || src_width == 0 || src_height == 0 ||
-      !dst_argb || dst_width <= 0 || dst_height <= 0) {
-    return -1;
-  }
-  ScaleARGB(src_argb, src_stride_argb, src_width, src_height,
-            dst_argb, dst_stride_argb, dst_width, dst_height,
-            0, 0, dst_width, dst_height, filtering);
-  return 0;
-}
-
-#ifdef __cplusplus
-}  // extern "C"
-}  // namespace libyuv
-#endif
diff --git a/drivers/theoraplayer/src/YUV/libyuv/src/scale_argb_neon.cc b/drivers/theoraplayer/src/YUV/libyuv/src/scale_argb_neon.cc
deleted file mode 100755
index c0b5433239..0000000000
--- a/drivers/theoraplayer/src/YUV/libyuv/src/scale_argb_neon.cc
+++ /dev/null
@@ -1,145 +0,0 @@
-/*
- *  Copyright 2012 The LibYuv Project Authors. All rights reserved.
- *
- *  Use of this source code is governed by a BSD-style license
- *  that can be found in the LICENSE file in the root of the source
- *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS. All contributing project authors may
- *  be found in the AUTHORS file in the root of the source tree.
- */
-
-#include "libyuv/basic_types.h"
-#include "libyuv/row.h"
-
-#ifdef __cplusplus
-namespace libyuv {
-extern "C" {
-#endif
-
-// This module is for GCC Neon
-#if !defined(LIBYUV_DISABLE_NEON) && defined(__ARM_NEON__)
-
-void ScaleARGBRowDown2_NEON(const uint8* src_ptr, ptrdiff_t /* src_stride */,
-                            uint8* dst, int dst_width) {
-  asm volatile (
-#ifdef _ANDROID
-	".fpu neon\n"
-#endif
-  "1:                                          \n"
-    // load even pixels into q0, odd into q1
-    "vld2.32    {q0, q1}, [%0]!                \n"
-    "vld2.32    {q2, q3}, [%0]!                \n"
-    "subs       %2, %2, #8                     \n"  // 8 processed per loop
-    "vst1.8     {q1}, [%1]!                    \n"  // store odd pixels
-    "vst1.8     {q3}, [%1]!                    \n"
-    "bgt        1b                             \n"
-  : "+r"(src_ptr),          // %0
-    "+r"(dst),              // %1
-    "+r"(dst_width)         // %2
-  :
-  : "memory", "cc", "q0", "q1", "q2", "q3"  // Clobber List
-  );
-}
-
-void ScaleARGBRowDown2Box_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
-                               uint8* dst, int dst_width) {
-  asm volatile (
-    // change the stride to row 2 pointer
-    "add        %1, %1, %0                     \n"
-  "1:                                          \n"
-    "vld4.8     {d0, d2, d4, d6}, [%0]!        \n"  // load 8 ARGB pixels.
-    "vld4.8     {d1, d3, d5, d7}, [%0]!        \n"  // load next 8 ARGB pixels.
-    "subs       %3, %3, #8                     \n"  // 8 processed per loop.
-    "vpaddl.u8  q0, q0                         \n"  // B 16 bytes -> 8 shorts.
-    "vpaddl.u8  q1, q1                         \n"  // G 16 bytes -> 8 shorts.
-    "vpaddl.u8  q2, q2                         \n"  // R 16 bytes -> 8 shorts.
-    "vpaddl.u8  q3, q3                         \n"  // A 16 bytes -> 8 shorts.
-    "vld4.8     {d16, d18, d20, d22}, [%1]!    \n"  // load 8 more ARGB pixels.
-    "vld4.8     {d17, d19, d21, d23}, [%1]!    \n"  // load last 8 ARGB pixels.
-    "vpadal.u8  q0, q8                         \n"  // B 16 bytes -> 8 shorts.
-    "vpadal.u8  q1, q9                         \n"  // G 16 bytes -> 8 shorts.
-    "vpadal.u8  q2, q10                        \n"  // R 16 bytes -> 8 shorts.
-    "vpadal.u8  q3, q11                        \n"  // A 16 bytes -> 8 shorts.
-    "vrshrn.u16 d0, q0, #2                     \n"  // downshift, round and pack
-    "vrshrn.u16 d1, q1, #2                     \n"
-    "vrshrn.u16 d2, q2, #2                     \n"
-    "vrshrn.u16 d3, q3, #2                     \n"
-    "vst4.8     {d0, d1, d2, d3}, [%2]!        \n"
-    "bgt        1b                             \n"
-  : "+r"(src_ptr),          // %0
-    "+r"(src_stride),       // %1
-    "+r"(dst),              // %2
-    "+r"(dst_width)         // %3
-  :
-  : "memory", "cc", "q0", "q1", "q2", "q3", "q8", "q9", "q10", "q11"
-  );
-}
-
-// Reads 4 pixels at a time.
-// Alignment requirement: src_argb 4 byte aligned.
-void ScaleARGBRowDownEven_NEON(const uint8* src_argb, ptrdiff_t, int src_stepx,
-                               uint8* dst_argb, int dst_width) {
-  asm volatile (
-    "mov        r12, %3, lsl #2                \n"
-    ".p2align  2                               \n"
-  "1:                                          \n"
-    "vld1.32    {d0[0]}, [%0], r12             \n"
-    "vld1.32    {d0[1]}, [%0], r12             \n"
-    "vld1.32    {d1[0]}, [%0], r12             \n"
-    "vld1.32    {d1[1]}, [%0], r12             \n"
-    "subs       %2, %2, #4                     \n"  // 4 pixels per loop.
-    "vst1.8     {q0}, [%1]!                    \n"
-    "bgt        1b                             \n"
-  : "+r"(src_argb),    // %0
-    "+r"(dst_argb),    // %1
-    "+r"(dst_width)    // %2
-  : "r"(src_stepx)     // %3
-  : "memory", "cc", "r12", "q0"
-  );
-}
-
-// Reads 4 pixels at a time.
-// Alignment requirement: src_argb 4 byte aligned.
-void ScaleARGBRowDownEvenBox_NEON(const uint8* src_argb, ptrdiff_t src_stride,
-                                  int src_stepx,
-                                  uint8* dst_argb, int dst_width) {
-  asm volatile (
-    "mov       r12, %4, lsl #2                 \n"
-    "add       %1, %1, %0                      \n"
-    ".p2align  2                               \n"
-  "1:                                          \n"
-    "vld1.8    {d0}, [%0], r12                 \n"  // Read 4 2x2 blocks -> 2x1
-    "vld1.8    {d1}, [%1], r12                 \n"
-    "vld1.8    {d2}, [%0], r12                 \n"
-    "vld1.8    {d3}, [%1], r12                 \n"
-    "vld1.8    {d4}, [%0], r12                 \n"
-    "vld1.8    {d5}, [%1], r12                 \n"
-    "vld1.8    {d6}, [%0], r12                 \n"
-    "vld1.8    {d7}, [%1], r12                 \n"
-    "vaddl.u8  q0, d0, d1                      \n"
-    "vaddl.u8  q1, d2, d3                      \n"
-    "vaddl.u8  q2, d4, d5                      \n"
-    "vaddl.u8  q3, d6, d7                      \n"
-    "vswp.8    d1, d2                          \n"  // ab_cd -> ac_bd
-    "vswp.8    d5, d6                          \n"  // ef_gh -> eg_fh
-    "vadd.u16  q0, q0, q1                      \n"  // (a+b)_(c+d)
-    "vadd.u16  q2, q2, q3                      \n"  // (e+f)_(g+h)
-    "vrshrn.u16 d0, q0, #2                     \n"  // first 2 pixels.
-    "vrshrn.u16 d1, q2, #2                     \n"  // next 2 pixels.
-    "subs       %3, %3, #4                     \n"  // 4 pixels per loop.
-    "vst1.8     {q0}, [%2]!                    \n"
-    "bgt        1b                             \n"
-  : "+r"(src_argb),    // %0
-    "+r"(src_stride),  // %1
-    "+r"(dst_argb),    // %2
-    "+r"(dst_width)    // %3
-  : "r"(src_stepx)     // %4
-  : "memory", "cc", "r12", "q0", "q1", "q2", "q3"
-  );
-}
-#endif  // __ARM_NEON__
-
-#ifdef __cplusplus
-}  // extern "C"
-}  // namespace libyuv
-#endif
diff --git a/drivers/theoraplayer/src/YUV/libyuv/src/scale_common.cc b/drivers/theoraplayer/src/YUV/libyuv/src/scale_common.cc
deleted file mode 100644
index 6ed8bfaf97..0000000000
--- a/drivers/theoraplayer/src/YUV/libyuv/src/scale_common.cc
+++ /dev/null
@@ -1,772 +0,0 @@
-/*
- *  Copyright 2013 The LibYuv Project Authors. All rights reserved.
- *
- *  Use of this source code is governed by a BSD-style license
- *  that can be found in the LICENSE file in the root of the source
- *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS. All contributing project authors may
- *  be found in the AUTHORS file in the root of the source tree.
- */
-
-#include "libyuv/scale.h"
-
-#include <assert.h>
-#include <string.h>
-
-#include "libyuv/cpu_id.h"
-#include "libyuv/planar_functions.h"  // For CopyARGB
-#include "libyuv/row.h"
-#include "libyuv/scale_row.h"
-
-#ifdef __cplusplus
-namespace libyuv {
-extern "C" {
-#endif
-
-static __inline int Abs(int v) {
-  return v >= 0 ? v : -v;
-}
-
-// CPU agnostic row functions
-void ScaleRowDown2_C(const uint8* src_ptr, ptrdiff_t src_stride,
-                     uint8* dst, int dst_width) {
-  int x;
-  for (x = 0; x < dst_width - 1; x += 2) {
-    dst[0] = src_ptr[1];
-    dst[1] = src_ptr[3];
-    dst += 2;
-    src_ptr += 4;
-  }
-  if (dst_width & 1) {
-    dst[0] = src_ptr[1];
-  }
-}
-
-void ScaleRowDown2Linear_C(const uint8* src_ptr, ptrdiff_t src_stride,
-                           uint8* dst, int dst_width) {
-  const uint8* s = src_ptr;
-  int x;
-  for (x = 0; x < dst_width - 1; x += 2) {
-    dst[0] = (s[0] + s[1] + 1) >> 1;
-    dst[1] = (s[2] + s[3] + 1) >> 1;
-    dst += 2;
-    s += 4;
-  }
-  if (dst_width & 1) {
-    dst[0] = (s[0] + s[1] + 1) >> 1;
-  }
-}
-
-void ScaleRowDown2Box_C(const uint8* src_ptr, ptrdiff_t src_stride,
-                        uint8* dst, int dst_width) {
-  const uint8* s = src_ptr;
-  const uint8* t = src_ptr + src_stride;
-  int x;
-  for (x = 0; x < dst_width - 1; x += 2) {
-    dst[0] = (s[0] + s[1] + t[0] + t[1] + 2) >> 2;
-    dst[1] = (s[2] + s[3] + t[2] + t[3] + 2) >> 2;
-    dst += 2;
-    s += 4;
-    t += 4;
-  }
-  if (dst_width & 1) {
-    dst[0] = (s[0] + s[1] + t[0] + t[1] + 2) >> 2;
-  }
-}
-
-void ScaleRowDown4_C(const uint8* src_ptr, ptrdiff_t src_stride,
-                     uint8* dst, int dst_width) {
-  int x;
-  for (x = 0; x < dst_width - 1; x += 2) {
-    dst[0] = src_ptr[2];
-    dst[1] = src_ptr[6];
-    dst += 2;
-    src_ptr += 8;
-  }
-  if (dst_width & 1) {
-    dst[0] = src_ptr[2];
-  }
-}
-
-void ScaleRowDown4Box_C(const uint8* src_ptr, ptrdiff_t src_stride,
-                        uint8* dst, int dst_width) {
-  intptr_t stride = src_stride;
-  int x;
-  for (x = 0; x < dst_width - 1; x += 2) {
-    dst[0] = (src_ptr[0] + src_ptr[1] + src_ptr[2] + src_ptr[3] +
-             src_ptr[stride + 0] + src_ptr[stride + 1] +
-             src_ptr[stride + 2] + src_ptr[stride + 3] +
-             src_ptr[stride * 2 + 0] + src_ptr[stride * 2 + 1] +
-             src_ptr[stride * 2 + 2] + src_ptr[stride * 2 + 3] +
-             src_ptr[stride * 3 + 0] + src_ptr[stride * 3 + 1] +
-             src_ptr[stride * 3 + 2] + src_ptr[stride * 3 + 3] +
-             8) >> 4;
-    dst[1] = (src_ptr[4] + src_ptr[5] + src_ptr[6] + src_ptr[7] +
-             src_ptr[stride + 4] + src_ptr[stride + 5] +
-             src_ptr[stride + 6] + src_ptr[stride + 7] +
-             src_ptr[stride * 2 + 4] + src_ptr[stride * 2 + 5] +
-             src_ptr[stride * 2 + 6] + src_ptr[stride * 2 + 7] +
-             src_ptr[stride * 3 + 4] + src_ptr[stride * 3 + 5] +
-             src_ptr[stride * 3 + 6] + src_ptr[stride * 3 + 7] +
-             8) >> 4;
-    dst += 2;
-    src_ptr += 8;
-  }
-  if (dst_width & 1) {
-    dst[0] = (src_ptr[0] + src_ptr[1] + src_ptr[2] + src_ptr[3] +
-             src_ptr[stride + 0] + src_ptr[stride + 1] +
-             src_ptr[stride + 2] + src_ptr[stride + 3] +
-             src_ptr[stride * 2 + 0] + src_ptr[stride * 2 + 1] +
-             src_ptr[stride * 2 + 2] + src_ptr[stride * 2 + 3] +
-             src_ptr[stride * 3 + 0] + src_ptr[stride * 3 + 1] +
-             src_ptr[stride * 3 + 2] + src_ptr[stride * 3 + 3] +
-             8) >> 4;
-  }
-}
-
-void ScaleRowDown34_C(const uint8* src_ptr, ptrdiff_t src_stride,
-                      uint8* dst, int dst_width) {
-  int x;
-  assert((dst_width % 3 == 0) && (dst_width > 0));
-  for (x = 0; x < dst_width; x += 3) {
-    dst[0] = src_ptr[0];
-    dst[1] = src_ptr[1];
-    dst[2] = src_ptr[3];
-    dst += 3;
-    src_ptr += 4;
-  }
-}
-
-// Filter rows 0 and 1 together, 3 : 1
-void ScaleRowDown34_0_Box_C(const uint8* src_ptr, ptrdiff_t src_stride,
-                            uint8* d, int dst_width) {
-  const uint8* s = src_ptr;
-  const uint8* t = src_ptr + src_stride;
-  int x;
-  assert((dst_width % 3 == 0) && (dst_width > 0));
-  for (x = 0; x < dst_width; x += 3) {
-    uint8 a0 = (s[0] * 3 + s[1] * 1 + 2) >> 2;
-    uint8 a1 = (s[1] * 1 + s[2] * 1 + 1) >> 1;
-    uint8 a2 = (s[2] * 1 + s[3] * 3 + 2) >> 2;
-    uint8 b0 = (t[0] * 3 + t[1] * 1 + 2) >> 2;
-    uint8 b1 = (t[1] * 1 + t[2] * 1 + 1) >> 1;
-    uint8 b2 = (t[2] * 1 + t[3] * 3 + 2) >> 2;
-    d[0] = (a0 * 3 + b0 + 2) >> 2;
-    d[1] = (a1 * 3 + b1 + 2) >> 2;
-    d[2] = (a2 * 3 + b2 + 2) >> 2;
-    d += 3;
-    s += 4;
-    t += 4;
-  }
-}
-
-// Filter rows 1 and 2 together, 1 : 1
-void ScaleRowDown34_1_Box_C(const uint8* src_ptr, ptrdiff_t src_stride,
-                            uint8* d, int dst_width) {
-  const uint8* s = src_ptr;
-  const uint8* t = src_ptr + src_stride;
-  int x;
-  assert((dst_width % 3 == 0) && (dst_width > 0));
-  for (x = 0; x < dst_width; x += 3) {
-    uint8 a0 = (s[0] * 3 + s[1] * 1 + 2) >> 2;
-    uint8 a1 = (s[1] * 1 + s[2] * 1 + 1) >> 1;
-    uint8 a2 = (s[2] * 1 + s[3] * 3 + 2) >> 2;
-    uint8 b0 = (t[0] * 3 + t[1] * 1 + 2) >> 2;
-    uint8 b1 = (t[1] * 1 + t[2] * 1 + 1) >> 1;
-    uint8 b2 = (t[2] * 1 + t[3] * 3 + 2) >> 2;
-    d[0] = (a0 + b0 + 1) >> 1;
-    d[1] = (a1 + b1 + 1) >> 1;
-    d[2] = (a2 + b2 + 1) >> 1;
-    d += 3;
-    s += 4;
-    t += 4;
-  }
-}
-
-// Scales a single row of pixels using point sampling.
-void ScaleCols_C(uint8* dst_ptr, const uint8* src_ptr,
-                 int dst_width, int x, int dx) {
-  int j;
-  for (j = 0; j < dst_width - 1; j += 2) {
-    dst_ptr[0] = src_ptr[x >> 16];
-    x += dx;
-    dst_ptr[1] = src_ptr[x >> 16];
-    x += dx;
-    dst_ptr += 2;
-  }
-  if (dst_width & 1) {
-    dst_ptr[0] = src_ptr[x >> 16];
-  }
-}
-
-// Scales a single row of pixels up by 2x using point sampling.
-void ScaleColsUp2_C(uint8* dst_ptr, const uint8* src_ptr,
-                    int dst_width, int x, int dx) {
-  int j;
-  for (j = 0; j < dst_width - 1; j += 2) {
-    dst_ptr[1] = dst_ptr[0] = src_ptr[0];
-    src_ptr += 1;
-    dst_ptr += 2;
-  }
-  if (dst_width & 1) {
-    dst_ptr[0] = src_ptr[0];
-  }
-}
-
-// (1-f)a + fb can be replaced with a + f(b-a)
-#define BLENDER(a, b, f) (uint8)((int)(a) + \
-    ((int)(f) * ((int)(b) - (int)(a)) >> 16))
-
-void ScaleFilterCols_C(uint8* dst_ptr, const uint8* src_ptr,
-                       int dst_width, int x, int dx) {
-  int j;
-  for (j = 0; j < dst_width - 1; j += 2) {
-    int xi = x >> 16;
-    int a = src_ptr[xi];
-    int b = src_ptr[xi + 1];
-    dst_ptr[0] = BLENDER(a, b, x & 0xffff);
-    x += dx;
-    xi = x >> 16;
-    a = src_ptr[xi];
-    b = src_ptr[xi + 1];
-    dst_ptr[1] = BLENDER(a, b, x & 0xffff);
-    x += dx;
-    dst_ptr += 2;
-  }
-  if (dst_width & 1) {
-    int xi = x >> 16;
-    int a = src_ptr[xi];
-    int b = src_ptr[xi + 1];
-    dst_ptr[0] = BLENDER(a, b, x & 0xffff);
-  }
-}
-
-void ScaleFilterCols64_C(uint8* dst_ptr, const uint8* src_ptr,
-                         int dst_width, int x32, int dx) {
-  int64 x = (int64)(x32);
-  int j;
-  for (j = 0; j < dst_width - 1; j += 2) {
-    int64 xi = x >> 16;
-    int a = src_ptr[xi];
-    int b = src_ptr[xi + 1];
-    dst_ptr[0] = BLENDER(a, b, x & 0xffff);
-    x += dx;
-    xi = x >> 16;
-    a = src_ptr[xi];
-    b = src_ptr[xi + 1];
-    dst_ptr[1] = BLENDER(a, b, x & 0xffff);
-    x += dx;
-    dst_ptr += 2;
-  }
-  if (dst_width & 1) {
-    int64 xi = x >> 16;
-    int a = src_ptr[xi];
-    int b = src_ptr[xi + 1];
-    dst_ptr[0] = BLENDER(a, b, x & 0xffff);
-  }
-}
-#undef BLENDER
-
-void ScaleRowDown38_C(const uint8* src_ptr, ptrdiff_t src_stride,
-                      uint8* dst, int dst_width) {
-  int x;
-  assert(dst_width % 3 == 0);
-  for (x = 0; x < dst_width; x += 3) {
-    dst[0] = src_ptr[0];
-    dst[1] = src_ptr[3];
-    dst[2] = src_ptr[6];
-    dst += 3;
-    src_ptr += 8;
-  }
-}
-
-// 8x3 -> 3x1
-void ScaleRowDown38_3_Box_C(const uint8* src_ptr,
-                            ptrdiff_t src_stride,
-                            uint8* dst_ptr, int dst_width) {
-  intptr_t stride = src_stride;
-  int i;
-  assert((dst_width % 3 == 0) && (dst_width > 0));
-  for (i = 0; i < dst_width; i += 3) {
-    dst_ptr[0] = (src_ptr[0] + src_ptr[1] + src_ptr[2] +
-        src_ptr[stride + 0] + src_ptr[stride + 1] +
-        src_ptr[stride + 2] + src_ptr[stride * 2 + 0] +
-        src_ptr[stride * 2 + 1] + src_ptr[stride * 2 + 2]) *
-        (65536 / 9) >> 16;
-    dst_ptr[1] = (src_ptr[3] + src_ptr[4] + src_ptr[5] +
-        src_ptr[stride + 3] + src_ptr[stride + 4] +
-        src_ptr[stride + 5] + src_ptr[stride * 2 + 3] +
-        src_ptr[stride * 2 + 4] + src_ptr[stride * 2 + 5]) *
-        (65536 / 9) >> 16;
-    dst_ptr[2] = (src_ptr[6] + src_ptr[7] +
-        src_ptr[stride + 6] + src_ptr[stride + 7] +
-        src_ptr[stride * 2 + 6] + src_ptr[stride * 2 + 7]) *
-        (65536 / 6) >> 16;
-    src_ptr += 8;
-    dst_ptr += 3;
-  }
-}
-
-// 8x2 -> 3x1
-void ScaleRowDown38_2_Box_C(const uint8* src_ptr, ptrdiff_t src_stride,
-                            uint8* dst_ptr, int dst_width) {
-  intptr_t stride = src_stride;
-  int i;
-  assert((dst_width % 3 == 0) && (dst_width > 0));
-  for (i = 0; i < dst_width; i += 3) {
-    dst_ptr[0] = (src_ptr[0] + src_ptr[1] + src_ptr[2] +
-        src_ptr[stride + 0] + src_ptr[stride + 1] +
-        src_ptr[stride + 2]) * (65536 / 6) >> 16;
-    dst_ptr[1] = (src_ptr[3] + src_ptr[4] + src_ptr[5] +
-        src_ptr[stride + 3] + src_ptr[stride + 4] +
-        src_ptr[stride + 5]) * (65536 / 6) >> 16;
-    dst_ptr[2] = (src_ptr[6] + src_ptr[7] +
-        src_ptr[stride + 6] + src_ptr[stride + 7]) *
-        (65536 / 4) >> 16;
-    src_ptr += 8;
-    dst_ptr += 3;
-  }
-}
-
-void ScaleAddRows_C(const uint8* src_ptr, ptrdiff_t src_stride,
-                    uint16* dst_ptr, int src_width, int src_height) {
-  int x;
-  assert(src_width > 0);
-  assert(src_height > 0);
-  for (x = 0; x < src_width; ++x) {
-    const uint8* s = src_ptr + x;
-    unsigned int sum = 0u;
-    int y;
-    for (y = 0; y < src_height; ++y) {
-      sum += s[0];
-      s += src_stride;
-    }
-    // TODO(fbarchard): Consider limitting height to 256 to avoid overflow.
-    dst_ptr[x] = sum < 65535u ? sum : 65535u;
-  }
-}
-
-void ScaleARGBRowDown2_C(const uint8* src_argb,
-                         ptrdiff_t src_stride,
-                         uint8* dst_argb, int dst_width) {
-  const uint32* src = (const uint32*)(src_argb);
-  uint32* dst = (uint32*)(dst_argb);
-
-  int x;
-  for (x = 0; x < dst_width - 1; x += 2) {
-    dst[0] = src[1];
-    dst[1] = src[3];
-    src += 4;
-    dst += 2;
-  }
-  if (dst_width & 1) {
-    dst[0] = src[1];
-  }
-}
-
-void ScaleARGBRowDown2Linear_C(const uint8* src_argb,
-                               ptrdiff_t src_stride,
-                               uint8* dst_argb, int dst_width) {
-  int x;
-  for (x = 0; x < dst_width; ++x) {
-    dst_argb[0] = (src_argb[0] + src_argb[4] + 1) >> 1;
-    dst_argb[1] = (src_argb[1] + src_argb[5] + 1) >> 1;
-    dst_argb[2] = (src_argb[2] + src_argb[6] + 1) >> 1;
-    dst_argb[3] = (src_argb[3] + src_argb[7] + 1) >> 1;
-    src_argb += 8;
-    dst_argb += 4;
-  }
-}
-
-void ScaleARGBRowDown2Box_C(const uint8* src_argb, ptrdiff_t src_stride,
-                            uint8* dst_argb, int dst_width) {
-  int x;
-  for (x = 0; x < dst_width; ++x) {
-    dst_argb[0] = (src_argb[0] + src_argb[4] +
-                  src_argb[src_stride] + src_argb[src_stride + 4] + 2) >> 2;
-    dst_argb[1] = (src_argb[1] + src_argb[5] +
-                  src_argb[src_stride + 1] + src_argb[src_stride + 5] + 2) >> 2;
-    dst_argb[2] = (src_argb[2] + src_argb[6] +
-                  src_argb[src_stride + 2] + src_argb[src_stride + 6] + 2) >> 2;
-    dst_argb[3] = (src_argb[3] + src_argb[7] +
-                  src_argb[src_stride + 3] + src_argb[src_stride + 7] + 2) >> 2;
-    src_argb += 8;
-    dst_argb += 4;
-  }
-}
-
-void ScaleARGBRowDownEven_C(const uint8* src_argb, ptrdiff_t src_stride,
-                            int src_stepx,
-                            uint8* dst_argb, int dst_width) {
-  const uint32* src = (const uint32*)(src_argb);
-  uint32* dst = (uint32*)(dst_argb);
-
-  int x;
-  for (x = 0; x < dst_width - 1; x += 2) {
-    dst[0] = src[0];
-    dst[1] = src[src_stepx];
-    src += src_stepx * 2;
-    dst += 2;
-  }
-  if (dst_width & 1) {
-    dst[0] = src[0];
-  }
-}
-
-void ScaleARGBRowDownEvenBox_C(const uint8* src_argb,
-                               ptrdiff_t src_stride,
-                               int src_stepx,
-                               uint8* dst_argb, int dst_width) {
-  int x;
-  for (x = 0; x < dst_width; ++x) {
-    dst_argb[0] = (src_argb[0] + src_argb[4] +
-                  src_argb[src_stride] + src_argb[src_stride + 4] + 2) >> 2;
-    dst_argb[1] = (src_argb[1] + src_argb[5] +
-                  src_argb[src_stride + 1] + src_argb[src_stride + 5] + 2) >> 2;
-    dst_argb[2] = (src_argb[2] + src_argb[6] +
-                  src_argb[src_stride + 2] + src_argb[src_stride + 6] + 2) >> 2;
-    dst_argb[3] = (src_argb[3] + src_argb[7] +
-                  src_argb[src_stride + 3] + src_argb[src_stride + 7] + 2) >> 2;
-    src_argb += src_stepx * 4;
-    dst_argb += 4;
-  }
-}
-
-// Scales a single row of pixels using point sampling.
-void ScaleARGBCols_C(uint8* dst_argb, const uint8* src_argb,
-                     int dst_width, int x, int dx) {
-  const uint32* src = (const uint32*)(src_argb);
-  uint32* dst = (uint32*)(dst_argb);
-  int j;
-  for (j = 0; j < dst_width - 1; j += 2) {
-    dst[0] = src[x >> 16];
-    x += dx;
-    dst[1] = src[x >> 16];
-    x += dx;
-    dst += 2;
-  }
-  if (dst_width & 1) {
-    dst[0] = src[x >> 16];
-  }
-}
-
-void ScaleARGBCols64_C(uint8* dst_argb, const uint8* src_argb,
-                       int dst_width, int x32, int dx) {
-  int64 x = (int64)(x32);
-  const uint32* src = (const uint32*)(src_argb);
-  uint32* dst = (uint32*)(dst_argb);
-  int j;
-  for (j = 0; j < dst_width - 1; j += 2) {
-    dst[0] = src[x >> 16];
-    x += dx;
-    dst[1] = src[x >> 16];
-    x += dx;
-    dst += 2;
-  }
-  if (dst_width & 1) {
-    dst[0] = src[x >> 16];
-  }
-}
-
-// Scales a single row of pixels up by 2x using point sampling.
-void ScaleARGBColsUp2_C(uint8* dst_argb, const uint8* src_argb,
-                        int dst_width, int x, int dx) {
-  const uint32* src = (const uint32*)(src_argb);
-  uint32* dst = (uint32*)(dst_argb);
-  int j;
-  for (j = 0; j < dst_width - 1; j += 2) {
-    dst[1] = dst[0] = src[0];
-    src += 1;
-    dst += 2;
-  }
-  if (dst_width & 1) {
-    dst[0] = src[0];
-  }
-}
-
-// Mimics SSSE3 blender
-#define BLENDER1(a, b, f) ((a) * (0x7f ^ f) + (b) * f) >> 7
-#define BLENDERC(a, b, f, s) (uint32)( \
-    BLENDER1(((a) >> s) & 255, ((b) >> s) & 255, f) << s)
-#define BLENDER(a, b, f) \
-    BLENDERC(a, b, f, 24) | BLENDERC(a, b, f, 16) | \
-    BLENDERC(a, b, f, 8) | BLENDERC(a, b, f, 0)
-
-void ScaleARGBFilterCols_C(uint8* dst_argb, const uint8* src_argb,
-                           int dst_width, int x, int dx) {
-  const uint32* src = (const uint32*)(src_argb);
-  uint32* dst = (uint32*)(dst_argb);
-  int j;
-  for (j = 0; j < dst_width - 1; j += 2) {
-    int xi = x >> 16;
-    int xf = (x >> 9) & 0x7f;
-    uint32 a = src[xi];
-    uint32 b = src[xi + 1];
-    dst[0] = BLENDER(a, b, xf);
-    x += dx;
-    xi = x >> 16;
-    xf = (x >> 9) & 0x7f;
-    a = src[xi];
-    b = src[xi + 1];
-    dst[1] = BLENDER(a, b, xf);
-    x += dx;
-    dst += 2;
-  }
-  if (dst_width & 1) {
-    int xi = x >> 16;
-    int xf = (x >> 9) & 0x7f;
-    uint32 a = src[xi];
-    uint32 b = src[xi + 1];
-    dst[0] = BLENDER(a, b, xf);
-  }
-}
-
-void ScaleARGBFilterCols64_C(uint8* dst_argb, const uint8* src_argb,
-                             int dst_width, int x32, int dx) {
-  int64 x = (int64)(x32);
-  const uint32* src = (const uint32*)(src_argb);
-  uint32* dst = (uint32*)(dst_argb);
-  int j;
-  for (j = 0; j < dst_width - 1; j += 2) {
-    int64 xi = x >> 16;
-    int xf = (x >> 9) & 0x7f;
-    uint32 a = src[xi];
-    uint32 b = src[xi + 1];
-    dst[0] = BLENDER(a, b, xf);
-    x += dx;
-    xi = x >> 16;
-    xf = (x >> 9) & 0x7f;
-    a = src[xi];
-    b = src[xi + 1];
-    dst[1] = BLENDER(a, b, xf);
-    x += dx;
-    dst += 2;
-  }
-  if (dst_width & 1) {
-    int64 xi = x >> 16;
-    int xf = (x >> 9) & 0x7f;
-    uint32 a = src[xi];
-    uint32 b = src[xi + 1];
-    dst[0] = BLENDER(a, b, xf);
-  }
-}
-#undef BLENDER1
-#undef BLENDERC
-#undef BLENDER
-
-// Scale plane vertically with bilinear interpolation.
-void ScalePlaneVertical(int src_height,
-                        int dst_width, int dst_height,
-                        int src_stride, int dst_stride,
-                        const uint8* src_argb, uint8* dst_argb,
-                        int x, int y, int dy,
-                        int bpp, enum FilterMode filtering) {
-  // TODO(fbarchard): Allow higher bpp.
-  int dst_width_bytes = dst_width * bpp;
-  void (*InterpolateRow)(uint8* dst_argb, const uint8* src_argb,
-      ptrdiff_t src_stride, int dst_width, int source_y_fraction) =
-      InterpolateRow_C;
-  const int max_y = (src_height > 1) ? ((src_height - 1) << 16) - 1 : 0;
-  int j;
-  assert(bpp >= 1 && bpp <= 4);
-  assert(src_height != 0);
-  assert(dst_width > 0);
-  assert(dst_height > 0);
-  src_argb += (x >> 16) * bpp;
-#if defined(HAS_INTERPOLATEROW_SSE2)
-  if (TestCpuFlag(kCpuHasSSE2) && dst_width_bytes >= 16) {
-    InterpolateRow = InterpolateRow_Any_SSE2;
-    if (IS_ALIGNED(dst_width_bytes, 16)) {
-      InterpolateRow = InterpolateRow_Unaligned_SSE2;
-      if (IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride, 16) &&
-          IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride, 16)) {
-        InterpolateRow = InterpolateRow_SSE2;
-      }
-    }
-  }
-#endif
-#if defined(HAS_INTERPOLATEROW_SSSE3)
-  if (TestCpuFlag(kCpuHasSSSE3) && dst_width_bytes >= 16) {
-    InterpolateRow = InterpolateRow_Any_SSSE3;
-    if (IS_ALIGNED(dst_width_bytes, 16)) {
-      InterpolateRow = InterpolateRow_Unaligned_SSSE3;
-      if (IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride, 16) &&
-          IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride, 16)) {
-        InterpolateRow = InterpolateRow_SSSE3;
-      }
-    }
-  }
-#endif
-#if defined(HAS_INTERPOLATEROW_AVX2)
-  if (TestCpuFlag(kCpuHasAVX2) && dst_width_bytes >= 32) {
-    InterpolateRow = InterpolateRow_Any_AVX2;
-    if (IS_ALIGNED(dst_width_bytes, 32)) {
-      InterpolateRow = InterpolateRow_AVX2;
-    }
-  }
-#endif
-#if defined(HAS_INTERPOLATEROW_NEON)
-  if (TestCpuFlag(kCpuHasNEON) && dst_width_bytes >= 16) {
-    InterpolateRow = InterpolateRow_Any_NEON;
-    if (IS_ALIGNED(dst_width_bytes, 16)) {
-      InterpolateRow = InterpolateRow_NEON;
-    }
-  }
-#endif
-#if defined(HAS_INTERPOLATEROWS_MIPS_DSPR2)
-  if (TestCpuFlag(kCpuHasMIPS_DSPR2) && dst_width_bytes >= 4 &&
-      IS_ALIGNED(src_argb, 4) && IS_ALIGNED(src_stride, 4) &&
-      IS_ALIGNED(dst_argb, 4) && IS_ALIGNED(dst_stride, 4)) {
-    InterpolateRow = InterpolateRow_Any_MIPS_DSPR2;
-    if (IS_ALIGNED(dst_width_bytes, 4)) {
-      InterpolateRow = InterpolateRow_MIPS_DSPR2;
-    }
-  }
-#endif
-  for (j = 0; j < dst_height; ++j) {
-    int yi;
-    int yf;
-    if (y > max_y) {
-      y = max_y;
-    }
-    yi = y >> 16;
-    yf = filtering ? ((y >> 8) & 255) : 0;
-    InterpolateRow(dst_argb, src_argb + yi * src_stride,
-                   src_stride, dst_width_bytes, yf);
-    dst_argb += dst_stride;
-    y += dy;
-  }
-}
-
-// Simplify the filtering based on scale factors.
-enum FilterMode ScaleFilterReduce(int src_width, int src_height,
-                                  int dst_width, int dst_height,
-                                  enum FilterMode filtering) {
-  if (src_width < 0) {
-    src_width = -src_width;
-  }
-  if (src_height < 0) {
-    src_height = -src_height;
-  }
-  if (filtering == kFilterBox) {
-    // If scaling both axis to 0.5 or larger, switch from Box to Bilinear.
-    if (dst_width * 2 >= src_width && dst_height * 2 >= src_height) {
-      filtering = kFilterBilinear;
-    }
-    // If scaling to larger, switch from Box to Bilinear.
-    if (dst_width >= src_width || dst_height >= src_height) {
-      filtering = kFilterBilinear;
-    }
-  }
-  if (filtering == kFilterBilinear) {
-    if (src_height == 1) {
-      filtering = kFilterLinear;
-    }
-    // TODO(fbarchard): Detect any odd scale factor and reduce to Linear.
-    if (dst_height == src_height || dst_height * 3 == src_height) {
-      filtering = kFilterLinear;
-    }
-    // TODO(fbarchard): Remove 1 pixel wide filter restriction, which is to
-    // avoid reading 2 pixels horizontally that causes memory exception.
-    if (src_width == 1) {
-      filtering = kFilterNone;
-    }
-  }
-  if (filtering == kFilterLinear) {
-    if (src_width == 1) {
-      filtering = kFilterNone;
-    }
-    // TODO(fbarchard): Detect any odd scale factor and reduce to None.
-    if (dst_width == src_width || dst_width * 3 == src_width) {
-      filtering = kFilterNone;
-    }
-  }
-  return filtering;
-}
-
-// Divide num by div and return as 16.16 fixed point result.
-int FixedDiv_C(int num, int div) {
-  return (int)(((int64)(num) << 16) / div);
-}
-
-// Divide num by div and return as 16.16 fixed point result.
-int FixedDiv1_C(int num, int div) {
-  return (int)((((int64)(num) << 16) - 0x00010001) /
-                          (div - 1));
-}
-
-#define CENTERSTART(dx, s) (dx < 0) ? -((-dx >> 1) + s) : ((dx >> 1) + s)
-
-// Compute slope values for stepping.
-void ScaleSlope(int src_width, int src_height,
-                int dst_width, int dst_height,
-                enum FilterMode filtering,
-                int* x, int* y, int* dx, int* dy) {
-  assert(x != NULL);
-  assert(y != NULL);
-  assert(dx != NULL);
-  assert(dy != NULL);
-  assert(src_width != 0);
-  assert(src_height != 0);
-  assert(dst_width > 0);
-  assert(dst_height > 0);
-  // Check for 1 pixel and avoid FixedDiv overflow.
-  if (dst_width == 1 && src_width >= 32768) {
-    dst_width = src_width;
-  }
-  if (dst_height == 1 && src_height >= 32768) {
-    dst_height = src_height;
-  }
-  if (filtering == kFilterBox) {
-    // Scale step for point sampling duplicates all pixels equally.
-    *dx = FixedDiv(Abs(src_width), dst_width);
-    *dy = FixedDiv(src_height, dst_height);
-    *x = 0;
-    *y = 0;
-  } else if (filtering == kFilterBilinear) {
-    // Scale step for bilinear sampling renders last pixel once for upsample.
-    if (dst_width <= Abs(src_width)) {
-      *dx = FixedDiv(Abs(src_width), dst_width);
-      *x = CENTERSTART(*dx, -32768);  // Subtract 0.5 (32768) to center filter.
-    } else if (dst_width > 1) {
-      *dx = FixedDiv1(Abs(src_width), dst_width);
-      *x = 0;
-    }
-    if (dst_height <= src_height) {
-      *dy = FixedDiv(src_height,  dst_height);
-      *y = CENTERSTART(*dy, -32768);  // Subtract 0.5 (32768) to center filter.
-    } else if (dst_height > 1) {
-      *dy = FixedDiv1(src_height, dst_height);
-      *y = 0;
-    }
-  } else if (filtering == kFilterLinear) {
-    // Scale step for bilinear sampling renders last pixel once for upsample.
-    if (dst_width <= Abs(src_width)) {
-      *dx = FixedDiv(Abs(src_width), dst_width);
-      *x = CENTERSTART(*dx, -32768);  // Subtract 0.5 (32768) to center filter.
-    } else if (dst_width > 1) {
-      *dx = FixedDiv1(Abs(src_width), dst_width);
-      *x = 0;
-    }
-    *dy = FixedDiv(src_height, dst_height);
-    *y = *dy >> 1;
-  } else {
-    // Scale step for point sampling duplicates all pixels equally.
-    *dx = FixedDiv(Abs(src_width), dst_width);
-    *dy = FixedDiv(src_height, dst_height);
-    *x = CENTERSTART(*dx, 0);
-    *y = CENTERSTART(*dy, 0);
-  }
-  // Negative src_width means horizontally mirror.
-  if (src_width < 0) {
-    *x += (dst_width - 1) * *dx;
-    *dx = -*dx;
-    // src_width = -src_width;   // Caller must do this.
-  }
-}
-#undef CENTERSTART
-
-#ifdef __cplusplus
-}  // extern "C"
-}  // namespace libyuv
-#endif
diff --git a/drivers/theoraplayer/src/YUV/libyuv/src/scale_mips.cc b/drivers/theoraplayer/src/YUV/libyuv/src/scale_mips.cc
deleted file mode 100755
index 4572f4504e..0000000000
--- a/drivers/theoraplayer/src/YUV/libyuv/src/scale_mips.cc
+++ /dev/null
@@ -1,653 +0,0 @@
-/*
- *  Copyright 2012 The LibYuv Project Authors. All rights reserved.
- *
- *  Use of this source code is governed by a BSD-style license
- *  that can be found in the LICENSE file in the root of the source
- *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS. All contributing project authors may
- *  be found in the AUTHORS file in the root of the source tree.
- */
-
-#include "libyuv/basic_types.h"
-#include "libyuv/row.h"
-
-#ifdef __cplusplus
-namespace libyuv {
-extern "C" {
-#endif
-
-// This module is for GCC MIPS DSPR2
-#if !defined(LIBYUV_DISABLE_MIPS) && \
-    defined(__mips_dsp) && (__mips_dsp_rev >= 2)
-
-void ScaleRowDown2_MIPS_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride,
-                              uint8* dst, int dst_width) {
-  __asm__ __volatile__(
-    ".set push                                     \n"
-    ".set noreorder                                \n"
-
-    "srl            $t9, %[dst_width], 4           \n"  // iterations -> by 16
-    "beqz           $t9, 2f                        \n"
-    " nop                                          \n"
-
-    ".p2align       2                              \n"
-  "1:                                              \n"
-    "lw             $t0, 0(%[src_ptr])             \n"  // |3|2|1|0|
-    "lw             $t1, 4(%[src_ptr])             \n"  // |7|6|5|4|
-    "lw             $t2, 8(%[src_ptr])             \n"  // |11|10|9|8|
-    "lw             $t3, 12(%[src_ptr])            \n"  // |15|14|13|12|
-    "lw             $t4, 16(%[src_ptr])            \n"  // |19|18|17|16|
-    "lw             $t5, 20(%[src_ptr])            \n"  // |23|22|21|20|
-    "lw             $t6, 24(%[src_ptr])            \n"  // |27|26|25|24|
-    "lw             $t7, 28(%[src_ptr])            \n"  // |31|30|29|28|
-    // TODO(fbarchard): Use odd pixels instead of even.
-    "precr.qb.ph    $t8, $t1, $t0                  \n"  // |6|4|2|0|
-    "precr.qb.ph    $t0, $t3, $t2                  \n"  // |14|12|10|8|
-    "precr.qb.ph    $t1, $t5, $t4                  \n"  // |22|20|18|16|
-    "precr.qb.ph    $t2, $t7, $t6                  \n"  // |30|28|26|24|
-    "addiu          %[src_ptr], %[src_ptr], 32     \n"
-    "addiu          $t9, $t9, -1                   \n"
-    "sw             $t8, 0(%[dst])                 \n"
-    "sw             $t0, 4(%[dst])                 \n"
-    "sw             $t1, 8(%[dst])                 \n"
-    "sw             $t2, 12(%[dst])                \n"
-    "bgtz           $t9, 1b                        \n"
-    " addiu         %[dst], %[dst], 16             \n"
-
-  "2:                                              \n"
-    "andi           $t9, %[dst_width], 0xf         \n"  // residue
-    "beqz           $t9, 3f                        \n"
-    " nop                                          \n"
-
-  "21:                                             \n"
-    "lbu            $t0, 0(%[src_ptr])             \n"
-    "addiu          %[src_ptr], %[src_ptr], 2      \n"
-    "addiu          $t9, $t9, -1                   \n"
-    "sb             $t0, 0(%[dst])                 \n"
-    "bgtz           $t9, 21b                       \n"
-    " addiu         %[dst], %[dst], 1              \n"
-
-  "3:                                              \n"
-    ".set pop                                      \n"
-  : [src_ptr] "+r" (src_ptr),
-    [dst] "+r" (dst)
-  : [dst_width] "r" (dst_width)
-  : "t0", "t1", "t2", "t3", "t4", "t5",
-    "t6", "t7", "t8", "t9"
-  );
-}
-
-void ScaleRowDown2Box_MIPS_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride,
-                                 uint8* dst, int dst_width) {
-  const uint8* t = src_ptr + src_stride;
-
-  __asm__ __volatile__ (
-    ".set push                                    \n"
-    ".set noreorder                               \n"
-
-    "srl            $t9, %[dst_width], 3          \n"  // iterations -> step 8
-    "bltz           $t9, 2f                       \n"
-    " nop                                         \n"
-
-    ".p2align       2                             \n"
-  "1:                                             \n"
-    "lw             $t0, 0(%[src_ptr])            \n"  // |3|2|1|0|
-    "lw             $t1, 4(%[src_ptr])            \n"  // |7|6|5|4|
-    "lw             $t2, 8(%[src_ptr])            \n"  // |11|10|9|8|
-    "lw             $t3, 12(%[src_ptr])           \n"  // |15|14|13|12|
-    "lw             $t4, 0(%[t])                  \n"  // |19|18|17|16|
-    "lw             $t5, 4(%[t])                  \n"  // |23|22|21|20|
-    "lw             $t6, 8(%[t])                  \n"  // |27|26|25|24|
-    "lw             $t7, 12(%[t])                 \n"  // |31|30|29|28|
-    "addiu          $t9, $t9, -1                  \n"
-    "srl            $t8, $t0, 16                  \n"  // |X|X|3|2|
-    "ins            $t0, $t4, 16, 16              \n"  // |17|16|1|0|
-    "ins            $t4, $t8, 0, 16               \n"  // |19|18|3|2|
-    "raddu.w.qb     $t0, $t0                      \n"  // |17+16+1+0|
-    "raddu.w.qb     $t4, $t4                      \n"  // |19+18+3+2|
-    "shra_r.w       $t0, $t0, 2                   \n"  // |t0+2|>>2
-    "shra_r.w       $t4, $t4, 2                   \n"  // |t4+2|>>2
-    "srl            $t8, $t1, 16                  \n"  // |X|X|7|6|
-    "ins            $t1, $t5, 16, 16              \n"  // |21|20|5|4|
-    "ins            $t5, $t8, 0, 16               \n"  // |22|23|7|6|
-    "raddu.w.qb     $t1, $t1                      \n"  // |21+20+5+4|
-    "raddu.w.qb     $t5, $t5                      \n"  // |23+22+7+6|
-    "shra_r.w       $t1, $t1, 2                   \n"  // |t1+2|>>2
-    "shra_r.w       $t5, $t5, 2                   \n"  // |t5+2|>>2
-    "srl            $t8, $t2, 16                  \n"  // |X|X|11|10|
-    "ins            $t2, $t6, 16, 16              \n"  // |25|24|9|8|
-    "ins            $t6, $t8, 0, 16               \n"  // |27|26|11|10|
-    "raddu.w.qb     $t2, $t2                      \n"  // |25+24+9+8|
-    "raddu.w.qb     $t6, $t6                      \n"  // |27+26+11+10|
-    "shra_r.w       $t2, $t2, 2                   \n"  // |t2+2|>>2
-    "shra_r.w       $t6, $t6, 2                   \n"  // |t5+2|>>2
-    "srl            $t8, $t3, 16                  \n"  // |X|X|15|14|
-    "ins            $t3, $t7, 16, 16              \n"  // |29|28|13|12|
-    "ins            $t7, $t8, 0, 16               \n"  // |31|30|15|14|
-    "raddu.w.qb     $t3, $t3                      \n"  // |29+28+13+12|
-    "raddu.w.qb     $t7, $t7                      \n"  // |31+30+15+14|
-    "shra_r.w       $t3, $t3, 2                   \n"  // |t3+2|>>2
-    "shra_r.w       $t7, $t7, 2                   \n"  // |t7+2|>>2
-    "addiu          %[src_ptr], %[src_ptr], 16    \n"
-    "addiu          %[t], %[t], 16                \n"
-    "sb             $t0, 0(%[dst])                \n"
-    "sb             $t4, 1(%[dst])                \n"
-    "sb             $t1, 2(%[dst])                \n"
-    "sb             $t5, 3(%[dst])                \n"
-    "sb             $t2, 4(%[dst])                \n"
-    "sb             $t6, 5(%[dst])                \n"
-    "sb             $t3, 6(%[dst])                \n"
-    "sb             $t7, 7(%[dst])                \n"
-    "bgtz           $t9, 1b                       \n"
-    " addiu         %[dst], %[dst], 8             \n"
-
-  "2:                                             \n"
-    "andi           $t9, %[dst_width], 0x7        \n"  // x = residue
-    "beqz           $t9, 3f                       \n"
-    " nop                                         \n"
-
-    "21:                                          \n"
-    "lwr            $t1, 0(%[src_ptr])            \n"
-    "lwl            $t1, 3(%[src_ptr])            \n"
-    "lwr            $t2, 0(%[t])                  \n"
-    "lwl            $t2, 3(%[t])                  \n"
-    "srl            $t8, $t1, 16                  \n"
-    "ins            $t1, $t2, 16, 16              \n"
-    "ins            $t2, $t8, 0, 16               \n"
-    "raddu.w.qb     $t1, $t1                      \n"
-    "raddu.w.qb     $t2, $t2                      \n"
-    "shra_r.w       $t1, $t1, 2                   \n"
-    "shra_r.w       $t2, $t2, 2                   \n"
-    "sb             $t1, 0(%[dst])                \n"
-    "sb             $t2, 1(%[dst])                \n"
-    "addiu          %[src_ptr], %[src_ptr], 4     \n"
-    "addiu          $t9, $t9, -2                  \n"
-    "addiu          %[t], %[t], 4                 \n"
-    "bgtz           $t9, 21b                      \n"
-    " addiu         %[dst], %[dst], 2             \n"
-
-  "3:                                             \n"
-    ".set pop                                     \n"
-
-  : [src_ptr] "+r" (src_ptr),
-    [dst] "+r" (dst), [t] "+r" (t)
-  : [dst_width] "r" (dst_width)
-  : "t0", "t1", "t2", "t3", "t4", "t5",
-    "t6", "t7", "t8", "t9"
-  );
-}
-
-void ScaleRowDown4_MIPS_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride,
-                              uint8* dst, int dst_width) {
-  __asm__ __volatile__ (
-      ".set push                                    \n"
-      ".set noreorder                               \n"
-
-      "srl            $t9, %[dst_width], 3          \n"
-      "beqz           $t9, 2f                       \n"
-      " nop                                         \n"
-
-      ".p2align       2                             \n"
-     "1:                                            \n"
-      "lw             $t1, 0(%[src_ptr])            \n"  // |3|2|1|0|
-      "lw             $t2, 4(%[src_ptr])            \n"  // |7|6|5|4|
-      "lw             $t3, 8(%[src_ptr])            \n"  // |11|10|9|8|
-      "lw             $t4, 12(%[src_ptr])           \n"  // |15|14|13|12|
-      "lw             $t5, 16(%[src_ptr])           \n"  // |19|18|17|16|
-      "lw             $t6, 20(%[src_ptr])           \n"  // |23|22|21|20|
-      "lw             $t7, 24(%[src_ptr])           \n"  // |27|26|25|24|
-      "lw             $t8, 28(%[src_ptr])           \n"  // |31|30|29|28|
-      "precr.qb.ph    $t1, $t2, $t1                 \n"  // |6|4|2|0|
-      "precr.qb.ph    $t2, $t4, $t3                 \n"  // |14|12|10|8|
-      "precr.qb.ph    $t5, $t6, $t5                 \n"  // |22|20|18|16|
-      "precr.qb.ph    $t6, $t8, $t7                 \n"  // |30|28|26|24|
-      "precr.qb.ph    $t1, $t2, $t1                 \n"  // |12|8|4|0|
-      "precr.qb.ph    $t5, $t6, $t5                 \n"  // |28|24|20|16|
-      "addiu          %[src_ptr], %[src_ptr], 32    \n"
-      "addiu          $t9, $t9, -1                  \n"
-      "sw             $t1, 0(%[dst])                \n"
-      "sw             $t5, 4(%[dst])                \n"
-      "bgtz           $t9, 1b                       \n"
-      " addiu         %[dst], %[dst], 8             \n"
-
-    "2:                                             \n"
-      "andi           $t9, %[dst_width], 7          \n"  // residue
-      "beqz           $t9, 3f                       \n"
-      " nop                                         \n"
-
-    "21:                                            \n"
-      "lbu            $t1, 0(%[src_ptr])            \n"
-      "addiu          %[src_ptr], %[src_ptr], 4     \n"
-      "addiu          $t9, $t9, -1                  \n"
-      "sb             $t1, 0(%[dst])                \n"
-      "bgtz           $t9, 21b                      \n"
-      " addiu         %[dst], %[dst], 1             \n"
-
-    "3:                                             \n"
-      ".set pop                                     \n"
-      : [src_ptr] "+r" (src_ptr),
-        [dst] "+r" (dst)
-      : [dst_width] "r" (dst_width)
-      : "t1", "t2", "t3", "t4", "t5",
-        "t6", "t7", "t8", "t9"
-  );
-}
-
-void ScaleRowDown4Box_MIPS_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride,
-                                 uint8* dst, int dst_width) {
-  intptr_t stride = src_stride;
-  const uint8* s1 = src_ptr + stride;
-  const uint8* s2 = s1 + stride;
-  const uint8* s3 = s2 + stride;
-
-  __asm__ __volatile__ (
-      ".set push                                  \n"
-      ".set noreorder                             \n"
-
-      "srl           $t9, %[dst_width], 1         \n"
-      "andi          $t8, %[dst_width], 1         \n"
-
-      ".p2align      2                            \n"
-     "1:                                          \n"
-      "lw            $t0, 0(%[src_ptr])           \n"  // |3|2|1|0|
-      "lw            $t1, 0(%[s1])                \n"  // |7|6|5|4|
-      "lw            $t2, 0(%[s2])                \n"  // |11|10|9|8|
-      "lw            $t3, 0(%[s3])                \n"  // |15|14|13|12|
-      "lw            $t4, 4(%[src_ptr])           \n"  // |19|18|17|16|
-      "lw            $t5, 4(%[s1])                \n"  // |23|22|21|20|
-      "lw            $t6, 4(%[s2])                \n"  // |27|26|25|24|
-      "lw            $t7, 4(%[s3])                \n"  // |31|30|29|28|
-      "raddu.w.qb    $t0, $t0                     \n"  // |3 + 2 + 1 + 0|
-      "raddu.w.qb    $t1, $t1                     \n"  // |7 + 6 + 5 + 4|
-      "raddu.w.qb    $t2, $t2                     \n"  // |11 + 10 + 9 + 8|
-      "raddu.w.qb    $t3, $t3                     \n"  // |15 + 14 + 13 + 12|
-      "raddu.w.qb    $t4, $t4                     \n"  // |19 + 18 + 17 + 16|
-      "raddu.w.qb    $t5, $t5                     \n"  // |23 + 22 + 21 + 20|
-      "raddu.w.qb    $t6, $t6                     \n"  // |27 + 26 + 25 + 24|
-      "raddu.w.qb    $t7, $t7                     \n"  // |31 + 30 + 29 + 28|
-      "add           $t0, $t0, $t1                \n"
-      "add           $t1, $t2, $t3                \n"
-      "add           $t0, $t0, $t1                \n"
-      "add           $t4, $t4, $t5                \n"
-      "add           $t6, $t6, $t7                \n"
-      "add           $t4, $t4, $t6                \n"
-      "shra_r.w      $t0, $t0, 4                  \n"
-      "shra_r.w      $t4, $t4, 4                  \n"
-      "sb            $t0, 0(%[dst])               \n"
-      "sb            $t4, 1(%[dst])               \n"
-      "addiu         %[src_ptr], %[src_ptr], 8    \n"
-      "addiu         %[s1], %[s1], 8              \n"
-      "addiu         %[s2], %[s2], 8              \n"
-      "addiu         %[s3], %[s3], 8              \n"
-      "addiu         $t9, $t9, -1                 \n"
-      "bgtz          $t9, 1b                      \n"
-      " addiu        %[dst], %[dst], 2            \n"
-      "beqz          $t8, 2f                      \n"
-      " nop                                       \n"
-
-      "lw            $t0, 0(%[src_ptr])           \n"  // |3|2|1|0|
-      "lw            $t1, 0(%[s1])                \n"  // |7|6|5|4|
-      "lw            $t2, 0(%[s2])                \n"  // |11|10|9|8|
-      "lw            $t3, 0(%[s3])                \n"  // |15|14|13|12|
-      "raddu.w.qb    $t0, $t0                     \n"  // |3 + 2 + 1 + 0|
-      "raddu.w.qb    $t1, $t1                     \n"  // |7 + 6 + 5 + 4|
-      "raddu.w.qb    $t2, $t2                     \n"  // |11 + 10 + 9 + 8|
-      "raddu.w.qb    $t3, $t3                     \n"  // |15 + 14 + 13 + 12|
-      "add           $t0, $t0, $t1                \n"
-      "add           $t1, $t2, $t3                \n"
-      "add           $t0, $t0, $t1                \n"
-      "shra_r.w      $t0, $t0, 4                  \n"
-      "sb            $t0, 0(%[dst])               \n"
-
-      "2:                                         \n"
-      ".set pop                                   \n"
-
-      : [src_ptr] "+r" (src_ptr),
-        [dst] "+r" (dst),
-        [s1] "+r" (s1),
-        [s2] "+r" (s2),
-        [s3] "+r" (s3)
-      : [dst_width] "r" (dst_width)
-      : "t0", "t1", "t2", "t3", "t4", "t5",
-        "t6","t7", "t8", "t9"
-  );
-}
-
-void ScaleRowDown34_MIPS_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride,
-                               uint8* dst, int dst_width) {
-  __asm__ __volatile__ (
-      ".set push                                          \n"
-      ".set noreorder                                     \n"
-      ".p2align        2                                  \n"
-    "1:                                                   \n"
-      "lw              $t1, 0(%[src_ptr])                 \n"  // |3|2|1|0|
-      "lw              $t2, 4(%[src_ptr])                 \n"  // |7|6|5|4|
-      "lw              $t3, 8(%[src_ptr])                 \n"  // |11|10|9|8|
-      "lw              $t4, 12(%[src_ptr])                \n"  // |15|14|13|12|
-      "lw              $t5, 16(%[src_ptr])                \n"  // |19|18|17|16|
-      "lw              $t6, 20(%[src_ptr])                \n"  // |23|22|21|20|
-      "lw              $t7, 24(%[src_ptr])                \n"  // |27|26|25|24|
-      "lw              $t8, 28(%[src_ptr])                \n"  // |31|30|29|28|
-      "precrq.qb.ph    $t0, $t2, $t4                      \n"  // |7|5|15|13|
-      "precrq.qb.ph    $t9, $t6, $t8                      \n"  // |23|21|31|30|
-      "addiu           %[dst_width], %[dst_width], -24    \n"
-      "ins             $t1, $t1, 8, 16                    \n"  // |3|1|0|X|
-      "ins             $t4, $t0, 8, 16                    \n"  // |X|15|13|12|
-      "ins             $t5, $t5, 8, 16                    \n"  // |19|17|16|X|
-      "ins             $t8, $t9, 8, 16                    \n"  // |X|31|29|28|
-      "addiu           %[src_ptr], %[src_ptr], 32         \n"
-      "packrl.ph       $t0, $t3, $t0                      \n"  // |9|8|7|5|
-      "packrl.ph       $t9, $t7, $t9                      \n"  // |25|24|23|21|
-      "prepend         $t1, $t2, 8                        \n"  // |4|3|1|0|
-      "prepend         $t3, $t4, 24                       \n"  // |15|13|12|11|
-      "prepend         $t5, $t6, 8                        \n"  // |20|19|17|16|
-      "prepend         $t7, $t8, 24                       \n"  // |31|29|28|27|
-      "sw              $t1, 0(%[dst])                     \n"
-      "sw              $t0, 4(%[dst])                     \n"
-      "sw              $t3, 8(%[dst])                     \n"
-      "sw              $t5, 12(%[dst])                    \n"
-      "sw              $t9, 16(%[dst])                    \n"
-      "sw              $t7, 20(%[dst])                    \n"
-      "bnez            %[dst_width], 1b                   \n"
-      " addiu          %[dst], %[dst], 24                 \n"
-      ".set pop                                           \n"
-      : [src_ptr] "+r" (src_ptr),
-        [dst] "+r" (dst),
-        [dst_width] "+r" (dst_width)
-      :
-      : "t0", "t1", "t2", "t3", "t4", "t5",
-        "t6","t7", "t8", "t9"
-  );
-}
-
-void ScaleRowDown34_0_Box_MIPS_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride,
-                                     uint8* d, int dst_width) {
-  __asm__ __volatile__ (
-      ".set push                                         \n"
-      ".set noreorder                                    \n"
-      "repl.ph           $t3, 3                          \n"  // 0x00030003
-
-     ".p2align           2                               \n"
-    "1:                                                  \n"
-      "lw                $t0, 0(%[src_ptr])              \n"  // |S3|S2|S1|S0|
-      "lwx               $t1, %[src_stride](%[src_ptr])  \n"  // |T3|T2|T1|T0|
-      "rotr              $t2, $t0, 8                     \n"  // |S0|S3|S2|S1|
-      "rotr              $t6, $t1, 8                     \n"  // |T0|T3|T2|T1|
-      "muleu_s.ph.qbl    $t4, $t2, $t3                   \n"  // |S0*3|S3*3|
-      "muleu_s.ph.qbl    $t5, $t6, $t3                   \n"  // |T0*3|T3*3|
-      "andi              $t0, $t2, 0xFFFF                \n"  // |0|0|S2|S1|
-      "andi              $t1, $t6, 0xFFFF                \n"  // |0|0|T2|T1|
-      "raddu.w.qb        $t0, $t0                        \n"
-      "raddu.w.qb        $t1, $t1                        \n"
-      "shra_r.w          $t0, $t0, 1                     \n"
-      "shra_r.w          $t1, $t1, 1                     \n"
-      "preceu.ph.qbr     $t2, $t2                        \n"  // |0|S2|0|S1|
-      "preceu.ph.qbr     $t6, $t6                        \n"  // |0|T2|0|T1|
-      "rotr              $t2, $t2, 16                    \n"  // |0|S1|0|S2|
-      "rotr              $t6, $t6, 16                    \n"  // |0|T1|0|T2|
-      "addu.ph           $t2, $t2, $t4                   \n"
-      "addu.ph           $t6, $t6, $t5                   \n"
-      "sll               $t5, $t0, 1                     \n"
-      "add               $t0, $t5, $t0                   \n"
-      "shra_r.ph         $t2, $t2, 2                     \n"
-      "shra_r.ph         $t6, $t6, 2                     \n"
-      "shll.ph           $t4, $t2, 1                     \n"
-      "addq.ph           $t4, $t4, $t2                   \n"
-      "addu              $t0, $t0, $t1                   \n"
-      "addiu             %[src_ptr], %[src_ptr], 4       \n"
-      "shra_r.w          $t0, $t0, 2                     \n"
-      "addu.ph           $t6, $t6, $t4                   \n"
-      "shra_r.ph         $t6, $t6, 2                     \n"
-      "srl               $t1, $t6, 16                    \n"
-      "addiu             %[dst_width], %[dst_width], -3  \n"
-      "sb                $t1, 0(%[d])                    \n"
-      "sb                $t0, 1(%[d])                    \n"
-      "sb                $t6, 2(%[d])                    \n"
-      "bgtz              %[dst_width], 1b                \n"
-      " addiu            %[d], %[d], 3                   \n"
-    "3:                                                  \n"
-      ".set pop                                          \n"
-      : [src_ptr] "+r" (src_ptr),
-        [src_stride] "+r" (src_stride),
-        [d] "+r" (d),
-        [dst_width] "+r" (dst_width)
-      :
-      : "t0", "t1", "t2", "t3",
-        "t4", "t5", "t6"
-  );
-}
-
-void ScaleRowDown34_1_Box_MIPS_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride,
-                                     uint8* d, int dst_width) {
-  __asm__ __volatile__ (
-      ".set push                                           \n"
-      ".set noreorder                                      \n"
-      "repl.ph           $t2, 3                            \n"  // 0x00030003
-
-      ".p2align          2                                 \n"
-    "1:                                                    \n"
-      "lw                $t0, 0(%[src_ptr])                \n"  // |S3|S2|S1|S0|
-      "lwx               $t1, %[src_stride](%[src_ptr])    \n"  // |T3|T2|T1|T0|
-      "rotr              $t4, $t0, 8                       \n"  // |S0|S3|S2|S1|
-      "rotr              $t6, $t1, 8                       \n"  // |T0|T3|T2|T1|
-      "muleu_s.ph.qbl    $t3, $t4, $t2                     \n"  // |S0*3|S3*3|
-      "muleu_s.ph.qbl    $t5, $t6, $t2                     \n"  // |T0*3|T3*3|
-      "andi              $t0, $t4, 0xFFFF                  \n"  // |0|0|S2|S1|
-      "andi              $t1, $t6, 0xFFFF                  \n"  // |0|0|T2|T1|
-      "raddu.w.qb        $t0, $t0                          \n"
-      "raddu.w.qb        $t1, $t1                          \n"
-      "shra_r.w          $t0, $t0, 1                       \n"
-      "shra_r.w          $t1, $t1, 1                       \n"
-      "preceu.ph.qbr     $t4, $t4                          \n"  // |0|S2|0|S1|
-      "preceu.ph.qbr     $t6, $t6                          \n"  // |0|T2|0|T1|
-      "rotr              $t4, $t4, 16                      \n"  // |0|S1|0|S2|
-      "rotr              $t6, $t6, 16                      \n"  // |0|T1|0|T2|
-      "addu.ph           $t4, $t4, $t3                     \n"
-      "addu.ph           $t6, $t6, $t5                     \n"
-      "shra_r.ph         $t6, $t6, 2                       \n"
-      "shra_r.ph         $t4, $t4, 2                       \n"
-      "addu.ph           $t6, $t6, $t4                     \n"
-      "addiu             %[src_ptr], %[src_ptr], 4         \n"
-      "shra_r.ph         $t6, $t6, 1                       \n"
-      "addu              $t0, $t0, $t1                     \n"
-      "addiu             %[dst_width], %[dst_width], -3    \n"
-      "shra_r.w          $t0, $t0, 1                       \n"
-      "srl               $t1, $t6, 16                      \n"
-      "sb                $t1, 0(%[d])                      \n"
-      "sb                $t0, 1(%[d])                      \n"
-      "sb                $t6, 2(%[d])                      \n"
-      "bgtz              %[dst_width], 1b                  \n"
-      " addiu            %[d], %[d], 3                     \n"
-    "3:                                                    \n"
-      ".set pop                                            \n"
-      : [src_ptr] "+r" (src_ptr),
-        [src_stride] "+r" (src_stride),
-        [d] "+r" (d),
-        [dst_width] "+r" (dst_width)
-      :
-      : "t0", "t1", "t2", "t3",
-        "t4", "t5", "t6"
-  );
-}
-
-void ScaleRowDown38_MIPS_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride,
-                               uint8* dst, int dst_width) {
-  __asm__ __volatile__ (
-      ".set push                                     \n"
-      ".set noreorder                                \n"
-
-      ".p2align   2                                  \n"
-    "1:                                              \n"
-      "lw         $t0, 0(%[src_ptr])                 \n"  // |3|2|1|0|
-      "lw         $t1, 4(%[src_ptr])                 \n"  // |7|6|5|4|
-      "lw         $t2, 8(%[src_ptr])                 \n"  // |11|10|9|8|
-      "lw         $t3, 12(%[src_ptr])                \n"  // |15|14|13|12|
-      "lw         $t4, 16(%[src_ptr])                \n"  // |19|18|17|16|
-      "lw         $t5, 20(%[src_ptr])                \n"  // |23|22|21|20|
-      "lw         $t6, 24(%[src_ptr])                \n"  // |27|26|25|24|
-      "lw         $t7, 28(%[src_ptr])                \n"  // |31|30|29|28|
-      "wsbh       $t0, $t0                           \n"  // |2|3|0|1|
-      "wsbh       $t6, $t6                           \n"  // |26|27|24|25|
-      "srl        $t0, $t0, 8                        \n"  // |X|2|3|0|
-      "srl        $t3, $t3, 16                       \n"  // |X|X|15|14|
-      "srl        $t5, $t5, 16                       \n"  // |X|X|23|22|
-      "srl        $t7, $t7, 16                       \n"  // |X|X|31|30|
-      "ins        $t1, $t2, 24, 8                    \n"  // |8|6|5|4|
-      "ins        $t6, $t5, 0, 8                     \n"  // |26|27|24|22|
-      "ins        $t1, $t0, 0, 16                    \n"  // |8|6|3|0|
-      "ins        $t6, $t7, 24, 8                    \n"  // |30|27|24|22|
-      "prepend    $t2, $t3, 24                       \n"  // |X|15|14|11|
-      "ins        $t4, $t4, 16, 8                    \n"  // |19|16|17|X|
-      "ins        $t4, $t2, 0, 16                    \n"  // |19|16|14|11|
-      "addiu      %[src_ptr], %[src_ptr], 32         \n"
-      "addiu      %[dst_width], %[dst_width], -12    \n"
-      "addiu      $t8,%[dst_width], -12              \n"
-      "sw         $t1, 0(%[dst])                     \n"
-      "sw         $t4, 4(%[dst])                     \n"
-      "sw         $t6, 8(%[dst])                     \n"
-      "bgez       $t8, 1b                            \n"
-      " addiu     %[dst], %[dst], 12                 \n"
-      ".set pop                                      \n"
-      : [src_ptr] "+r" (src_ptr),
-        [dst] "+r" (dst),
-        [dst_width] "+r" (dst_width)
-      :
-      : "t0", "t1", "t2", "t3", "t4",
-        "t5", "t6", "t7", "t8"
-  );
-}
-
-void ScaleRowDown38_2_Box_MIPS_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride,
-                                     uint8* dst_ptr, int dst_width) {
-  intptr_t stride = src_stride;
-  const uint8* t = src_ptr + stride;
-  const int c = 0x2AAA;
-
-  __asm__ __volatile__ (
-      ".set push                                         \n"
-      ".set noreorder                                    \n"
-
-      ".p2align        2                                 \n"
-    "1:                                                  \n"
-      "lw              $t0, 0(%[src_ptr])                \n"  // |S3|S2|S1|S0|
-      "lw              $t1, 4(%[src_ptr])                \n"  // |S7|S6|S5|S4|
-      "lw              $t2, 0(%[t])                      \n"  // |T3|T2|T1|T0|
-      "lw              $t3, 4(%[t])                      \n"  // |T7|T6|T5|T4|
-      "rotr            $t1, $t1, 16                      \n"  // |S5|S4|S7|S6|
-      "packrl.ph       $t4, $t1, $t3                     \n"  // |S7|S6|T7|T6|
-      "packrl.ph       $t5, $t3, $t1                     \n"  // |T5|T4|S5|S4|
-      "raddu.w.qb      $t4, $t4                          \n"  // S7+S6+T7+T6
-      "raddu.w.qb      $t5, $t5                          \n"  // T5+T4+S5+S4
-      "precrq.qb.ph    $t6, $t0, $t2                     \n"  // |S3|S1|T3|T1|
-      "precrq.qb.ph    $t6, $t6, $t6                     \n"  // |S3|T3|S3|T3|
-      "srl             $t4, $t4, 2                       \n"  // t4 / 4
-      "srl             $t6, $t6, 16                      \n"  // |0|0|S3|T3|
-      "raddu.w.qb      $t6, $t6                          \n"  // 0+0+S3+T3
-      "addu            $t6, $t5, $t6                     \n"
-      "mul             $t6, $t6, %[c]                    \n"  // t6 * 0x2AAA
-      "sll             $t0, $t0, 8                       \n"  // |S2|S1|S0|0|
-      "sll             $t2, $t2, 8                       \n"  // |T2|T1|T0|0|
-      "raddu.w.qb      $t0, $t0                          \n"  // S2+S1+S0+0
-      "raddu.w.qb      $t2, $t2                          \n"  // T2+T1+T0+0
-      "addu            $t0, $t0, $t2                     \n"
-      "mul             $t0, $t0, %[c]                    \n"  // t0 * 0x2AAA
-      "addiu           %[src_ptr], %[src_ptr], 8         \n"
-      "addiu           %[t], %[t], 8                     \n"
-      "addiu           %[dst_width], %[dst_width], -3    \n"
-      "addiu           %[dst_ptr], %[dst_ptr], 3         \n"
-      "srl             $t6, $t6, 16                      \n"
-      "srl             $t0, $t0, 16                      \n"
-      "sb              $t4, -1(%[dst_ptr])               \n"
-      "sb              $t6, -2(%[dst_ptr])               \n"
-      "bgtz            %[dst_width], 1b                  \n"
-      " sb             $t0, -3(%[dst_ptr])               \n"
-      ".set pop                                          \n"
-      : [src_ptr] "+r" (src_ptr),
-        [dst_ptr] "+r" (dst_ptr),
-        [t] "+r" (t),
-        [dst_width] "+r" (dst_width)
-      : [c] "r" (c)
-      : "t0", "t1", "t2", "t3", "t4", "t5", "t6"
-  );
-}
-
-void ScaleRowDown38_3_Box_MIPS_DSPR2(const uint8* src_ptr,
-                                     ptrdiff_t src_stride,
-                                     uint8* dst_ptr, int dst_width) {
-  intptr_t stride = src_stride;
-  const uint8* s1 = src_ptr + stride;
-  stride += stride;
-  const uint8* s2 = src_ptr + stride;
-  const int c1 = 0x1C71;
-  const int c2 = 0x2AAA;
-
-  __asm__ __volatile__ (
-      ".set push                                         \n"
-      ".set noreorder                                    \n"
-
-      ".p2align        2                                 \n"
-    "1:                                                  \n"
-      "lw              $t0, 0(%[src_ptr])                \n"  // |S3|S2|S1|S0|
-      "lw              $t1, 4(%[src_ptr])                \n"  // |S7|S6|S5|S4|
-      "lw              $t2, 0(%[s1])                     \n"  // |T3|T2|T1|T0|
-      "lw              $t3, 4(%[s1])                     \n"  // |T7|T6|T5|T4|
-      "lw              $t4, 0(%[s2])                     \n"  // |R3|R2|R1|R0|
-      "lw              $t5, 4(%[s2])                     \n"  // |R7|R6|R5|R4|
-      "rotr            $t1, $t1, 16                      \n"  // |S5|S4|S7|S6|
-      "packrl.ph       $t6, $t1, $t3                     \n"  // |S7|S6|T7|T6|
-      "raddu.w.qb      $t6, $t6                          \n"  // S7+S6+T7+T6
-      "packrl.ph       $t7, $t3, $t1                     \n"  // |T5|T4|S5|S4|
-      "raddu.w.qb      $t7, $t7                          \n"  // T5+T4+S5+S4
-      "sll             $t8, $t5, 16                      \n"  // |R5|R4|0|0|
-      "raddu.w.qb      $t8, $t8                          \n"  // R5+R4
-      "addu            $t7, $t7, $t8                     \n"
-      "srl             $t8, $t5, 16                      \n"  // |0|0|R7|R6|
-      "raddu.w.qb      $t8, $t8                          \n"  // R7 + R6
-      "addu            $t6, $t6, $t8                     \n"
-      "mul             $t6, $t6, %[c2]                   \n"  // t6 * 0x2AAA
-      "precrq.qb.ph    $t8, $t0, $t2                     \n"  // |S3|S1|T3|T1|
-      "precrq.qb.ph    $t8, $t8, $t4                     \n"  // |S3|T3|R3|R1|
-      "srl             $t8, $t8, 8                       \n"  // |0|S3|T3|R3|
-      "raddu.w.qb      $t8, $t8                          \n"  // S3 + T3 + R3
-      "addu            $t7, $t7, $t8                     \n"
-      "mul             $t7, $t7, %[c1]                   \n"  // t7 * 0x1C71
-      "sll             $t0, $t0, 8                       \n"  // |S2|S1|S0|0|
-      "sll             $t2, $t2, 8                       \n"  // |T2|T1|T0|0|
-      "sll             $t4, $t4, 8                       \n"  // |R2|R1|R0|0|
-      "raddu.w.qb      $t0, $t0                          \n"
-      "raddu.w.qb      $t2, $t2                          \n"
-      "raddu.w.qb      $t4, $t4                          \n"
-      "addu            $t0, $t0, $t2                     \n"
-      "addu            $t0, $t0, $t4                     \n"
-      "mul             $t0, $t0, %[c1]                   \n"  // t0 * 0x1C71
-      "addiu           %[src_ptr], %[src_ptr], 8         \n"
-      "addiu           %[s1], %[s1], 8                   \n"
-      "addiu           %[s2], %[s2], 8                   \n"
-      "addiu           %[dst_width], %[dst_width], -3    \n"
-      "addiu           %[dst_ptr], %[dst_ptr], 3         \n"
-      "srl             $t6, $t6, 16                      \n"
-      "srl             $t7, $t7, 16                      \n"
-      "srl             $t0, $t0, 16                      \n"
-      "sb              $t6, -1(%[dst_ptr])               \n"
-      "sb              $t7, -2(%[dst_ptr])               \n"
-      "bgtz            %[dst_width], 1b                  \n"
-      " sb             $t0, -3(%[dst_ptr])               \n"
-      ".set pop                                          \n"
-      : [src_ptr] "+r" (src_ptr),
-        [dst_ptr] "+r" (dst_ptr),
-        [s1] "+r" (s1),
-        [s2] "+r" (s2),
-        [dst_width] "+r" (dst_width)
-      : [c1] "r" (c1), [c2] "r" (c2)
-      : "t0", "t1", "t2", "t3", "t4",
-        "t5", "t6", "t7", "t8"
-  );
-}
-
-#endif  // defined(__mips_dsp) && (__mips_dsp_rev >= 2)
-
-#ifdef __cplusplus
-}  // extern "C"
-}  // namespace libyuv
-#endif
-
diff --git a/drivers/theoraplayer/src/YUV/libyuv/src/scale_neon.cc b/drivers/theoraplayer/src/YUV/libyuv/src/scale_neon.cc
deleted file mode 100755
index a9df93c055..0000000000
--- a/drivers/theoraplayer/src/YUV/libyuv/src/scale_neon.cc
+++ /dev/null
@@ -1,699 +0,0 @@
-/*
- *  Copyright 2011 The LibYuv Project Authors. All rights reserved.
- *
- *  Use of this source code is governed by a BSD-style license
- *  that can be found in the LICENSE file in the root of the source
- *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS. All contributing project authors may
- *  be found in the AUTHORS file in the root of the source tree.
- */
-
-#include "libyuv/row.h"
-
-#ifdef __cplusplus
-namespace libyuv {
-extern "C" {
-#endif
-
-// This module is for GCC Neon.
-#if !defined(LIBYUV_DISABLE_NEON) && defined(__ARM_NEON__)
-
-// NEON downscalers with interpolation.
-// Provided by Fritz Koenig
-
-// Read 32x1 throw away even pixels, and write 16x1.
-void ScaleRowDown2_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
-                        uint8* dst, int dst_width) {
-  asm volatile (
-#ifdef _ANDROID
-	".fpu neon\n"
-#endif
-    ".p2align   2                              \n"
-  "1:                                          \n"
-    // load even pixels into q0, odd into q1
-    "vld2.8     {q0, q1}, [%0]!                \n"
-    "subs       %2, %2, #16                    \n"  // 16 processed per loop
-    "vst1.8     {q1}, [%1]!                    \n"  // store odd pixels
-    "bgt        1b                             \n"
-  : "+r"(src_ptr),          // %0
-    "+r"(dst),              // %1
-    "+r"(dst_width)         // %2
-  :
-  : "q0", "q1"              // Clobber List
-  );
-}
-
-// Read 32x2 average down and write 16x1.
-void ScaleRowDown2Box_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
-                           uint8* dst, int dst_width) {
-  asm volatile (
-#ifdef _ANDROID
-	".fpu neon\n"
-#endif
-    // change the stride to row 2 pointer
-    "add        %1, %0                         \n"
-    ".p2align   2                              \n"
-  "1:                                          \n"
-    "vld1.8     {q0, q1}, [%0]!                \n"  // load row 1 and post inc
-    "vld1.8     {q2, q3}, [%1]!                \n"  // load row 2 and post inc
-    "subs       %3, %3, #16                    \n"  // 16 processed per loop
-    "vpaddl.u8  q0, q0                         \n"  // row 1 add adjacent
-    "vpaddl.u8  q1, q1                         \n"
-    "vpadal.u8  q0, q2                         \n"  // row 2 add adjacent + row1
-    "vpadal.u8  q1, q3                         \n"
-    "vrshrn.u16 d0, q0, #2                     \n"  // downshift, round and pack
-    "vrshrn.u16 d1, q1, #2                     \n"
-    "vst1.8     {q0}, [%2]!                    \n"
-    "bgt        1b                             \n"
-  : "+r"(src_ptr),          // %0
-    "+r"(src_stride),       // %1
-    "+r"(dst),              // %2
-    "+r"(dst_width)         // %3
-  :
-  : "q0", "q1", "q2", "q3"     // Clobber List
-  );
-}
-
-void ScaleRowDown4_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
-                        uint8* dst_ptr, int dst_width) {
-  asm volatile (
-#ifdef _ANDROID
-	".fpu neon\n"
-#endif
-    ".p2align   2                              \n"
-  "1:                                          \n"
-    "vld4.8     {d0, d1, d2, d3}, [%0]!        \n" // src line 0
-    "subs       %2, %2, #8                     \n" // 8 processed per loop
-    "vst1.8     {d2}, [%1]!                    \n"
-    "bgt        1b                             \n"
-  : "+r"(src_ptr),          // %0
-    "+r"(dst_ptr),          // %1
-    "+r"(dst_width)         // %2
-  :
-  : "q0", "q1", "memory", "cc"
-  );
-}
-
-void ScaleRowDown4Box_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
-                           uint8* dst_ptr, int dst_width) {
-  asm volatile (
-#ifdef _ANDROID
-	".fpu neon\n"
-#endif
-    "add        r4, %0, %3                     \n"
-    "add        r5, r4, %3                     \n"
-    "add        %3, r5, %3                     \n"
-    ".p2align   2                              \n"
-  "1:                                          \n"
-    "vld1.8     {q0}, [%0]!                    \n"   // load up 16x4
-    "vld1.8     {q1}, [r4]!                    \n"
-    "vld1.8     {q2}, [r5]!                    \n"
-    "vld1.8     {q3}, [%3]!                    \n"
-    "subs       %2, %2, #4                     \n"
-    "vpaddl.u8  q0, q0                         \n"
-    "vpadal.u8  q0, q1                         \n"
-    "vpadal.u8  q0, q2                         \n"
-    "vpadal.u8  q0, q3                         \n"
-    "vpaddl.u16 q0, q0                         \n"
-    "vrshrn.u32 d0, q0, #4                     \n"   // divide by 16 w/rounding
-    "vmovn.u16  d0, q0                         \n"
-    "vst1.32    {d0[0]}, [%1]!                 \n"
-    "bgt        1b                             \n"
-  : "+r"(src_ptr),          // %0
-    "+r"(dst_ptr),          // %1
-    "+r"(dst_width)         // %2
-  : "r"(src_stride)         // %3
-  : "r4", "r5", "q0", "q1", "q2", "q3", "memory", "cc"
-  );
-}
-
-// Down scale from 4 to 3 pixels. Use the neon multilane read/write
-// to load up the every 4th pixel into a 4 different registers.
-// Point samples 32 pixels to 24 pixels.
-void ScaleRowDown34_NEON(const uint8* src_ptr,
-                         ptrdiff_t src_stride,
-                         uint8* dst_ptr, int dst_width) {
-  asm volatile (
-#ifdef _ANDROID
-				".fpu neon\n"
-#endif
-    ".p2align   2                              \n"
-  "1:                                          \n"
-    "vld4.8     {d0, d1, d2, d3}, [%0]!      \n" // src line 0
-    "subs       %2, %2, #24                  \n"
-    "vmov       d2, d3                       \n" // order d0, d1, d2
-    "vst3.8     {d0, d1, d2}, [%1]!          \n"
-    "bgt        1b                           \n"
-  : "+r"(src_ptr),          // %0
-    "+r"(dst_ptr),          // %1
-    "+r"(dst_width)         // %2
-  :
-  : "d0", "d1", "d2", "d3", "memory", "cc"
-  );
-}
-
-void ScaleRowDown34_0_Box_NEON(const uint8* src_ptr,
-                               ptrdiff_t src_stride,
-                               uint8* dst_ptr, int dst_width) {
-  asm volatile (
-    "vmov.u8    d24, #3                        \n"
-    "add        %3, %0                         \n"
-    ".p2align   2                              \n"
-  "1:                                          \n"
-    "vld4.8       {d0, d1, d2, d3}, [%0]!      \n" // src line 0
-    "vld4.8       {d4, d5, d6, d7}, [%3]!      \n" // src line 1
-    "subs         %2, %2, #24                  \n"
-
-    // filter src line 0 with src line 1
-    // expand chars to shorts to allow for room
-    // when adding lines together
-    "vmovl.u8     q8, d4                       \n"
-    "vmovl.u8     q9, d5                       \n"
-    "vmovl.u8     q10, d6                      \n"
-    "vmovl.u8     q11, d7                      \n"
-
-    // 3 * line_0 + line_1
-    "vmlal.u8     q8, d0, d24                  \n"
-    "vmlal.u8     q9, d1, d24                  \n"
-    "vmlal.u8     q10, d2, d24                 \n"
-    "vmlal.u8     q11, d3, d24                 \n"
-
-    // (3 * line_0 + line_1) >> 2
-    "vqrshrn.u16  d0, q8, #2                   \n"
-    "vqrshrn.u16  d1, q9, #2                   \n"
-    "vqrshrn.u16  d2, q10, #2                  \n"
-    "vqrshrn.u16  d3, q11, #2                  \n"
-
-    // a0 = (src[0] * 3 + s[1] * 1) >> 2
-    "vmovl.u8     q8, d1                       \n"
-    "vmlal.u8     q8, d0, d24                  \n"
-    "vqrshrn.u16  d0, q8, #2                   \n"
-
-    // a1 = (src[1] * 1 + s[2] * 1) >> 1
-    "vrhadd.u8    d1, d1, d2                   \n"
-
-    // a2 = (src[2] * 1 + s[3] * 3) >> 2
-    "vmovl.u8     q8, d2                       \n"
-    "vmlal.u8     q8, d3, d24                  \n"
-    "vqrshrn.u16  d2, q8, #2                   \n"
-
-    "vst3.8       {d0, d1, d2}, [%1]!          \n"
-
-    "bgt          1b                           \n"
-  : "+r"(src_ptr),          // %0
-    "+r"(dst_ptr),          // %1
-    "+r"(dst_width),        // %2
-    "+r"(src_stride)        // %3
-  :
-  : "q0", "q1", "q2", "q3", "q8", "q9", "q10", "q11", "d24", "memory", "cc"
-  );
-}
-
-void ScaleRowDown34_1_Box_NEON(const uint8* src_ptr,
-                               ptrdiff_t src_stride,
-                               uint8* dst_ptr, int dst_width) {
-  asm volatile (
-    "vmov.u8    d24, #3                        \n"
-    "add        %3, %0                         \n"
-    ".p2align   2                              \n"
-  "1:                                          \n"
-    "vld4.8       {d0, d1, d2, d3}, [%0]!      \n" // src line 0
-    "vld4.8       {d4, d5, d6, d7}, [%3]!      \n" // src line 1
-    "subs         %2, %2, #24                  \n"
-    // average src line 0 with src line 1
-    "vrhadd.u8    q0, q0, q2                   \n"
-    "vrhadd.u8    q1, q1, q3                   \n"
-
-    // a0 = (src[0] * 3 + s[1] * 1) >> 2
-    "vmovl.u8     q3, d1                       \n"
-    "vmlal.u8     q3, d0, d24                  \n"
-    "vqrshrn.u16  d0, q3, #2                   \n"
-
-    // a1 = (src[1] * 1 + s[2] * 1) >> 1
-    "vrhadd.u8    d1, d1, d2                   \n"
-
-    // a2 = (src[2] * 1 + s[3] * 3) >> 2
-    "vmovl.u8     q3, d2                       \n"
-    "vmlal.u8     q3, d3, d24                  \n"
-    "vqrshrn.u16  d2, q3, #2                   \n"
-
-    "vst3.8       {d0, d1, d2}, [%1]!          \n"
-    "bgt          1b                           \n"
-  : "+r"(src_ptr),          // %0
-    "+r"(dst_ptr),          // %1
-    "+r"(dst_width),        // %2
-    "+r"(src_stride)        // %3
-  :
-  : "r4", "q0", "q1", "q2", "q3", "d24", "memory", "cc"
-  );
-}
-
-#define HAS_SCALEROWDOWN38_NEON
-static uvec8 kShuf38 =
-  { 0, 3, 6, 8, 11, 14, 16, 19, 22, 24, 27, 30, 0, 0, 0, 0 };
-static uvec8 kShuf38_2 =
-  { 0, 8, 16, 2, 10, 17, 4, 12, 18, 6, 14, 19, 0, 0, 0, 0 };
-static vec16 kMult38_Div6 =
-  { 65536 / 12, 65536 / 12, 65536 / 12, 65536 / 12,
-    65536 / 12, 65536 / 12, 65536 / 12, 65536 / 12 };
-static vec16 kMult38_Div9 =
-  { 65536 / 18, 65536 / 18, 65536 / 18, 65536 / 18,
-    65536 / 18, 65536 / 18, 65536 / 18, 65536 / 18 };
-
-// 32 -> 12
-void ScaleRowDown38_NEON(const uint8* src_ptr,
-                         ptrdiff_t src_stride,
-                         uint8* dst_ptr, int dst_width) {
-  asm volatile (
-    "vld1.8     {q3}, [%3]                     \n"
-    ".p2align   2                              \n"
-  "1:                                          \n"
-    "vld1.8     {d0, d1, d2, d3}, [%0]!        \n"
-    "subs       %2, %2, #12                    \n"
-    "vtbl.u8    d4, {d0, d1, d2, d3}, d6       \n"
-    "vtbl.u8    d5, {d0, d1, d2, d3}, d7       \n"
-    "vst1.8     {d4}, [%1]!                    \n"
-    "vst1.32    {d5[0]}, [%1]!                 \n"
-    "bgt        1b                             \n"
-  : "+r"(src_ptr),          // %0
-    "+r"(dst_ptr),          // %1
-    "+r"(dst_width)         // %2
-  : "r"(&kShuf38)           // %3
-  : "d0", "d1", "d2", "d3", "d4", "d5", "memory", "cc"
-  );
-}
-
-// 32x3 -> 12x1
-void OMITFP ScaleRowDown38_3_Box_NEON(const uint8* src_ptr,
-                                      ptrdiff_t src_stride,
-                                      uint8* dst_ptr, int dst_width) {
-  asm volatile (
-    "vld1.16    {q13}, [%4]                    \n"
-    "vld1.8     {q14}, [%5]                    \n"
-    "vld1.8     {q15}, [%6]                    \n"
-    "add        r4, %0, %3, lsl #1             \n"
-    "add        %3, %0                         \n"
-    ".p2align   2                              \n"
-  "1:                                          \n"
-
-    // d0 = 00 40 01 41 02 42 03 43
-    // d1 = 10 50 11 51 12 52 13 53
-    // d2 = 20 60 21 61 22 62 23 63
-    // d3 = 30 70 31 71 32 72 33 73
-    "vld4.8       {d0, d1, d2, d3}, [%0]!      \n"
-    "vld4.8       {d4, d5, d6, d7}, [%3]!      \n"
-    "vld4.8       {d16, d17, d18, d19}, [r4]!  \n"
-    "subs         %2, %2, #12                  \n"
-
-    // Shuffle the input data around to get align the data
-    //  so adjacent data can be added. 0,1 - 2,3 - 4,5 - 6,7
-    // d0 = 00 10 01 11 02 12 03 13
-    // d1 = 40 50 41 51 42 52 43 53
-    "vtrn.u8      d0, d1                       \n"
-    "vtrn.u8      d4, d5                       \n"
-    "vtrn.u8      d16, d17                     \n"
-
-    // d2 = 20 30 21 31 22 32 23 33
-    // d3 = 60 70 61 71 62 72 63 73
-    "vtrn.u8      d2, d3                       \n"
-    "vtrn.u8      d6, d7                       \n"
-    "vtrn.u8      d18, d19                     \n"
-
-    // d0 = 00+10 01+11 02+12 03+13
-    // d2 = 40+50 41+51 42+52 43+53
-    "vpaddl.u8    q0, q0                       \n"
-    "vpaddl.u8    q2, q2                       \n"
-    "vpaddl.u8    q8, q8                       \n"
-
-    // d3 = 60+70 61+71 62+72 63+73
-    "vpaddl.u8    d3, d3                       \n"
-    "vpaddl.u8    d7, d7                       \n"
-    "vpaddl.u8    d19, d19                     \n"
-
-    // combine source lines
-    "vadd.u16     q0, q2                       \n"
-    "vadd.u16     q0, q8                       \n"
-    "vadd.u16     d4, d3, d7                   \n"
-    "vadd.u16     d4, d19                      \n"
-
-    // dst_ptr[3] = (s[6 + st * 0] + s[7 + st * 0]
-    //             + s[6 + st * 1] + s[7 + st * 1]
-    //             + s[6 + st * 2] + s[7 + st * 2]) / 6
-    "vqrdmulh.s16 q2, q2, q13                  \n"
-    "vmovn.u16    d4, q2                       \n"
-
-    // Shuffle 2,3 reg around so that 2 can be added to the
-    //  0,1 reg and 3 can be added to the 4,5 reg. This
-    //  requires expanding from u8 to u16 as the 0,1 and 4,5
-    //  registers are already expanded. Then do transposes
-    //  to get aligned.
-    // q2 = xx 20 xx 30 xx 21 xx 31 xx 22 xx 32 xx 23 xx 33
-    "vmovl.u8     q1, d2                       \n"
-    "vmovl.u8     q3, d6                       \n"
-    "vmovl.u8     q9, d18                      \n"
-
-    // combine source lines
-    "vadd.u16     q1, q3                       \n"
-    "vadd.u16     q1, q9                       \n"
-
-    // d4 = xx 20 xx 30 xx 22 xx 32
-    // d5 = xx 21 xx 31 xx 23 xx 33
-    "vtrn.u32     d2, d3                       \n"
-
-    // d4 = xx 20 xx 21 xx 22 xx 23
-    // d5 = xx 30 xx 31 xx 32 xx 33
-    "vtrn.u16     d2, d3                       \n"
-
-    // 0+1+2, 3+4+5
-    "vadd.u16     q0, q1                       \n"
-
-    // Need to divide, but can't downshift as the the value
-    //  isn't a power of 2. So multiply by 65536 / n
-    //  and take the upper 16 bits.
-    "vqrdmulh.s16 q0, q0, q15                  \n"
-
-    // Align for table lookup, vtbl requires registers to
-    //  be adjacent
-    "vmov.u8      d2, d4                       \n"
-
-    "vtbl.u8      d3, {d0, d1, d2}, d28        \n"
-    "vtbl.u8      d4, {d0, d1, d2}, d29        \n"
-
-    "vst1.8       {d3}, [%1]!                  \n"
-    "vst1.32      {d4[0]}, [%1]!               \n"
-    "bgt          1b                           \n"
-  : "+r"(src_ptr),          // %0
-    "+r"(dst_ptr),          // %1
-    "+r"(dst_width),        // %2
-    "+r"(src_stride)        // %3
-  : "r"(&kMult38_Div6),     // %4
-    "r"(&kShuf38_2),        // %5
-    "r"(&kMult38_Div9)      // %6
-  : "r4", "q0", "q1", "q2", "q3", "q8", "q9",
-    "q13", "q14", "q15", "memory", "cc"
-  );
-}
-
-// 32x2 -> 12x1
-void ScaleRowDown38_2_Box_NEON(const uint8* src_ptr,
-                               ptrdiff_t src_stride,
-                               uint8* dst_ptr, int dst_width) {
-  asm volatile (
-    "vld1.16    {q13}, [%4]                    \n"
-    "vld1.8     {q14}, [%5]                    \n"
-    "add        %3, %0                         \n"
-    ".p2align   2                              \n"
-  "1:                                          \n"
-
-    // d0 = 00 40 01 41 02 42 03 43
-    // d1 = 10 50 11 51 12 52 13 53
-    // d2 = 20 60 21 61 22 62 23 63
-    // d3 = 30 70 31 71 32 72 33 73
-    "vld4.8       {d0, d1, d2, d3}, [%0]!      \n"
-    "vld4.8       {d4, d5, d6, d7}, [%3]!      \n"
-    "subs         %2, %2, #12                  \n"
-
-    // Shuffle the input data around to get align the data
-    //  so adjacent data can be added. 0,1 - 2,3 - 4,5 - 6,7
-    // d0 = 00 10 01 11 02 12 03 13
-    // d1 = 40 50 41 51 42 52 43 53
-    "vtrn.u8      d0, d1                       \n"
-    "vtrn.u8      d4, d5                       \n"
-
-    // d2 = 20 30 21 31 22 32 23 33
-    // d3 = 60 70 61 71 62 72 63 73
-    "vtrn.u8      d2, d3                       \n"
-    "vtrn.u8      d6, d7                       \n"
-
-    // d0 = 00+10 01+11 02+12 03+13
-    // d2 = 40+50 41+51 42+52 43+53
-    "vpaddl.u8    q0, q0                       \n"
-    "vpaddl.u8    q2, q2                       \n"
-
-    // d3 = 60+70 61+71 62+72 63+73
-    "vpaddl.u8    d3, d3                       \n"
-    "vpaddl.u8    d7, d7                       \n"
-
-    // combine source lines
-    "vadd.u16     q0, q2                       \n"
-    "vadd.u16     d4, d3, d7                   \n"
-
-    // dst_ptr[3] = (s[6] + s[7] + s[6+st] + s[7+st]) / 4
-    "vqrshrn.u16  d4, q2, #2                   \n"
-
-    // Shuffle 2,3 reg around so that 2 can be added to the
-    //  0,1 reg and 3 can be added to the 4,5 reg. This
-    //  requires expanding from u8 to u16 as the 0,1 and 4,5
-    //  registers are already expanded. Then do transposes
-    //  to get aligned.
-    // q2 = xx 20 xx 30 xx 21 xx 31 xx 22 xx 32 xx 23 xx 33
-    "vmovl.u8     q1, d2                       \n"
-    "vmovl.u8     q3, d6                       \n"
-
-    // combine source lines
-    "vadd.u16     q1, q3                       \n"
-
-    // d4 = xx 20 xx 30 xx 22 xx 32
-    // d5 = xx 21 xx 31 xx 23 xx 33
-    "vtrn.u32     d2, d3                       \n"
-
-    // d4 = xx 20 xx 21 xx 22 xx 23
-    // d5 = xx 30 xx 31 xx 32 xx 33
-    "vtrn.u16     d2, d3                       \n"
-
-    // 0+1+2, 3+4+5
-    "vadd.u16     q0, q1                       \n"
-
-    // Need to divide, but can't downshift as the the value
-    //  isn't a power of 2. So multiply by 65536 / n
-    //  and take the upper 16 bits.
-    "vqrdmulh.s16 q0, q0, q13                  \n"
-
-    // Align for table lookup, vtbl requires registers to
-    //  be adjacent
-    "vmov.u8      d2, d4                       \n"
-
-    "vtbl.u8      d3, {d0, d1, d2}, d28        \n"
-    "vtbl.u8      d4, {d0, d1, d2}, d29        \n"
-
-    "vst1.8       {d3}, [%1]!                  \n"
-    "vst1.32      {d4[0]}, [%1]!               \n"
-    "bgt          1b                           \n"
-  : "+r"(src_ptr),       // %0
-    "+r"(dst_ptr),       // %1
-    "+r"(dst_width),     // %2
-    "+r"(src_stride)     // %3
-  : "r"(&kMult38_Div6),  // %4
-    "r"(&kShuf38_2)      // %5
-  : "q0", "q1", "q2", "q3", "q13", "q14", "memory", "cc"
-  );
-}
-
-// 16x2 -> 16x1
-void ScaleFilterRows_NEON(uint8* dst_ptr,
-                          const uint8* src_ptr, ptrdiff_t src_stride,
-                          int dst_width, int source_y_fraction) {
-  asm volatile (
-    "cmp          %4, #0                       \n"
-    "beq          100f                         \n"
-    "add          %2, %1                       \n"
-    "cmp          %4, #64                      \n"
-    "beq          75f                          \n"
-    "cmp          %4, #128                     \n"
-    "beq          50f                          \n"
-    "cmp          %4, #192                     \n"
-    "beq          25f                          \n"
-
-    "vdup.8       d5, %4                       \n"
-    "rsb          %4, #256                     \n"
-    "vdup.8       d4, %4                       \n"
-    // General purpose row blend.
-  "1:                                          \n"
-    "vld1.8       {q0}, [%1]!                  \n"
-    "vld1.8       {q1}, [%2]!                  \n"
-    "subs         %3, %3, #16                  \n"
-    "vmull.u8     q13, d0, d4                  \n"
-    "vmull.u8     q14, d1, d4                  \n"
-    "vmlal.u8     q13, d2, d5                  \n"
-    "vmlal.u8     q14, d3, d5                  \n"
-    "vrshrn.u16   d0, q13, #8                  \n"
-    "vrshrn.u16   d1, q14, #8                  \n"
-    "vst1.8       {q0}, [%0]!                  \n"
-    "bgt          1b                           \n"
-    "b            99f                          \n"
-
-    // Blend 25 / 75.
-  "25:                                         \n"
-    "vld1.8       {q0}, [%1]!                  \n"
-    "vld1.8       {q1}, [%2]!                  \n"
-    "subs         %3, %3, #16                  \n"
-    "vrhadd.u8    q0, q1                       \n"
-    "vrhadd.u8    q0, q1                       \n"
-    "vst1.8       {q0}, [%0]!                  \n"
-    "bgt          25b                          \n"
-    "b            99f                          \n"
-
-    // Blend 50 / 50.
-  "50:                                         \n"
-    "vld1.8       {q0}, [%1]!                  \n"
-    "vld1.8       {q1}, [%2]!                  \n"
-    "subs         %3, %3, #16                  \n"
-    "vrhadd.u8    q0, q1                       \n"
-    "vst1.8       {q0}, [%0]!                  \n"
-    "bgt          50b                          \n"
-    "b            99f                          \n"
-
-    // Blend 75 / 25.
-  "75:                                         \n"
-    "vld1.8       {q1}, [%1]!                  \n"
-    "vld1.8       {q0}, [%2]!                  \n"
-    "subs         %3, %3, #16                  \n"
-    "vrhadd.u8    q0, q1                       \n"
-    "vrhadd.u8    q0, q1                       \n"
-    "vst1.8       {q0}, [%0]!                  \n"
-    "bgt          75b                          \n"
-    "b            99f                          \n"
-
-    // Blend 100 / 0 - Copy row unchanged.
-  "100:                                        \n"
-    "vld1.8       {q0}, [%1]!                  \n"
-    "subs         %3, %3, #16                  \n"
-    "vst1.8       {q0}, [%0]!                  \n"
-    "bgt          100b                         \n"
-
-  "99:                                         \n"
-    "vst1.8       {d1[7]}, [%0]                \n"
-  : "+r"(dst_ptr),          // %0
-    "+r"(src_ptr),          // %1
-    "+r"(src_stride),       // %2
-    "+r"(dst_width),        // %3
-    "+r"(source_y_fraction) // %4
-  :
-  : "q0", "q1", "d4", "d5", "q13", "q14", "memory", "cc"
-  );
-}
-
-void ScaleARGBRowDown2_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
-                            uint8* dst, int dst_width) {
-  asm volatile (
-    ".p2align   2                              \n"
-  "1:                                          \n"
-    // load even pixels into q0, odd into q1
-    "vld2.32    {q0, q1}, [%0]!                \n"
-    "vld2.32    {q2, q3}, [%0]!                \n"
-    "subs       %2, %2, #8                     \n"  // 8 processed per loop
-    "vst1.8     {q1}, [%1]!                    \n"  // store odd pixels
-    "vst1.8     {q3}, [%1]!                    \n"
-    "bgt        1b                             \n"
-  : "+r"(src_ptr),          // %0
-    "+r"(dst),              // %1
-    "+r"(dst_width)         // %2
-  :
-  : "memory", "cc", "q0", "q1", "q2", "q3"  // Clobber List
-  );
-}
-
-void ScaleARGBRowDown2Box_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
-                               uint8* dst, int dst_width) {
-  asm volatile (
-    // change the stride to row 2 pointer
-    "add        %1, %1, %0                     \n"
-    ".p2align   2                              \n"
-  "1:                                          \n"
-    "vld4.8     {d0, d2, d4, d6}, [%0]!        \n"  // load 8 ARGB pixels.
-    "vld4.8     {d1, d3, d5, d7}, [%0]!        \n"  // load next 8 ARGB pixels.
-    "subs       %3, %3, #8                     \n"  // 8 processed per loop.
-    "vpaddl.u8  q0, q0                         \n"  // B 16 bytes -> 8 shorts.
-    "vpaddl.u8  q1, q1                         \n"  // G 16 bytes -> 8 shorts.
-    "vpaddl.u8  q2, q2                         \n"  // R 16 bytes -> 8 shorts.
-    "vpaddl.u8  q3, q3                         \n"  // A 16 bytes -> 8 shorts.
-    "vld4.8     {d16, d18, d20, d22}, [%1]!    \n"  // load 8 more ARGB pixels.
-    "vld4.8     {d17, d19, d21, d23}, [%1]!    \n"  // load last 8 ARGB pixels.
-    "vpadal.u8  q0, q8                         \n"  // B 16 bytes -> 8 shorts.
-    "vpadal.u8  q1, q9                         \n"  // G 16 bytes -> 8 shorts.
-    "vpadal.u8  q2, q10                        \n"  // R 16 bytes -> 8 shorts.
-    "vpadal.u8  q3, q11                        \n"  // A 16 bytes -> 8 shorts.
-    "vrshrn.u16 d0, q0, #2                     \n"  // downshift, round and pack
-    "vrshrn.u16 d1, q1, #2                     \n"
-    "vrshrn.u16 d2, q2, #2                     \n"
-    "vrshrn.u16 d3, q3, #2                     \n"
-    "vst4.8     {d0, d1, d2, d3}, [%2]!        \n"
-    "bgt        1b                             \n"
-  : "+r"(src_ptr),          // %0
-    "+r"(src_stride),       // %1
-    "+r"(dst),              // %2
-    "+r"(dst_width)         // %3
-  :
-  : "memory", "cc", "q0", "q1", "q2", "q3", "q8", "q9", "q10", "q11"
-  );
-}
-
-// Reads 4 pixels at a time.
-// Alignment requirement: src_argb 4 byte aligned.
-void ScaleARGBRowDownEven_NEON(const uint8* src_argb,  ptrdiff_t src_stride,
-                               int src_stepx, uint8* dst_argb, int dst_width) {
-  asm volatile (
-    "mov        r12, %3, lsl #2                \n"
-    ".p2align   2                              \n"
-  "1:                                          \n"
-    "vld1.32    {d0[0]}, [%0], r12             \n"
-    "vld1.32    {d0[1]}, [%0], r12             \n"
-    "vld1.32    {d1[0]}, [%0], r12             \n"
-    "vld1.32    {d1[1]}, [%0], r12             \n"
-    "subs       %2, %2, #4                     \n"  // 4 pixels per loop.
-    "vst1.8     {q0}, [%1]!                    \n"
-    "bgt        1b                             \n"
-  : "+r"(src_argb),    // %0
-    "+r"(dst_argb),    // %1
-    "+r"(dst_width)    // %2
-  : "r"(src_stepx)     // %3
-  : "memory", "cc", "r12", "q0"
-  );
-}
-
-// Reads 4 pixels at a time.
-// Alignment requirement: src_argb 4 byte aligned.
-void ScaleARGBRowDownEvenBox_NEON(const uint8* src_argb, ptrdiff_t src_stride,
-                                  int src_stepx,
-                                  uint8* dst_argb, int dst_width) {
-  asm volatile (
-    "mov        r12, %4, lsl #2                \n"
-    "add        %1, %1, %0                     \n"
-    ".p2align   2                              \n"
-  "1:                                          \n"
-    "vld1.8     {d0}, [%0], r12                \n"  // Read 4 2x2 blocks -> 2x1
-    "vld1.8     {d1}, [%1], r12                \n"
-    "vld1.8     {d2}, [%0], r12                \n"
-    "vld1.8     {d3}, [%1], r12                \n"
-    "vld1.8     {d4}, [%0], r12                \n"
-    "vld1.8     {d5}, [%1], r12                \n"
-    "vld1.8     {d6}, [%0], r12                \n"
-    "vld1.8     {d7}, [%1], r12                \n"
-    "vaddl.u8   q0, d0, d1                     \n"
-    "vaddl.u8   q1, d2, d3                     \n"
-    "vaddl.u8   q2, d4, d5                     \n"
-    "vaddl.u8   q3, d6, d7                     \n"
-    "vswp.8     d1, d2                         \n"  // ab_cd -> ac_bd
-    "vswp.8     d5, d6                         \n"  // ef_gh -> eg_fh
-    "vadd.u16   q0, q0, q1                     \n"  // (a+b)_(c+d)
-    "vadd.u16   q2, q2, q3                     \n"  // (e+f)_(g+h)
-    "vrshrn.u16 d0, q0, #2                     \n"  // first 2 pixels.
-    "vrshrn.u16 d1, q2, #2                     \n"  // next 2 pixels.
-    "subs       %3, %3, #4                     \n"  // 4 pixels per loop.
-    "vst1.8     {q0}, [%2]!                    \n"
-    "bgt        1b                             \n"
-  : "+r"(src_argb),    // %0
-    "+r"(src_stride),  // %1
-    "+r"(dst_argb),    // %2
-    "+r"(dst_width)    // %3
-  : "r"(src_stepx)     // %4
-  : "memory", "cc", "r12", "q0", "q1", "q2", "q3"
-  );
-}
-
-#endif  // __ARM_NEON__
-
-#ifdef __cplusplus
-}  // extern "C"
-}  // namespace libyuv
-#endif
diff --git a/drivers/theoraplayer/src/YUV/libyuv/src/scale_posix.cc b/drivers/theoraplayer/src/YUV/libyuv/src/scale_posix.cc
deleted file mode 100644
index 352e667822..0000000000
--- a/drivers/theoraplayer/src/YUV/libyuv/src/scale_posix.cc
+++ /dev/null
@@ -1,1315 +0,0 @@
-/*
- *  Copyright 2013 The LibYuv Project Authors. All rights reserved.
- *
- *  Use of this source code is governed by a BSD-style license
- *  that can be found in the LICENSE file in the root of the source
- *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS. All contributing project authors may
- *  be found in the AUTHORS file in the root of the source tree.
- */
-
-#include "libyuv/row.h"
-
-#ifdef __cplusplus
-namespace libyuv {
-extern "C" {
-#endif
-
-// This module is for GCC x86 and x64.
-#if !defined(LIBYUV_DISABLE_X86) && (defined(__x86_64__) || defined(__i386__))
-
-// Offsets for source bytes 0 to 9
-static uvec8 kShuf0 =
-  { 0, 1, 3, 4, 5, 7, 8, 9, 128, 128, 128, 128, 128, 128, 128, 128 };
-
-// Offsets for source bytes 11 to 20 with 8 subtracted = 3 to 12.
-static uvec8 kShuf1 =
-  { 3, 4, 5, 7, 8, 9, 11, 12, 128, 128, 128, 128, 128, 128, 128, 128 };
-
-// Offsets for source bytes 21 to 31 with 16 subtracted = 5 to 31.
-static uvec8 kShuf2 =
-  { 5, 7, 8, 9, 11, 12, 13, 15, 128, 128, 128, 128, 128, 128, 128, 128 };
-
-// Offsets for source bytes 0 to 10
-static uvec8 kShuf01 =
-  { 0, 1, 1, 2, 2, 3, 4, 5, 5, 6, 6, 7, 8, 9, 9, 10 };
-
-// Offsets for source bytes 10 to 21 with 8 subtracted = 3 to 13.
-static uvec8 kShuf11 =
-  { 2, 3, 4, 5, 5, 6, 6, 7, 8, 9, 9, 10, 10, 11, 12, 13 };
-
-// Offsets for source bytes 21 to 31 with 16 subtracted = 5 to 31.
-static uvec8 kShuf21 =
-  { 5, 6, 6, 7, 8, 9, 9, 10, 10, 11, 12, 13, 13, 14, 14, 15 };
-
-// Coefficients for source bytes 0 to 10
-static uvec8 kMadd01 =
-  { 3, 1, 2, 2, 1, 3, 3, 1, 2, 2, 1, 3, 3, 1, 2, 2 };
-
-// Coefficients for source bytes 10 to 21
-static uvec8 kMadd11 =
-  { 1, 3, 3, 1, 2, 2, 1, 3, 3, 1, 2, 2, 1, 3, 3, 1 };
-
-// Coefficients for source bytes 21 to 31
-static uvec8 kMadd21 =
-  { 2, 2, 1, 3, 3, 1, 2, 2, 1, 3, 3, 1, 2, 2, 1, 3 };
-
-// Coefficients for source bytes 21 to 31
-static vec16 kRound34 =
-  { 2, 2, 2, 2, 2, 2, 2, 2 };
-
-static uvec8 kShuf38a =
-  { 0, 3, 6, 8, 11, 14, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 };
-
-static uvec8 kShuf38b =
-  { 128, 128, 128, 128, 128, 128, 0, 3, 6, 8, 11, 14, 128, 128, 128, 128 };
-
-// Arrange words 0,3,6 into 0,1,2
-static uvec8 kShufAc =
-  { 0, 1, 6, 7, 12, 13, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 };
-
-// Arrange words 0,3,6 into 3,4,5
-static uvec8 kShufAc3 =
-  { 128, 128, 128, 128, 128, 128, 0, 1, 6, 7, 12, 13, 128, 128, 128, 128 };
-
-// Scaling values for boxes of 3x3 and 2x3
-static uvec16 kScaleAc33 =
-  { 65536 / 9, 65536 / 9, 65536 / 6, 65536 / 9, 65536 / 9, 65536 / 6, 0, 0 };
-
-// Arrange first value for pixels 0,1,2,3,4,5
-static uvec8 kShufAb0 =
-  { 0, 128, 3, 128, 6, 128, 8, 128, 11, 128, 14, 128, 128, 128, 128, 128 };
-
-// Arrange second value for pixels 0,1,2,3,4,5
-static uvec8 kShufAb1 =
-  { 1, 128, 4, 128, 7, 128, 9, 128, 12, 128, 15, 128, 128, 128, 128, 128 };
-
-// Arrange third value for pixels 0,1,2,3,4,5
-static uvec8 kShufAb2 =
-  { 2, 128, 5, 128, 128, 128, 10, 128, 13, 128, 128, 128, 128, 128, 128, 128 };
-
-// Scaling values for boxes of 3x2 and 2x2
-static uvec16 kScaleAb2 =
-  { 65536 / 3, 65536 / 3, 65536 / 2, 65536 / 3, 65536 / 3, 65536 / 2, 0, 0 };
-
-// GCC versions of row functions are verbatim conversions from Visual C.
-// Generated using gcc disassembly on Visual C object file:
-// objdump -D yuvscaler.obj >yuvscaler.txt
-
-void ScaleRowDown2_SSE2(const uint8* src_ptr, ptrdiff_t src_stride,
-                        uint8* dst_ptr, int dst_width) {
-  asm volatile (
-    LABELALIGN
-  "1:                                          \n"
-    "movdqa    " MEMACCESS(0) ",%%xmm0         \n"
-    "movdqa    " MEMACCESS2(0x10,0) ",%%xmm1   \n"
-    "lea       " MEMLEA(0x20,0) ",%0           \n"
-    "psrlw     $0x8,%%xmm0                     \n"
-    "psrlw     $0x8,%%xmm1                     \n"
-    "packuswb  %%xmm1,%%xmm0                   \n"
-    "movdqa    %%xmm0," MEMACCESS(1) "         \n"
-    "lea       " MEMLEA(0x10,1) ",%1           \n"
-    "sub       $0x10,%2                        \n"
-    "jg        1b                              \n"
-  : "+r"(src_ptr),    // %0
-    "+r"(dst_ptr),    // %1
-    "+r"(dst_width)   // %2
-  :
-  : "memory", "cc"
-#if defined(__SSE2__)
-    , "xmm0", "xmm1"
-#endif
-  );
-}
-
-void ScaleRowDown2Linear_SSE2(const uint8* src_ptr, ptrdiff_t src_stride,
-                              uint8* dst_ptr, int dst_width) {
-  asm volatile (
-    "pcmpeqb   %%xmm5,%%xmm5                   \n"
-    "psrlw     $0x8,%%xmm5                     \n"
-
-    LABELALIGN
-  "1:                                          \n"
-    "movdqa    " MEMACCESS(0) ",%%xmm0         \n"
-    "movdqa    " MEMACCESS2(0x10, 0) ",%%xmm1  \n"
-    "lea       " MEMLEA(0x20,0) ",%0           \n"
-    "movdqa    %%xmm0,%%xmm2                   \n"
-    "psrlw     $0x8,%%xmm0                     \n"
-    "movdqa    %%xmm1,%%xmm3                   \n"
-    "psrlw     $0x8,%%xmm1                     \n"
-    "pand      %%xmm5,%%xmm2                   \n"
-    "pand      %%xmm5,%%xmm3                   \n"
-    "pavgw     %%xmm2,%%xmm0                   \n"
-    "pavgw     %%xmm3,%%xmm1                   \n"
-    "packuswb  %%xmm1,%%xmm0                   \n"
-    "movdqa    %%xmm0," MEMACCESS(1) "         \n"
-    "lea       " MEMLEA(0x10,1) ",%1           \n"
-    "sub       $0x10,%2                        \n"
-    "jg        1b                              \n"
-  : "+r"(src_ptr),    // %0
-    "+r"(dst_ptr),    // %1
-    "+r"(dst_width)   // %2
-  :
-  : "memory", "cc"
-#if defined(__SSE2__)
-    , "xmm0", "xmm1", "xmm5"
-#endif
-  );
-}
-
-void ScaleRowDown2Box_SSE2(const uint8* src_ptr, ptrdiff_t src_stride,
-                           uint8* dst_ptr, int dst_width) {
-  asm volatile (
-    "pcmpeqb   %%xmm5,%%xmm5                   \n"
-    "psrlw     $0x8,%%xmm5                     \n"
-
-    LABELALIGN
-  "1:                                          \n"
-    "movdqa    " MEMACCESS(0) ",%%xmm0         \n"
-    "movdqa    " MEMACCESS2(0x10,0) ",%%xmm1   \n"
-    MEMOPREG(movdqa,0x00,0,3,1,xmm2)           //  movdqa  (%0,%3,1),%%xmm2
-    BUNDLEALIGN
-    MEMOPREG(movdqa,0x10,0,3,1,xmm3)           //  movdqa  0x10(%0,%3,1),%%xmm3
-    "lea       " MEMLEA(0x20,0) ",%0           \n"
-    "pavgb     %%xmm2,%%xmm0                   \n"
-    "pavgb     %%xmm3,%%xmm1                   \n"
-    "movdqa    %%xmm0,%%xmm2                   \n"
-    "psrlw     $0x8,%%xmm0                     \n"
-    "movdqa    %%xmm1,%%xmm3                   \n"
-    "psrlw     $0x8,%%xmm1                     \n"
-    "pand      %%xmm5,%%xmm2                   \n"
-    "pand      %%xmm5,%%xmm3                   \n"
-    "pavgw     %%xmm2,%%xmm0                   \n"
-    "pavgw     %%xmm3,%%xmm1                   \n"
-    "packuswb  %%xmm1,%%xmm0                   \n"
-    "movdqa    %%xmm0," MEMACCESS(1) "         \n"
-    "lea       " MEMLEA(0x10,1) ",%1           \n"
-    "sub       $0x10,%2                        \n"
-    "jg        1b                              \n"
-  : "+r"(src_ptr),    // %0
-    "+r"(dst_ptr),    // %1
-    "+r"(dst_width)   // %2
-  : "r"((intptr_t)(src_stride))   // %3
-  : "memory", "cc"
-#if defined(__native_client__) && defined(__x86_64__)
-    , "r14"
-#endif
-#if defined(__SSE2__)
-    , "xmm0", "xmm1", "xmm2", "xmm3", "xmm5"
-#endif
-  );
-}
-
-void ScaleRowDown2_Unaligned_SSE2(const uint8* src_ptr, ptrdiff_t src_stride,
-                                  uint8* dst_ptr, int dst_width) {
-  asm volatile (
-    LABELALIGN
-  "1:                                          \n"
-    "movdqu    " MEMACCESS(0) ",%%xmm0         \n"
-    "movdqu    " MEMACCESS2(0x10,0) ",%%xmm1   \n"
-    "lea       " MEMLEA(0x20,0) ",%0           \n"
-    "psrlw     $0x8,%%xmm0                     \n"
-    "psrlw     $0x8,%%xmm1                     \n"
-    "packuswb  %%xmm1,%%xmm0                   \n"
-    "movdqu    %%xmm0," MEMACCESS(1) "         \n"
-    "lea       " MEMLEA(0x10,1) ",%1           \n"
-    "sub       $0x10,%2                        \n"
-    "jg        1b                              \n"
-  : "+r"(src_ptr),    // %0
-    "+r"(dst_ptr),    // %1
-    "+r"(dst_width)   // %2
-  :
-  : "memory", "cc"
-#if defined(__SSE2__)
-    , "xmm0", "xmm1"
-#endif
-  );
-}
-
-void ScaleRowDown2Linear_Unaligned_SSE2(const uint8* src_ptr,
-                                        ptrdiff_t src_stride,
-                                        uint8* dst_ptr, int dst_width) {
-  asm volatile (
-    "pcmpeqb   %%xmm5,%%xmm5                   \n"
-    "psrlw     $0x8,%%xmm5                     \n"
-
-    LABELALIGN
-  "1:                                          \n"
-    "movdqu    " MEMACCESS(0) ",%%xmm0         \n"
-    "movdqu    " MEMACCESS2(0x10,0) ",%%xmm1   \n"
-    "lea       " MEMLEA(0x20,0) ",%0           \n"
-    "movdqa    %%xmm0,%%xmm2                   \n"
-    "psrlw     $0x8,%%xmm0                     \n"
-    "movdqa    %%xmm1,%%xmm3                   \n"
-    "psrlw     $0x8,%%xmm1                     \n"
-    "pand      %%xmm5,%%xmm2                   \n"
-    "pand      %%xmm5,%%xmm3                   \n"
-    "pavgw     %%xmm2,%%xmm0                   \n"
-    "pavgw     %%xmm3,%%xmm1                   \n"
-    "packuswb  %%xmm1,%%xmm0                   \n"
-    "movdqu    %%xmm0," MEMACCESS(1) "         \n"
-    "lea       " MEMLEA(0x10,1) ",%1           \n"
-    "sub       $0x10,%2                        \n"
-    "jg        1b                              \n"
-  : "+r"(src_ptr),    // %0
-    "+r"(dst_ptr),    // %1
-    "+r"(dst_width)   // %2
-  :
-  : "memory", "cc"
-#if defined(__SSE2__)
-    , "xmm0", "xmm1", "xmm5"
-#endif
-  );
-}
-
-void ScaleRowDown2Box_Unaligned_SSE2(const uint8* src_ptr,
-                                     ptrdiff_t src_stride,
-                                     uint8* dst_ptr, int dst_width) {
-  asm volatile (
-    "pcmpeqb   %%xmm5,%%xmm5                   \n"
-    "psrlw     $0x8,%%xmm5                     \n"
-
-    LABELALIGN
-  "1:                                          \n"
-    "movdqu    " MEMACCESS(0) ",%%xmm0         \n"
-    "movdqu    " MEMACCESS2(0x10,0) ",%%xmm1   \n"
-    MEMOPREG(movdqu,0x00,0,3,1,xmm2)           //  movdqu  (%0,%3,1),%%xmm2
-    BUNDLEALIGN
-    MEMOPREG(movdqu,0x10,0,3,1,xmm3)           //  movdqu  0x10(%0,%3,1),%%xmm3
-    "lea       " MEMLEA(0x20,0) ",%0           \n"
-    "pavgb     %%xmm2,%%xmm0                   \n"
-    "pavgb     %%xmm3,%%xmm1                   \n"
-    "movdqa    %%xmm0,%%xmm2                   \n"
-    "psrlw     $0x8,%%xmm0                     \n"
-    "movdqa    %%xmm1,%%xmm3                   \n"
-    "psrlw     $0x8,%%xmm1                     \n"
-    "pand      %%xmm5,%%xmm2                   \n"
-    "pand      %%xmm5,%%xmm3                   \n"
-    "pavgw     %%xmm2,%%xmm0                   \n"
-    "pavgw     %%xmm3,%%xmm1                   \n"
-    "packuswb  %%xmm1,%%xmm0                   \n"
-    "movdqu    %%xmm0," MEMACCESS(1) "         \n"
-    "lea       " MEMLEA(0x10,1) ",%1           \n"
-    "sub       $0x10,%2                        \n"
-    "jg        1b                              \n"
-  : "+r"(src_ptr),    // %0
-    "+r"(dst_ptr),    // %1
-    "+r"(dst_width)   // %2
-  : "r"((intptr_t)(src_stride))   // %3
-  : "memory", "cc"
-#if defined(__native_client__) && defined(__x86_64__)
-    , "r14"
-#endif
-#if defined(__SSE2__)
-    , "xmm0", "xmm1", "xmm2", "xmm3", "xmm5"
-#endif
-  );
-}
-
-void ScaleRowDown4_SSE2(const uint8* src_ptr, ptrdiff_t src_stride,
-                        uint8* dst_ptr, int dst_width) {
-  asm volatile (
-    "pcmpeqb   %%xmm5,%%xmm5                   \n"
-    "psrld     $0x18,%%xmm5                    \n"
-    "pslld     $0x10,%%xmm5                    \n"
-
-    LABELALIGN
-  "1:                                          \n"
-    "movdqa    " MEMACCESS(0) ",%%xmm0         \n"
-    "movdqa    " MEMACCESS2(0x10,0) ",%%xmm1   \n"
-    "lea       " MEMLEA(0x20,0) ",%0           \n"
-    "pand      %%xmm5,%%xmm0                   \n"
-    "pand      %%xmm5,%%xmm1                   \n"
-    "packuswb  %%xmm1,%%xmm0                   \n"
-    "psrlw     $0x8,%%xmm0                     \n"
-    "packuswb  %%xmm0,%%xmm0                   \n"
-    "movq      %%xmm0," MEMACCESS(1) "         \n"
-    "lea       " MEMLEA(0x8,1) ",%1            \n"
-    "sub       $0x8,%2                         \n"
-    "jg        1b                              \n"
-  : "+r"(src_ptr),    // %0
-    "+r"(dst_ptr),    // %1
-    "+r"(dst_width)   // %2
-  :
-  : "memory", "cc"
-#if defined(__SSE2__)
-    , "xmm0", "xmm1", "xmm5"
-#endif
-  );
-}
-
-void ScaleRowDown4Box_SSE2(const uint8* src_ptr, ptrdiff_t src_stride,
-                           uint8* dst_ptr, int dst_width) {
-  intptr_t stridex3 = 0;
-  asm volatile (
-    "pcmpeqb   %%xmm7,%%xmm7                   \n"
-    "psrlw     $0x8,%%xmm7                     \n"
-    "lea       " MEMLEA4(0x00,4,4,2) ",%3      \n"
-
-    LABELALIGN
-  "1:                                          \n"
-    "movdqa    " MEMACCESS(0) ",%%xmm0         \n"
-    "movdqa    " MEMACCESS2(0x10,0) ",%%xmm1   \n"
-    MEMOPREG(movdqa,0x00,0,4,1,xmm2)           //  movdqa  (%0,%4,1),%%xmm2
-    BUNDLEALIGN
-    MEMOPREG(movdqa,0x10,0,4,1,xmm3)           //  movdqa  0x10(%0,%4,1),%%xmm3
-    "pavgb     %%xmm2,%%xmm0                   \n"
-    "pavgb     %%xmm3,%%xmm1                   \n"
-    MEMOPREG(movdqa,0x00,0,4,2,xmm2)           //  movdqa  (%0,%4,2),%%xmm2
-    BUNDLEALIGN
-    MEMOPREG(movdqa,0x10,0,4,2,xmm3)           //  movdqa  0x10(%0,%4,2),%%xmm3
-    MEMOPREG(movdqa,0x00,0,3,1,xmm4)           //  movdqa  (%0,%3,1),%%xmm4
-    MEMOPREG(movdqa,0x10,0,3,1,xmm5)           //  movdqa  0x10(%0,%3,1),%%xmm5
-    "lea       " MEMLEA(0x20,0) ",%0           \n"
-    "pavgb     %%xmm4,%%xmm2                   \n"
-    "pavgb     %%xmm2,%%xmm0                   \n"
-    "pavgb     %%xmm5,%%xmm3                   \n"
-    "pavgb     %%xmm3,%%xmm1                   \n"
-    "movdqa    %%xmm0,%%xmm2                   \n"
-    "psrlw     $0x8,%%xmm0                     \n"
-    "movdqa    %%xmm1,%%xmm3                   \n"
-    "psrlw     $0x8,%%xmm1                     \n"
-    "pand      %%xmm7,%%xmm2                   \n"
-    "pand      %%xmm7,%%xmm3                   \n"
-    "pavgw     %%xmm2,%%xmm0                   \n"
-    "pavgw     %%xmm3,%%xmm1                   \n"
-    "packuswb  %%xmm1,%%xmm0                   \n"
-    "movdqa    %%xmm0,%%xmm2                   \n"
-    "psrlw     $0x8,%%xmm0                     \n"
-    "pand      %%xmm7,%%xmm2                   \n"
-    "pavgw     %%xmm2,%%xmm0                   \n"
-    "packuswb  %%xmm0,%%xmm0                   \n"
-    "movq      %%xmm0," MEMACCESS(1) "         \n"
-    "lea       " MEMLEA(0x8,1) ",%1            \n"
-    "sub       $0x8,%2                         \n"
-    "jg        1b                              \n"
-  : "+r"(src_ptr),     // %0
-    "+r"(dst_ptr),     // %1
-    "+r"(dst_width),   // %2
-    "+r"(stridex3)     // %3
-  : "r"((intptr_t)(src_stride))    // %4
-  : "memory", "cc"
-#if defined(__native_client__) && defined(__x86_64__)
-    , "r14"
-#endif
-#if defined(__SSE2__)
-    , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm7"
-#endif
-  );
-}
-
-void ScaleRowDown34_SSSE3(const uint8* src_ptr, ptrdiff_t src_stride,
-                          uint8* dst_ptr, int dst_width) {
-  asm volatile (
-    "movdqa    %0,%%xmm3                       \n"
-    "movdqa    %1,%%xmm4                       \n"
-    "movdqa    %2,%%xmm5                       \n"
-  :
-  : "m"(kShuf0),  // %0
-    "m"(kShuf1),  // %1
-    "m"(kShuf2)   // %2
-  );
-  asm volatile (
-    LABELALIGN
-  "1:                                          \n"
-    "movdqa    " MEMACCESS(0) ",%%xmm0         \n"
-    "movdqa    " MEMACCESS2(0x10,0) ",%%xmm2   \n"
-    "lea       " MEMLEA(0x20,0) ",%0           \n"
-    "movdqa    %%xmm2,%%xmm1                   \n"
-    "palignr   $0x8,%%xmm0,%%xmm1              \n"
-    "pshufb    %%xmm3,%%xmm0                   \n"
-    "pshufb    %%xmm4,%%xmm1                   \n"
-    "pshufb    %%xmm5,%%xmm2                   \n"
-    "movq      %%xmm0," MEMACCESS(1) "         \n"
-    "movq      %%xmm1," MEMACCESS2(0x8,1) "    \n"
-    "movq      %%xmm2," MEMACCESS2(0x10,1) "   \n"
-    "lea       " MEMLEA(0x18,1) ",%1           \n"
-    "sub       $0x18,%2                        \n"
-    "jg        1b                              \n"
-  : "+r"(src_ptr),   // %0
-    "+r"(dst_ptr),   // %1
-    "+r"(dst_width)  // %2
-  :
-  : "memory", "cc"
-#if defined(__SSE2__)
-    , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
-#endif
-  );
-}
-
-void ScaleRowDown34_1_Box_SSSE3(const uint8* src_ptr,
-                                ptrdiff_t src_stride,
-                                uint8* dst_ptr, int dst_width) {
-  asm volatile (
-    "movdqa    %0,%%xmm2                       \n"  // kShuf01
-    "movdqa    %1,%%xmm3                       \n"  // kShuf11
-    "movdqa    %2,%%xmm4                       \n"  // kShuf21
-  :
-  : "m"(kShuf01),  // %0
-    "m"(kShuf11),  // %1
-    "m"(kShuf21)   // %2
-  );
-  asm volatile (
-    "movdqa    %0,%%xmm5                       \n"  // kMadd01
-    "movdqa    %1,%%xmm0                       \n"  // kMadd11
-    "movdqa    %2,%%xmm1                       \n"  // kRound34
-  :
-  : "m"(kMadd01),  // %0
-    "m"(kMadd11),  // %1
-    "m"(kRound34)  // %2
-  );
-  asm volatile (
-    LABELALIGN
-  "1:                                          \n"
-    "movdqa    " MEMACCESS(0) ",%%xmm6         \n"
-    MEMOPREG(movdqa,0x00,0,3,1,xmm7)           //  movdqa  (%0,%3),%%xmm7
-    "pavgb     %%xmm7,%%xmm6                   \n"
-    "pshufb    %%xmm2,%%xmm6                   \n"
-    "pmaddubsw %%xmm5,%%xmm6                   \n"
-    "paddsw    %%xmm1,%%xmm6                   \n"
-    "psrlw     $0x2,%%xmm6                     \n"
-    "packuswb  %%xmm6,%%xmm6                   \n"
-    "movq      %%xmm6," MEMACCESS(1) "         \n"
-    "movdqu    " MEMACCESS2(0x8,0) ",%%xmm6    \n"
-    MEMOPREG(movdqu,0x8,0,3,1,xmm7)            //  movdqu  0x8(%0,%3),%%xmm7
-    "pavgb     %%xmm7,%%xmm6                   \n"
-    "pshufb    %%xmm3,%%xmm6                   \n"
-    "pmaddubsw %%xmm0,%%xmm6                   \n"
-    "paddsw    %%xmm1,%%xmm6                   \n"
-    "psrlw     $0x2,%%xmm6                     \n"
-    "packuswb  %%xmm6,%%xmm6                   \n"
-    "movq      %%xmm6," MEMACCESS2(0x8,1) "    \n"
-    "movdqa    " MEMACCESS2(0x10,0) ",%%xmm6   \n"
-    BUNDLEALIGN
-    MEMOPREG(movdqa,0x10,0,3,1,xmm7)           //  movdqa  0x10(%0,%3),%%xmm7
-    "lea       " MEMLEA(0x20,0) ",%0           \n"
-    "pavgb     %%xmm7,%%xmm6                   \n"
-    "pshufb    %%xmm4,%%xmm6                   \n"
-    "pmaddubsw %4,%%xmm6                       \n"
-    "paddsw    %%xmm1,%%xmm6                   \n"
-    "psrlw     $0x2,%%xmm6                     \n"
-    "packuswb  %%xmm6,%%xmm6                   \n"
-    "movq      %%xmm6," MEMACCESS2(0x10,1) "   \n"
-    "lea       " MEMLEA(0x18,1) ",%1           \n"
-    "sub       $0x18,%2                        \n"
-    "jg        1b                              \n"
-  : "+r"(src_ptr),   // %0
-    "+r"(dst_ptr),   // %1
-    "+r"(dst_width)  // %2
-  : "r"((intptr_t)(src_stride)),  // %3
-    "m"(kMadd21)     // %4
-  : "memory", "cc"
-#if defined(__native_client__) && defined(__x86_64__)
-    , "r14"
-#endif
-#if defined(__SSE2__)
-    , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7"
-#endif
-  );
-}
-
-void ScaleRowDown34_0_Box_SSSE3(const uint8* src_ptr,
-                                ptrdiff_t src_stride,
-                                uint8* dst_ptr, int dst_width) {
-  asm volatile (
-    "movdqa    %0,%%xmm2                       \n"  // kShuf01
-    "movdqa    %1,%%xmm3                       \n"  // kShuf11
-    "movdqa    %2,%%xmm4                       \n"  // kShuf21
-  :
-  : "m"(kShuf01),  // %0
-    "m"(kShuf11),  // %1
-    "m"(kShuf21)   // %2
-  );
-  asm volatile (
-    "movdqa    %0,%%xmm5                       \n"  // kMadd01
-    "movdqa    %1,%%xmm0                       \n"  // kMadd11
-    "movdqa    %2,%%xmm1                       \n"  // kRound34
-  :
-  : "m"(kMadd01),  // %0
-    "m"(kMadd11),  // %1
-    "m"(kRound34)  // %2
-  );
-
-  asm volatile (
-    LABELALIGN
-  "1:                                          \n"
-    "movdqa    " MEMACCESS(0) ",%%xmm6         \n"
-    MEMOPREG(movdqa,0x00,0,3,1,xmm7)           //  movdqa  (%0,%3,1),%%xmm7
-    "pavgb     %%xmm6,%%xmm7                   \n"
-    "pavgb     %%xmm7,%%xmm6                   \n"
-    "pshufb    %%xmm2,%%xmm6                   \n"
-    "pmaddubsw %%xmm5,%%xmm6                   \n"
-    "paddsw    %%xmm1,%%xmm6                   \n"
-    "psrlw     $0x2,%%xmm6                     \n"
-    "packuswb  %%xmm6,%%xmm6                   \n"
-    "movq      %%xmm6," MEMACCESS(1) "         \n"
-    "movdqu    " MEMACCESS2(0x8,0) ",%%xmm6    \n"
-    MEMOPREG(movdqu,0x8,0,3,1,xmm7)            //  movdqu  0x8(%0,%3,1),%%xmm7
-    "pavgb     %%xmm6,%%xmm7                   \n"
-    "pavgb     %%xmm7,%%xmm6                   \n"
-    "pshufb    %%xmm3,%%xmm6                   \n"
-    "pmaddubsw %%xmm0,%%xmm6                   \n"
-    "paddsw    %%xmm1,%%xmm6                   \n"
-    "psrlw     $0x2,%%xmm6                     \n"
-    "packuswb  %%xmm6,%%xmm6                   \n"
-    "movq      %%xmm6," MEMACCESS2(0x8,1) "    \n"
-    "movdqa    " MEMACCESS2(0x10,0) ",%%xmm6   \n"
-    MEMOPREG(movdqa,0x10,0,3,1,xmm7)           //  movdqa  0x10(%0,%3,1),%%xmm7
-    "lea       " MEMLEA(0x20,0) ",%0           \n"
-    "pavgb     %%xmm6,%%xmm7                   \n"
-    "pavgb     %%xmm7,%%xmm6                   \n"
-    "pshufb    %%xmm4,%%xmm6                   \n"
-    "pmaddubsw %4,%%xmm6                       \n"
-    "paddsw    %%xmm1,%%xmm6                   \n"
-    "psrlw     $0x2,%%xmm6                     \n"
-    "packuswb  %%xmm6,%%xmm6                   \n"
-    "movq      %%xmm6," MEMACCESS2(0x10,1) "   \n"
-    "lea       " MEMLEA(0x18,1) ",%1           \n"
-    "sub       $0x18,%2                        \n"
-    "jg        1b                              \n"
-    : "+r"(src_ptr),   // %0
-      "+r"(dst_ptr),   // %1
-      "+r"(dst_width)  // %2
-    : "r"((intptr_t)(src_stride)),  // %3
-      "m"(kMadd21)     // %4
-    : "memory", "cc"
-#if defined(__native_client__) && defined(__x86_64__)
-    , "r14"
-#endif
-#if defined(__SSE2__)
-    , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7"
-#endif
-  );
-}
-
-void ScaleRowDown38_SSSE3(const uint8* src_ptr, ptrdiff_t src_stride,
-                          uint8* dst_ptr, int dst_width) {
-  asm volatile (
-    "movdqa    %3,%%xmm4                       \n"
-    "movdqa    %4,%%xmm5                       \n"
-
-    LABELALIGN
-  "1:                                          \n"
-    "movdqa    " MEMACCESS(0) ",%%xmm0         \n"
-    "movdqa    " MEMACCESS2(0x10,0) ",%%xmm1   \n"
-    "lea       " MEMLEA(0x20,0) ",%0           \n"
-    "pshufb    %%xmm4,%%xmm0                   \n"
-    "pshufb    %%xmm5,%%xmm1                   \n"
-    "paddusb   %%xmm1,%%xmm0                   \n"
-    "movq      %%xmm0," MEMACCESS(1) "         \n"
-    "movhlps   %%xmm0,%%xmm1                   \n"
-    "movd      %%xmm1," MEMACCESS2(0x8,1) "    \n"
-    "lea       " MEMLEA(0xc,1) ",%1            \n"
-    "sub       $0xc,%2                         \n"
-    "jg        1b                              \n"
-  : "+r"(src_ptr),   // %0
-    "+r"(dst_ptr),   // %1
-    "+r"(dst_width)  // %2
-  : "m"(kShuf38a),   // %3
-    "m"(kShuf38b)    // %4
-  : "memory", "cc"
-#if defined(__SSE2__)
-      , "xmm0", "xmm1", "xmm4", "xmm5"
-#endif
-  );
-}
-
-void ScaleRowDown38_2_Box_SSSE3(const uint8* src_ptr,
-                                ptrdiff_t src_stride,
-                                uint8* dst_ptr, int dst_width) {
-  asm volatile (
-    "movdqa    %0,%%xmm2                       \n"
-    "movdqa    %1,%%xmm3                       \n"
-    "movdqa    %2,%%xmm4                       \n"
-    "movdqa    %3,%%xmm5                       \n"
-  :
-  : "m"(kShufAb0),   // %0
-    "m"(kShufAb1),   // %1
-    "m"(kShufAb2),   // %2
-    "m"(kScaleAb2)   // %3
-  );
-  asm volatile (
-    LABELALIGN
-  "1:                                          \n"
-    "movdqa    " MEMACCESS(0) ",%%xmm0         \n"
-    MEMOPREG(pavgb,0x00,0,3,1,xmm0)            //  pavgb   (%0,%3,1),%%xmm0
-    "lea       " MEMLEA(0x10,0) ",%0           \n"
-    "movdqa    %%xmm0,%%xmm1                   \n"
-    "pshufb    %%xmm2,%%xmm1                   \n"
-    "movdqa    %%xmm0,%%xmm6                   \n"
-    "pshufb    %%xmm3,%%xmm6                   \n"
-    "paddusw   %%xmm6,%%xmm1                   \n"
-    "pshufb    %%xmm4,%%xmm0                   \n"
-    "paddusw   %%xmm0,%%xmm1                   \n"
-    "pmulhuw   %%xmm5,%%xmm1                   \n"
-    "packuswb  %%xmm1,%%xmm1                   \n"
-    "sub       $0x6,%2                         \n"
-    "movd      %%xmm1," MEMACCESS(1) "         \n"
-    "psrlq     $0x10,%%xmm1                    \n"
-    "movd      %%xmm1," MEMACCESS2(0x2,1) "    \n"
-    "lea       " MEMLEA(0x6,1) ",%1            \n"
-    "jg        1b                              \n"
-  : "+r"(src_ptr),     // %0
-    "+r"(dst_ptr),     // %1
-    "+r"(dst_width)    // %2
-  : "r"((intptr_t)(src_stride))  // %3
-  : "memory", "cc"
-#if defined(__native_client__) && defined(__x86_64__)
-    , "r14"
-#endif
-#if defined(__SSE2__)
-    , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6"
-#endif
-  );
-}
-
-void ScaleRowDown38_3_Box_SSSE3(const uint8* src_ptr,
-                                ptrdiff_t src_stride,
-                                uint8* dst_ptr, int dst_width) {
-  asm volatile (
-    "movdqa    %0,%%xmm2                       \n"
-    "movdqa    %1,%%xmm3                       \n"
-    "movdqa    %2,%%xmm4                       \n"
-    "pxor      %%xmm5,%%xmm5                   \n"
-  :
-  : "m"(kShufAc),    // %0
-    "m"(kShufAc3),   // %1
-    "m"(kScaleAc33)  // %2
-  );
-  asm volatile (
-    LABELALIGN
-  "1:                                          \n"
-    "movdqa    " MEMACCESS(0) ",%%xmm0         \n"
-    MEMOPREG(movdqa,0x00,0,3,1,xmm6)           //  movdqa  (%0,%3,1),%%xmm6
-    "movhlps   %%xmm0,%%xmm1                   \n"
-    "movhlps   %%xmm6,%%xmm7                   \n"
-    "punpcklbw %%xmm5,%%xmm0                   \n"
-    "punpcklbw %%xmm5,%%xmm1                   \n"
-    "punpcklbw %%xmm5,%%xmm6                   \n"
-    "punpcklbw %%xmm5,%%xmm7                   \n"
-    "paddusw   %%xmm6,%%xmm0                   \n"
-    "paddusw   %%xmm7,%%xmm1                   \n"
-    MEMOPREG(movdqa,0x00,0,3,2,xmm6)           //  movdqa  (%0,%3,2),%%xmm6
-    "lea       " MEMLEA(0x10,0) ",%0           \n"
-    "movhlps   %%xmm6,%%xmm7                   \n"
-    "punpcklbw %%xmm5,%%xmm6                   \n"
-    "punpcklbw %%xmm5,%%xmm7                   \n"
-    "paddusw   %%xmm6,%%xmm0                   \n"
-    "paddusw   %%xmm7,%%xmm1                   \n"
-    "movdqa    %%xmm0,%%xmm6                   \n"
-    "psrldq    $0x2,%%xmm0                     \n"
-    "paddusw   %%xmm0,%%xmm6                   \n"
-    "psrldq    $0x2,%%xmm0                     \n"
-    "paddusw   %%xmm0,%%xmm6                   \n"
-    "pshufb    %%xmm2,%%xmm6                   \n"
-    "movdqa    %%xmm1,%%xmm7                   \n"
-    "psrldq    $0x2,%%xmm1                     \n"
-    "paddusw   %%xmm1,%%xmm7                   \n"
-    "psrldq    $0x2,%%xmm1                     \n"
-    "paddusw   %%xmm1,%%xmm7                   \n"
-    "pshufb    %%xmm3,%%xmm7                   \n"
-    "paddusw   %%xmm7,%%xmm6                   \n"
-    "pmulhuw   %%xmm4,%%xmm6                   \n"
-    "packuswb  %%xmm6,%%xmm6                   \n"
-    "sub       $0x6,%2                         \n"
-    "movd      %%xmm6," MEMACCESS(1) "         \n"
-    "psrlq     $0x10,%%xmm6                    \n"
-    "movd      %%xmm6," MEMACCESS2(0x2,1) "    \n"
-    "lea       " MEMLEA(0x6,1) ",%1            \n"
-    "jg        1b                              \n"
-  : "+r"(src_ptr),    // %0
-    "+r"(dst_ptr),    // %1
-    "+r"(dst_width)   // %2
-  : "r"((intptr_t)(src_stride))   // %3
-  : "memory", "cc"
-#if defined(__native_client__) && defined(__x86_64__)
-    , "r14"
-#endif
-#if defined(__SSE2__)
-    , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7"
-#endif
-  );
-}
-
-void ScaleAddRows_SSE2(const uint8* src_ptr, ptrdiff_t src_stride,
-                       uint16* dst_ptr, int src_width, int src_height) {
-  int tmp_height = 0;
-  intptr_t tmp_src = 0;
-  asm volatile (
-    "pxor      %%xmm4,%%xmm4                   \n"
-    "sub       $0x1,%5                         \n"
-
-    LABELALIGN
-  "1:                                          \n"
-    "movdqa    " MEMACCESS(0) ",%%xmm0         \n"
-    "mov       %0,%3                           \n"
-    "add       %6,%0                           \n"
-    "movdqa    %%xmm0,%%xmm1                   \n"
-    "punpcklbw %%xmm4,%%xmm0                   \n"
-    "punpckhbw %%xmm4,%%xmm1                   \n"
-    "mov       %5,%2                           \n"
-    "test      %2,%2                           \n"
-    "je        3f                              \n"
-
-    LABELALIGN
-  "2:                                          \n"
-    "movdqa    " MEMACCESS(0) ",%%xmm2         \n"
-    "add       %6,%0                           \n"
-    "movdqa    %%xmm2,%%xmm3                   \n"
-    "punpcklbw %%xmm4,%%xmm2                   \n"
-    "punpckhbw %%xmm4,%%xmm3                   \n"
-    "paddusw   %%xmm2,%%xmm0                   \n"
-    "paddusw   %%xmm3,%%xmm1                   \n"
-    "sub       $0x1,%2                         \n"
-    "jg        2b                              \n"
-
-    LABELALIGN
-  "3:                                          \n"
-    "movdqa    %%xmm0," MEMACCESS(1) "         \n"
-    "movdqa    %%xmm1," MEMACCESS2(0x10,1) "   \n"
-    "lea       " MEMLEA(0x10,3) ",%0           \n"
-    "lea       " MEMLEA(0x20,1) ",%1           \n"
-    "sub       $0x10,%4                        \n"
-    "jg        1b                              \n"
-  : "+r"(src_ptr),     // %0
-    "+r"(dst_ptr),     // %1
-    "+r"(tmp_height),  // %2
-    "+r"(tmp_src),     // %3
-    "+r"(src_width),   // %4
-    "+rm"(src_height)  // %5
-  : "rm"((intptr_t)(src_stride))  // %6
-  : "memory", "cc"
-#if defined(__SSE2__)
-    , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4"
-#endif
-  );
-}
-
-// Bilinear column filtering. SSSE3 version.
-void ScaleFilterCols_SSSE3(uint8* dst_ptr, const uint8* src_ptr,
-                           int dst_width, int x, int dx) {
-  intptr_t x0 = 0, x1 = 0, temp_pixel = 0;
-  asm volatile (
-    "movd      %6,%%xmm2                       \n"
-    "movd      %7,%%xmm3                       \n"
-    "movl      $0x04040000,%k2                 \n"
-    "movd      %k2,%%xmm5                      \n"
-    "pcmpeqb   %%xmm6,%%xmm6                   \n"
-    "psrlw     $0x9,%%xmm6                     \n"
-    "pextrw    $0x1,%%xmm2,%k3                 \n"
-    "subl      $0x2,%5                         \n"
-    "jl        29f                             \n"
-    "movdqa    %%xmm2,%%xmm0                   \n"
-    "paddd     %%xmm3,%%xmm0                   \n"
-    "punpckldq %%xmm0,%%xmm2                   \n"
-    "punpckldq %%xmm3,%%xmm3                   \n"
-    "paddd     %%xmm3,%%xmm3                   \n"
-    "pextrw    $0x3,%%xmm2,%k4                 \n"
-
-    LABELALIGN
-  "2:                                          \n"
-    "movdqa    %%xmm2,%%xmm1                   \n"
-    "paddd     %%xmm3,%%xmm2                   \n"
-    MEMOPARG(movzwl,0x00,1,3,1,k2)             //  movzwl  (%1,%3,1),%k2
-    "movd      %k2,%%xmm0                      \n"
-    "psrlw     $0x9,%%xmm1                     \n"
-    BUNDLEALIGN
-    MEMOPARG(movzwl,0x00,1,4,1,k2)             //  movzwl  (%1,%4,1),%k2
-    "movd      %k2,%%xmm4                      \n"
-    "pshufb    %%xmm5,%%xmm1                   \n"
-    "punpcklwd %%xmm4,%%xmm0                   \n"
-    "pxor      %%xmm6,%%xmm1                   \n"
-    "pmaddubsw %%xmm1,%%xmm0                   \n"
-    "pextrw    $0x1,%%xmm2,%k3                 \n"
-    "pextrw    $0x3,%%xmm2,%k4                 \n"
-    "psrlw     $0x7,%%xmm0                     \n"
-    "packuswb  %%xmm0,%%xmm0                   \n"
-    "movd      %%xmm0,%k2                      \n"
-    "mov       %w2," MEMACCESS(0) "            \n"
-    "lea       " MEMLEA(0x2,0) ",%0            \n"
-    "sub       $0x2,%5                         \n"
-    "jge       2b                              \n"
-
-    LABELALIGN
-  "29:                                         \n"
-    "addl      $0x1,%5                         \n"
-    "jl        99f                             \n"
-    MEMOPARG(movzwl,0x00,1,3,1,k2)             //  movzwl  (%1,%3,1),%k2
-    "movd      %k2,%%xmm0                      \n"
-    "psrlw     $0x9,%%xmm2                     \n"
-    "pshufb    %%xmm5,%%xmm2                   \n"
-    "pxor      %%xmm6,%%xmm2                   \n"
-    "pmaddubsw %%xmm2,%%xmm0                   \n"
-    "psrlw     $0x7,%%xmm0                     \n"
-    "packuswb  %%xmm0,%%xmm0                   \n"
-    "movd      %%xmm0,%k2                      \n"
-    "mov       %b2," MEMACCESS(0) "            \n"
-  "99:                                         \n"
-  : "+r"(dst_ptr),     // %0
-    "+r"(src_ptr),     // %1
-    "+a"(temp_pixel),  // %2
-    "+r"(x0),          // %3
-    "+r"(x1),          // %4
-    "+rm"(dst_width)   // %5
-  : "rm"(x),           // %6
-    "rm"(dx)           // %7
-  : "memory", "cc"
-#if defined(__native_client__) && defined(__x86_64__)
-    , "r14"
-#endif
-#if defined(__SSE2__)
-    , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6"
-#endif
-  );
-}
-
-// Reads 4 pixels, duplicates them and writes 8 pixels.
-// Alignment requirement: src_argb 16 byte aligned, dst_argb 16 byte aligned.
-void ScaleColsUp2_SSE2(uint8* dst_ptr, const uint8* src_ptr,
-                       int dst_width, int x, int dx) {
-  asm volatile (
-    LABELALIGN
-  "1:                                          \n"
-    "movdqa    " MEMACCESS(1) ",%%xmm0         \n"
-    "lea       " MEMLEA(0x10,1) ",%1           \n"
-    "movdqa    %%xmm0,%%xmm1                   \n"
-    "punpcklbw %%xmm0,%%xmm0                   \n"
-    "punpckhbw %%xmm1,%%xmm1                   \n"
-    "sub       $0x20,%2                         \n"
-    "movdqa    %%xmm0," MEMACCESS(0) "         \n"
-    "movdqa    %%xmm1," MEMACCESS2(0x10,0) "   \n"
-    "lea       " MEMLEA(0x20,0) ",%0           \n"
-    "jg        1b                              \n"
-
-  : "+r"(dst_ptr),     // %0
-    "+r"(src_ptr),     // %1
-    "+r"(dst_width)    // %2
-  :
-  : "memory", "cc"
-#if defined(__SSE2__)
-    , "xmm0", "xmm1"
-#endif
-  );
-}
-
-void ScaleARGBRowDown2_SSE2(const uint8* src_argb,
-                            ptrdiff_t src_stride,
-                            uint8* dst_argb, int dst_width) {
-  asm volatile (
-    LABELALIGN
-  "1:                                          \n"
-    "movdqa    " MEMACCESS(0) ",%%xmm0         \n"
-    "movdqa    " MEMACCESS2(0x10,0) ",%%xmm1   \n"
-    "lea       " MEMLEA(0x20,0) ",%0           \n"
-    "shufps    $0xdd,%%xmm1,%%xmm0             \n"
-    "sub       $0x4,%2                         \n"
-    "movdqa    %%xmm0," MEMACCESS(1) "         \n"
-    "lea       " MEMLEA(0x10,1) ",%1           \n"
-    "jg        1b                              \n"
-  : "+r"(src_argb),  // %0
-    "+r"(dst_argb),  // %1
-    "+r"(dst_width)  // %2
-  :
-  : "memory", "cc"
-#if defined(__SSE2__)
-    , "xmm0", "xmm1"
-#endif
-  );
-}
-
-void ScaleARGBRowDown2Linear_SSE2(const uint8* src_argb,
-                                  ptrdiff_t src_stride,
-                                  uint8* dst_argb, int dst_width) {
-  asm volatile (
-    LABELALIGN
-  "1:                                          \n"
-    "movdqa    " MEMACCESS(0) ",%%xmm0         \n"
-    "movdqa    " MEMACCESS2(0x10,0) ",%%xmm1   \n"
-    "lea       " MEMLEA(0x20,0) ",%0           \n"
-    "movdqa    %%xmm0,%%xmm2                   \n"
-    "shufps    $0x88,%%xmm1,%%xmm0             \n"
-    "shufps    $0xdd,%%xmm1,%%xmm2             \n"
-    "pavgb     %%xmm2,%%xmm0                   \n"
-    "sub       $0x4,%2                         \n"
-    "movdqa    %%xmm0," MEMACCESS(1) "         \n"
-    "lea       " MEMLEA(0x10,1) ",%1           \n"
-    "jg        1b                              \n"
-  : "+r"(src_argb),  // %0
-    "+r"(dst_argb),  // %1
-    "+r"(dst_width)  // %2
-  :
-  : "memory", "cc"
-#if defined(__SSE2__)
-    , "xmm0", "xmm1"
-#endif
-  );
-}
-
-void ScaleARGBRowDown2Box_SSE2(const uint8* src_argb,
-                               ptrdiff_t src_stride,
-                               uint8* dst_argb, int dst_width) {
-  asm volatile (
-    LABELALIGN
-  "1:                                          \n"
-    "movdqa    " MEMACCESS(0) ",%%xmm0         \n"
-    "movdqa    " MEMACCESS2(0x10,0) ",%%xmm1   \n"
-    BUNDLEALIGN
-    MEMOPREG(movdqa,0x00,0,3,1,xmm2)           //  movdqa   (%0,%3,1),%%xmm2
-    MEMOPREG(movdqa,0x10,0,3,1,xmm3)           //  movdqa   0x10(%0,%3,1),%%xmm3
-    "lea       " MEMLEA(0x20,0) ",%0           \n"
-    "pavgb     %%xmm2,%%xmm0                   \n"
-    "pavgb     %%xmm3,%%xmm1                   \n"
-    "movdqa    %%xmm0,%%xmm2                   \n"
-    "shufps    $0x88,%%xmm1,%%xmm0             \n"
-    "shufps    $0xdd,%%xmm1,%%xmm2             \n"
-    "pavgb     %%xmm2,%%xmm0                   \n"
-    "sub       $0x4,%2                         \n"
-    "movdqa    %%xmm0," MEMACCESS(1) "         \n"
-    "lea       " MEMLEA(0x10,1) ",%1           \n"
-    "jg        1b                              \n"
-  : "+r"(src_argb),   // %0
-    "+r"(dst_argb),   // %1
-    "+r"(dst_width)   // %2
-  : "r"((intptr_t)(src_stride))   // %3
-  : "memory", "cc"
-#if defined(__native_client__) && defined(__x86_64__)
-    , "r14"
-#endif
-#if defined(__SSE2__)
-    , "xmm0", "xmm1", "xmm2", "xmm3"
-#endif
-  );
-}
-
-// Reads 4 pixels at a time.
-// Alignment requirement: dst_argb 16 byte aligned.
-void ScaleARGBRowDownEven_SSE2(const uint8* src_argb, ptrdiff_t src_stride,
-                               int src_stepx,
-                               uint8* dst_argb, int dst_width) {
-  intptr_t src_stepx_x4 = (intptr_t)(src_stepx);
-  intptr_t src_stepx_x12 = 0;
-  asm volatile (
-    "lea       " MEMLEA3(0x00,1,4) ",%1        \n"
-    "lea       " MEMLEA4(0x00,1,1,2) ",%4      \n"
-    LABELALIGN
-  "1:                                          \n"
-    "movd      " MEMACCESS(0) ",%%xmm0         \n"
-    MEMOPREG(movd,0x00,0,1,1,xmm1)             //  movd      (%0,%1,1),%%xmm1
-    "punpckldq %%xmm1,%%xmm0                   \n"
-    BUNDLEALIGN
-    MEMOPREG(movd,0x00,0,1,2,xmm2)             //  movd      (%0,%1,2),%%xmm2
-    MEMOPREG(movd,0x00,0,4,1,xmm3)             //  movd      (%0,%4,1),%%xmm3
-    "lea       " MEMLEA4(0x00,0,1,4) ",%0      \n"
-    "punpckldq %%xmm3,%%xmm2                   \n"
-    "punpcklqdq %%xmm2,%%xmm0                  \n"
-    "sub       $0x4,%3                         \n"
-    "movdqa    %%xmm0," MEMACCESS(2) "         \n"
-    "lea       " MEMLEA(0x10,2) ",%2           \n"
-    "jg        1b                              \n"
-  : "+r"(src_argb),      // %0
-    "+r"(src_stepx_x4),  // %1
-    "+r"(dst_argb),      // %2
-    "+r"(dst_width),     // %3
-    "+r"(src_stepx_x12)  // %4
-  :
-  : "memory", "cc"
-#if defined(__native_client__) && defined(__x86_64__)
-    , "r14"
-#endif
-#if defined(__SSE2__)
-    , "xmm0", "xmm1", "xmm2", "xmm3"
-#endif
-  );
-}
-
-// Blends four 2x2 to 4x1.
-// Alignment requirement: dst_argb 16 byte aligned.
-void ScaleARGBRowDownEvenBox_SSE2(const uint8* src_argb,
-                                  ptrdiff_t src_stride, int src_stepx,
-                                  uint8* dst_argb, int dst_width) {
-  intptr_t src_stepx_x4 = (intptr_t)(src_stepx);
-  intptr_t src_stepx_x12 = 0;
-  intptr_t row1 = (intptr_t)(src_stride);
-  asm volatile (
-    "lea       " MEMLEA3(0x00,1,4) ",%1        \n"
-    "lea       " MEMLEA4(0x00,1,1,2) ",%4      \n"
-    "lea       " MEMLEA4(0x00,0,5,1) ",%5      \n"
-
-    LABELALIGN
-  "1:                                          \n"
-    "movq      " MEMACCESS(0) ",%%xmm0         \n"
-    MEMOPREG(movhps,0x00,0,1,1,xmm0)           //  movhps    (%0,%1,1),%%xmm0
-    MEMOPREG(movq,0x00,0,1,2,xmm1)             //  movq      (%0,%1,2),%%xmm1
-    BUNDLEALIGN
-    MEMOPREG(movhps,0x00,0,4,1,xmm1)           //  movhps    (%0,%4,1),%%xmm1
-    "lea       " MEMLEA4(0x00,0,1,4) ",%0      \n"
-    "movq      " MEMACCESS(5) ",%%xmm2         \n"
-    BUNDLEALIGN
-    MEMOPREG(movhps,0x00,5,1,1,xmm2)           //  movhps    (%5,%1,1),%%xmm2
-    MEMOPREG(movq,0x00,5,1,2,xmm3)             //  movq      (%5,%1,2),%%xmm3
-    MEMOPREG(movhps,0x00,5,4,1,xmm3)           //  movhps    (%5,%4,1),%%xmm3
-    "lea       " MEMLEA4(0x00,5,1,4) ",%5      \n"
-    "pavgb     %%xmm2,%%xmm0                   \n"
-    "pavgb     %%xmm3,%%xmm1                   \n"
-    "movdqa    %%xmm0,%%xmm2                   \n"
-    "shufps    $0x88,%%xmm1,%%xmm0             \n"
-    "shufps    $0xdd,%%xmm1,%%xmm2             \n"
-    "pavgb     %%xmm2,%%xmm0                   \n"
-    "sub       $0x4,%3                         \n"
-    "movdqa    %%xmm0," MEMACCESS(2) "         \n"
-    "lea       " MEMLEA(0x10,2) ",%2           \n"
-    "jg        1b                              \n"
-  : "+r"(src_argb),       // %0
-    "+r"(src_stepx_x4),   // %1
-    "+r"(dst_argb),       // %2
-    "+rm"(dst_width),     // %3
-    "+r"(src_stepx_x12),  // %4
-    "+r"(row1)            // %5
-  :
-  : "memory", "cc"
-#if defined(__native_client__) && defined(__x86_64__)
-    , "r14"
-#endif
-#if defined(__SSE2__)
-    , "xmm0", "xmm1", "xmm2", "xmm3"
-#endif
-  );
-}
-
-void ScaleARGBCols_SSE2(uint8* dst_argb, const uint8* src_argb,
-                        int dst_width, int x, int dx) {
-  intptr_t x0 = 0, x1 = 0;
-  asm volatile (
-    "movd      %5,%%xmm2                       \n"
-    "movd      %6,%%xmm3                       \n"
-    "pshufd    $0x0,%%xmm2,%%xmm2              \n"
-    "pshufd    $0x11,%%xmm3,%%xmm0             \n"
-    "paddd     %%xmm0,%%xmm2                   \n"
-    "paddd     %%xmm3,%%xmm3                   \n"
-    "pshufd    $0x5,%%xmm3,%%xmm0              \n"
-    "paddd     %%xmm0,%%xmm2                   \n"
-    "paddd     %%xmm3,%%xmm3                   \n"
-    "pshufd    $0x0,%%xmm3,%%xmm3              \n"
-    "pextrw    $0x1,%%xmm2,%k0                 \n"
-    "pextrw    $0x3,%%xmm2,%k1                 \n"
-    "cmp       $0x0,%4                         \n"
-    "jl        99f                             \n"
-    "sub       $0x4,%4                         \n"
-    "jl        49f                             \n"
-
-    LABELALIGN
-  "40:                                         \n"
-    MEMOPREG(movd,0x00,3,0,4,xmm0)             //  movd      (%3,%0,4),%%xmm0
-    MEMOPREG(movd,0x00,3,1,4,xmm1)             //  movd      (%3,%1,4),%%xmm1
-    "pextrw    $0x5,%%xmm2,%k0                 \n"
-    "pextrw    $0x7,%%xmm2,%k1                 \n"
-    "paddd     %%xmm3,%%xmm2                   \n"
-    "punpckldq %%xmm1,%%xmm0                   \n"
-    MEMOPREG(movd,0x00,3,0,4,xmm1)             //  movd      (%3,%0,4),%%xmm1
-    MEMOPREG(movd,0x00,3,1,4,xmm4)             //  movd      (%3,%1,4),%%xmm4
-    "pextrw    $0x1,%%xmm2,%k0                 \n"
-    "pextrw    $0x3,%%xmm2,%k1                 \n"
-    "punpckldq %%xmm4,%%xmm1                   \n"
-    "punpcklqdq %%xmm1,%%xmm0                  \n"
-    "sub       $0x4,%4                         \n"
-    "movdqu    %%xmm0," MEMACCESS(2) "         \n"
-    "lea       " MEMLEA(0x10,2) ",%2           \n"
-    "jge       40b                             \n"
-
-  "49:                                         \n"
-    "test      $0x2,%4                         \n"
-    "je        29f                             \n"
-    BUNDLEALIGN
-    MEMOPREG(movd,0x00,3,0,4,xmm0)             //  movd      (%3,%0,4),%%xmm0
-    MEMOPREG(movd,0x00,3,1,4,xmm1)             //  movd      (%3,%1,4),%%xmm1
-    "pextrw    $0x5,%%xmm2,%k0                 \n"
-    "punpckldq %%xmm1,%%xmm0                   \n"
-    "movq      %%xmm0," MEMACCESS(2) "         \n"
-    "lea       " MEMLEA(0x8,2) ",%2            \n"
-  "29:                                         \n"
-    "test      $0x1,%4                         \n"
-    "je        99f                             \n"
-    MEMOPREG(movd,0x00,3,0,4,xmm0)             //  movd      (%3,%0,4),%%xmm0
-    "movd      %%xmm0," MEMACCESS(2) "         \n"
-  "99:                                         \n"
-  : "+a"(x0),          // %0
-    "+d"(x1),          // %1
-    "+r"(dst_argb),    // %2
-    "+r"(src_argb),    // %3
-    "+r"(dst_width)    // %4
-  : "rm"(x),           // %5
-    "rm"(dx)           // %6
-  : "memory", "cc"
-#if defined(__native_client__) && defined(__x86_64__)
-    , "r14"
-#endif
-#if defined(__SSE2__)
-    , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4"
-#endif
-  );
-}
-
-// Reads 4 pixels, duplicates them and writes 8 pixels.
-// Alignment requirement: src_argb 16 byte aligned, dst_argb 16 byte aligned.
-void ScaleARGBColsUp2_SSE2(uint8* dst_argb, const uint8* src_argb,
-                           int dst_width, int x, int dx) {
-  asm volatile (
-    LABELALIGN
-  "1:                                          \n"
-    "movdqa    " MEMACCESS(1) ",%%xmm0         \n"
-    "lea       " MEMLEA(0x10,1) ",%1           \n"
-    "movdqa    %%xmm0,%%xmm1                   \n"
-    "punpckldq %%xmm0,%%xmm0                   \n"
-    "punpckhdq %%xmm1,%%xmm1                   \n"
-    "sub       $0x8,%2                         \n"
-    "movdqa    %%xmm0," MEMACCESS(0) "         \n"
-    "movdqa    %%xmm1," MEMACCESS2(0x10,0) "   \n"
-    "lea       " MEMLEA(0x20,0) ",%0           \n"
-    "jg        1b                              \n"
-
-  : "+r"(dst_argb),    // %0
-    "+r"(src_argb),    // %1
-    "+r"(dst_width)    // %2
-  :
-  : "memory", "cc"
-#if defined(__native_client__) && defined(__x86_64__)
-    , "r14"
-#endif
-#if defined(__SSE2__)
-    , "xmm0", "xmm1"
-#endif
-  );
-}
-
-// Shuffle table for arranging 2 pixels into pairs for pmaddubsw
-static uvec8 kShuffleColARGB = {
-  0u, 4u, 1u, 5u, 2u, 6u, 3u, 7u,  // bbggrraa 1st pixel
-  8u, 12u, 9u, 13u, 10u, 14u, 11u, 15u  // bbggrraa 2nd pixel
-};
-
-// Shuffle table for duplicating 2 fractions into 8 bytes each
-static uvec8 kShuffleFractions = {
-  0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 4u, 4u, 4u, 4u, 4u, 4u, 4u, 4u,
-};
-
-// Bilinear row filtering combines 4x2 -> 4x1. SSSE3 version
-void ScaleARGBFilterCols_SSSE3(uint8* dst_argb, const uint8* src_argb,
-                               int dst_width, int x, int dx) {
-  intptr_t x0 = 0, x1 = 0;
-  asm volatile (
-    "movdqa    %0,%%xmm4                       \n"
-    "movdqa    %1,%%xmm5                       \n"
-  :
-  : "m"(kShuffleColARGB),  // %0
-    "m"(kShuffleFractions)  // %1
-  );
-
-  asm volatile (
-    "movd      %5,%%xmm2                       \n"
-    "movd      %6,%%xmm3                       \n"
-    "pcmpeqb   %%xmm6,%%xmm6                   \n"
-    "psrlw     $0x9,%%xmm6                     \n"
-    "pextrw    $0x1,%%xmm2,%k3                 \n"
-    "sub       $0x2,%2                         \n"
-    "jl        29f                             \n"
-    "movdqa    %%xmm2,%%xmm0                   \n"
-    "paddd     %%xmm3,%%xmm0                   \n"
-    "punpckldq %%xmm0,%%xmm2                   \n"
-    "punpckldq %%xmm3,%%xmm3                   \n"
-    "paddd     %%xmm3,%%xmm3                   \n"
-    "pextrw    $0x3,%%xmm2,%k4                 \n"
-
-    LABELALIGN
-  "2:                                          \n"
-    "movdqa    %%xmm2,%%xmm1                   \n"
-    "paddd     %%xmm3,%%xmm2                   \n"
-    MEMOPREG(movq,0x00,1,3,4,xmm0)             //  movq      (%1,%3,4),%%xmm0
-    "psrlw     $0x9,%%xmm1                     \n"
-    BUNDLEALIGN
-    MEMOPREG(movhps,0x00,1,4,4,xmm0)           //  movhps    (%1,%4,4),%%xmm0
-    "pshufb    %%xmm5,%%xmm1                   \n"
-    "pshufb    %%xmm4,%%xmm0                   \n"
-    "pxor      %%xmm6,%%xmm1                   \n"
-    "pmaddubsw %%xmm1,%%xmm0                   \n"
-    "psrlw     $0x7,%%xmm0                     \n"
-    "pextrw    $0x1,%%xmm2,%k3                 \n"
-    "pextrw    $0x3,%%xmm2,%k4                 \n"
-    "packuswb  %%xmm0,%%xmm0                   \n"
-    "movq      %%xmm0," MEMACCESS(0) "         \n"
-    "lea       " MEMLEA(0x8,0) ",%0            \n"
-    "sub       $0x2,%2                         \n"
-    "jge       2b                              \n"
-
-    LABELALIGN
-  "29:                                         \n"
-    "add       $0x1,%2                         \n"
-    "jl        99f                             \n"
-    "psrlw     $0x9,%%xmm2                     \n"
-    BUNDLEALIGN
-    MEMOPREG(movq,0x00,1,3,4,xmm0)             //  movq      (%1,%3,4),%%xmm0
-    "pshufb    %%xmm5,%%xmm2                   \n"
-    "pshufb    %%xmm4,%%xmm0                   \n"
-    "pxor      %%xmm6,%%xmm2                   \n"
-    "pmaddubsw %%xmm2,%%xmm0                   \n"
-    "psrlw     $0x7,%%xmm0                     \n"
-    "packuswb  %%xmm0,%%xmm0                   \n"
-    "movd      %%xmm0," MEMACCESS(0) "         \n"
-
-    LABELALIGN
-  "99:                                         \n"
-  : "+r"(dst_argb),    // %0
-    "+r"(src_argb),    // %1
-    "+rm"(dst_width),  // %2
-    "+r"(x0),          // %3
-    "+r"(x1)           // %4
-  : "rm"(x),           // %5
-    "rm"(dx)           // %6
-  : "memory", "cc"
-#if defined(__native_client__) && defined(__x86_64__)
-    , "r14"
-#endif
-#if defined(__SSE2__)
-    , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6"
-#endif
-  );
-}
-
-// Divide num by div and return as 16.16 fixed point result.
-int FixedDiv_X86(int num, int div) {
-  asm volatile (
-    "cdq                                       \n"
-    "shld      $0x10,%%eax,%%edx               \n"
-    "shl       $0x10,%%eax                     \n"
-    "idiv      %1                              \n"
-    "mov       %0, %%eax                       \n"
-    : "+a"(num)  // %0
-    : "c"(div)   // %1
-    : "memory", "cc", "edx"
-  );
-  return num;
-}
-
-// Divide num - 1 by div - 1 and return as 16.16 fixed point result.
-int FixedDiv1_X86(int num, int div) {
-  asm volatile (
-    "cdq                                       \n"
-    "shld      $0x10,%%eax,%%edx               \n"
-    "shl       $0x10,%%eax                     \n"
-    "sub       $0x10001,%%eax                  \n"
-    "sbb       $0x0,%%edx                      \n"
-    "sub       $0x1,%1                         \n"
-    "idiv      %1                              \n"
-    "mov       %0, %%eax                       \n"
-    : "+a"(num)  // %0
-    : "c"(div)   // %1
-    : "memory", "cc", "edx"
-  );
-  return num;
-}
-
-#endif  // defined(__x86_64__) || defined(__i386__)
-
-#ifdef __cplusplus
-}  // extern "C"
-}  // namespace libyuv
-#endif
diff --git a/drivers/theoraplayer/src/YUV/libyuv/src/scale_win.cc b/drivers/theoraplayer/src/YUV/libyuv/src/scale_win.cc
deleted file mode 100644
index 840b9738da..0000000000
--- a/drivers/theoraplayer/src/YUV/libyuv/src/scale_win.cc
+++ /dev/null
@@ -1,1320 +0,0 @@
-/*
- *  Copyright 2013 The LibYuv Project Authors. All rights reserved.
- *
- *  Use of this source code is governed by a BSD-style license
- *  that can be found in the LICENSE file in the root of the source
- *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS. All contributing project authors may
- *  be found in the AUTHORS file in the root of the source tree.
- */
-
-#include "libyuv/row.h"
-
-#ifdef __cplusplus
-namespace libyuv {
-extern "C" {
-#endif
-
-// This module is for Visual C x86.
-#if !defined(LIBYUV_DISABLE_X86) && defined(_M_IX86) && defined(_MSC_VER)
-
-// Offsets for source bytes 0 to 9
-static uvec8 kShuf0 =
-  { 0, 1, 3, 4, 5, 7, 8, 9, 128, 128, 128, 128, 128, 128, 128, 128 };
-
-// Offsets for source bytes 11 to 20 with 8 subtracted = 3 to 12.
-static uvec8 kShuf1 =
-  { 3, 4, 5, 7, 8, 9, 11, 12, 128, 128, 128, 128, 128, 128, 128, 128 };
-
-// Offsets for source bytes 21 to 31 with 16 subtracted = 5 to 31.
-static uvec8 kShuf2 =
-  { 5, 7, 8, 9, 11, 12, 13, 15, 128, 128, 128, 128, 128, 128, 128, 128 };
-
-// Offsets for source bytes 0 to 10
-static uvec8 kShuf01 =
-  { 0, 1, 1, 2, 2, 3, 4, 5, 5, 6, 6, 7, 8, 9, 9, 10 };
-
-// Offsets for source bytes 10 to 21 with 8 subtracted = 3 to 13.
-static uvec8 kShuf11 =
-  { 2, 3, 4, 5, 5, 6, 6, 7, 8, 9, 9, 10, 10, 11, 12, 13 };
-
-// Offsets for source bytes 21 to 31 with 16 subtracted = 5 to 31.
-static uvec8 kShuf21 =
-  { 5, 6, 6, 7, 8, 9, 9, 10, 10, 11, 12, 13, 13, 14, 14, 15 };
-
-// Coefficients for source bytes 0 to 10
-static uvec8 kMadd01 =
-  { 3, 1, 2, 2, 1, 3, 3, 1, 2, 2, 1, 3, 3, 1, 2, 2 };
-
-// Coefficients for source bytes 10 to 21
-static uvec8 kMadd11 =
-  { 1, 3, 3, 1, 2, 2, 1, 3, 3, 1, 2, 2, 1, 3, 3, 1 };
-
-// Coefficients for source bytes 21 to 31
-static uvec8 kMadd21 =
-  { 2, 2, 1, 3, 3, 1, 2, 2, 1, 3, 3, 1, 2, 2, 1, 3 };
-
-// Coefficients for source bytes 21 to 31
-static vec16 kRound34 =
-  { 2, 2, 2, 2, 2, 2, 2, 2 };
-
-static uvec8 kShuf38a =
-  { 0, 3, 6, 8, 11, 14, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 };
-
-static uvec8 kShuf38b =
-  { 128, 128, 128, 128, 128, 128, 0, 3, 6, 8, 11, 14, 128, 128, 128, 128 };
-
-// Arrange words 0,3,6 into 0,1,2
-static uvec8 kShufAc =
-  { 0, 1, 6, 7, 12, 13, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 };
-
-// Arrange words 0,3,6 into 3,4,5
-static uvec8 kShufAc3 =
-  { 128, 128, 128, 128, 128, 128, 0, 1, 6, 7, 12, 13, 128, 128, 128, 128 };
-
-// Scaling values for boxes of 3x3 and 2x3
-static uvec16 kScaleAc33 =
-  { 65536 / 9, 65536 / 9, 65536 / 6, 65536 / 9, 65536 / 9, 65536 / 6, 0, 0 };
-
-// Arrange first value for pixels 0,1,2,3,4,5
-static uvec8 kShufAb0 =
-  { 0, 128, 3, 128, 6, 128, 8, 128, 11, 128, 14, 128, 128, 128, 128, 128 };
-
-// Arrange second value for pixels 0,1,2,3,4,5
-static uvec8 kShufAb1 =
-  { 1, 128, 4, 128, 7, 128, 9, 128, 12, 128, 15, 128, 128, 128, 128, 128 };
-
-// Arrange third value for pixels 0,1,2,3,4,5
-static uvec8 kShufAb2 =
-  { 2, 128, 5, 128, 128, 128, 10, 128, 13, 128, 128, 128, 128, 128, 128, 128 };
-
-// Scaling values for boxes of 3x2 and 2x2
-static uvec16 kScaleAb2 =
-  { 65536 / 3, 65536 / 3, 65536 / 2, 65536 / 3, 65536 / 3, 65536 / 2, 0, 0 };
-
-// Reads 32 pixels, throws half away and writes 16 pixels.
-// Alignment requirement: src_ptr 16 byte aligned, dst_ptr 16 byte aligned.
-__declspec(naked) __declspec(align(16))
-void ScaleRowDown2_SSE2(const uint8* src_ptr, ptrdiff_t src_stride,
-                        uint8* dst_ptr, int dst_width) {
-  __asm {
-    mov        eax, [esp + 4]        // src_ptr
-                                     // src_stride ignored
-    mov        edx, [esp + 12]       // dst_ptr
-    mov        ecx, [esp + 16]       // dst_width
-
-    align      4
-  wloop:
-    movdqa     xmm0, [eax]
-    movdqa     xmm1, [eax + 16]
-    lea        eax,  [eax + 32]
-    psrlw      xmm0, 8               // isolate odd pixels.
-    psrlw      xmm1, 8
-    packuswb   xmm0, xmm1
-    sub        ecx, 16
-    movdqa     [edx], xmm0
-    lea        edx, [edx + 16]
-    jg         wloop
-
-    ret
-  }
-}
-
-// Blends 32x1 rectangle to 16x1.
-// Alignment requirement: src_ptr 16 byte aligned, dst_ptr 16 byte aligned.
-__declspec(naked) __declspec(align(16))
-void ScaleRowDown2Linear_SSE2(const uint8* src_ptr, ptrdiff_t src_stride,
-                              uint8* dst_ptr, int dst_width) {
-  __asm {
-    mov        eax, [esp + 4]        // src_ptr
-                                     // src_stride
-    mov        edx, [esp + 12]       // dst_ptr
-    mov        ecx, [esp + 16]       // dst_width
-    pcmpeqb    xmm5, xmm5            // generate mask 0x00ff00ff
-    psrlw      xmm5, 8
-
-    align      4
-  wloop:
-    movdqa     xmm0, [eax]
-    movdqa     xmm1, [eax + 16]
-    lea        eax,  [eax + 32]
-
-    movdqa     xmm2, xmm0            // average columns (32 to 16 pixels)
-    psrlw      xmm0, 8
-    movdqa     xmm3, xmm1
-    psrlw      xmm1, 8
-    pand       xmm2, xmm5
-    pand       xmm3, xmm5
-    pavgw      xmm0, xmm2
-    pavgw      xmm1, xmm3
-    packuswb   xmm0, xmm1
-
-    sub        ecx, 16
-    movdqa     [edx], xmm0
-    lea        edx, [edx + 16]
-    jg         wloop
-
-    ret
-  }
-}
-
-// Blends 32x2 rectangle to 16x1.
-// Alignment requirement: src_ptr 16 byte aligned, dst_ptr 16 byte aligned.
-__declspec(naked) __declspec(align(16))
-void ScaleRowDown2Box_SSE2(const uint8* src_ptr, ptrdiff_t src_stride,
-                           uint8* dst_ptr, int dst_width) {
-  __asm {
-    push       esi
-    mov        eax, [esp + 4 + 4]    // src_ptr
-    mov        esi, [esp + 4 + 8]    // src_stride
-    mov        edx, [esp + 4 + 12]   // dst_ptr
-    mov        ecx, [esp + 4 + 16]   // dst_width
-    pcmpeqb    xmm5, xmm5            // generate mask 0x00ff00ff
-    psrlw      xmm5, 8
-
-    align      4
-  wloop:
-    movdqa     xmm0, [eax]
-    movdqa     xmm1, [eax + 16]
-    movdqa     xmm2, [eax + esi]
-    movdqa     xmm3, [eax + esi + 16]
-    lea        eax,  [eax + 32]
-    pavgb      xmm0, xmm2            // average rows
-    pavgb      xmm1, xmm3
-
-    movdqa     xmm2, xmm0            // average columns (32 to 16 pixels)
-    psrlw      xmm0, 8
-    movdqa     xmm3, xmm1
-    psrlw      xmm1, 8
-    pand       xmm2, xmm5
-    pand       xmm3, xmm5
-    pavgw      xmm0, xmm2
-    pavgw      xmm1, xmm3
-    packuswb   xmm0, xmm1
-
-    sub        ecx, 16
-    movdqa     [edx], xmm0
-    lea        edx, [edx + 16]
-    jg         wloop
-
-    pop        esi
-    ret
-  }
-}
-
-// Reads 32 pixels, throws half away and writes 16 pixels.
-// Alignment requirement: src_ptr 16 byte aligned, dst_ptr 16 byte aligned.
-__declspec(naked) __declspec(align(16))
-void ScaleRowDown2_Unaligned_SSE2(const uint8* src_ptr,
-                                  ptrdiff_t src_stride,
-                                  uint8* dst_ptr, int dst_width) {
-  __asm {
-    mov        eax, [esp + 4]        // src_ptr
-                                     // src_stride ignored
-    mov        edx, [esp + 12]       // dst_ptr
-    mov        ecx, [esp + 16]       // dst_width
-
-    align      4
-  wloop:
-    movdqu     xmm0, [eax]
-    movdqu     xmm1, [eax + 16]
-    lea        eax,  [eax + 32]
-    psrlw      xmm0, 8               // isolate odd pixels.
-    psrlw      xmm1, 8
-    packuswb   xmm0, xmm1
-    sub        ecx, 16
-    movdqu     [edx], xmm0
-    lea        edx, [edx + 16]
-    jg         wloop
-
-    ret
-  }
-}
-
-// Blends 32x1 rectangle to 16x1.
-// Alignment requirement: src_ptr 16 byte aligned, dst_ptr 16 byte aligned.
-__declspec(naked) __declspec(align(16))
-void ScaleRowDown2Linear_Unaligned_SSE2(const uint8* src_ptr,
-                                        ptrdiff_t src_stride,
-                                        uint8* dst_ptr, int dst_width) {
-  __asm {
-    mov        eax, [esp + 4]        // src_ptr
-                                     // src_stride
-    mov        edx, [esp + 12]       // dst_ptr
-    mov        ecx, [esp + 16]       // dst_width
-    pcmpeqb    xmm5, xmm5            // generate mask 0x00ff00ff
-    psrlw      xmm5, 8
-
-    align      4
-  wloop:
-    movdqu     xmm0, [eax]
-    movdqu     xmm1, [eax + 16]
-    lea        eax,  [eax + 32]
-
-    movdqa     xmm2, xmm0            // average columns (32 to 16 pixels)
-    psrlw      xmm0, 8
-    movdqa     xmm3, xmm1
-    psrlw      xmm1, 8
-    pand       xmm2, xmm5
-    pand       xmm3, xmm5
-    pavgw      xmm0, xmm2
-    pavgw      xmm1, xmm3
-    packuswb   xmm0, xmm1
-
-    sub        ecx, 16
-    movdqu     [edx], xmm0
-    lea        edx, [edx + 16]
-    jg         wloop
-
-    ret
-  }
-}
-
-// Blends 32x2 rectangle to 16x1.
-// Alignment requirement: src_ptr 16 byte aligned, dst_ptr 16 byte aligned.
-__declspec(naked) __declspec(align(16))
-void ScaleRowDown2Box_Unaligned_SSE2(const uint8* src_ptr,
-                                     ptrdiff_t src_stride,
-                                     uint8* dst_ptr, int dst_width) {
-  __asm {
-    push       esi
-    mov        eax, [esp + 4 + 4]    // src_ptr
-    mov        esi, [esp + 4 + 8]    // src_stride
-    mov        edx, [esp + 4 + 12]   // dst_ptr
-    mov        ecx, [esp + 4 + 16]   // dst_width
-    pcmpeqb    xmm5, xmm5            // generate mask 0x00ff00ff
-    psrlw      xmm5, 8
-
-    align      4
-  wloop:
-    movdqu     xmm0, [eax]
-    movdqu     xmm1, [eax + 16]
-    movdqu     xmm2, [eax + esi]
-    movdqu     xmm3, [eax + esi + 16]
-    lea        eax,  [eax + 32]
-    pavgb      xmm0, xmm2            // average rows
-    pavgb      xmm1, xmm3
-
-    movdqa     xmm2, xmm0            // average columns (32 to 16 pixels)
-    psrlw      xmm0, 8
-    movdqa     xmm3, xmm1
-    psrlw      xmm1, 8
-    pand       xmm2, xmm5
-    pand       xmm3, xmm5
-    pavgw      xmm0, xmm2
-    pavgw      xmm1, xmm3
-    packuswb   xmm0, xmm1
-
-    sub        ecx, 16
-    movdqu     [edx], xmm0
-    lea        edx, [edx + 16]
-    jg         wloop
-
-    pop        esi
-    ret
-  }
-}
-
-// Point samples 32 pixels to 8 pixels.
-// Alignment requirement: src_ptr 16 byte aligned, dst_ptr 8 byte aligned.
-__declspec(naked) __declspec(align(16))
-void ScaleRowDown4_SSE2(const uint8* src_ptr, ptrdiff_t src_stride,
-                        uint8* dst_ptr, int dst_width) {
-  __asm {
-    mov        eax, [esp + 4]        // src_ptr
-                                     // src_stride ignored
-    mov        edx, [esp + 12]       // dst_ptr
-    mov        ecx, [esp + 16]       // dst_width
-    pcmpeqb    xmm5, xmm5            // generate mask 0x00ff0000
-    psrld      xmm5, 24
-    pslld      xmm5, 16
-
-    align      4
-  wloop:
-    movdqa     xmm0, [eax]
-    movdqa     xmm1, [eax + 16]
-    lea        eax,  [eax + 32]
-    pand       xmm0, xmm5
-    pand       xmm1, xmm5
-    packuswb   xmm0, xmm1
-    psrlw      xmm0, 8
-    packuswb   xmm0, xmm0
-    sub        ecx, 8
-    movq       qword ptr [edx], xmm0
-    lea        edx, [edx + 8]
-    jg         wloop
-
-    ret
-  }
-}
-
-// Blends 32x4 rectangle to 8x1.
-// Alignment requirement: src_ptr 16 byte aligned, dst_ptr 8 byte aligned.
-__declspec(naked) __declspec(align(16))
-void ScaleRowDown4Box_SSE2(const uint8* src_ptr, ptrdiff_t src_stride,
-                           uint8* dst_ptr, int dst_width) {
-  __asm {
-    push       esi
-    push       edi
-    mov        eax, [esp + 8 + 4]    // src_ptr
-    mov        esi, [esp + 8 + 8]    // src_stride
-    mov        edx, [esp + 8 + 12]   // dst_ptr
-    mov        ecx, [esp + 8 + 16]   // dst_width
-    lea        edi, [esi + esi * 2]  // src_stride * 3
-    pcmpeqb    xmm7, xmm7            // generate mask 0x00ff00ff
-    psrlw      xmm7, 8
-
-    align      4
-  wloop:
-    movdqa     xmm0, [eax]
-    movdqa     xmm1, [eax + 16]
-    movdqa     xmm2, [eax + esi]
-    movdqa     xmm3, [eax + esi + 16]
-    pavgb      xmm0, xmm2            // average rows
-    pavgb      xmm1, xmm3
-    movdqa     xmm2, [eax + esi * 2]
-    movdqa     xmm3, [eax + esi * 2 + 16]
-    movdqa     xmm4, [eax + edi]
-    movdqa     xmm5, [eax + edi + 16]
-    lea        eax, [eax + 32]
-    pavgb      xmm2, xmm4
-    pavgb      xmm3, xmm5
-    pavgb      xmm0, xmm2
-    pavgb      xmm1, xmm3
-
-    movdqa     xmm2, xmm0            // average columns (32 to 16 pixels)
-    psrlw      xmm0, 8
-    movdqa     xmm3, xmm1
-    psrlw      xmm1, 8
-    pand       xmm2, xmm7
-    pand       xmm3, xmm7
-    pavgw      xmm0, xmm2
-    pavgw      xmm1, xmm3
-    packuswb   xmm0, xmm1
-
-    movdqa     xmm2, xmm0            // average columns (16 to 8 pixels)
-    psrlw      xmm0, 8
-    pand       xmm2, xmm7
-    pavgw      xmm0, xmm2
-    packuswb   xmm0, xmm0
-
-    sub        ecx, 8
-    movq       qword ptr [edx], xmm0
-    lea        edx, [edx + 8]
-    jg         wloop
-
-    pop        edi
-    pop        esi
-    ret
-  }
-}
-
-// Point samples 32 pixels to 24 pixels.
-// Produces three 8 byte values. For each 8 bytes, 16 bytes are read.
-// Then shuffled to do the scaling.
-
-// Note that movdqa+palign may be better than movdqu.
-// Alignment requirement: src_ptr 16 byte aligned, dst_ptr 8 byte aligned.
-__declspec(naked) __declspec(align(16))
-void ScaleRowDown34_SSSE3(const uint8* src_ptr, ptrdiff_t src_stride,
-                          uint8* dst_ptr, int dst_width) {
-  __asm {
-    mov        eax, [esp + 4]        // src_ptr
-                                     // src_stride ignored
-    mov        edx, [esp + 12]       // dst_ptr
-    mov        ecx, [esp + 16]       // dst_width
-    movdqa     xmm3, kShuf0
-    movdqa     xmm4, kShuf1
-    movdqa     xmm5, kShuf2
-
-    align      4
-  wloop:
-    movdqa     xmm0, [eax]
-    movdqa     xmm1, [eax + 16]
-    lea        eax,  [eax + 32]
-    movdqa     xmm2, xmm1
-    palignr    xmm1, xmm0, 8
-    pshufb     xmm0, xmm3
-    pshufb     xmm1, xmm4
-    pshufb     xmm2, xmm5
-    movq       qword ptr [edx], xmm0
-    movq       qword ptr [edx + 8], xmm1
-    movq       qword ptr [edx + 16], xmm2
-    lea        edx, [edx + 24]
-    sub        ecx, 24
-    jg         wloop
-
-    ret
-  }
-}
-
-// Blends 32x2 rectangle to 24x1
-// Produces three 8 byte values. For each 8 bytes, 16 bytes are read.
-// Then shuffled to do the scaling.
-
-// Register usage:
-// xmm0 src_row 0
-// xmm1 src_row 1
-// xmm2 shuf 0
-// xmm3 shuf 1
-// xmm4 shuf 2
-// xmm5 madd 0
-// xmm6 madd 1
-// xmm7 kRound34
-
-// Note that movdqa+palign may be better than movdqu.
-// Alignment requirement: src_ptr 16 byte aligned, dst_ptr 8 byte aligned.
-__declspec(naked) __declspec(align(16))
-void ScaleRowDown34_1_Box_SSSE3(const uint8* src_ptr,
-                                ptrdiff_t src_stride,
-                                uint8* dst_ptr, int dst_width) {
-  __asm {
-    push       esi
-    mov        eax, [esp + 4 + 4]    // src_ptr
-    mov        esi, [esp + 4 + 8]    // src_stride
-    mov        edx, [esp + 4 + 12]   // dst_ptr
-    mov        ecx, [esp + 4 + 16]   // dst_width
-    movdqa     xmm2, kShuf01
-    movdqa     xmm3, kShuf11
-    movdqa     xmm4, kShuf21
-    movdqa     xmm5, kMadd01
-    movdqa     xmm6, kMadd11
-    movdqa     xmm7, kRound34
-
-    align      4
-  wloop:
-    movdqa     xmm0, [eax]           // pixels 0..7
-    movdqa     xmm1, [eax + esi]
-    pavgb      xmm0, xmm1
-    pshufb     xmm0, xmm2
-    pmaddubsw  xmm0, xmm5
-    paddsw     xmm0, xmm7
-    psrlw      xmm0, 2
-    packuswb   xmm0, xmm0
-    movq       qword ptr [edx], xmm0
-    movdqu     xmm0, [eax + 8]       // pixels 8..15
-    movdqu     xmm1, [eax + esi + 8]
-    pavgb      xmm0, xmm1
-    pshufb     xmm0, xmm3
-    pmaddubsw  xmm0, xmm6
-    paddsw     xmm0, xmm7
-    psrlw      xmm0, 2
-    packuswb   xmm0, xmm0
-    movq       qword ptr [edx + 8], xmm0
-    movdqa     xmm0, [eax + 16]      // pixels 16..23
-    movdqa     xmm1, [eax + esi + 16]
-    lea        eax, [eax + 32]
-    pavgb      xmm0, xmm1
-    pshufb     xmm0, xmm4
-    movdqa     xmm1, kMadd21
-    pmaddubsw  xmm0, xmm1
-    paddsw     xmm0, xmm7
-    psrlw      xmm0, 2
-    packuswb   xmm0, xmm0
-    sub        ecx, 24
-    movq       qword ptr [edx + 16], xmm0
-    lea        edx, [edx + 24]
-    jg         wloop
-
-    pop        esi
-    ret
-  }
-}
-
-// Note that movdqa+palign may be better than movdqu.
-// Alignment requirement: src_ptr 16 byte aligned, dst_ptr 8 byte aligned.
-__declspec(naked) __declspec(align(16))
-void ScaleRowDown34_0_Box_SSSE3(const uint8* src_ptr,
-                                ptrdiff_t src_stride,
-                                uint8* dst_ptr, int dst_width) {
-  __asm {
-    push       esi
-    mov        eax, [esp + 4 + 4]    // src_ptr
-    mov        esi, [esp + 4 + 8]    // src_stride
-    mov        edx, [esp + 4 + 12]   // dst_ptr
-    mov        ecx, [esp + 4 + 16]   // dst_width
-    movdqa     xmm2, kShuf01
-    movdqa     xmm3, kShuf11
-    movdqa     xmm4, kShuf21
-    movdqa     xmm5, kMadd01
-    movdqa     xmm6, kMadd11
-    movdqa     xmm7, kRound34
-
-    align      4
-  wloop:
-    movdqa     xmm0, [eax]           // pixels 0..7
-    movdqa     xmm1, [eax + esi]
-    pavgb      xmm1, xmm0
-    pavgb      xmm0, xmm1
-    pshufb     xmm0, xmm2
-    pmaddubsw  xmm0, xmm5
-    paddsw     xmm0, xmm7
-    psrlw      xmm0, 2
-    packuswb   xmm0, xmm0
-    movq       qword ptr [edx], xmm0
-    movdqu     xmm0, [eax + 8]       // pixels 8..15
-    movdqu     xmm1, [eax + esi + 8]
-    pavgb      xmm1, xmm0
-    pavgb      xmm0, xmm1
-    pshufb     xmm0, xmm3
-    pmaddubsw  xmm0, xmm6
-    paddsw     xmm0, xmm7
-    psrlw      xmm0, 2
-    packuswb   xmm0, xmm0
-    movq       qword ptr [edx + 8], xmm0
-    movdqa     xmm0, [eax + 16]      // pixels 16..23
-    movdqa     xmm1, [eax + esi + 16]
-    lea        eax, [eax + 32]
-    pavgb      xmm1, xmm0
-    pavgb      xmm0, xmm1
-    pshufb     xmm0, xmm4
-    movdqa     xmm1, kMadd21
-    pmaddubsw  xmm0, xmm1
-    paddsw     xmm0, xmm7
-    psrlw      xmm0, 2
-    packuswb   xmm0, xmm0
-    sub        ecx, 24
-    movq       qword ptr [edx + 16], xmm0
-    lea        edx, [edx+24]
-    jg         wloop
-
-    pop        esi
-    ret
-  }
-}
-
-// 3/8 point sampler
-
-// Scale 32 pixels to 12
-__declspec(naked) __declspec(align(16))
-void ScaleRowDown38_SSSE3(const uint8* src_ptr, ptrdiff_t src_stride,
-                          uint8* dst_ptr, int dst_width) {
-  __asm {
-    mov        eax, [esp + 4]        // src_ptr
-                                     // src_stride ignored
-    mov        edx, [esp + 12]       // dst_ptr
-    mov        ecx, [esp + 16]       // dst_width
-    movdqa     xmm4, kShuf38a
-    movdqa     xmm5, kShuf38b
-
-    align      4
-  xloop:
-    movdqa     xmm0, [eax]           // 16 pixels -> 0,1,2,3,4,5
-    movdqa     xmm1, [eax + 16]      // 16 pixels -> 6,7,8,9,10,11
-    lea        eax, [eax + 32]
-    pshufb     xmm0, xmm4
-    pshufb     xmm1, xmm5
-    paddusb    xmm0, xmm1
-
-    sub        ecx, 12
-    movq       qword ptr [edx], xmm0  // write 12 pixels
-    movhlps    xmm1, xmm0
-    movd       [edx + 8], xmm1
-    lea        edx, [edx + 12]
-    jg         xloop
-
-    ret
-  }
-}
-
-// Scale 16x3 pixels to 6x1 with interpolation
-__declspec(naked) __declspec(align(16))
-void ScaleRowDown38_3_Box_SSSE3(const uint8* src_ptr,
-                                ptrdiff_t src_stride,
-                                uint8* dst_ptr, int dst_width) {
-  __asm {
-    push       esi
-    mov        eax, [esp + 4 + 4]    // src_ptr
-    mov        esi, [esp + 4 + 8]    // src_stride
-    mov        edx, [esp + 4 + 12]   // dst_ptr
-    mov        ecx, [esp + 4 + 16]   // dst_width
-    movdqa     xmm2, kShufAc
-    movdqa     xmm3, kShufAc3
-    movdqa     xmm4, kScaleAc33
-    pxor       xmm5, xmm5
-
-    align      4
-  xloop:
-    movdqa     xmm0, [eax]           // sum up 3 rows into xmm0/1
-    movdqa     xmm6, [eax + esi]
-    movhlps    xmm1, xmm0
-    movhlps    xmm7, xmm6
-    punpcklbw  xmm0, xmm5
-    punpcklbw  xmm1, xmm5
-    punpcklbw  xmm6, xmm5
-    punpcklbw  xmm7, xmm5
-    paddusw    xmm0, xmm6
-    paddusw    xmm1, xmm7
-    movdqa     xmm6, [eax + esi * 2]
-    lea        eax, [eax + 16]
-    movhlps    xmm7, xmm6
-    punpcklbw  xmm6, xmm5
-    punpcklbw  xmm7, xmm5
-    paddusw    xmm0, xmm6
-    paddusw    xmm1, xmm7
-
-    movdqa     xmm6, xmm0            // 8 pixels -> 0,1,2 of xmm6
-    psrldq     xmm0, 2
-    paddusw    xmm6, xmm0
-    psrldq     xmm0, 2
-    paddusw    xmm6, xmm0
-    pshufb     xmm6, xmm2
-
-    movdqa     xmm7, xmm1            // 8 pixels -> 3,4,5 of xmm6
-    psrldq     xmm1, 2
-    paddusw    xmm7, xmm1
-    psrldq     xmm1, 2
-    paddusw    xmm7, xmm1
-    pshufb     xmm7, xmm3
-    paddusw    xmm6, xmm7
-
-    pmulhuw    xmm6, xmm4            // divide by 9,9,6, 9,9,6
-    packuswb   xmm6, xmm6
-
-    sub        ecx, 6
-    movd       [edx], xmm6           // write 6 pixels
-    psrlq      xmm6, 16
-    movd       [edx + 2], xmm6
-    lea        edx, [edx + 6]
-    jg         xloop
-
-    pop        esi
-    ret
-  }
-}
-
-// Scale 16x2 pixels to 6x1 with interpolation
-__declspec(naked) __declspec(align(16))
-void ScaleRowDown38_2_Box_SSSE3(const uint8* src_ptr,
-                                ptrdiff_t src_stride,
-                                uint8* dst_ptr, int dst_width) {
-  __asm {
-    push       esi
-    mov        eax, [esp + 4 + 4]    // src_ptr
-    mov        esi, [esp + 4 + 8]    // src_stride
-    mov        edx, [esp + 4 + 12]   // dst_ptr
-    mov        ecx, [esp + 4 + 16]   // dst_width
-    movdqa     xmm2, kShufAb0
-    movdqa     xmm3, kShufAb1
-    movdqa     xmm4, kShufAb2
-    movdqa     xmm5, kScaleAb2
-
-    align      4
-  xloop:
-    movdqa     xmm0, [eax]           // average 2 rows into xmm0
-    pavgb      xmm0, [eax + esi]
-    lea        eax, [eax + 16]
-
-    movdqa     xmm1, xmm0            // 16 pixels -> 0,1,2,3,4,5 of xmm1
-    pshufb     xmm1, xmm2
-    movdqa     xmm6, xmm0
-    pshufb     xmm6, xmm3
-    paddusw    xmm1, xmm6
-    pshufb     xmm0, xmm4
-    paddusw    xmm1, xmm0
-
-    pmulhuw    xmm1, xmm5            // divide by 3,3,2, 3,3,2
-    packuswb   xmm1, xmm1
-
-    sub        ecx, 6
-    movd       [edx], xmm1           // write 6 pixels
-    psrlq      xmm1, 16
-    movd       [edx + 2], xmm1
-    lea        edx, [edx + 6]
-    jg         xloop
-
-    pop        esi
-    ret
-  }
-}
-
-// Reads 16xN bytes and produces 16 shorts at a time.
-// TODO(fbarchard): Make this handle 4xN bytes for any width ARGB.
-__declspec(naked) __declspec(align(16))
-void ScaleAddRows_SSE2(const uint8* src_ptr, ptrdiff_t src_stride,
-                       uint16* dst_ptr, int src_width,
-                       int src_height) {
-  __asm {
-    push       esi
-    push       edi
-    push       ebx
-    push       ebp
-    mov        esi, [esp + 16 + 4]   // src_ptr
-    mov        edx, [esp + 16 + 8]   // src_stride
-    mov        edi, [esp + 16 + 12]  // dst_ptr
-    mov        ecx, [esp + 16 + 16]  // dst_width
-    mov        ebx, [esp + 16 + 20]  // height
-    pxor       xmm4, xmm4
-    dec        ebx
-
-    align      4
-  xloop:
-    // first row
-    movdqa     xmm0, [esi]
-    lea        eax, [esi + edx]
-    movdqa     xmm1, xmm0
-    punpcklbw  xmm0, xmm4
-    punpckhbw  xmm1, xmm4
-    lea        esi, [esi + 16]
-    mov        ebp, ebx
-    test       ebp, ebp
-    je         ydone
-
-    // sum remaining rows
-    align      4
-  yloop:
-    movdqa     xmm2, [eax]       // read 16 pixels
-    lea        eax, [eax + edx]  // advance to next row
-    movdqa     xmm3, xmm2
-    punpcklbw  xmm2, xmm4
-    punpckhbw  xmm3, xmm4
-    paddusw    xmm0, xmm2        // sum 16 words
-    paddusw    xmm1, xmm3
-    sub        ebp, 1
-    jg         yloop
-
-    align      4
-  ydone:
-    movdqa     [edi], xmm0
-    movdqa     [edi + 16], xmm1
-    lea        edi, [edi + 32]
-
-    sub        ecx, 16
-    jg         xloop
-
-    pop        ebp
-    pop        ebx
-    pop        edi
-    pop        esi
-    ret
-  }
-}
-
-// Bilinear column filtering. SSSE3 version.
-// TODO(fbarchard): Port to Neon
-// TODO(fbarchard): Switch the following:
-//    xor        ebx, ebx
-//    mov        bx, word ptr [esi + eax]  // 2 source x0 pixels
-// To
-//    movzx      ebx, word ptr [esi + eax]  // 2 source x0 pixels
-// when drmemory bug fixed.
-// https://code.google.com/p/drmemory/issues/detail?id=1396
-
-__declspec(naked) __declspec(align(16))
-void ScaleFilterCols_SSSE3(uint8* dst_ptr, const uint8* src_ptr,
-                           int dst_width, int x, int dx) {
-  __asm {
-    push       ebx
-    push       esi
-    push       edi
-    mov        edi, [esp + 12 + 4]    // dst_ptr
-    mov        esi, [esp + 12 + 8]    // src_ptr
-    mov        ecx, [esp + 12 + 12]   // dst_width
-    movd       xmm2, [esp + 12 + 16]  // x
-    movd       xmm3, [esp + 12 + 20]  // dx
-    mov        eax, 0x04040000      // shuffle to line up fractions with pixel.
-    movd       xmm5, eax
-    pcmpeqb    xmm6, xmm6           // generate 0x007f for inverting fraction.
-    psrlw      xmm6, 9
-    pextrw     eax, xmm2, 1         // get x0 integer. preroll
-    sub        ecx, 2
-    jl         xloop29
-
-    movdqa     xmm0, xmm2           // x1 = x0 + dx
-    paddd      xmm0, xmm3
-    punpckldq  xmm2, xmm0           // x0 x1
-    punpckldq  xmm3, xmm3           // dx dx
-    paddd      xmm3, xmm3           // dx * 2, dx * 2
-    pextrw     edx, xmm2, 3         // get x1 integer. preroll
-
-    // 2 Pixel loop.
-    align      4
-  xloop2:
-    movdqa     xmm1, xmm2           // x0, x1 fractions.
-    paddd      xmm2, xmm3           // x += dx
-    movzx      ebx, word ptr [esi + eax]  // 2 source x0 pixels
-    movd       xmm0, ebx
-    psrlw      xmm1, 9              // 7 bit fractions.
-    movzx      ebx, word ptr [esi + edx]  // 2 source x1 pixels
-    movd       xmm4, ebx
-    pshufb     xmm1, xmm5           // 0011
-    punpcklwd  xmm0, xmm4
-    pxor       xmm1, xmm6           // 0..7f and 7f..0
-    pmaddubsw  xmm0, xmm1           // 16 bit, 2 pixels.
-    pextrw     eax, xmm2, 1         // get x0 integer. next iteration.
-    pextrw     edx, xmm2, 3         // get x1 integer. next iteration.
-    psrlw      xmm0, 7              // 8.7 fixed point to low 8 bits.
-    packuswb   xmm0, xmm0           // 8 bits, 2 pixels.
-    movd       ebx, xmm0
-    mov        [edi], bx
-    lea        edi, [edi + 2]
-    sub        ecx, 2               // 2 pixels
-    jge        xloop2
-
-    align      4
- xloop29:
-
-    add        ecx, 2 - 1
-    jl         xloop99
-
-    // 1 pixel remainder
-    movzx      ebx, word ptr [esi + eax]  // 2 source x0 pixels
-    movd       xmm0, ebx
-    psrlw      xmm2, 9              // 7 bit fractions.
-    pshufb     xmm2, xmm5           // 0011
-    pxor       xmm2, xmm6           // 0..7f and 7f..0
-    pmaddubsw  xmm0, xmm2           // 16 bit
-    psrlw      xmm0, 7              // 8.7 fixed point to low 8 bits.
-    packuswb   xmm0, xmm0           // 8 bits
-    movd       ebx, xmm0
-    mov        [edi], bl
-
-    align      4
- xloop99:
-
-    pop        edi
-    pop        esi
-    pop        ebx
-    ret
-  }
-}
-
-// Reads 16 pixels, duplicates them and writes 32 pixels.
-// Alignment requirement: src_argb 16 byte aligned, dst_argb 16 byte aligned.
-__declspec(naked) __declspec(align(16))
-void ScaleColsUp2_SSE2(uint8* dst_ptr, const uint8* src_ptr,
-                       int dst_width, int x, int dx) {
-  __asm {
-    mov        edx, [esp + 4]    // dst_ptr
-    mov        eax, [esp + 8]    // src_ptr
-    mov        ecx, [esp + 12]   // dst_width
-
-    align      4
-  wloop:
-    movdqa     xmm0, [eax]
-    lea        eax,  [eax + 16]
-    movdqa     xmm1, xmm0
-    punpcklbw  xmm0, xmm0
-    punpckhbw  xmm1, xmm1
-    sub        ecx, 32
-    movdqa     [edx], xmm0
-    movdqa     [edx + 16], xmm1
-    lea        edx, [edx + 32]
-    jg         wloop
-
-    ret
-  }
-}
-
-// Reads 8 pixels, throws half away and writes 4 even pixels (0, 2, 4, 6)
-// Alignment requirement: src_argb 16 byte aligned, dst_argb 16 byte aligned.
-__declspec(naked) __declspec(align(16))
-void ScaleARGBRowDown2_SSE2(const uint8* src_argb,
-                            ptrdiff_t src_stride,
-                            uint8* dst_argb, int dst_width) {
-  __asm {
-    mov        eax, [esp + 4]        // src_argb
-                                     // src_stride ignored
-    mov        edx, [esp + 12]       // dst_argb
-    mov        ecx, [esp + 16]       // dst_width
-
-    align      4
-  wloop:
-    movdqa     xmm0, [eax]
-    movdqa     xmm1, [eax + 16]
-    lea        eax,  [eax + 32]
-    shufps     xmm0, xmm1, 0xdd
-    sub        ecx, 4
-    movdqa     [edx], xmm0
-    lea        edx, [edx + 16]
-    jg         wloop
-
-    ret
-  }
-}
-
-// Blends 8x1 rectangle to 4x1.
-// Alignment requirement: src_argb 16 byte aligned, dst_argb 16 byte aligned.
-__declspec(naked) __declspec(align(16))
-void ScaleARGBRowDown2Linear_SSE2(const uint8* src_argb,
-                                  ptrdiff_t src_stride,
-                                  uint8* dst_argb, int dst_width) {
-  __asm {
-    mov        eax, [esp + 4]        // src_argb
-                                     // src_stride ignored
-    mov        edx, [esp + 12]       // dst_argb
-    mov        ecx, [esp + 16]       // dst_width
-
-    align      4
-  wloop:
-    movdqa     xmm0, [eax]
-    movdqa     xmm1, [eax + 16]
-    lea        eax,  [eax + 32]
-    movdqa     xmm2, xmm0
-    shufps     xmm0, xmm1, 0x88      // even pixels
-    shufps     xmm2, xmm1, 0xdd      // odd pixels
-    pavgb      xmm0, xmm2
-    sub        ecx, 4
-    movdqa     [edx], xmm0
-    lea        edx, [edx + 16]
-    jg         wloop
-
-    ret
-  }
-}
-
-// Blends 8x2 rectangle to 4x1.
-// Alignment requirement: src_argb 16 byte aligned, dst_argb 16 byte aligned.
-__declspec(naked) __declspec(align(16))
-void ScaleARGBRowDown2Box_SSE2(const uint8* src_argb,
-                               ptrdiff_t src_stride,
-                               uint8* dst_argb, int dst_width) {
-  __asm {
-    push       esi
-    mov        eax, [esp + 4 + 4]    // src_argb
-    mov        esi, [esp + 4 + 8]    // src_stride
-    mov        edx, [esp + 4 + 12]   // dst_argb
-    mov        ecx, [esp + 4 + 16]   // dst_width
-
-    align      4
-  wloop:
-    movdqa     xmm0, [eax]
-    movdqa     xmm1, [eax + 16]
-    movdqa     xmm2, [eax + esi]
-    movdqa     xmm3, [eax + esi + 16]
-    lea        eax,  [eax + 32]
-    pavgb      xmm0, xmm2            // average rows
-    pavgb      xmm1, xmm3
-    movdqa     xmm2, xmm0            // average columns (8 to 4 pixels)
-    shufps     xmm0, xmm1, 0x88      // even pixels
-    shufps     xmm2, xmm1, 0xdd      // odd pixels
-    pavgb      xmm0, xmm2
-    sub        ecx, 4
-    movdqa     [edx], xmm0
-    lea        edx, [edx + 16]
-    jg         wloop
-
-    pop        esi
-    ret
-  }
-}
-
-// Reads 4 pixels at a time.
-// Alignment requirement: dst_argb 16 byte aligned.
-__declspec(naked) __declspec(align(16))
-void ScaleARGBRowDownEven_SSE2(const uint8* src_argb, ptrdiff_t src_stride,
-                               int src_stepx,
-                               uint8* dst_argb, int dst_width) {
-  __asm {
-    push       ebx
-    push       edi
-    mov        eax, [esp + 8 + 4]    // src_argb
-                                     // src_stride ignored
-    mov        ebx, [esp + 8 + 12]   // src_stepx
-    mov        edx, [esp + 8 + 16]   // dst_argb
-    mov        ecx, [esp + 8 + 20]   // dst_width
-    lea        ebx, [ebx * 4]
-    lea        edi, [ebx + ebx * 2]
-
-    align      4
-  wloop:
-    movd       xmm0, [eax]
-    movd       xmm1, [eax + ebx]
-    punpckldq  xmm0, xmm1
-    movd       xmm2, [eax + ebx * 2]
-    movd       xmm3, [eax + edi]
-    lea        eax,  [eax + ebx * 4]
-    punpckldq  xmm2, xmm3
-    punpcklqdq xmm0, xmm2
-    sub        ecx, 4
-    movdqa     [edx], xmm0
-    lea        edx, [edx + 16]
-    jg         wloop
-
-    pop        edi
-    pop        ebx
-    ret
-  }
-}
-
-// Blends four 2x2 to 4x1.
-// Alignment requirement: dst_argb 16 byte aligned.
-__declspec(naked) __declspec(align(16))
-void ScaleARGBRowDownEvenBox_SSE2(const uint8* src_argb,
-                                  ptrdiff_t src_stride,
-                                  int src_stepx,
-                                  uint8* dst_argb, int dst_width) {
-  __asm {
-    push       ebx
-    push       esi
-    push       edi
-    mov        eax, [esp + 12 + 4]    // src_argb
-    mov        esi, [esp + 12 + 8]    // src_stride
-    mov        ebx, [esp + 12 + 12]   // src_stepx
-    mov        edx, [esp + 12 + 16]   // dst_argb
-    mov        ecx, [esp + 12 + 20]   // dst_width
-    lea        esi, [eax + esi]       // row1 pointer
-    lea        ebx, [ebx * 4]
-    lea        edi, [ebx + ebx * 2]
-
-    align      4
-  wloop:
-    movq       xmm0, qword ptr [eax]  // row0 4 pairs
-    movhps     xmm0, qword ptr [eax + ebx]
-    movq       xmm1, qword ptr [eax + ebx * 2]
-    movhps     xmm1, qword ptr [eax + edi]
-    lea        eax,  [eax + ebx * 4]
-    movq       xmm2, qword ptr [esi]  // row1 4 pairs
-    movhps     xmm2, qword ptr [esi + ebx]
-    movq       xmm3, qword ptr [esi + ebx * 2]
-    movhps     xmm3, qword ptr [esi + edi]
-    lea        esi,  [esi + ebx * 4]
-    pavgb      xmm0, xmm2            // average rows
-    pavgb      xmm1, xmm3
-    movdqa     xmm2, xmm0            // average columns (8 to 4 pixels)
-    shufps     xmm0, xmm1, 0x88      // even pixels
-    shufps     xmm2, xmm1, 0xdd      // odd pixels
-    pavgb      xmm0, xmm2
-    sub        ecx, 4
-    movdqa     [edx], xmm0
-    lea        edx, [edx + 16]
-    jg         wloop
-
-    pop        edi
-    pop        esi
-    pop        ebx
-    ret
-  }
-}
-
-// Column scaling unfiltered. SSE2 version.
-__declspec(naked) __declspec(align(16))
-void ScaleARGBCols_SSE2(uint8* dst_argb, const uint8* src_argb,
-                        int dst_width, int x, int dx) {
-  __asm {
-    push       edi
-    push       esi
-    mov        edi, [esp + 8 + 4]    // dst_argb
-    mov        esi, [esp + 8 + 8]    // src_argb
-    mov        ecx, [esp + 8 + 12]   // dst_width
-    movd       xmm2, [esp + 8 + 16]  // x
-    movd       xmm3, [esp + 8 + 20]  // dx
-
-    pshufd     xmm2, xmm2, 0         // x0 x0 x0 x0
-    pshufd     xmm0, xmm3, 0x11      // dx  0 dx  0
-    paddd      xmm2, xmm0
-    paddd      xmm3, xmm3            // 0, 0, 0,  dx * 2
-    pshufd     xmm0, xmm3, 0x05      // dx * 2, dx * 2, 0, 0
-    paddd      xmm2, xmm0            // x3 x2 x1 x0
-    paddd      xmm3, xmm3            // 0, 0, 0,  dx * 4
-    pshufd     xmm3, xmm3, 0         // dx * 4, dx * 4, dx * 4, dx * 4
-
-    pextrw     eax, xmm2, 1          // get x0 integer.
-    pextrw     edx, xmm2, 3          // get x1 integer.
-
-    cmp        ecx, 0
-    jle        xloop99
-    sub        ecx, 4
-    jl         xloop49
-
-    // 4 Pixel loop.
-    align      4
- xloop4:
-    movd       xmm0, [esi + eax * 4]  // 1 source x0 pixels
-    movd       xmm1, [esi + edx * 4]  // 1 source x1 pixels
-    pextrw     eax, xmm2, 5           // get x2 integer.
-    pextrw     edx, xmm2, 7           // get x3 integer.
-    paddd      xmm2, xmm3             // x += dx
-    punpckldq  xmm0, xmm1             // x0 x1
-
-    movd       xmm1, [esi + eax * 4]  // 1 source x2 pixels
-    movd       xmm4, [esi + edx * 4]  // 1 source x3 pixels
-    pextrw     eax, xmm2, 1           // get x0 integer. next iteration.
-    pextrw     edx, xmm2, 3           // get x1 integer. next iteration.
-    punpckldq  xmm1, xmm4             // x2 x3
-    punpcklqdq xmm0, xmm1             // x0 x1 x2 x3
-    sub        ecx, 4                 // 4 pixels
-    movdqu     [edi], xmm0
-    lea        edi, [edi + 16]
-    jge        xloop4
-
-    align      4
- xloop49:
-    test       ecx, 2
-    je         xloop29
-
-    // 2 Pixels.
-    movd       xmm0, [esi + eax * 4]  // 1 source x0 pixels
-    movd       xmm1, [esi + edx * 4]  // 1 source x1 pixels
-    pextrw     eax, xmm2, 5           // get x2 integer.
-    punpckldq  xmm0, xmm1             // x0 x1
-
-    movq       qword ptr [edi], xmm0
-    lea        edi, [edi + 8]
-
- xloop29:
-    test       ecx, 1
-    je         xloop99
-
-    // 1 Pixels.
-    movd       xmm0, [esi + eax * 4]  // 1 source x2 pixels
-    movd       dword ptr [edi], xmm0
-    align      4
- xloop99:
-
-    pop        esi
-    pop        edi
-    ret
-  }
-}
-
-// Bilinear row filtering combines 2x1 -> 1x1. SSSE3 version.
-// TODO(fbarchard): Port to Neon
-
-// Shuffle table for arranging 2 pixels into pairs for pmaddubsw
-static uvec8 kShuffleColARGB = {
-  0u, 4u, 1u, 5u, 2u, 6u, 3u, 7u,  // bbggrraa 1st pixel
-  8u, 12u, 9u, 13u, 10u, 14u, 11u, 15u  // bbggrraa 2nd pixel
-};
-
-// Shuffle table for duplicating 2 fractions into 8 bytes each
-static uvec8 kShuffleFractions = {
-  0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 4u, 4u, 4u, 4u, 4u, 4u, 4u, 4u,
-};
-
-__declspec(naked) __declspec(align(16))
-void ScaleARGBFilterCols_SSSE3(uint8* dst_argb, const uint8* src_argb,
-                               int dst_width, int x, int dx) {
-  __asm {
-    push       esi
-    push       edi
-    mov        edi, [esp + 8 + 4]    // dst_argb
-    mov        esi, [esp + 8 + 8]    // src_argb
-    mov        ecx, [esp + 8 + 12]   // dst_width
-    movd       xmm2, [esp + 8 + 16]  // x
-    movd       xmm3, [esp + 8 + 20]  // dx
-    movdqa     xmm4, kShuffleColARGB
-    movdqa     xmm5, kShuffleFractions
-    pcmpeqb    xmm6, xmm6           // generate 0x007f for inverting fraction.
-    psrlw      xmm6, 9
-    pextrw     eax, xmm2, 1         // get x0 integer. preroll
-    sub        ecx, 2
-    jl         xloop29
-
-    movdqa     xmm0, xmm2           // x1 = x0 + dx
-    paddd      xmm0, xmm3
-    punpckldq  xmm2, xmm0           // x0 x1
-    punpckldq  xmm3, xmm3           // dx dx
-    paddd      xmm3, xmm3           // dx * 2, dx * 2
-    pextrw     edx, xmm2, 3         // get x1 integer. preroll
-
-    // 2 Pixel loop.
-    align      4
-  xloop2:
-    movdqa     xmm1, xmm2           // x0, x1 fractions.
-    paddd      xmm2, xmm3           // x += dx
-    movq       xmm0, qword ptr [esi + eax * 4]  // 2 source x0 pixels
-    psrlw      xmm1, 9              // 7 bit fractions.
-    movhps     xmm0, qword ptr [esi + edx * 4]  // 2 source x1 pixels
-    pshufb     xmm1, xmm5           // 0000000011111111
-    pshufb     xmm0, xmm4           // arrange pixels into pairs
-    pxor       xmm1, xmm6           // 0..7f and 7f..0
-    pmaddubsw  xmm0, xmm1           // argb_argb 16 bit, 2 pixels.
-    pextrw     eax, xmm2, 1         // get x0 integer. next iteration.
-    pextrw     edx, xmm2, 3         // get x1 integer. next iteration.
-    psrlw      xmm0, 7              // argb 8.7 fixed point to low 8 bits.
-    packuswb   xmm0, xmm0           // argb_argb 8 bits, 2 pixels.
-    movq       qword ptr [edi], xmm0
-    lea        edi, [edi + 8]
-    sub        ecx, 2               // 2 pixels
-    jge        xloop2
-
-    align      4
- xloop29:
-
-    add        ecx, 2 - 1
-    jl         xloop99
-
-    // 1 pixel remainder
-    psrlw      xmm2, 9              // 7 bit fractions.
-    movq       xmm0, qword ptr [esi + eax * 4]  // 2 source x0 pixels
-    pshufb     xmm2, xmm5           // 00000000
-    pshufb     xmm0, xmm4           // arrange pixels into pairs
-    pxor       xmm2, xmm6           // 0..7f and 7f..0
-    pmaddubsw  xmm0, xmm2           // argb 16 bit, 1 pixel.
-    psrlw      xmm0, 7
-    packuswb   xmm0, xmm0           // argb 8 bits, 1 pixel.
-    movd       [edi], xmm0
-
-    align      4
- xloop99:
-
-    pop        edi
-    pop        esi
-    ret
-  }
-}
-
-// Reads 4 pixels, duplicates them and writes 8 pixels.
-// Alignment requirement: src_argb 16 byte aligned, dst_argb 16 byte aligned.
-__declspec(naked) __declspec(align(16))
-void ScaleARGBColsUp2_SSE2(uint8* dst_argb, const uint8* src_argb,
-                           int dst_width, int x, int dx) {
-  __asm {
-    mov        edx, [esp + 4]    // dst_argb
-    mov        eax, [esp + 8]    // src_argb
-    mov        ecx, [esp + 12]   // dst_width
-
-    align      4
-  wloop:
-    movdqa     xmm0, [eax]
-    lea        eax,  [eax + 16]
-    movdqa     xmm1, xmm0
-    punpckldq  xmm0, xmm0
-    punpckhdq  xmm1, xmm1
-    sub        ecx, 8
-    movdqa     [edx], xmm0
-    movdqa     [edx + 16], xmm1
-    lea        edx, [edx + 32]
-    jg         wloop
-
-    ret
-  }
-}
-
-// Divide num by div and return as 16.16 fixed point result.
-__declspec(naked) __declspec(align(16))
-int FixedDiv_X86(int num, int div) {
-  __asm {
-    mov        eax, [esp + 4]    // num
-    cdq                          // extend num to 64 bits
-    shld       edx, eax, 16      // 32.16
-    shl        eax, 16
-    idiv       dword ptr [esp + 8]
-    ret
-  }
-}
-
-// Divide num by div and return as 16.16 fixed point result.
-__declspec(naked) __declspec(align(16))
-int FixedDiv1_X86(int num, int div) {
-  __asm {
-    mov        eax, [esp + 4]    // num
-    mov        ecx, [esp + 8]    // denom
-    cdq                          // extend num to 64 bits
-    shld       edx, eax, 16      // 32.16
-    shl        eax, 16
-    sub        eax, 0x00010001
-    sbb        edx, 0
-    sub        ecx, 1
-    idiv       ecx
-    ret
-  }
-}
-
-#endif  // !defined(LIBYUV_DISABLE_X86) && defined(_M_IX86) && defined(_MSC_VER)
-
-#ifdef __cplusplus
-}  // extern "C"
-}  // namespace libyuv
-#endif
diff --git a/drivers/theoraplayer/src/YUV/libyuv/src/video_common.cc b/drivers/theoraplayer/src/YUV/libyuv/src/video_common.cc
deleted file mode 100755
index efbedf46e2..0000000000
--- a/drivers/theoraplayer/src/YUV/libyuv/src/video_common.cc
+++ /dev/null
@@ -1,64 +0,0 @@
-/*
- *  Copyright 2011 The LibYuv Project Authors. All rights reserved.
- *
- *  Use of this source code is governed by a BSD-style license
- *  that can be found in the LICENSE file in the root of the source
- *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS. All contributing project authors may
- *  be found in the AUTHORS file in the root of the source tree.
- */
-
-
-#include "libyuv/video_common.h"
-
-#ifdef __cplusplus
-namespace libyuv {
-extern "C" {
-#endif
-
-#define ARRAY_SIZE(x) (int)(sizeof(x) / sizeof(x[0]))
-
-struct FourCCAliasEntry {
-  uint32 alias;
-  uint32 canonical;
-};
-
-static const struct FourCCAliasEntry kFourCCAliases[] = {
-  {FOURCC_IYUV, FOURCC_I420},
-  {FOURCC_YU16, FOURCC_I422},
-  {FOURCC_YU24, FOURCC_I444},
-  {FOURCC_YUYV, FOURCC_YUY2},
-  {FOURCC_YUVS, FOURCC_YUY2},  // kCMPixelFormat_422YpCbCr8_yuvs
-  {FOURCC_HDYC, FOURCC_UYVY},
-  {FOURCC_2VUY, FOURCC_UYVY},  // kCMPixelFormat_422YpCbCr8
-  {FOURCC_JPEG, FOURCC_MJPG},  // Note: JPEG has DHT while MJPG does not.
-  {FOURCC_DMB1, FOURCC_MJPG},
-  {FOURCC_BA81, FOURCC_BGGR},
-  {FOURCC_RGB3, FOURCC_RAW },
-  {FOURCC_BGR3, FOURCC_24BG},
-  {FOURCC_CM32, FOURCC_BGRA},  // kCMPixelFormat_32ARGB
-  {FOURCC_CM24, FOURCC_RAW },  // kCMPixelFormat_24RGB
-  {FOURCC_L555, FOURCC_RGBO},  // kCMPixelFormat_16LE555
-  {FOURCC_L565, FOURCC_RGBP},  // kCMPixelFormat_16LE565
-  {FOURCC_5551, FOURCC_RGBO},  // kCMPixelFormat_16LE5551
-};
-// TODO(fbarchard): Consider mapping kCMPixelFormat_32BGRA to FOURCC_ARGB.
-//  {FOURCC_BGRA, FOURCC_ARGB},  // kCMPixelFormat_32BGRA
-
-LIBYUV_API
-uint32 CanonicalFourCC(uint32 fourcc) {
-  int i;
-  for (i = 0; i < ARRAY_SIZE(kFourCCAliases); ++i) {
-    if (kFourCCAliases[i].alias == fourcc) {
-      return kFourCCAliases[i].canonical;
-    }
-  }
-  // Not an alias, so return it as-is.
-  return fourcc;
-}
-
-#ifdef __cplusplus
-}  // extern "C"
-}  // namespace libyuv
-#endif
-
diff --git a/drivers/theoraplayer/src/YUV/libyuv/src/x86inc.asm b/drivers/theoraplayer/src/YUV/libyuv/src/x86inc.asm
deleted file mode 100755
index cb5c32df3a..0000000000
--- a/drivers/theoraplayer/src/YUV/libyuv/src/x86inc.asm
+++ /dev/null
@@ -1,1136 +0,0 @@
-;*****************************************************************************
-;* x86inc.asm: x264asm abstraction layer
-;*****************************************************************************
-;* Copyright (C) 2005-2012 x264 project
-;*
-;* Authors: Loren Merritt <lorenm@u.washington.edu>
-;*          Anton Mitrofanov <BugMaster@narod.ru>
-;*          Jason Garrett-Glaser <darkshikari@gmail.com>
-;*          Henrik Gramner <hengar-6@student.ltu.se>
-;*
-;* Permission to use, copy, modify, and/or distribute this software for any
-;* purpose with or without fee is hereby granted, provided that the above
-;* copyright notice and this permission notice appear in all copies.
-;*
-;* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
-;* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
-;* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
-;* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
-;* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
-;* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
-;* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
-;*****************************************************************************
-
-; This is a header file for the x264ASM assembly language, which uses
-; NASM/YASM syntax combined with a large number of macros to provide easy
-; abstraction between different calling conventions (x86_32, win64, linux64).
-; It also has various other useful features to simplify writing the kind of
-; DSP functions that are most often used in x264.
-
-; Unlike the rest of x264, this file is available under an ISC license, as it
-; has significant usefulness outside of x264 and we want it to be available
-; to the largest audience possible.  Of course, if you modify it for your own
-; purposes to add a new feature, we strongly encourage contributing a patch
-; as this feature might be useful for others as well.  Send patches or ideas
-; to x264-devel@videolan.org .
-
-; Local changes for libyuv:
-; remove %define program_name and references in labels
-; rename cpus to uppercase
-
-%define WIN64  0
-%define UNIX64 0
-%if ARCH_X86_64
-    %ifidn __OUTPUT_FORMAT__,win32
-        %define WIN64  1
-    %elifidn __OUTPUT_FORMAT__,win64
-        %define WIN64  1
-    %else
-        %define UNIX64 1
-    %endif
-%endif
-
-%ifdef PREFIX
-    %define mangle(x) _ %+ x
-%else
-    %define mangle(x) x
-%endif
-
-; Name of the .rodata section.
-; Kludge: Something on OS X fails to align .rodata even given an align attribute,
-; so use a different read-only section.
-%macro SECTION_RODATA 0-1 16
-    %ifidn __OUTPUT_FORMAT__,macho64
-        SECTION .text align=%1
-    %elifidn __OUTPUT_FORMAT__,macho
-        SECTION .text align=%1
-        fakegot:
-    %elifidn __OUTPUT_FORMAT__,aout
-        section .text
-    %else
-        SECTION .rodata align=%1
-    %endif
-%endmacro
-
-; aout does not support align=
-%macro SECTION_TEXT 0-1 16
-    %ifidn __OUTPUT_FORMAT__,aout
-        SECTION .text
-    %else
-        SECTION .text align=%1
-    %endif
-%endmacro
-
-%if WIN64
-    %define PIC
-%elif ARCH_X86_64 == 0
-; x86_32 doesn't require PIC.
-; Some distros prefer shared objects to be PIC, but nothing breaks if
-; the code contains a few textrels, so we'll skip that complexity.
-    %undef PIC
-%endif
-%ifdef PIC
-    default rel
-%endif
-
-; Always use long nops (reduces 0x90 spam in disassembly on x86_32)
-CPU amdnop
-
-; Macros to eliminate most code duplication between x86_32 and x86_64:
-; Currently this works only for leaf functions which load all their arguments
-; into registers at the start, and make no other use of the stack. Luckily that
-; covers most of x264's asm.
-
-; PROLOGUE:
-; %1 = number of arguments. loads them from stack if needed.
-; %2 = number of registers used. pushes callee-saved regs if needed.
-; %3 = number of xmm registers used. pushes callee-saved xmm regs if needed.
-; %4 = list of names to define to registers
-; PROLOGUE can also be invoked by adding the same options to cglobal
-
-; e.g.
-; cglobal foo, 2,3,0, dst, src, tmp
-; declares a function (foo), taking two args (dst and src) and one local variable (tmp)
-
-; TODO Some functions can use some args directly from the stack. If they're the
-; last args then you can just not declare them, but if they're in the middle
-; we need more flexible macro.
-
-; RET:
-; Pops anything that was pushed by PROLOGUE, and returns.
-
-; REP_RET:
-; Same, but if it doesn't pop anything it becomes a 2-byte ret, for athlons
-; which are slow when a normal ret follows a branch.
-
-; registers:
-; rN and rNq are the native-size register holding function argument N
-; rNd, rNw, rNb are dword, word, and byte size
-; rNh is the high 8 bits of the word size
-; rNm is the original location of arg N (a register or on the stack), dword
-; rNmp is native size
-
-%macro DECLARE_REG 2-3
-    %define r%1q %2
-    %define r%1d %2d
-    %define r%1w %2w
-    %define r%1b %2b
-    %define r%1h %2h
-    %if %0 == 2
-        %define r%1m  %2d
-        %define r%1mp %2
-    %elif ARCH_X86_64 ; memory
-        %define r%1m [rsp + stack_offset + %3]
-        %define r%1mp qword r %+ %1m
-    %else
-        %define r%1m [esp + stack_offset + %3]
-        %define r%1mp dword r %+ %1m
-    %endif
-    %define r%1  %2
-%endmacro
-
-%macro DECLARE_REG_SIZE 3
-    %define r%1q r%1
-    %define e%1q r%1
-    %define r%1d e%1
-    %define e%1d e%1
-    %define r%1w %1
-    %define e%1w %1
-    %define r%1h %3
-    %define e%1h %3
-    %define r%1b %2
-    %define e%1b %2
-%if ARCH_X86_64 == 0
-    %define r%1  e%1
-%endif
-%endmacro
-
-DECLARE_REG_SIZE ax, al, ah
-DECLARE_REG_SIZE bx, bl, bh
-DECLARE_REG_SIZE cx, cl, ch
-DECLARE_REG_SIZE dx, dl, dh
-DECLARE_REG_SIZE si, sil, null
-DECLARE_REG_SIZE di, dil, null
-DECLARE_REG_SIZE bp, bpl, null
-
-; t# defines for when per-arch register allocation is more complex than just function arguments
-
-%macro DECLARE_REG_TMP 1-*
-    %assign %%i 0
-    %rep %0
-        CAT_XDEFINE t, %%i, r%1
-        %assign %%i %%i+1
-        %rotate 1
-    %endrep
-%endmacro
-
-%macro DECLARE_REG_TMP_SIZE 0-*
-    %rep %0
-        %define t%1q t%1 %+ q
-        %define t%1d t%1 %+ d
-        %define t%1w t%1 %+ w
-        %define t%1h t%1 %+ h
-        %define t%1b t%1 %+ b
-        %rotate 1
-    %endrep
-%endmacro
-
-DECLARE_REG_TMP_SIZE 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14
-
-%if ARCH_X86_64
-    %define gprsize 8
-%else
-    %define gprsize 4
-%endif
-
-%macro PUSH 1
-    push %1
-    %assign stack_offset stack_offset+gprsize
-%endmacro
-
-%macro POP 1
-    pop %1
-    %assign stack_offset stack_offset-gprsize
-%endmacro
-
-%macro PUSH_IF_USED 1-*
-    %rep %0
-        %if %1 < regs_used
-            PUSH r%1
-        %endif
-        %rotate 1
-    %endrep
-%endmacro
-
-%macro POP_IF_USED 1-*
-    %rep %0
-        %if %1 < regs_used
-            pop r%1
-        %endif
-        %rotate 1
-    %endrep
-%endmacro
-
-%macro LOAD_IF_USED 1-*
-    %rep %0
-        %if %1 < num_args
-            mov r%1, r %+ %1 %+ mp
-        %endif
-        %rotate 1
-    %endrep
-%endmacro
-
-%macro SUB 2
-    sub %1, %2
-    %ifidn %1, rsp
-        %assign stack_offset stack_offset+(%2)
-    %endif
-%endmacro
-
-%macro ADD 2
-    add %1, %2
-    %ifidn %1, rsp
-        %assign stack_offset stack_offset-(%2)
-    %endif
-%endmacro
-
-%macro movifnidn 2
-    %ifnidn %1, %2
-        mov %1, %2
-    %endif
-%endmacro
-
-%macro movsxdifnidn 2
-    %ifnidn %1, %2
-        movsxd %1, %2
-    %endif
-%endmacro
-
-%macro ASSERT 1
-    %if (%1) == 0
-        %error assert failed
-    %endif
-%endmacro
-
-%macro DEFINE_ARGS 0-*
-    %ifdef n_arg_names
-        %assign %%i 0
-        %rep n_arg_names
-            CAT_UNDEF arg_name %+ %%i, q
-            CAT_UNDEF arg_name %+ %%i, d
-            CAT_UNDEF arg_name %+ %%i, w
-            CAT_UNDEF arg_name %+ %%i, h
-            CAT_UNDEF arg_name %+ %%i, b
-            CAT_UNDEF arg_name %+ %%i, m
-            CAT_UNDEF arg_name %+ %%i, mp
-            CAT_UNDEF arg_name, %%i
-            %assign %%i %%i+1
-        %endrep
-    %endif
-
-    %xdefine %%stack_offset stack_offset
-    %undef stack_offset ; so that the current value of stack_offset doesn't get baked in by xdefine
-    %assign %%i 0
-    %rep %0
-        %xdefine %1q r %+ %%i %+ q
-        %xdefine %1d r %+ %%i %+ d
-        %xdefine %1w r %+ %%i %+ w
-        %xdefine %1h r %+ %%i %+ h
-        %xdefine %1b r %+ %%i %+ b
-        %xdefine %1m r %+ %%i %+ m
-        %xdefine %1mp r %+ %%i %+ mp
-        CAT_XDEFINE arg_name, %%i, %1
-        %assign %%i %%i+1
-        %rotate 1
-    %endrep
-    %xdefine stack_offset %%stack_offset
-    %assign n_arg_names %0
-%endmacro
-
-%if WIN64 ; Windows x64 ;=================================================
-
-DECLARE_REG 0,  rcx
-DECLARE_REG 1,  rdx
-DECLARE_REG 2,  R8
-DECLARE_REG 3,  R9
-DECLARE_REG 4,  R10, 40
-DECLARE_REG 5,  R11, 48
-DECLARE_REG 6,  rax, 56
-DECLARE_REG 7,  rdi, 64
-DECLARE_REG 8,  rsi, 72
-DECLARE_REG 9,  rbx, 80
-DECLARE_REG 10, rbp, 88
-DECLARE_REG 11, R12, 96
-DECLARE_REG 12, R13, 104
-DECLARE_REG 13, R14, 112
-DECLARE_REG 14, R15, 120
-
-%macro PROLOGUE 2-4+ 0 ; #args, #regs, #xmm_regs, arg_names...
-    %assign num_args %1
-    %assign regs_used %2
-    ASSERT regs_used >= num_args
-    ASSERT regs_used <= 15
-    PUSH_IF_USED 7, 8, 9, 10, 11, 12, 13, 14
-    %if mmsize == 8
-        %assign xmm_regs_used 0
-    %else
-        WIN64_SPILL_XMM %3
-    %endif
-    LOAD_IF_USED 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14
-    DEFINE_ARGS %4
-%endmacro
-
-%macro WIN64_SPILL_XMM 1
-    %assign xmm_regs_used %1
-    ASSERT xmm_regs_used <= 16
-    %if xmm_regs_used > 6
-        SUB rsp, (xmm_regs_used-6)*16+16
-        %assign %%i xmm_regs_used
-        %rep (xmm_regs_used-6)
-            %assign %%i %%i-1
-            movdqa [rsp + (%%i-6)*16+(~stack_offset&8)], xmm %+ %%i
-        %endrep
-    %endif
-%endmacro
-
-%macro WIN64_RESTORE_XMM_INTERNAL 1
-    %if xmm_regs_used > 6
-        %assign %%i xmm_regs_used
-        %rep (xmm_regs_used-6)
-            %assign %%i %%i-1
-            movdqa xmm %+ %%i, [%1 + (%%i-6)*16+(~stack_offset&8)]
-        %endrep
-        add %1, (xmm_regs_used-6)*16+16
-    %endif
-%endmacro
-
-%macro WIN64_RESTORE_XMM 1
-    WIN64_RESTORE_XMM_INTERNAL %1
-    %assign stack_offset stack_offset-(xmm_regs_used-6)*16+16
-    %assign xmm_regs_used 0
-%endmacro
-
-%define has_epilogue regs_used > 7 || xmm_regs_used > 6 || mmsize == 32
-
-%macro RET 0
-    WIN64_RESTORE_XMM_INTERNAL rsp
-    POP_IF_USED 14, 13, 12, 11, 10, 9, 8, 7
-%if mmsize == 32
-    vzeroupper
-%endif
-    ret
-%endmacro
-
-%elif ARCH_X86_64 ; *nix x64 ;=============================================
-
-DECLARE_REG 0,  rdi
-DECLARE_REG 1,  rsi
-DECLARE_REG 2,  rdx
-DECLARE_REG 3,  rcx
-DECLARE_REG 4,  R8
-DECLARE_REG 5,  R9
-DECLARE_REG 6,  rax, 8
-DECLARE_REG 7,  R10, 16
-DECLARE_REG 8,  R11, 24
-DECLARE_REG 9,  rbx, 32
-DECLARE_REG 10, rbp, 40
-DECLARE_REG 11, R12, 48
-DECLARE_REG 12, R13, 56
-DECLARE_REG 13, R14, 64
-DECLARE_REG 14, R15, 72
-
-%macro PROLOGUE 2-4+ ; #args, #regs, #xmm_regs, arg_names...
-    %assign num_args %1
-    %assign regs_used %2
-    ASSERT regs_used >= num_args
-    ASSERT regs_used <= 15
-    PUSH_IF_USED 9, 10, 11, 12, 13, 14
-    LOAD_IF_USED 6, 7, 8, 9, 10, 11, 12, 13, 14
-    DEFINE_ARGS %4
-%endmacro
-
-%define has_epilogue regs_used > 9 || mmsize == 32
-
-%macro RET 0
-    POP_IF_USED 14, 13, 12, 11, 10, 9
-%if mmsize == 32
-    vzeroupper
-%endif
-    ret
-%endmacro
-
-%else ; X86_32 ;==============================================================
-
-DECLARE_REG 0, eax, 4
-DECLARE_REG 1, ecx, 8
-DECLARE_REG 2, edx, 12
-DECLARE_REG 3, ebx, 16
-DECLARE_REG 4, esi, 20
-DECLARE_REG 5, edi, 24
-DECLARE_REG 6, ebp, 28
-%define rsp esp
-
-%macro DECLARE_ARG 1-*
-    %rep %0
-        %define r%1m [esp + stack_offset + 4*%1 + 4]
-        %define r%1mp dword r%1m
-        %rotate 1
-    %endrep
-%endmacro
-
-DECLARE_ARG 7, 8, 9, 10, 11, 12, 13, 14
-
-%macro PROLOGUE 2-4+ ; #args, #regs, #xmm_regs, arg_names...
-    %assign num_args %1
-    %assign regs_used %2
-    %if regs_used > 7
-        %assign regs_used 7
-    %endif
-    ASSERT regs_used >= num_args
-    PUSH_IF_USED 3, 4, 5, 6
-    LOAD_IF_USED 0, 1, 2, 3, 4, 5, 6
-    DEFINE_ARGS %4
-%endmacro
-
-%define has_epilogue regs_used > 3 || mmsize == 32
-
-%macro RET 0
-    POP_IF_USED 6, 5, 4, 3
-%if mmsize == 32
-    vzeroupper
-%endif
-    ret
-%endmacro
-
-%endif ;======================================================================
-
-%if WIN64 == 0
-%macro WIN64_SPILL_XMM 1
-%endmacro
-%macro WIN64_RESTORE_XMM 1
-%endmacro
-%endif
-
-%macro REP_RET 0
-    %if has_epilogue
-        RET
-    %else
-        rep ret
-    %endif
-%endmacro
-
-%macro TAIL_CALL 2 ; callee, is_nonadjacent
-    %if has_epilogue
-        call %1
-        RET
-    %elif %2
-        jmp %1
-    %endif
-%endmacro
-
-;=============================================================================
-; arch-independent part
-;=============================================================================
-
-%assign function_align 16
-
-; Begin a function.
-; Applies any symbol mangling needed for C linkage, and sets up a define such that
-; subsequent uses of the function name automatically refer to the mangled version.
-; Appends cpuflags to the function name if cpuflags has been specified.
-%macro cglobal 1-2+ ; name, [PROLOGUE args]
-%if %0 == 1
-    cglobal_internal %1 %+ SUFFIX
-%else
-    cglobal_internal %1 %+ SUFFIX, %2
-%endif
-%endmacro
-%macro cglobal_internal 1-2+
-    %ifndef cglobaled_%1
-        %xdefine %1 mangle(%1)
-        %xdefine %1.skip_prologue %1 %+ .skip_prologue
-        CAT_XDEFINE cglobaled_, %1, 1
-    %endif
-    %xdefine current_function %1
-    %ifidn __OUTPUT_FORMAT__,elf
-        global %1:function hidden
-    %else
-        global %1
-    %endif
-    align function_align
-    %1:
-    RESET_MM_PERMUTATION ; not really needed, but makes disassembly somewhat nicer
-    %assign stack_offset 0
-    %if %0 > 1
-        PROLOGUE %2
-    %endif
-%endmacro
-
-%macro cextern 1
-    %xdefine %1 mangle(%1)
-    CAT_XDEFINE cglobaled_, %1, 1
-    extern %1
-%endmacro
-
-; like cextern, but without the prefix
-%macro cextern_naked 1
-    %xdefine %1 mangle(%1)
-    CAT_XDEFINE cglobaled_, %1, 1
-    extern %1
-%endmacro
-
-%macro const 2+
-    %xdefine %1 mangle(%1)
-    global %1
-    %1: %2
-%endmacro
-
-; This is needed for ELF, otherwise the GNU linker assumes the stack is
-; executable by default.
-%ifidn __OUTPUT_FORMAT__,elf
-SECTION .note.GNU-stack noalloc noexec nowrite progbits
-%endif
-%ifidn __OUTPUT_FORMAT__,elf32
-section .note.GNU-stack noalloc noexec nowrite progbits
-%endif
-%ifidn __OUTPUT_FORMAT__,elf64
-section .note.GNU-stack noalloc noexec nowrite progbits
-%endif
-
-; cpuflags
-
-%assign cpuflags_MMX      (1<<0)
-%assign cpuflags_MMX2     (1<<1) | cpuflags_MMX
-%assign cpuflags_3dnow    (1<<2) | cpuflags_MMX
-%assign cpuflags_3dnow2   (1<<3) | cpuflags_3dnow
-%assign cpuflags_SSE      (1<<4) | cpuflags_MMX2
-%assign cpuflags_SSE2     (1<<5) | cpuflags_SSE
-%assign cpuflags_SSE2slow (1<<6) | cpuflags_SSE2
-%assign cpuflags_SSE3     (1<<7) | cpuflags_SSE2
-%assign cpuflags_SSSE3    (1<<8) | cpuflags_SSE3
-%assign cpuflags_SSE4     (1<<9) | cpuflags_SSSE3
-%assign cpuflags_SSE42    (1<<10)| cpuflags_SSE4
-%assign cpuflags_AVX      (1<<11)| cpuflags_SSE42
-%assign cpuflags_xop      (1<<12)| cpuflags_AVX
-%assign cpuflags_fma4     (1<<13)| cpuflags_AVX
-%assign cpuflags_AVX2     (1<<14)| cpuflags_AVX
-%assign cpuflags_fma3     (1<<15)| cpuflags_AVX
-
-%assign cpuflags_cache32  (1<<16)
-%assign cpuflags_cache64  (1<<17)
-%assign cpuflags_slowctz  (1<<18)
-%assign cpuflags_lzcnt    (1<<19)
-%assign cpuflags_misalign (1<<20)
-%assign cpuflags_aligned  (1<<21) ; not a cpu feature, but a function variant
-%assign cpuflags_atom     (1<<22)
-%assign cpuflags_bmi1     (1<<23)
-%assign cpuflags_bmi2     (1<<24)|cpuflags_bmi1
-%assign cpuflags_tbm      (1<<25)|cpuflags_bmi1
-
-%define    cpuflag(x) ((cpuflags & (cpuflags_ %+ x)) == (cpuflags_ %+ x))
-%define notcpuflag(x) ((cpuflags & (cpuflags_ %+ x)) != (cpuflags_ %+ x))
-
-; Takes up to 2 cpuflags from the above list.
-; All subsequent functions (up to the next INIT_CPUFLAGS) is built for the specified cpu.
-; You shouldn't need to invoke this macro directly, it's a subroutine for INIT_MMX &co.
-%macro INIT_CPUFLAGS 0-2
-    %if %0 >= 1
-        %xdefine cpuname %1
-        %assign cpuflags cpuflags_%1
-        %if %0 >= 2
-            %xdefine cpuname %1_%2
-            %assign cpuflags cpuflags | cpuflags_%2
-        %endif
-        %xdefine SUFFIX _ %+ cpuname
-        %if cpuflag(AVX)
-            %assign AVX_enabled 1
-        %endif
-        %if mmsize == 16 && notcpuflag(SSE2)
-            %define mova movaps
-            %define movu movups
-            %define movnta movntps
-        %endif
-        %if cpuflag(aligned)
-            %define movu mova
-        %elifidn %1, SSE3
-            %define movu lddqu
-        %endif
-    %else
-        %xdefine SUFFIX
-        %undef cpuname
-        %undef cpuflags
-    %endif
-%endmacro
-
-; merge MMX and SSE*
-
-%macro CAT_XDEFINE 3
-    %xdefine %1%2 %3
-%endmacro
-
-%macro CAT_UNDEF 2
-    %undef %1%2
-%endmacro
-
-%macro INIT_MMX 0-1+
-    %assign AVX_enabled 0
-    %define RESET_MM_PERMUTATION INIT_MMX %1
-    %define mmsize 8
-    %define num_mmregs 8
-    %define mova movq
-    %define movu movq
-    %define movh movd
-    %define movnta movntq
-    %assign %%i 0
-    %rep 8
-    CAT_XDEFINE m, %%i, mm %+ %%i
-    CAT_XDEFINE nmm, %%i, %%i
-    %assign %%i %%i+1
-    %endrep
-    %rep 8
-    CAT_UNDEF m, %%i
-    CAT_UNDEF nmm, %%i
-    %assign %%i %%i+1
-    %endrep
-    INIT_CPUFLAGS %1
-%endmacro
-
-%macro INIT_XMM 0-1+
-    %assign AVX_enabled 0
-    %define RESET_MM_PERMUTATION INIT_XMM %1
-    %define mmsize 16
-    %define num_mmregs 8
-    %if ARCH_X86_64
-    %define num_mmregs 16
-    %endif
-    %define mova movdqa
-    %define movu movdqu
-    %define movh movq
-    %define movnta movntdq
-    %assign %%i 0
-    %rep num_mmregs
-    CAT_XDEFINE m, %%i, xmm %+ %%i
-    CAT_XDEFINE nxmm, %%i, %%i
-    %assign %%i %%i+1
-    %endrep
-    INIT_CPUFLAGS %1
-%endmacro
-
-%macro INIT_YMM 0-1+
-    %assign AVX_enabled 1
-    %define RESET_MM_PERMUTATION INIT_YMM %1
-    %define mmsize 32
-    %define num_mmregs 8
-    %if ARCH_X86_64
-    %define num_mmregs 16
-    %endif
-    %define mova vmovaps
-    %define movu vmovups
-    %undef movh
-    %define movnta vmovntps
-    %assign %%i 0
-    %rep num_mmregs
-    CAT_XDEFINE m, %%i, ymm %+ %%i
-    CAT_XDEFINE nymm, %%i, %%i
-    %assign %%i %%i+1
-    %endrep
-    INIT_CPUFLAGS %1
-%endmacro
-
-INIT_XMM
-
-; I often want to use macros that permute their arguments. e.g. there's no
-; efficient way to implement butterfly or transpose or dct without swapping some
-; arguments.
-;
-; I would like to not have to manually keep track of the permutations:
-; If I insert a permutation in the middle of a function, it should automatically
-; change everything that follows. For more complex macros I may also have multiple
-; implementations, e.g. the SSE2 and SSSE3 versions may have different permutations.
-;
-; Hence these macros. Insert a PERMUTE or some SWAPs at the end of a macro that
-; permutes its arguments. It's equivalent to exchanging the contents of the
-; registers, except that this way you exchange the register names instead, so it
-; doesn't cost any cycles.
-
-%macro PERMUTE 2-* ; takes a list of pairs to swap
-%rep %0/2
-    %xdefine tmp%2 m%2
-    %xdefine ntmp%2 nm%2
-    %rotate 2
-%endrep
-%rep %0/2
-    %xdefine m%1 tmp%2
-    %xdefine nm%1 ntmp%2
-    %undef tmp%2
-    %undef ntmp%2
-    %rotate 2
-%endrep
-%endmacro
-
-%macro SWAP 2-* ; swaps a single chain (sometimes more concise than pairs)
-%rep %0-1
-%ifdef m%1
-    %xdefine tmp m%1
-    %xdefine m%1 m%2
-    %xdefine m%2 tmp
-    CAT_XDEFINE n, m%1, %1
-    CAT_XDEFINE n, m%2, %2
-%else
-    ; If we were called as "SWAP m0,m1" rather than "SWAP 0,1" infer the original numbers here.
-    ; Be careful using this mode in nested macros though, as in some cases there may be
-    ; other copies of m# that have already been dereferenced and don't get updated correctly.
-    %xdefine %%n1 n %+ %1
-    %xdefine %%n2 n %+ %2
-    %xdefine tmp m %+ %%n1
-    CAT_XDEFINE m, %%n1, m %+ %%n2
-    CAT_XDEFINE m, %%n2, tmp
-    CAT_XDEFINE n, m %+ %%n1, %%n1
-    CAT_XDEFINE n, m %+ %%n2, %%n2
-%endif
-    %undef tmp
-    %rotate 1
-%endrep
-%endmacro
-
-; If SAVE_MM_PERMUTATION is placed at the end of a function, then any later
-; calls to that function will automatically load the permutation, so values can
-; be returned in mmregs.
-%macro SAVE_MM_PERMUTATION 0-1
-    %if %0
-        %xdefine %%f %1_m
-    %else
-        %xdefine %%f current_function %+ _m
-    %endif
-    %assign %%i 0
-    %rep num_mmregs
-        CAT_XDEFINE %%f, %%i, m %+ %%i
-    %assign %%i %%i+1
-    %endrep
-%endmacro
-
-%macro LOAD_MM_PERMUTATION 1 ; name to load from
-    %ifdef %1_m0
-        %assign %%i 0
-        %rep num_mmregs
-            CAT_XDEFINE m, %%i, %1_m %+ %%i
-            CAT_XDEFINE n, m %+ %%i, %%i
-        %assign %%i %%i+1
-        %endrep
-    %endif
-%endmacro
-
-; Append cpuflags to the callee's name iff the appended name is known and the plain name isn't
-%macro call 1
-    call_internal %1, %1 %+ SUFFIX
-%endmacro
-%macro call_internal 2
-    %xdefine %%i %1
-    %ifndef cglobaled_%1
-        %ifdef cglobaled_%2
-            %xdefine %%i %2
-        %endif
-    %endif
-    call %%i
-    LOAD_MM_PERMUTATION %%i
-%endmacro
-
-; Substitutions that reduce instruction size but are functionally equivalent
-%macro add 2
-    %ifnum %2
-        %if %2==128
-            sub %1, -128
-        %else
-            add %1, %2
-        %endif
-    %else
-        add %1, %2
-    %endif
-%endmacro
-
-%macro sub 2
-    %ifnum %2
-        %if %2==128
-            add %1, -128
-        %else
-            sub %1, %2
-        %endif
-    %else
-        sub %1, %2
-    %endif
-%endmacro
-
-;=============================================================================
-; AVX abstraction layer
-;=============================================================================
-
-%assign i 0
-%rep 16
-    %if i < 8
-        CAT_XDEFINE sizeofmm, i, 8
-    %endif
-    CAT_XDEFINE sizeofxmm, i, 16
-    CAT_XDEFINE sizeofymm, i, 32
-%assign i i+1
-%endrep
-%undef i
-
-%macro CHECK_AVX_INSTR_EMU 3-*
-    %xdefine %%opcode %1
-    %xdefine %%dst %2
-    %rep %0-2
-        %ifidn %%dst, %3
-            %error non-AVX emulation of ``%%opcode'' is not supported
-        %endif
-        %rotate 1
-    %endrep
-%endmacro
-
-;%1 == instruction
-;%2 == 1 if float, 0 if int
-;%3 == 1 if 4-operand (xmm, xmm, xmm, imm), 0 if 2- or 3-operand (xmm, xmm, xmm)
-;%4 == number of operands given
-;%5+: operands
-%macro RUN_AVX_INSTR 6-7+
-    %ifid %6
-        %define %%sizeofreg sizeof%6
-    %elifid %5
-        %define %%sizeofreg sizeof%5
-    %else
-        %define %%sizeofreg mmsize
-    %endif
-    %if %%sizeofreg==32
-        %if %4>=3
-            v%1 %5, %6, %7
-        %else
-            v%1 %5, %6
-        %endif
-    %else
-        %if %%sizeofreg==8
-            %define %%regmov movq
-        %elif %2
-            %define %%regmov movaps
-        %else
-            %define %%regmov movdqa
-        %endif
-
-        %if %4>=3+%3
-            %ifnidn %5, %6
-                %if AVX_enabled && %%sizeofreg==16
-                    v%1 %5, %6, %7
-                %else
-                    CHECK_AVX_INSTR_EMU {%1 %5, %6, %7}, %5, %7
-                    %%regmov %5, %6
-                    %1 %5, %7
-                %endif
-            %else
-                %1 %5, %7
-            %endif
-        %elif %4>=3
-            %1 %5, %6, %7
-        %else
-            %1 %5, %6
-        %endif
-    %endif
-%endmacro
-
-; 3arg AVX ops with a memory arg can only have it in src2,
-; whereas SSE emulation of 3arg prefers to have it in src1 (i.e. the mov).
-; So, if the op is symmetric and the wrong one is memory, swap them.
-%macro RUN_AVX_INSTR1 8
-    %assign %%swap 0
-    %if AVX_enabled
-        %ifnid %6
-            %assign %%swap 1
-        %endif
-    %elifnidn %5, %6
-        %ifnid %7
-            %assign %%swap 1
-        %endif
-    %endif
-    %if %%swap && %3 == 0 && %8 == 1
-        RUN_AVX_INSTR %1, %2, %3, %4, %5, %7, %6
-    %else
-        RUN_AVX_INSTR %1, %2, %3, %4, %5, %6, %7
-    %endif
-%endmacro
-
-;%1 == instruction
-;%2 == 1 if float, 0 if int
-;%3 == 1 if 4-operand (xmm, xmm, xmm, imm), 0 if 2- or 3-operand (xmm, xmm, xmm)
-;%4 == 1 if symmetric (i.e. doesn't matter which src arg is which), 0 if not
-%macro AVX_INSTR 4
-    %macro %1 2-9 fnord, fnord, fnord, %1, %2, %3, %4
-        %ifidn %3, fnord
-            RUN_AVX_INSTR %6, %7, %8, 2, %1, %2
-        %elifidn %4, fnord
-            RUN_AVX_INSTR1 %6, %7, %8, 3, %1, %2, %3, %9
-        %elifidn %5, fnord
-            RUN_AVX_INSTR %6, %7, %8, 4, %1, %2, %3, %4
-        %else
-            RUN_AVX_INSTR %6, %7, %8, 5, %1, %2, %3, %4, %5
-        %endif
-    %endmacro
-%endmacro
-
-AVX_INSTR addpd, 1, 0, 1
-AVX_INSTR addps, 1, 0, 1
-AVX_INSTR addsd, 1, 0, 1
-AVX_INSTR addss, 1, 0, 1
-AVX_INSTR addsubpd, 1, 0, 0
-AVX_INSTR addsubps, 1, 0, 0
-AVX_INSTR andpd, 1, 0, 1
-AVX_INSTR andps, 1, 0, 1
-AVX_INSTR andnpd, 1, 0, 0
-AVX_INSTR andnps, 1, 0, 0
-AVX_INSTR blendpd, 1, 0, 0
-AVX_INSTR blendps, 1, 0, 0
-AVX_INSTR blendvpd, 1, 0, 0
-AVX_INSTR blendvps, 1, 0, 0
-AVX_INSTR cmppd, 1, 0, 0
-AVX_INSTR cmpps, 1, 0, 0
-AVX_INSTR cmpsd, 1, 0, 0
-AVX_INSTR cmpss, 1, 0, 0
-AVX_INSTR cvtdq2ps, 1, 0, 0
-AVX_INSTR cvtps2dq, 1, 0, 0
-AVX_INSTR divpd, 1, 0, 0
-AVX_INSTR divps, 1, 0, 0
-AVX_INSTR divsd, 1, 0, 0
-AVX_INSTR divss, 1, 0, 0
-AVX_INSTR dppd, 1, 1, 0
-AVX_INSTR dpps, 1, 1, 0
-AVX_INSTR haddpd, 1, 0, 0
-AVX_INSTR haddps, 1, 0, 0
-AVX_INSTR hsubpd, 1, 0, 0
-AVX_INSTR hsubps, 1, 0, 0
-AVX_INSTR maxpd, 1, 0, 1
-AVX_INSTR maxps, 1, 0, 1
-AVX_INSTR maxsd, 1, 0, 1
-AVX_INSTR maxss, 1, 0, 1
-AVX_INSTR minpd, 1, 0, 1
-AVX_INSTR minps, 1, 0, 1
-AVX_INSTR minsd, 1, 0, 1
-AVX_INSTR minss, 1, 0, 1
-AVX_INSTR movhlps, 1, 0, 0
-AVX_INSTR movlhps, 1, 0, 0
-AVX_INSTR movsd, 1, 0, 0
-AVX_INSTR movss, 1, 0, 0
-AVX_INSTR mpsadbw, 0, 1, 0
-AVX_INSTR mulpd, 1, 0, 1
-AVX_INSTR mulps, 1, 0, 1
-AVX_INSTR mulsd, 1, 0, 1
-AVX_INSTR mulss, 1, 0, 1
-AVX_INSTR orpd, 1, 0, 1
-AVX_INSTR orps, 1, 0, 1
-AVX_INSTR pabsb, 0, 0, 0
-AVX_INSTR pabsw, 0, 0, 0
-AVX_INSTR pabsd, 0, 0, 0
-AVX_INSTR packsswb, 0, 0, 0
-AVX_INSTR packssdw, 0, 0, 0
-AVX_INSTR packuswb, 0, 0, 0
-AVX_INSTR packusdw, 0, 0, 0
-AVX_INSTR paddb, 0, 0, 1
-AVX_INSTR paddw, 0, 0, 1
-AVX_INSTR paddd, 0, 0, 1
-AVX_INSTR paddq, 0, 0, 1
-AVX_INSTR paddsb, 0, 0, 1
-AVX_INSTR paddsw, 0, 0, 1
-AVX_INSTR paddusb, 0, 0, 1
-AVX_INSTR paddusw, 0, 0, 1
-AVX_INSTR palignr, 0, 1, 0
-AVX_INSTR pand, 0, 0, 1
-AVX_INSTR pandn, 0, 0, 0
-AVX_INSTR pavgb, 0, 0, 1
-AVX_INSTR pavgw, 0, 0, 1
-AVX_INSTR pblendvb, 0, 0, 0
-AVX_INSTR pblendw, 0, 1, 0
-AVX_INSTR pcmpestri, 0, 0, 0
-AVX_INSTR pcmpestrm, 0, 0, 0
-AVX_INSTR pcmpistri, 0, 0, 0
-AVX_INSTR pcmpistrm, 0, 0, 0
-AVX_INSTR pcmpeqb, 0, 0, 1
-AVX_INSTR pcmpeqw, 0, 0, 1
-AVX_INSTR pcmpeqd, 0, 0, 1
-AVX_INSTR pcmpeqq, 0, 0, 1
-AVX_INSTR pcmpgtb, 0, 0, 0
-AVX_INSTR pcmpgtw, 0, 0, 0
-AVX_INSTR pcmpgtd, 0, 0, 0
-AVX_INSTR pcmpgtq, 0, 0, 0
-AVX_INSTR phaddw, 0, 0, 0
-AVX_INSTR phaddd, 0, 0, 0
-AVX_INSTR phaddsw, 0, 0, 0
-AVX_INSTR phsubw, 0, 0, 0
-AVX_INSTR phsubd, 0, 0, 0
-AVX_INSTR phsubsw, 0, 0, 0
-AVX_INSTR pmaddwd, 0, 0, 1
-AVX_INSTR pmaddubsw, 0, 0, 0
-AVX_INSTR pmaxsb, 0, 0, 1
-AVX_INSTR pmaxsw, 0, 0, 1
-AVX_INSTR pmaxsd, 0, 0, 1
-AVX_INSTR pmaxub, 0, 0, 1
-AVX_INSTR pmaxuw, 0, 0, 1
-AVX_INSTR pmaxud, 0, 0, 1
-AVX_INSTR pminsb, 0, 0, 1
-AVX_INSTR pminsw, 0, 0, 1
-AVX_INSTR pminsd, 0, 0, 1
-AVX_INSTR pminub, 0, 0, 1
-AVX_INSTR pminuw, 0, 0, 1
-AVX_INSTR pminud, 0, 0, 1
-AVX_INSTR pmovmskb, 0, 0, 0
-AVX_INSTR pmulhuw, 0, 0, 1
-AVX_INSTR pmulhrsw, 0, 0, 1
-AVX_INSTR pmulhw, 0, 0, 1
-AVX_INSTR pmullw, 0, 0, 1
-AVX_INSTR pmulld, 0, 0, 1
-AVX_INSTR pmuludq, 0, 0, 1
-AVX_INSTR pmuldq, 0, 0, 1
-AVX_INSTR por, 0, 0, 1
-AVX_INSTR psadbw, 0, 0, 1
-AVX_INSTR pshufb, 0, 0, 0
-AVX_INSTR pshufd, 0, 1, 0
-AVX_INSTR pshufhw, 0, 1, 0
-AVX_INSTR pshuflw, 0, 1, 0
-AVX_INSTR psignb, 0, 0, 0
-AVX_INSTR psignw, 0, 0, 0
-AVX_INSTR psignd, 0, 0, 0
-AVX_INSTR psllw, 0, 0, 0
-AVX_INSTR pslld, 0, 0, 0
-AVX_INSTR psllq, 0, 0, 0
-AVX_INSTR pslldq, 0, 0, 0
-AVX_INSTR psraw, 0, 0, 0
-AVX_INSTR psrad, 0, 0, 0
-AVX_INSTR psrlw, 0, 0, 0
-AVX_INSTR psrld, 0, 0, 0
-AVX_INSTR psrlq, 0, 0, 0
-AVX_INSTR psrldq, 0, 0, 0
-AVX_INSTR psubb, 0, 0, 0
-AVX_INSTR psubw, 0, 0, 0
-AVX_INSTR psubd, 0, 0, 0
-AVX_INSTR psubq, 0, 0, 0
-AVX_INSTR psubsb, 0, 0, 0
-AVX_INSTR psubsw, 0, 0, 0
-AVX_INSTR psubusb, 0, 0, 0
-AVX_INSTR psubusw, 0, 0, 0
-AVX_INSTR ptest, 0, 0, 0
-AVX_INSTR punpckhbw, 0, 0, 0
-AVX_INSTR punpckhwd, 0, 0, 0
-AVX_INSTR punpckhdq, 0, 0, 0
-AVX_INSTR punpckhqdq, 0, 0, 0
-AVX_INSTR punpcklbw, 0, 0, 0
-AVX_INSTR punpcklwd, 0, 0, 0
-AVX_INSTR punpckldq, 0, 0, 0
-AVX_INSTR punpcklqdq, 0, 0, 0
-AVX_INSTR pxor, 0, 0, 1
-AVX_INSTR shufps, 1, 1, 0
-AVX_INSTR subpd, 1, 0, 0
-AVX_INSTR subps, 1, 0, 0
-AVX_INSTR subsd, 1, 0, 0
-AVX_INSTR subss, 1, 0, 0
-AVX_INSTR unpckhpd, 1, 0, 0
-AVX_INSTR unpckhps, 1, 0, 0
-AVX_INSTR unpcklpd, 1, 0, 0
-AVX_INSTR unpcklps, 1, 0, 0
-AVX_INSTR xorpd, 1, 0, 1
-AVX_INSTR xorps, 1, 0, 1
-
-; 3DNow instructions, for sharing code between AVX, SSE and 3DN
-AVX_INSTR pfadd, 1, 0, 1
-AVX_INSTR pfsub, 1, 0, 0
-AVX_INSTR pfmul, 1, 0, 1
-
-; base-4 constants for shuffles
-%assign i 0
-%rep 256
-    %assign j ((i>>6)&3)*1000 + ((i>>4)&3)*100 + ((i>>2)&3)*10 + (i&3)
-    %if j < 10
-        CAT_XDEFINE q000, j, i
-    %elif j < 100
-        CAT_XDEFINE q00, j, i
-    %elif j < 1000
-        CAT_XDEFINE q0, j, i
-    %else
-        CAT_XDEFINE q, j, i
-    %endif
-%assign i i+1
-%endrep
-%undef i
-%undef j
-
-%macro FMA_INSTR 3
-    %macro %1 4-7 %1, %2, %3
-        %if cpuflag(xop)
-            v%5 %1, %2, %3, %4
-        %else
-            %6 %1, %2, %3
-            %7 %1, %4
-        %endif
-    %endmacro
-%endmacro
-
-FMA_INSTR  pmacsdd,  pmulld, paddd
-FMA_INSTR  pmacsww,  pmullw, paddw
-FMA_INSTR pmadcswd, pmaddwd, paddd
-
-; tzcnt is equivalent to "rep bsf" and is backwards-compatible with bsf.
-; This lets us use tzcnt without bumping the yasm version requirement yet.
-%define tzcnt rep bsf
diff --git a/drivers/theoraplayer/src/YUV/libyuv/yuv_libyuv.c b/drivers/theoraplayer/src/YUV/libyuv/yuv_libyuv.c
deleted file mode 100755
index 3712a3b6d6..0000000000
--- a/drivers/theoraplayer/src/YUV/libyuv/yuv_libyuv.c
+++ /dev/null
@@ -1,72 +0,0 @@
-/************************************************************************************
-This source file is part of the Theora Video Playback Library
-For latest info, see http://libtheoraplayer.googlecode.com
-*************************************************************************************
-Copyright (c) 2008-2014 Kresimir Spes (kspes@cateia.com)
-This program is free software; you can redistribute it and/or modify it under
-the terms of the BSD license: http://opensource.org/licenses/BSD-3-Clause
-*************************************************************************************/
-#ifdef _YUV_LIBYUV
-#include <libyuv.h>
-#include "yuv_util.h"
-#include "yuv_libyuv.h"
-
-void decodeRGB(struct TheoraPixelTransform* t)
-{
-	I420ToRAW(t->y, t->yStride, t->u, t->uStride, t->v, t->vStride, t->out, t->w * 3, t->w, t->h);
-}
-
-void decodeRGBA(struct TheoraPixelTransform* t)
-{
-	I420ToABGR(t->y, t->yStride, t->u, t->uStride, t->v, t->vStride, t->out, t->w * 4, t->w, t->h);
-	_decodeAlpha(incOut(t, 3), t->w * 4);
-}
-
-void decodeRGBX(struct TheoraPixelTransform* t)
-{
-    I420ToABGR(t->y, t->yStride, t->u, t->uStride, t->v, t->vStride, t->out, t->w * 4, t->w, t->h);
-}
-
-void decodeARGB(struct TheoraPixelTransform* t)
-{
-    I420ToBGRA(t->y, t->yStride, t->u, t->uStride, t->v, t->vStride, t->out, t->w * 4, t->w, t->h);
-	_decodeAlpha(t, t->w * 4);
-}
-
-void decodeXRGB(struct TheoraPixelTransform* t)
-{
-    I420ToBGRA(t->y, t->yStride, t->u, t->uStride, t->v, t->vStride, t->out, t->w * 4, t->w, t->h);
-}
-
-void decodeBGR(struct TheoraPixelTransform* t)
-{
-	I420ToRGB24(t->y, t->yStride, t->u, t->uStride, t->v, t->vStride, t->out, t->w * 3, t->w, t->h);
-}
-
-void decodeBGRA(struct TheoraPixelTransform* t)
-{
-	I420ToARGB(t->y, t->yStride, t->u, t->uStride, t->v, t->vStride, t->out, t->w * 4, t->w, t->h);
-	_decodeAlpha(incOut(t, 3), t->w * 4);
-}
-
-void decodeBGRX(struct TheoraPixelTransform* t)
-{
-	I420ToARGB(t->y, t->yStride, t->u, t->uStride, t->v, t->vStride, t->out, t->w * 4, t->w, t->h);
-}
-
-void decodeABGR(struct TheoraPixelTransform* t)
-{
-    I420ToRGBA(t->y, t->yStride, t->u, t->uStride, t->v, t->vStride, t->out, t->w * 4, t->w, t->h);
-	_decodeAlpha(t, t->w * 4);
-}
-
-void decodeXBGR(struct TheoraPixelTransform* t)
-{
-	I420ToRGBA(t->y, t->yStride, t->u, t->uStride, t->v, t->vStride, t->out, t->w * 4, t->w, t->h);
-}
-
-void initYUVConversionModule()
-{
-
-}
-#endif
diff --git a/drivers/theoraplayer/src/YUV/libyuv/yuv_libyuv.h b/drivers/theoraplayer/src/YUV/libyuv/yuv_libyuv.h
deleted file mode 100755
index f621af0c5f..0000000000
--- a/drivers/theoraplayer/src/YUV/libyuv/yuv_libyuv.h
+++ /dev/null
@@ -1,14 +0,0 @@
-/************************************************************************************
-This source file is part of the Theora Video Playback Library
-For latest info, see http://libtheoraplayer.googlecode.com
-*************************************************************************************
-Copyright (c) 2008-2014 Kresimir Spes (kspes@cateia.com)
-This program is free software; you can redistribute it and/or modify it under
-the terms of the BSD license: http://opensource.org/licenses/BSD-3-Clause
-*************************************************************************************/
-#ifndef _YUV_LIBYUV_h
-#define _YUV_LIBYUV_h
-
-#include "TheoraPixelTransform.h"
-
-#endif
diff --git a/drivers/theoraplayer/src/YUV/yuv_util.c b/drivers/theoraplayer/src/YUV/yuv_util.c
deleted file mode 100644
index f5bf3e5f9e..0000000000
--- a/drivers/theoraplayer/src/YUV/yuv_util.c
+++ /dev/null
@@ -1,39 +0,0 @@
-/************************************************************************************
-This source file is part of the Theora Video Playback Library
-For latest info, see http://libtheoraplayer.googlecode.com
-*************************************************************************************
-Copyright (c) 2008-2014 Kresimir Spes (kspes@cateia.com)
-This program is free software; you can redistribute it and/or modify it under
-the terms of the BSD license: http://opensource.org/licenses/BSD-3-Clause
-*************************************************************************************/
-#include "yuv_util.h"
-
-struct TheoraPixelTransform* incOut(struct TheoraPixelTransform* t, int n)
-{
-	// used for XRGB, XBGR and similar
-	t->out += n;
-	return t;
-}
-
-void _decodeAlpha(struct TheoraPixelTransform* t, int stride)
-{
-	int width = t->w;
-	unsigned char *ySrc, *yLineEnd, *out;
-	int luma;
-	unsigned int y;
-	for (y = 0; y < t->h; y++)
-	{
-		ySrc = t->y + y * t->yStride + width;
-		out = t->out + y * stride;
-		
-		for (yLineEnd = ySrc + width; ySrc != yLineEnd; ++ySrc, out += 4)
-		{
-			luma = (*ySrc);
-            // because in YCbCr specification, luma values are in the range of [16, 235]
-            // account for 'footroom' and 'headroom' ranges while using luma values as alpha channel
-            if (luma <= 16)       *out = 0;
-            else if (luma >= 235) *out = 255;
-            else                  *out = (unsigned char) (((luma - 16) * 255) / 219);
-		}
-	}
-}
diff --git a/drivers/theoraplayer/src/YUV/yuv_util.h b/drivers/theoraplayer/src/YUV/yuv_util.h
deleted file mode 100644
index 1f9d76634a..0000000000
--- a/drivers/theoraplayer/src/YUV/yuv_util.h
+++ /dev/null
@@ -1,17 +0,0 @@
-/************************************************************************************
-This source file is part of the Theora Video Playback Library
-For latest info, see http://libtheoraplayer.googlecode.com
-*************************************************************************************
-Copyright (c) 2008-2014 Kresimir Spes (kspes@cateia.com)
-This program is free software; you can redistribute it and/or modify it under
-the terms of the BSD license: http://opensource.org/licenses/BSD-3-Clause
-*************************************************************************************/
-#ifndef _YUV_UTIL_h
-#define _YUV_UTIL_h
-
-#include "TheoraPixelTransform.h"
-
-struct TheoraPixelTransform* incOut(struct TheoraPixelTransform* t, int n);
-void _decodeAlpha(struct TheoraPixelTransform* t, int stride);
-
-#endif
diff --git a/drivers/theoraplayer/theoraplayer.xcodeproj/project.pbxproj b/drivers/theoraplayer/theoraplayer.xcodeproj/project.pbxproj
deleted file mode 100644
index 23f875fe0c..0000000000
--- a/drivers/theoraplayer/theoraplayer.xcodeproj/project.pbxproj
+++ /dev/null
@@ -1,2606 +0,0 @@
-// !$*UTF8*$!
-{
-	archiveVersion = 1;
-	classes = {
-	};
-	objectVersion = 46;
-	objects = {
-
-/* Begin PBXBuildFile section */
-		D139462D17C0ED450091F4A4 /* yuv_libyuv.c in Sources */ = {isa = PBXBuildFile; fileRef = D139462B17C0ED450091F4A4 /* yuv_libyuv.c */; };
-		D139462E17C0ED450091F4A4 /* yuv_libyuv.c in Sources */ = {isa = PBXBuildFile; fileRef = D139462B17C0ED450091F4A4 /* yuv_libyuv.c */; };
-		D139462F17C0ED450091F4A4 /* yuv_libyuv.c in Sources */ = {isa = PBXBuildFile; fileRef = D139462B17C0ED450091F4A4 /* yuv_libyuv.c */; };
-		D139463017C0ED450091F4A4 /* yuv_libyuv.c in Sources */ = {isa = PBXBuildFile; fileRef = D139462B17C0ED450091F4A4 /* yuv_libyuv.c */; };
-		D139463117C0ED450091F4A4 /* yuv_libyuv.c in Sources */ = {isa = PBXBuildFile; fileRef = D139462B17C0ED450091F4A4 /* yuv_libyuv.c */; };
-		D139463217C0ED450091F4A4 /* yuv_libyuv.c in Sources */ = {isa = PBXBuildFile; fileRef = D139462B17C0ED450091F4A4 /* yuv_libyuv.c */; };
-		D139463317C0ED450091F4A4 /* yuv_libyuv.c in Sources */ = {isa = PBXBuildFile; fileRef = D139462B17C0ED450091F4A4 /* yuv_libyuv.c */; };
-		D139463417C0ED450091F4A4 /* yuv_libyuv.c in Sources */ = {isa = PBXBuildFile; fileRef = D139462B17C0ED450091F4A4 /* yuv_libyuv.c */; };
-		D139463617C0ED450091F4A4 /* yuv_libyuv.h in Headers */ = {isa = PBXBuildFile; fileRef = D139462C17C0ED450091F4A4 /* yuv_libyuv.h */; };
-		D139463717C0ED450091F4A4 /* yuv_libyuv.h in Headers */ = {isa = PBXBuildFile; fileRef = D139462C17C0ED450091F4A4 /* yuv_libyuv.h */; };
-		D139463817C0ED450091F4A4 /* yuv_libyuv.h in Headers */ = {isa = PBXBuildFile; fileRef = D139462C17C0ED450091F4A4 /* yuv_libyuv.h */; };
-		D139463917C0ED450091F4A4 /* yuv_libyuv.h in Headers */ = {isa = PBXBuildFile; fileRef = D139462C17C0ED450091F4A4 /* yuv_libyuv.h */; };
-		D139463A17C0ED450091F4A4 /* yuv_libyuv.h in Headers */ = {isa = PBXBuildFile; fileRef = D139462C17C0ED450091F4A4 /* yuv_libyuv.h */; };
-		D139463B17C0ED450091F4A4 /* yuv_libyuv.h in Headers */ = {isa = PBXBuildFile; fileRef = D139462C17C0ED450091F4A4 /* yuv_libyuv.h */; };
-		D139463C17C0ED450091F4A4 /* yuv_libyuv.h in Headers */ = {isa = PBXBuildFile; fileRef = D139462C17C0ED450091F4A4 /* yuv_libyuv.h */; };
-		D139463D17C0ED450091F4A4 /* yuv_libyuv.h in Headers */ = {isa = PBXBuildFile; fileRef = D139462C17C0ED450091F4A4 /* yuv_libyuv.h */; };
-		D139463E17C0ED450091F4A4 /* yuv_libyuv.h in Headers */ = {isa = PBXBuildFile; fileRef = D139462C17C0ED450091F4A4 /* yuv_libyuv.h */; };
-		D13946C617C110670091F4A4 /* yuv_libyuv.c in Sources */ = {isa = PBXBuildFile; fileRef = D139462B17C0ED450091F4A4 /* yuv_libyuv.c */; };
-		D13946CC17C119B40091F4A4 /* yuv_util.c in Sources */ = {isa = PBXBuildFile; fileRef = D13946CA17C119B30091F4A4 /* yuv_util.c */; };
-		D13946CD17C119B40091F4A4 /* yuv_util.c in Sources */ = {isa = PBXBuildFile; fileRef = D13946CA17C119B30091F4A4 /* yuv_util.c */; };
-		D13946CE17C119B40091F4A4 /* yuv_util.c in Sources */ = {isa = PBXBuildFile; fileRef = D13946CA17C119B30091F4A4 /* yuv_util.c */; };
-		D13946CF17C119B40091F4A4 /* yuv_util.c in Sources */ = {isa = PBXBuildFile; fileRef = D13946CA17C119B30091F4A4 /* yuv_util.c */; };
-		D13946D017C119B40091F4A4 /* yuv_util.c in Sources */ = {isa = PBXBuildFile; fileRef = D13946CA17C119B30091F4A4 /* yuv_util.c */; };
-		D13946D117C119B40091F4A4 /* yuv_util.c in Sources */ = {isa = PBXBuildFile; fileRef = D13946CA17C119B30091F4A4 /* yuv_util.c */; };
-		D13946D217C119B40091F4A4 /* yuv_util.c in Sources */ = {isa = PBXBuildFile; fileRef = D13946CA17C119B30091F4A4 /* yuv_util.c */; };
-		D13946D317C119B40091F4A4 /* yuv_util.c in Sources */ = {isa = PBXBuildFile; fileRef = D13946CA17C119B30091F4A4 /* yuv_util.c */; };
-		D13946D417C119B40091F4A4 /* yuv_util.c in Sources */ = {isa = PBXBuildFile; fileRef = D13946CA17C119B30091F4A4 /* yuv_util.c */; };
-		D13946D517C119B40091F4A4 /* yuv_util.h in Headers */ = {isa = PBXBuildFile; fileRef = D13946CB17C119B30091F4A4 /* yuv_util.h */; };
-		D13946D617C119B40091F4A4 /* yuv_util.h in Headers */ = {isa = PBXBuildFile; fileRef = D13946CB17C119B30091F4A4 /* yuv_util.h */; };
-		D13946D717C119B40091F4A4 /* yuv_util.h in Headers */ = {isa = PBXBuildFile; fileRef = D13946CB17C119B30091F4A4 /* yuv_util.h */; };
-		D13946D817C119B40091F4A4 /* yuv_util.h in Headers */ = {isa = PBXBuildFile; fileRef = D13946CB17C119B30091F4A4 /* yuv_util.h */; };
-		D13946D917C119B40091F4A4 /* yuv_util.h in Headers */ = {isa = PBXBuildFile; fileRef = D13946CB17C119B30091F4A4 /* yuv_util.h */; };
-		D13946DA17C119B40091F4A4 /* yuv_util.h in Headers */ = {isa = PBXBuildFile; fileRef = D13946CB17C119B30091F4A4 /* yuv_util.h */; };
-		D13946DB17C119B40091F4A4 /* yuv_util.h in Headers */ = {isa = PBXBuildFile; fileRef = D13946CB17C119B30091F4A4 /* yuv_util.h */; };
-		D13946DC17C119B40091F4A4 /* yuv_util.h in Headers */ = {isa = PBXBuildFile; fileRef = D13946CB17C119B30091F4A4 /* yuv_util.h */; };
-		D13946DD17C119B40091F4A4 /* yuv_util.h in Headers */ = {isa = PBXBuildFile; fileRef = D13946CB17C119B30091F4A4 /* yuv_util.h */; };
-		D159BCB017C227F30030FAB6 /* convert_from.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D05617C157CD00CA0FD2 /* convert_from.cc */; };
-		D159BCB117C227F40030FAB6 /* convert_from.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D05617C157CD00CA0FD2 /* convert_from.cc */; };
-		D159BCB217C227F40030FAB6 /* convert_from.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D05617C157CD00CA0FD2 /* convert_from.cc */; };
-		D159BCB317C227F40030FAB6 /* convert_from.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D05617C157CD00CA0FD2 /* convert_from.cc */; };
-		D159BCB417C227F50030FAB6 /* convert_from.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D05617C157CD00CA0FD2 /* convert_from.cc */; };
-		D159BCB517C227F50030FAB6 /* convert_from.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D05617C157CD00CA0FD2 /* convert_from.cc */; };
-		D159BCB617C227F60030FAB6 /* convert_from.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D05617C157CD00CA0FD2 /* convert_from.cc */; };
-		D159BCB717C227F60030FAB6 /* convert_from.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D05617C157CD00CA0FD2 /* convert_from.cc */; };
-		D159BCB817C227F70030FAB6 /* convert_from.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D05617C157CD00CA0FD2 /* convert_from.cc */; };
-		D159BCB917C228310030FAB6 /* rotate_argb.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D06017C157CD00CA0FD2 /* rotate_argb.cc */; };
-		D159BCBA17C228320030FAB6 /* rotate_argb.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D06017C157CD00CA0FD2 /* rotate_argb.cc */; };
-		D159BCBB17C228320030FAB6 /* rotate_argb.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D06017C157CD00CA0FD2 /* rotate_argb.cc */; };
-		D159BCBC17C228330030FAB6 /* rotate_argb.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D06017C157CD00CA0FD2 /* rotate_argb.cc */; };
-		D159BCBD17C228330030FAB6 /* rotate_argb.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D06017C157CD00CA0FD2 /* rotate_argb.cc */; };
-		D159BCBE17C228340030FAB6 /* rotate_argb.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D06017C157CD00CA0FD2 /* rotate_argb.cc */; };
-		D159BCBF17C228340030FAB6 /* rotate_argb.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D06017C157CD00CA0FD2 /* rotate_argb.cc */; };
-		D159BCC017C228340030FAB6 /* rotate_argb.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D06017C157CD00CA0FD2 /* rotate_argb.cc */; };
-		D159BCC117C228350030FAB6 /* rotate_argb.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D06017C157CD00CA0FD2 /* rotate_argb.cc */; };
-		D159BCC217C2286D0030FAB6 /* scale.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D06F17C157CD00CA0FD2 /* scale.cc */; };
-		D159BCC317C2286D0030FAB6 /* scale.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D06F17C157CD00CA0FD2 /* scale.cc */; };
-		D159BCC417C2286D0030FAB6 /* scale.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D06F17C157CD00CA0FD2 /* scale.cc */; };
-		D159BCC517C2286E0030FAB6 /* scale.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D06F17C157CD00CA0FD2 /* scale.cc */; };
-		D159BCC617C2286E0030FAB6 /* scale.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D06F17C157CD00CA0FD2 /* scale.cc */; };
-		D159BCC717C2286F0030FAB6 /* scale.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D06F17C157CD00CA0FD2 /* scale.cc */; };
-		D159BCC817C2286F0030FAB6 /* scale.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D06F17C157CD00CA0FD2 /* scale.cc */; };
-		D159BCC917C2286F0030FAB6 /* scale.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D06F17C157CD00CA0FD2 /* scale.cc */; };
-		D159BCCA17C228700030FAB6 /* scale.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D06F17C157CD00CA0FD2 /* scale.cc */; };
-		D15D361017C386A600F40439 /* row_posix.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D06817C157CD00CA0FD2 /* row_posix.cc */; };
-		D15D361117C386A600F40439 /* row_posix.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D06817C157CD00CA0FD2 /* row_posix.cc */; };
-		D15D361217C386A700F40439 /* row_posix.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D06817C157CD00CA0FD2 /* row_posix.cc */; };
-		D15D361317C386B100F40439 /* compare_posix.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D05117C157CD00CA0FD2 /* compare_posix.cc */; };
-		D15D361517C386B300F40439 /* compare_posix.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D05117C157CD00CA0FD2 /* compare_posix.cc */; };
-		D15D361617C386B400F40439 /* compare_posix.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D05117C157CD00CA0FD2 /* compare_posix.cc */; };
-		D16775AB155C501D0050EC64 /* TheoraAsync.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D167759E155C501D0050EC64 /* TheoraAsync.cpp */; };
-		D16775AC155C501D0050EC64 /* TheoraAsync.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D167759E155C501D0050EC64 /* TheoraAsync.cpp */; };
-		D16775AD155C501D0050EC64 /* TheoraAudioInterface.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D167759F155C501D0050EC64 /* TheoraAudioInterface.cpp */; };
-		D16775AE155C501D0050EC64 /* TheoraAudioInterface.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D167759F155C501D0050EC64 /* TheoraAudioInterface.cpp */; };
-		D16775AF155C501D0050EC64 /* TheoraDataSource.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D16775A0155C501D0050EC64 /* TheoraDataSource.cpp */; };
-		D16775B0155C501D0050EC64 /* TheoraDataSource.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D16775A0155C501D0050EC64 /* TheoraDataSource.cpp */; };
-		D16775B1155C501D0050EC64 /* TheoraException.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D16775A1155C501D0050EC64 /* TheoraException.cpp */; };
-		D16775B2155C501D0050EC64 /* TheoraException.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D16775A1155C501D0050EC64 /* TheoraException.cpp */; };
-		D16775B3155C501D0050EC64 /* TheoraFrameQueue.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D16775A2155C501D0050EC64 /* TheoraFrameQueue.cpp */; };
-		D16775B4155C501D0050EC64 /* TheoraFrameQueue.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D16775A2155C501D0050EC64 /* TheoraFrameQueue.cpp */; };
-		D16775B5155C501D0050EC64 /* TheoraTimer.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D16775A3155C501D0050EC64 /* TheoraTimer.cpp */; };
-		D16775B6155C501D0050EC64 /* TheoraTimer.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D16775A3155C501D0050EC64 /* TheoraTimer.cpp */; };
-		D16775B7155C501D0050EC64 /* TheoraUtil.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D16775A4155C501D0050EC64 /* TheoraUtil.cpp */; };
-		D16775B8155C501D0050EC64 /* TheoraUtil.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D16775A4155C501D0050EC64 /* TheoraUtil.cpp */; };
-		D16775B9155C501D0050EC64 /* TheoraVideoClip.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D16775A5155C501D0050EC64 /* TheoraVideoClip.cpp */; };
-		D16775BA155C501D0050EC64 /* TheoraVideoClip.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D16775A5155C501D0050EC64 /* TheoraVideoClip.cpp */; };
-		D16775BB155C501D0050EC64 /* TheoraVideoFrame.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D16775A6155C501D0050EC64 /* TheoraVideoFrame.cpp */; };
-		D16775BC155C501D0050EC64 /* TheoraVideoFrame.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D16775A6155C501D0050EC64 /* TheoraVideoFrame.cpp */; };
-		D16775BD155C501D0050EC64 /* TheoraVideoManager.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D16775A7155C501D0050EC64 /* TheoraVideoManager.cpp */; };
-		D16775BE155C501D0050EC64 /* TheoraVideoManager.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D16775A7155C501D0050EC64 /* TheoraVideoManager.cpp */; };
-		D16775BF155C501D0050EC64 /* TheoraWorkerThread.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D16775A8155C501D0050EC64 /* TheoraWorkerThread.cpp */; };
-		D16775C0155C501D0050EC64 /* TheoraWorkerThread.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D16775A8155C501D0050EC64 /* TheoraWorkerThread.cpp */; };
-		D16775CE155C50280050EC64 /* TheoraAsync.h in Headers */ = {isa = PBXBuildFile; fileRef = D16775C1155C50280050EC64 /* TheoraAsync.h */; settings = {ATTRIBUTES = (Public, ); }; };
-		D16775CF155C50280050EC64 /* TheoraAsync.h in Headers */ = {isa = PBXBuildFile; fileRef = D16775C1155C50280050EC64 /* TheoraAsync.h */; };
-		D16775D0155C50280050EC64 /* TheoraAudioInterface.h in Headers */ = {isa = PBXBuildFile; fileRef = D16775C2155C50280050EC64 /* TheoraAudioInterface.h */; settings = {ATTRIBUTES = (Public, ); }; };
-		D16775D1155C50280050EC64 /* TheoraAudioInterface.h in Headers */ = {isa = PBXBuildFile; fileRef = D16775C2155C50280050EC64 /* TheoraAudioInterface.h */; };
-		D16775D2155C50280050EC64 /* TheoraDataSource.h in Headers */ = {isa = PBXBuildFile; fileRef = D16775C3155C50280050EC64 /* TheoraDataSource.h */; settings = {ATTRIBUTES = (Public, ); }; };
-		D16775D3155C50280050EC64 /* TheoraDataSource.h in Headers */ = {isa = PBXBuildFile; fileRef = D16775C3155C50280050EC64 /* TheoraDataSource.h */; };
-		D16775D4155C50280050EC64 /* TheoraException.h in Headers */ = {isa = PBXBuildFile; fileRef = D16775C4155C50280050EC64 /* TheoraException.h */; settings = {ATTRIBUTES = (Public, ); }; };
-		D16775D5155C50280050EC64 /* TheoraException.h in Headers */ = {isa = PBXBuildFile; fileRef = D16775C4155C50280050EC64 /* TheoraException.h */; };
-		D16775D6155C50280050EC64 /* TheoraExport.h in Headers */ = {isa = PBXBuildFile; fileRef = D16775C5155C50280050EC64 /* TheoraExport.h */; settings = {ATTRIBUTES = (Public, ); }; };
-		D16775D7155C50280050EC64 /* TheoraExport.h in Headers */ = {isa = PBXBuildFile; fileRef = D16775C5155C50280050EC64 /* TheoraExport.h */; };
-		D16775D8155C50280050EC64 /* TheoraFrameQueue.h in Headers */ = {isa = PBXBuildFile; fileRef = D16775C6155C50280050EC64 /* TheoraFrameQueue.h */; settings = {ATTRIBUTES = (Public, ); }; };
-		D16775D9155C50280050EC64 /* TheoraFrameQueue.h in Headers */ = {isa = PBXBuildFile; fileRef = D16775C6155C50280050EC64 /* TheoraFrameQueue.h */; };
-		D16775DA155C50280050EC64 /* TheoraPlayer.h in Headers */ = {isa = PBXBuildFile; fileRef = D16775C7155C50280050EC64 /* TheoraPlayer.h */; settings = {ATTRIBUTES = (Public, ); }; };
-		D16775DB155C50280050EC64 /* TheoraPlayer.h in Headers */ = {isa = PBXBuildFile; fileRef = D16775C7155C50280050EC64 /* TheoraPlayer.h */; };
-		D16775DC155C50280050EC64 /* TheoraTimer.h in Headers */ = {isa = PBXBuildFile; fileRef = D16775C8155C50280050EC64 /* TheoraTimer.h */; settings = {ATTRIBUTES = (Public, ); }; };
-		D16775DD155C50280050EC64 /* TheoraTimer.h in Headers */ = {isa = PBXBuildFile; fileRef = D16775C8155C50280050EC64 /* TheoraTimer.h */; };
-		D16775DE155C50280050EC64 /* TheoraUtil.h in Headers */ = {isa = PBXBuildFile; fileRef = D16775C9155C50280050EC64 /* TheoraUtil.h */; settings = {ATTRIBUTES = (Public, ); }; };
-		D16775DF155C50280050EC64 /* TheoraUtil.h in Headers */ = {isa = PBXBuildFile; fileRef = D16775C9155C50280050EC64 /* TheoraUtil.h */; };
-		D16775E0155C50280050EC64 /* TheoraVideoClip.h in Headers */ = {isa = PBXBuildFile; fileRef = D16775CA155C50280050EC64 /* TheoraVideoClip.h */; settings = {ATTRIBUTES = (Public, ); }; };
-		D16775E1155C50280050EC64 /* TheoraVideoClip.h in Headers */ = {isa = PBXBuildFile; fileRef = D16775CA155C50280050EC64 /* TheoraVideoClip.h */; };
-		D16775E2155C50280050EC64 /* TheoraVideoFrame.h in Headers */ = {isa = PBXBuildFile; fileRef = D16775CB155C50280050EC64 /* TheoraVideoFrame.h */; settings = {ATTRIBUTES = (Public, ); }; };
-		D16775E3155C50280050EC64 /* TheoraVideoFrame.h in Headers */ = {isa = PBXBuildFile; fileRef = D16775CB155C50280050EC64 /* TheoraVideoFrame.h */; };
-		D16775E4155C50280050EC64 /* TheoraVideoManager.h in Headers */ = {isa = PBXBuildFile; fileRef = D16775CC155C50280050EC64 /* TheoraVideoManager.h */; settings = {ATTRIBUTES = (Public, ); }; };
-		D16775E5155C50280050EC64 /* TheoraVideoManager.h in Headers */ = {isa = PBXBuildFile; fileRef = D16775CC155C50280050EC64 /* TheoraVideoManager.h */; };
-		D16775E6155C50280050EC64 /* TheoraWorkerThread.h in Headers */ = {isa = PBXBuildFile; fileRef = D16775CD155C50280050EC64 /* TheoraWorkerThread.h */; settings = {ATTRIBUTES = (Public, ); }; };
-		D16775E7155C50280050EC64 /* TheoraWorkerThread.h in Headers */ = {isa = PBXBuildFile; fileRef = D16775CD155C50280050EC64 /* TheoraWorkerThread.h */; };
-		D198F952177A31FC002942E3 /* TheoraAsync.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D167759E155C501D0050EC64 /* TheoraAsync.cpp */; };
-		D198F953177A31FC002942E3 /* TheoraAudioInterface.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D167759F155C501D0050EC64 /* TheoraAudioInterface.cpp */; };
-		D198F954177A31FC002942E3 /* TheoraDataSource.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D16775A0155C501D0050EC64 /* TheoraDataSource.cpp */; };
-		D198F955177A31FC002942E3 /* TheoraException.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D16775A1155C501D0050EC64 /* TheoraException.cpp */; };
-		D198F956177A31FC002942E3 /* TheoraFrameQueue.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D16775A2155C501D0050EC64 /* TheoraFrameQueue.cpp */; };
-		D198F957177A31FC002942E3 /* TheoraTimer.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D16775A3155C501D0050EC64 /* TheoraTimer.cpp */; };
-		D198F958177A31FC002942E3 /* TheoraUtil.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D16775A4155C501D0050EC64 /* TheoraUtil.cpp */; };
-		D198F959177A31FC002942E3 /* TheoraVideoClip.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D16775A5155C501D0050EC64 /* TheoraVideoClip.cpp */; };
-		D198F95A177A31FC002942E3 /* TheoraVideoFrame.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D16775A6155C501D0050EC64 /* TheoraVideoFrame.cpp */; };
-		D198F95B177A31FC002942E3 /* TheoraVideoManager.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D16775A7155C501D0050EC64 /* TheoraVideoManager.cpp */; };
-		D198F95C177A31FC002942E3 /* TheoraWorkerThread.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D16775A8155C501D0050EC64 /* TheoraWorkerThread.cpp */; };
-		D198F95D177A31FC002942E3 /* TheoraVideoClip_Theora.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D1CDFF941696D0F000609AB0 /* TheoraVideoClip_Theora.cpp */; };
-		D198F95F177A31FC002942E3 /* yuv420_grey_c.c in Sources */ = {isa = PBXBuildFile; fileRef = D1E2718A16B46F640046C00C /* yuv420_grey_c.c */; };
-		D198F960177A31FC002942E3 /* yuv420_yuv_c.c in Sources */ = {isa = PBXBuildFile; fileRef = D1E2718C16B46F640046C00C /* yuv420_yuv_c.c */; };
-		D198F961177A31FC002942E3 /* yuv420_rgb_c.c in Sources */ = {isa = PBXBuildFile; fileRef = D1E271AB16B470210046C00C /* yuv420_rgb_c.c */; };
-		D198F962177A31FC002942E3 /* TheoraAudioPacketQueue.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D1D465D916C2D070007A45AA /* TheoraAudioPacketQueue.cpp */; };
-		D198F965177A31FC002942E3 /* TheoraAsync.h in Headers */ = {isa = PBXBuildFile; fileRef = D16775C1155C50280050EC64 /* TheoraAsync.h */; };
-		D198F966177A31FC002942E3 /* TheoraAudioInterface.h in Headers */ = {isa = PBXBuildFile; fileRef = D16775C2155C50280050EC64 /* TheoraAudioInterface.h */; };
-		D198F967177A31FC002942E3 /* TheoraDataSource.h in Headers */ = {isa = PBXBuildFile; fileRef = D16775C3155C50280050EC64 /* TheoraDataSource.h */; };
-		D198F968177A31FC002942E3 /* TheoraException.h in Headers */ = {isa = PBXBuildFile; fileRef = D16775C4155C50280050EC64 /* TheoraException.h */; };
-		D198F969177A31FC002942E3 /* TheoraExport.h in Headers */ = {isa = PBXBuildFile; fileRef = D16775C5155C50280050EC64 /* TheoraExport.h */; };
-		D198F96A177A31FC002942E3 /* TheoraFrameQueue.h in Headers */ = {isa = PBXBuildFile; fileRef = D16775C6155C50280050EC64 /* TheoraFrameQueue.h */; };
-		D198F96B177A31FC002942E3 /* TheoraPlayer.h in Headers */ = {isa = PBXBuildFile; fileRef = D16775C7155C50280050EC64 /* TheoraPlayer.h */; };
-		D198F96C177A31FC002942E3 /* TheoraTimer.h in Headers */ = {isa = PBXBuildFile; fileRef = D16775C8155C50280050EC64 /* TheoraTimer.h */; };
-		D198F96D177A31FC002942E3 /* TheoraUtil.h in Headers */ = {isa = PBXBuildFile; fileRef = D16775C9155C50280050EC64 /* TheoraUtil.h */; };
-		D198F96E177A31FC002942E3 /* TheoraVideoClip.h in Headers */ = {isa = PBXBuildFile; fileRef = D16775CA155C50280050EC64 /* TheoraVideoClip.h */; };
-		D198F96F177A31FC002942E3 /* TheoraVideoFrame.h in Headers */ = {isa = PBXBuildFile; fileRef = D16775CB155C50280050EC64 /* TheoraVideoFrame.h */; };
-		D198F970177A31FC002942E3 /* TheoraVideoManager.h in Headers */ = {isa = PBXBuildFile; fileRef = D16775CC155C50280050EC64 /* TheoraVideoManager.h */; };
-		D198F971177A31FC002942E3 /* TheoraWorkerThread.h in Headers */ = {isa = PBXBuildFile; fileRef = D16775CD155C50280050EC64 /* TheoraWorkerThread.h */; };
-		D198F972177A31FC002942E3 /* TheoraVideoClip_Theora.h in Headers */ = {isa = PBXBuildFile; fileRef = D1CDFF951696D0F000609AB0 /* TheoraVideoClip_Theora.h */; };
-		D198F974177A31FC002942E3 /* TheoraPixelTransform.h in Headers */ = {isa = PBXBuildFile; fileRef = D1E271B216B471E80046C00C /* TheoraPixelTransform.h */; };
-		D198F97E177A31FE002942E3 /* TheoraAsync.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D167759E155C501D0050EC64 /* TheoraAsync.cpp */; };
-		D198F97F177A31FE002942E3 /* TheoraAudioInterface.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D167759F155C501D0050EC64 /* TheoraAudioInterface.cpp */; };
-		D198F980177A31FE002942E3 /* TheoraDataSource.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D16775A0155C501D0050EC64 /* TheoraDataSource.cpp */; };
-		D198F981177A31FE002942E3 /* TheoraException.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D16775A1155C501D0050EC64 /* TheoraException.cpp */; };
-		D198F982177A31FE002942E3 /* TheoraFrameQueue.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D16775A2155C501D0050EC64 /* TheoraFrameQueue.cpp */; };
-		D198F983177A31FE002942E3 /* TheoraTimer.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D16775A3155C501D0050EC64 /* TheoraTimer.cpp */; };
-		D198F984177A31FE002942E3 /* TheoraUtil.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D16775A4155C501D0050EC64 /* TheoraUtil.cpp */; };
-		D198F985177A31FE002942E3 /* TheoraVideoClip.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D16775A5155C501D0050EC64 /* TheoraVideoClip.cpp */; };
-		D198F986177A31FE002942E3 /* TheoraVideoFrame.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D16775A6155C501D0050EC64 /* TheoraVideoFrame.cpp */; };
-		D198F987177A31FE002942E3 /* TheoraVideoManager.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D16775A7155C501D0050EC64 /* TheoraVideoManager.cpp */; };
-		D198F988177A31FE002942E3 /* TheoraWorkerThread.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D16775A8155C501D0050EC64 /* TheoraWorkerThread.cpp */; };
-		D198F989177A31FE002942E3 /* TheoraVideoClip_AVFoundation.mm in Sources */ = {isa = PBXBuildFile; fileRef = D1CDFF9C1696D0FA00609AB0 /* TheoraVideoClip_AVFoundation.mm */; };
-		D198F98B177A31FE002942E3 /* yuv420_grey_c.c in Sources */ = {isa = PBXBuildFile; fileRef = D1E2718A16B46F640046C00C /* yuv420_grey_c.c */; };
-		D198F98C177A31FE002942E3 /* yuv420_yuv_c.c in Sources */ = {isa = PBXBuildFile; fileRef = D1E2718C16B46F640046C00C /* yuv420_yuv_c.c */; };
-		D198F98D177A31FE002942E3 /* yuv420_rgb_c.c in Sources */ = {isa = PBXBuildFile; fileRef = D1E271AB16B470210046C00C /* yuv420_rgb_c.c */; };
-		D198F98E177A31FE002942E3 /* TheoraAudioPacketQueue.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D1D465D916C2D070007A45AA /* TheoraAudioPacketQueue.cpp */; };
-		D198F991177A31FE002942E3 /* TheoraAsync.h in Headers */ = {isa = PBXBuildFile; fileRef = D16775C1155C50280050EC64 /* TheoraAsync.h */; };
-		D198F992177A31FE002942E3 /* TheoraAudioInterface.h in Headers */ = {isa = PBXBuildFile; fileRef = D16775C2155C50280050EC64 /* TheoraAudioInterface.h */; };
-		D198F993177A31FE002942E3 /* TheoraDataSource.h in Headers */ = {isa = PBXBuildFile; fileRef = D16775C3155C50280050EC64 /* TheoraDataSource.h */; };
-		D198F994177A31FE002942E3 /* TheoraException.h in Headers */ = {isa = PBXBuildFile; fileRef = D16775C4155C50280050EC64 /* TheoraException.h */; };
-		D198F995177A31FE002942E3 /* TheoraExport.h in Headers */ = {isa = PBXBuildFile; fileRef = D16775C5155C50280050EC64 /* TheoraExport.h */; };
-		D198F996177A31FE002942E3 /* TheoraFrameQueue.h in Headers */ = {isa = PBXBuildFile; fileRef = D16775C6155C50280050EC64 /* TheoraFrameQueue.h */; };
-		D198F997177A31FE002942E3 /* TheoraPlayer.h in Headers */ = {isa = PBXBuildFile; fileRef = D16775C7155C50280050EC64 /* TheoraPlayer.h */; };
-		D198F998177A31FE002942E3 /* TheoraTimer.h in Headers */ = {isa = PBXBuildFile; fileRef = D16775C8155C50280050EC64 /* TheoraTimer.h */; };
-		D198F999177A31FE002942E3 /* TheoraUtil.h in Headers */ = {isa = PBXBuildFile; fileRef = D16775C9155C50280050EC64 /* TheoraUtil.h */; };
-		D198F99A177A31FE002942E3 /* TheoraVideoClip.h in Headers */ = {isa = PBXBuildFile; fileRef = D16775CA155C50280050EC64 /* TheoraVideoClip.h */; };
-		D198F99B177A31FE002942E3 /* TheoraVideoFrame.h in Headers */ = {isa = PBXBuildFile; fileRef = D16775CB155C50280050EC64 /* TheoraVideoFrame.h */; };
-		D198F99C177A31FE002942E3 /* TheoraVideoManager.h in Headers */ = {isa = PBXBuildFile; fileRef = D16775CC155C50280050EC64 /* TheoraVideoManager.h */; };
-		D198F99D177A31FE002942E3 /* TheoraWorkerThread.h in Headers */ = {isa = PBXBuildFile; fileRef = D16775CD155C50280050EC64 /* TheoraWorkerThread.h */; };
-		D198F99E177A31FE002942E3 /* TheoraVideoClip_Theora.h in Headers */ = {isa = PBXBuildFile; fileRef = D1CDFF951696D0F000609AB0 /* TheoraVideoClip_Theora.h */; };
-		D198F9A0177A31FE002942E3 /* TheoraPixelTransform.h in Headers */ = {isa = PBXBuildFile; fileRef = D1E271B216B471E80046C00C /* TheoraPixelTransform.h */; };
-		D198F9AA177A3200002942E3 /* TheoraAsync.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D167759E155C501D0050EC64 /* TheoraAsync.cpp */; };
-		D198F9AB177A3200002942E3 /* TheoraAudioInterface.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D167759F155C501D0050EC64 /* TheoraAudioInterface.cpp */; };
-		D198F9AC177A3200002942E3 /* TheoraDataSource.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D16775A0155C501D0050EC64 /* TheoraDataSource.cpp */; };
-		D198F9AD177A3200002942E3 /* TheoraException.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D16775A1155C501D0050EC64 /* TheoraException.cpp */; };
-		D198F9AE177A3200002942E3 /* TheoraFrameQueue.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D16775A2155C501D0050EC64 /* TheoraFrameQueue.cpp */; };
-		D198F9AF177A3200002942E3 /* TheoraTimer.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D16775A3155C501D0050EC64 /* TheoraTimer.cpp */; };
-		D198F9B0177A3200002942E3 /* TheoraUtil.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D16775A4155C501D0050EC64 /* TheoraUtil.cpp */; };
-		D198F9B1177A3200002942E3 /* TheoraVideoClip.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D16775A5155C501D0050EC64 /* TheoraVideoClip.cpp */; };
-		D198F9B2177A3200002942E3 /* TheoraVideoFrame.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D16775A6155C501D0050EC64 /* TheoraVideoFrame.cpp */; };
-		D198F9B3177A3200002942E3 /* TheoraVideoManager.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D16775A7155C501D0050EC64 /* TheoraVideoManager.cpp */; };
-		D198F9B4177A3200002942E3 /* TheoraWorkerThread.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D16775A8155C501D0050EC64 /* TheoraWorkerThread.cpp */; };
-		D198F9B5177A3200002942E3 /* TheoraVideoClip_Theora.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D1CDFF941696D0F000609AB0 /* TheoraVideoClip_Theora.cpp */; };
-		D198F9B6177A3200002942E3 /* TheoraVideoClip_AVFoundation.mm in Sources */ = {isa = PBXBuildFile; fileRef = D1CDFF9C1696D0FA00609AB0 /* TheoraVideoClip_AVFoundation.mm */; };
-		D198F9B8177A3200002942E3 /* yuv420_grey_c.c in Sources */ = {isa = PBXBuildFile; fileRef = D1E2718A16B46F640046C00C /* yuv420_grey_c.c */; };
-		D198F9B9177A3200002942E3 /* yuv420_yuv_c.c in Sources */ = {isa = PBXBuildFile; fileRef = D1E2718C16B46F640046C00C /* yuv420_yuv_c.c */; };
-		D198F9BA177A3200002942E3 /* yuv420_rgb_c.c in Sources */ = {isa = PBXBuildFile; fileRef = D1E271AB16B470210046C00C /* yuv420_rgb_c.c */; };
-		D198F9BB177A3200002942E3 /* TheoraAudioPacketQueue.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D1D465D916C2D070007A45AA /* TheoraAudioPacketQueue.cpp */; };
-		D198F9BE177A3200002942E3 /* TheoraAsync.h in Headers */ = {isa = PBXBuildFile; fileRef = D16775C1155C50280050EC64 /* TheoraAsync.h */; };
-		D198F9BF177A3200002942E3 /* TheoraAudioInterface.h in Headers */ = {isa = PBXBuildFile; fileRef = D16775C2155C50280050EC64 /* TheoraAudioInterface.h */; };
-		D198F9C0177A3200002942E3 /* TheoraDataSource.h in Headers */ = {isa = PBXBuildFile; fileRef = D16775C3155C50280050EC64 /* TheoraDataSource.h */; };
-		D198F9C1177A3200002942E3 /* TheoraException.h in Headers */ = {isa = PBXBuildFile; fileRef = D16775C4155C50280050EC64 /* TheoraException.h */; };
-		D198F9C2177A3200002942E3 /* TheoraExport.h in Headers */ = {isa = PBXBuildFile; fileRef = D16775C5155C50280050EC64 /* TheoraExport.h */; };
-		D198F9C3177A3200002942E3 /* TheoraFrameQueue.h in Headers */ = {isa = PBXBuildFile; fileRef = D16775C6155C50280050EC64 /* TheoraFrameQueue.h */; };
-		D198F9C4177A3200002942E3 /* TheoraPlayer.h in Headers */ = {isa = PBXBuildFile; fileRef = D16775C7155C50280050EC64 /* TheoraPlayer.h */; };
-		D198F9C5177A3200002942E3 /* TheoraTimer.h in Headers */ = {isa = PBXBuildFile; fileRef = D16775C8155C50280050EC64 /* TheoraTimer.h */; };
-		D198F9C6177A3200002942E3 /* TheoraUtil.h in Headers */ = {isa = PBXBuildFile; fileRef = D16775C9155C50280050EC64 /* TheoraUtil.h */; };
-		D198F9C7177A3200002942E3 /* TheoraVideoClip.h in Headers */ = {isa = PBXBuildFile; fileRef = D16775CA155C50280050EC64 /* TheoraVideoClip.h */; };
-		D198F9C8177A3200002942E3 /* TheoraVideoFrame.h in Headers */ = {isa = PBXBuildFile; fileRef = D16775CB155C50280050EC64 /* TheoraVideoFrame.h */; };
-		D198F9C9177A3200002942E3 /* TheoraVideoManager.h in Headers */ = {isa = PBXBuildFile; fileRef = D16775CC155C50280050EC64 /* TheoraVideoManager.h */; };
-		D198F9CA177A3200002942E3 /* TheoraWorkerThread.h in Headers */ = {isa = PBXBuildFile; fileRef = D16775CD155C50280050EC64 /* TheoraWorkerThread.h */; };
-		D198F9CB177A3200002942E3 /* TheoraVideoClip_Theora.h in Headers */ = {isa = PBXBuildFile; fileRef = D1CDFF951696D0F000609AB0 /* TheoraVideoClip_Theora.h */; };
-		D198F9CD177A3200002942E3 /* TheoraPixelTransform.h in Headers */ = {isa = PBXBuildFile; fileRef = D1E271B216B471E80046C00C /* TheoraPixelTransform.h */; };
-		D1BCE05A18F3F7FE00C83470 /* scale_common.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1BCE05818F3F7FE00C83470 /* scale_common.cc */; };
-		D1BCE05B18F3F7FE00C83470 /* scale_common.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1BCE05818F3F7FE00C83470 /* scale_common.cc */; };
-		D1BCE05C18F3F7FE00C83470 /* scale_common.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1BCE05818F3F7FE00C83470 /* scale_common.cc */; };
-		D1BCE05D18F3F7FE00C83470 /* scale_common.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1BCE05818F3F7FE00C83470 /* scale_common.cc */; };
-		D1BCE05E18F3F7FE00C83470 /* scale_common.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1BCE05818F3F7FE00C83470 /* scale_common.cc */; };
-		D1BCE05F18F3F7FE00C83470 /* scale_common.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1BCE05818F3F7FE00C83470 /* scale_common.cc */; };
-		D1BCE06018F3F7FE00C83470 /* scale_common.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1BCE05818F3F7FE00C83470 /* scale_common.cc */; };
-		D1BCE06118F3F7FE00C83470 /* scale_common.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1BCE05818F3F7FE00C83470 /* scale_common.cc */; };
-		D1BCE06218F3F7FE00C83470 /* scale_common.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1BCE05818F3F7FE00C83470 /* scale_common.cc */; };
-		D1BCE06318F3F7FE00C83470 /* scale_posix.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1BCE05918F3F7FE00C83470 /* scale_posix.cc */; };
-		D1BCE06418F3F7FE00C83470 /* scale_posix.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1BCE05918F3F7FE00C83470 /* scale_posix.cc */; };
-		D1BCE06518F3F7FE00C83470 /* scale_posix.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1BCE05918F3F7FE00C83470 /* scale_posix.cc */; };
-		D1BCE06618F3F7FE00C83470 /* scale_posix.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1BCE05918F3F7FE00C83470 /* scale_posix.cc */; };
-		D1BCE06718F3F7FE00C83470 /* scale_posix.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1BCE05918F3F7FE00C83470 /* scale_posix.cc */; };
-		D1BCE06818F3F7FE00C83470 /* scale_posix.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1BCE05918F3F7FE00C83470 /* scale_posix.cc */; };
-		D1BCE06918F3F7FE00C83470 /* scale_posix.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1BCE05918F3F7FE00C83470 /* scale_posix.cc */; };
-		D1BCE06A18F3F7FE00C83470 /* scale_posix.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1BCE05918F3F7FE00C83470 /* scale_posix.cc */; };
-		D1BCE06B18F3F7FE00C83470 /* scale_posix.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1BCE05918F3F7FE00C83470 /* scale_posix.cc */; };
-		D1C3D07217C157CD00CA0FD2 /* compare_common.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D04F17C157CD00CA0FD2 /* compare_common.cc */; };
-		D1C3D07317C157CD00CA0FD2 /* compare_common.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D04F17C157CD00CA0FD2 /* compare_common.cc */; };
-		D1C3D07417C157CD00CA0FD2 /* compare_common.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D04F17C157CD00CA0FD2 /* compare_common.cc */; };
-		D1C3D07517C157CD00CA0FD2 /* compare_common.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D04F17C157CD00CA0FD2 /* compare_common.cc */; };
-		D1C3D07617C157CD00CA0FD2 /* compare_common.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D04F17C157CD00CA0FD2 /* compare_common.cc */; };
-		D1C3D07717C157CD00CA0FD2 /* compare_common.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D04F17C157CD00CA0FD2 /* compare_common.cc */; };
-		D1C3D07817C157CD00CA0FD2 /* compare_common.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D04F17C157CD00CA0FD2 /* compare_common.cc */; };
-		D1C3D07917C157CD00CA0FD2 /* compare_common.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D04F17C157CD00CA0FD2 /* compare_common.cc */; };
-		D1C3D07A17C157CD00CA0FD2 /* compare_common.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D04F17C157CD00CA0FD2 /* compare_common.cc */; };
-		D1C3D08117C157CD00CA0FD2 /* compare_neon.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D05017C157CD00CA0FD2 /* compare_neon.cc */; };
-		D1C3D08217C157CD00CA0FD2 /* compare_neon.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D05017C157CD00CA0FD2 /* compare_neon.cc */; };
-		D1C3D08317C157CD00CA0FD2 /* compare_neon.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D05017C157CD00CA0FD2 /* compare_neon.cc */; };
-		D1C3D08417C157CD00CA0FD2 /* compare_posix.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D05117C157CD00CA0FD2 /* compare_posix.cc */; };
-		D1C3D08517C157CD00CA0FD2 /* compare_posix.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D05117C157CD00CA0FD2 /* compare_posix.cc */; };
-		D1C3D08617C157CD00CA0FD2 /* compare_posix.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D05117C157CD00CA0FD2 /* compare_posix.cc */; };
-		D1C3D08717C157CD00CA0FD2 /* compare_posix.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D05117C157CD00CA0FD2 /* compare_posix.cc */; };
-		D1C3D08817C157CD00CA0FD2 /* compare_posix.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D05117C157CD00CA0FD2 /* compare_posix.cc */; };
-		D1C3D08917C157CD00CA0FD2 /* compare_posix.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D05117C157CD00CA0FD2 /* compare_posix.cc */; };
-		D1C3D09617C157CD00CA0FD2 /* compare.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D05317C157CD00CA0FD2 /* compare.cc */; };
-		D1C3D09717C157CD00CA0FD2 /* compare.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D05317C157CD00CA0FD2 /* compare.cc */; };
-		D1C3D09817C157CD00CA0FD2 /* compare.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D05317C157CD00CA0FD2 /* compare.cc */; };
-		D1C3D09917C157CD00CA0FD2 /* compare.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D05317C157CD00CA0FD2 /* compare.cc */; };
-		D1C3D09A17C157CD00CA0FD2 /* compare.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D05317C157CD00CA0FD2 /* compare.cc */; };
-		D1C3D09B17C157CD00CA0FD2 /* compare.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D05317C157CD00CA0FD2 /* compare.cc */; };
-		D1C3D09C17C157CD00CA0FD2 /* compare.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D05317C157CD00CA0FD2 /* compare.cc */; };
-		D1C3D09D17C157CD00CA0FD2 /* compare.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D05317C157CD00CA0FD2 /* compare.cc */; };
-		D1C3D09E17C157CD00CA0FD2 /* compare.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D05317C157CD00CA0FD2 /* compare.cc */; };
-		D1C3D09F17C157CD00CA0FD2 /* convert_argb.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D05417C157CD00CA0FD2 /* convert_argb.cc */; };
-		D1C3D0A017C157CD00CA0FD2 /* convert_argb.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D05417C157CD00CA0FD2 /* convert_argb.cc */; };
-		D1C3D0A117C157CD00CA0FD2 /* convert_argb.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D05417C157CD00CA0FD2 /* convert_argb.cc */; };
-		D1C3D0A217C157CD00CA0FD2 /* convert_argb.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D05417C157CD00CA0FD2 /* convert_argb.cc */; };
-		D1C3D0A317C157CD00CA0FD2 /* convert_argb.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D05417C157CD00CA0FD2 /* convert_argb.cc */; };
-		D1C3D0A417C157CD00CA0FD2 /* convert_argb.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D05417C157CD00CA0FD2 /* convert_argb.cc */; };
-		D1C3D0A517C157CD00CA0FD2 /* convert_argb.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D05417C157CD00CA0FD2 /* convert_argb.cc */; };
-		D1C3D0A617C157CD00CA0FD2 /* convert_argb.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D05417C157CD00CA0FD2 /* convert_argb.cc */; };
-		D1C3D0A717C157CD00CA0FD2 /* convert_argb.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D05417C157CD00CA0FD2 /* convert_argb.cc */; };
-		D1C3D0C317C157CD00CA0FD2 /* convert_to_argb.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D05817C157CD00CA0FD2 /* convert_to_argb.cc */; };
-		D1C3D0C417C157CD00CA0FD2 /* convert_to_argb.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D05817C157CD00CA0FD2 /* convert_to_argb.cc */; };
-		D1C3D0C517C157CD00CA0FD2 /* convert_to_argb.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D05817C157CD00CA0FD2 /* convert_to_argb.cc */; };
-		D1C3D0C617C157CD00CA0FD2 /* convert_to_argb.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D05817C157CD00CA0FD2 /* convert_to_argb.cc */; };
-		D1C3D0C717C157CD00CA0FD2 /* convert_to_argb.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D05817C157CD00CA0FD2 /* convert_to_argb.cc */; };
-		D1C3D0C817C157CD00CA0FD2 /* convert_to_argb.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D05817C157CD00CA0FD2 /* convert_to_argb.cc */; };
-		D1C3D0C917C157CD00CA0FD2 /* convert_to_argb.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D05817C157CD00CA0FD2 /* convert_to_argb.cc */; };
-		D1C3D0CA17C157CD00CA0FD2 /* convert_to_argb.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D05817C157CD00CA0FD2 /* convert_to_argb.cc */; };
-		D1C3D0CB17C157CD00CA0FD2 /* convert_to_argb.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D05817C157CD00CA0FD2 /* convert_to_argb.cc */; };
-		D1C3D0CC17C157CD00CA0FD2 /* convert_to_i420.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D05917C157CD00CA0FD2 /* convert_to_i420.cc */; };
-		D1C3D0CD17C157CD00CA0FD2 /* convert_to_i420.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D05917C157CD00CA0FD2 /* convert_to_i420.cc */; };
-		D1C3D0CE17C157CD00CA0FD2 /* convert_to_i420.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D05917C157CD00CA0FD2 /* convert_to_i420.cc */; };
-		D1C3D0CF17C157CD00CA0FD2 /* convert_to_i420.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D05917C157CD00CA0FD2 /* convert_to_i420.cc */; };
-		D1C3D0D017C157CD00CA0FD2 /* convert_to_i420.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D05917C157CD00CA0FD2 /* convert_to_i420.cc */; };
-		D1C3D0D117C157CD00CA0FD2 /* convert_to_i420.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D05917C157CD00CA0FD2 /* convert_to_i420.cc */; };
-		D1C3D0D217C157CD00CA0FD2 /* convert_to_i420.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D05917C157CD00CA0FD2 /* convert_to_i420.cc */; };
-		D1C3D0D317C157CD00CA0FD2 /* convert_to_i420.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D05917C157CD00CA0FD2 /* convert_to_i420.cc */; };
-		D1C3D0D417C157CD00CA0FD2 /* convert_to_i420.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D05917C157CD00CA0FD2 /* convert_to_i420.cc */; };
-		D1C3D0D517C157CD00CA0FD2 /* convert.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D05A17C157CD00CA0FD2 /* convert.cc */; };
-		D1C3D0D617C157CD00CA0FD2 /* convert.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D05A17C157CD00CA0FD2 /* convert.cc */; };
-		D1C3D0D717C157CD00CA0FD2 /* convert.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D05A17C157CD00CA0FD2 /* convert.cc */; };
-		D1C3D0D817C157CD00CA0FD2 /* convert.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D05A17C157CD00CA0FD2 /* convert.cc */; };
-		D1C3D0D917C157CD00CA0FD2 /* convert.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D05A17C157CD00CA0FD2 /* convert.cc */; };
-		D1C3D0DA17C157CD00CA0FD2 /* convert.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D05A17C157CD00CA0FD2 /* convert.cc */; };
-		D1C3D0DB17C157CD00CA0FD2 /* convert.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D05A17C157CD00CA0FD2 /* convert.cc */; };
-		D1C3D0DC17C157CD00CA0FD2 /* convert.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D05A17C157CD00CA0FD2 /* convert.cc */; };
-		D1C3D0DD17C157CD00CA0FD2 /* convert.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D05A17C157CD00CA0FD2 /* convert.cc */; };
-		D1C3D0DE17C157CD00CA0FD2 /* cpu_id.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D05B17C157CD00CA0FD2 /* cpu_id.cc */; };
-		D1C3D0DF17C157CD00CA0FD2 /* cpu_id.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D05B17C157CD00CA0FD2 /* cpu_id.cc */; };
-		D1C3D0E017C157CD00CA0FD2 /* cpu_id.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D05B17C157CD00CA0FD2 /* cpu_id.cc */; };
-		D1C3D0E117C157CD00CA0FD2 /* cpu_id.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D05B17C157CD00CA0FD2 /* cpu_id.cc */; };
-		D1C3D0E217C157CD00CA0FD2 /* cpu_id.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D05B17C157CD00CA0FD2 /* cpu_id.cc */; };
-		D1C3D0E317C157CD00CA0FD2 /* cpu_id.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D05B17C157CD00CA0FD2 /* cpu_id.cc */; };
-		D1C3D0E417C157CD00CA0FD2 /* cpu_id.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D05B17C157CD00CA0FD2 /* cpu_id.cc */; };
-		D1C3D0E517C157CD00CA0FD2 /* cpu_id.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D05B17C157CD00CA0FD2 /* cpu_id.cc */; };
-		D1C3D0E617C157CD00CA0FD2 /* cpu_id.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D05B17C157CD00CA0FD2 /* cpu_id.cc */; };
-		D1C3D0E717C157CD00CA0FD2 /* format_conversion.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D05C17C157CD00CA0FD2 /* format_conversion.cc */; };
-		D1C3D0E817C157CD00CA0FD2 /* format_conversion.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D05C17C157CD00CA0FD2 /* format_conversion.cc */; };
-		D1C3D0E917C157CD00CA0FD2 /* format_conversion.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D05C17C157CD00CA0FD2 /* format_conversion.cc */; };
-		D1C3D0EA17C157CD00CA0FD2 /* format_conversion.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D05C17C157CD00CA0FD2 /* format_conversion.cc */; };
-		D1C3D0EB17C157CD00CA0FD2 /* format_conversion.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D05C17C157CD00CA0FD2 /* format_conversion.cc */; };
-		D1C3D0EC17C157CD00CA0FD2 /* format_conversion.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D05C17C157CD00CA0FD2 /* format_conversion.cc */; };
-		D1C3D0ED17C157CD00CA0FD2 /* format_conversion.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D05C17C157CD00CA0FD2 /* format_conversion.cc */; };
-		D1C3D0EE17C157CD00CA0FD2 /* format_conversion.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D05C17C157CD00CA0FD2 /* format_conversion.cc */; };
-		D1C3D0EF17C157CD00CA0FD2 /* format_conversion.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D05C17C157CD00CA0FD2 /* format_conversion.cc */; };
-		D1C3D10217C157CD00CA0FD2 /* planar_functions.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D05F17C157CD00CA0FD2 /* planar_functions.cc */; };
-		D1C3D10317C157CD00CA0FD2 /* planar_functions.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D05F17C157CD00CA0FD2 /* planar_functions.cc */; };
-		D1C3D10417C157CD00CA0FD2 /* planar_functions.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D05F17C157CD00CA0FD2 /* planar_functions.cc */; };
-		D1C3D10517C157CD00CA0FD2 /* planar_functions.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D05F17C157CD00CA0FD2 /* planar_functions.cc */; };
-		D1C3D10617C157CD00CA0FD2 /* planar_functions.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D05F17C157CD00CA0FD2 /* planar_functions.cc */; };
-		D1C3D10717C157CD00CA0FD2 /* planar_functions.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D05F17C157CD00CA0FD2 /* planar_functions.cc */; };
-		D1C3D10817C157CD00CA0FD2 /* planar_functions.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D05F17C157CD00CA0FD2 /* planar_functions.cc */; };
-		D1C3D10917C157CD00CA0FD2 /* planar_functions.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D05F17C157CD00CA0FD2 /* planar_functions.cc */; };
-		D1C3D10A17C157CD00CA0FD2 /* planar_functions.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D05F17C157CD00CA0FD2 /* planar_functions.cc */; };
-		D1C3D12317C157CD00CA0FD2 /* rotate_neon.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D06217C157CD00CA0FD2 /* rotate_neon.cc */; };
-		D1C3D12417C157CD00CA0FD2 /* rotate_neon.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D06217C157CD00CA0FD2 /* rotate_neon.cc */; };
-		D1C3D12517C157CD00CA0FD2 /* rotate_neon.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D06217C157CD00CA0FD2 /* rotate_neon.cc */; };
-		D1C3D12617C157CD00CA0FD2 /* rotate.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D06317C157CD00CA0FD2 /* rotate.cc */; };
-		D1C3D12717C157CD00CA0FD2 /* rotate.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D06317C157CD00CA0FD2 /* rotate.cc */; };
-		D1C3D12817C157CD00CA0FD2 /* rotate.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D06317C157CD00CA0FD2 /* rotate.cc */; };
-		D1C3D12917C157CD00CA0FD2 /* rotate.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D06317C157CD00CA0FD2 /* rotate.cc */; };
-		D1C3D12A17C157CD00CA0FD2 /* rotate.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D06317C157CD00CA0FD2 /* rotate.cc */; };
-		D1C3D12B17C157CD00CA0FD2 /* rotate.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D06317C157CD00CA0FD2 /* rotate.cc */; };
-		D1C3D12C17C157CD00CA0FD2 /* rotate.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D06317C157CD00CA0FD2 /* rotate.cc */; };
-		D1C3D12D17C157CD00CA0FD2 /* rotate.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D06317C157CD00CA0FD2 /* rotate.cc */; };
-		D1C3D12E17C157CD00CA0FD2 /* rotate.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D06317C157CD00CA0FD2 /* rotate.cc */; };
-		D1C3D12F17C157CD00CA0FD2 /* row_any.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D06417C157CD00CA0FD2 /* row_any.cc */; };
-		D1C3D13017C157CD00CA0FD2 /* row_any.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D06417C157CD00CA0FD2 /* row_any.cc */; };
-		D1C3D13117C157CD00CA0FD2 /* row_any.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D06417C157CD00CA0FD2 /* row_any.cc */; };
-		D1C3D13217C157CD00CA0FD2 /* row_any.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D06417C157CD00CA0FD2 /* row_any.cc */; };
-		D1C3D13317C157CD00CA0FD2 /* row_any.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D06417C157CD00CA0FD2 /* row_any.cc */; };
-		D1C3D13417C157CD00CA0FD2 /* row_any.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D06417C157CD00CA0FD2 /* row_any.cc */; };
-		D1C3D13517C157CD00CA0FD2 /* row_any.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D06417C157CD00CA0FD2 /* row_any.cc */; };
-		D1C3D13617C157CD00CA0FD2 /* row_any.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D06417C157CD00CA0FD2 /* row_any.cc */; };
-		D1C3D13717C157CD00CA0FD2 /* row_any.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D06417C157CD00CA0FD2 /* row_any.cc */; };
-		D1C3D13817C157CD00CA0FD2 /* row_common.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D06517C157CD00CA0FD2 /* row_common.cc */; };
-		D1C3D13917C157CD00CA0FD2 /* row_common.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D06517C157CD00CA0FD2 /* row_common.cc */; };
-		D1C3D13A17C157CD00CA0FD2 /* row_common.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D06517C157CD00CA0FD2 /* row_common.cc */; };
-		D1C3D13B17C157CD00CA0FD2 /* row_common.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D06517C157CD00CA0FD2 /* row_common.cc */; };
-		D1C3D13C17C157CD00CA0FD2 /* row_common.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D06517C157CD00CA0FD2 /* row_common.cc */; };
-		D1C3D13D17C157CD00CA0FD2 /* row_common.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D06517C157CD00CA0FD2 /* row_common.cc */; };
-		D1C3D13E17C157CD00CA0FD2 /* row_common.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D06517C157CD00CA0FD2 /* row_common.cc */; };
-		D1C3D13F17C157CD00CA0FD2 /* row_common.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D06517C157CD00CA0FD2 /* row_common.cc */; };
-		D1C3D14017C157CD00CA0FD2 /* row_common.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D06517C157CD00CA0FD2 /* row_common.cc */; };
-		D1C3D15017C157CD00CA0FD2 /* row_neon.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D06717C157CD00CA0FD2 /* row_neon.cc */; };
-		D1C3D15117C157CD00CA0FD2 /* row_neon.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D06717C157CD00CA0FD2 /* row_neon.cc */; };
-		D1C3D15217C157CD00CA0FD2 /* row_neon.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D06717C157CD00CA0FD2 /* row_neon.cc */; };
-		D1C3D15317C157CD00CA0FD2 /* row_posix.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D06817C157CD00CA0FD2 /* row_posix.cc */; };
-		D1C3D15417C157CD00CA0FD2 /* row_posix.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D06817C157CD00CA0FD2 /* row_posix.cc */; };
-		D1C3D15517C157CD00CA0FD2 /* row_posix.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D06817C157CD00CA0FD2 /* row_posix.cc */; };
-		D1C3D15617C157CD00CA0FD2 /* row_posix.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D06817C157CD00CA0FD2 /* row_posix.cc */; };
-		D1C3D15717C157CD00CA0FD2 /* row_posix.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D06817C157CD00CA0FD2 /* row_posix.cc */; };
-		D1C3D15817C157CD00CA0FD2 /* row_posix.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D06817C157CD00CA0FD2 /* row_posix.cc */; };
-		D1C3D17417C157CD00CA0FD2 /* scale_argb_neon.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D06B17C157CD00CA0FD2 /* scale_argb_neon.cc */; };
-		D1C3D17517C157CD00CA0FD2 /* scale_argb_neon.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D06B17C157CD00CA0FD2 /* scale_argb_neon.cc */; };
-		D1C3D17617C157CD00CA0FD2 /* scale_argb_neon.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D06B17C157CD00CA0FD2 /* scale_argb_neon.cc */; };
-		D1C3D17717C157CD00CA0FD2 /* scale_argb.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D06C17C157CD00CA0FD2 /* scale_argb.cc */; };
-		D1C3D17817C157CD00CA0FD2 /* scale_argb.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D06C17C157CD00CA0FD2 /* scale_argb.cc */; };
-		D1C3D17917C157CD00CA0FD2 /* scale_argb.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D06C17C157CD00CA0FD2 /* scale_argb.cc */; };
-		D1C3D17A17C157CD00CA0FD2 /* scale_argb.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D06C17C157CD00CA0FD2 /* scale_argb.cc */; };
-		D1C3D17B17C157CD00CA0FD2 /* scale_argb.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D06C17C157CD00CA0FD2 /* scale_argb.cc */; };
-		D1C3D17C17C157CD00CA0FD2 /* scale_argb.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D06C17C157CD00CA0FD2 /* scale_argb.cc */; };
-		D1C3D17D17C157CD00CA0FD2 /* scale_argb.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D06C17C157CD00CA0FD2 /* scale_argb.cc */; };
-		D1C3D17E17C157CD00CA0FD2 /* scale_argb.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D06C17C157CD00CA0FD2 /* scale_argb.cc */; };
-		D1C3D17F17C157CD00CA0FD2 /* scale_argb.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D06C17C157CD00CA0FD2 /* scale_argb.cc */; };
-		D1C3D18F17C157CD00CA0FD2 /* scale_neon.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D06E17C157CD00CA0FD2 /* scale_neon.cc */; };
-		D1C3D19017C157CD00CA0FD2 /* scale_neon.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D06E17C157CD00CA0FD2 /* scale_neon.cc */; };
-		D1C3D19117C157CD00CA0FD2 /* scale_neon.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D06E17C157CD00CA0FD2 /* scale_neon.cc */; };
-		D1C3D19B17C157CD00CA0FD2 /* video_common.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D07017C157CD00CA0FD2 /* video_common.cc */; };
-		D1C3D19C17C157CD00CA0FD2 /* video_common.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D07017C157CD00CA0FD2 /* video_common.cc */; };
-		D1C3D19D17C157CD00CA0FD2 /* video_common.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D07017C157CD00CA0FD2 /* video_common.cc */; };
-		D1C3D19E17C157CD00CA0FD2 /* video_common.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D07017C157CD00CA0FD2 /* video_common.cc */; };
-		D1C3D19F17C157CD00CA0FD2 /* video_common.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D07017C157CD00CA0FD2 /* video_common.cc */; };
-		D1C3D1A017C157CD00CA0FD2 /* video_common.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D07017C157CD00CA0FD2 /* video_common.cc */; };
-		D1C3D1A117C157CD00CA0FD2 /* video_common.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D07017C157CD00CA0FD2 /* video_common.cc */; };
-		D1C3D1A217C157CD00CA0FD2 /* video_common.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D07017C157CD00CA0FD2 /* video_common.cc */; };
-		D1C3D1A317C157CD00CA0FD2 /* video_common.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D07017C157CD00CA0FD2 /* video_common.cc */; };
-		D1CD00001696FC0B00609AB0 /* Theora.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = D1CDFFFB1696FC0100609AB0 /* Theora.framework */; };
-		D1CD00011696FC0B00609AB0 /* Vorbis.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = D1CDFFF91696FBF700609AB0 /* Vorbis.framework */; };
-		D1CD00021696FC0B00609AB0 /* Ogg.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = D1CDFFF71696FBF400609AB0 /* Ogg.framework */; };
-		D1CD00041696FF9400609AB0 /* CoreMedia.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = D1CD00031696FF9400609AB0 /* CoreMedia.framework */; };
-		D1CD00051696FF9600609AB0 /* CoreMedia.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = D1CD00031696FF9400609AB0 /* CoreMedia.framework */; };
-		D1CDFF241696C77A00609AB0 /* TheoraAsync.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D167759E155C501D0050EC64 /* TheoraAsync.cpp */; };
-		D1CDFF251696C77A00609AB0 /* TheoraAudioInterface.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D167759F155C501D0050EC64 /* TheoraAudioInterface.cpp */; };
-		D1CDFF261696C77A00609AB0 /* TheoraDataSource.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D16775A0155C501D0050EC64 /* TheoraDataSource.cpp */; };
-		D1CDFF271696C77A00609AB0 /* TheoraException.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D16775A1155C501D0050EC64 /* TheoraException.cpp */; };
-		D1CDFF281696C77A00609AB0 /* TheoraFrameQueue.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D16775A2155C501D0050EC64 /* TheoraFrameQueue.cpp */; };
-		D1CDFF291696C77A00609AB0 /* TheoraTimer.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D16775A3155C501D0050EC64 /* TheoraTimer.cpp */; };
-		D1CDFF2A1696C77A00609AB0 /* TheoraUtil.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D16775A4155C501D0050EC64 /* TheoraUtil.cpp */; };
-		D1CDFF2B1696C77A00609AB0 /* TheoraVideoClip.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D16775A5155C501D0050EC64 /* TheoraVideoClip.cpp */; };
-		D1CDFF2C1696C77A00609AB0 /* TheoraVideoFrame.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D16775A6155C501D0050EC64 /* TheoraVideoFrame.cpp */; };
-		D1CDFF2D1696C77A00609AB0 /* TheoraVideoManager.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D16775A7155C501D0050EC64 /* TheoraVideoManager.cpp */; };
-		D1CDFF2E1696C77A00609AB0 /* TheoraWorkerThread.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D16775A8155C501D0050EC64 /* TheoraWorkerThread.cpp */; };
-		D1CDFF341696C77A00609AB0 /* TheoraAsync.h in Headers */ = {isa = PBXBuildFile; fileRef = D16775C1155C50280050EC64 /* TheoraAsync.h */; settings = {ATTRIBUTES = (Public, ); }; };
-		D1CDFF351696C77A00609AB0 /* TheoraAudioInterface.h in Headers */ = {isa = PBXBuildFile; fileRef = D16775C2155C50280050EC64 /* TheoraAudioInterface.h */; settings = {ATTRIBUTES = (Public, ); }; };
-		D1CDFF361696C77A00609AB0 /* TheoraDataSource.h in Headers */ = {isa = PBXBuildFile; fileRef = D16775C3155C50280050EC64 /* TheoraDataSource.h */; settings = {ATTRIBUTES = (Public, ); }; };
-		D1CDFF371696C77A00609AB0 /* TheoraException.h in Headers */ = {isa = PBXBuildFile; fileRef = D16775C4155C50280050EC64 /* TheoraException.h */; settings = {ATTRIBUTES = (Public, ); }; };
-		D1CDFF381696C77A00609AB0 /* TheoraExport.h in Headers */ = {isa = PBXBuildFile; fileRef = D16775C5155C50280050EC64 /* TheoraExport.h */; settings = {ATTRIBUTES = (Public, ); }; };
-		D1CDFF391696C77A00609AB0 /* TheoraFrameQueue.h in Headers */ = {isa = PBXBuildFile; fileRef = D16775C6155C50280050EC64 /* TheoraFrameQueue.h */; settings = {ATTRIBUTES = (Public, ); }; };
-		D1CDFF3A1696C77A00609AB0 /* TheoraPlayer.h in Headers */ = {isa = PBXBuildFile; fileRef = D16775C7155C50280050EC64 /* TheoraPlayer.h */; settings = {ATTRIBUTES = (Public, ); }; };
-		D1CDFF3B1696C77A00609AB0 /* TheoraTimer.h in Headers */ = {isa = PBXBuildFile; fileRef = D16775C8155C50280050EC64 /* TheoraTimer.h */; settings = {ATTRIBUTES = (Public, ); }; };
-		D1CDFF3C1696C77A00609AB0 /* TheoraUtil.h in Headers */ = {isa = PBXBuildFile; fileRef = D16775C9155C50280050EC64 /* TheoraUtil.h */; settings = {ATTRIBUTES = (Public, ); }; };
-		D1CDFF3D1696C77A00609AB0 /* TheoraVideoClip.h in Headers */ = {isa = PBXBuildFile; fileRef = D16775CA155C50280050EC64 /* TheoraVideoClip.h */; settings = {ATTRIBUTES = (Public, ); }; };
-		D1CDFF3E1696C77A00609AB0 /* TheoraVideoFrame.h in Headers */ = {isa = PBXBuildFile; fileRef = D16775CB155C50280050EC64 /* TheoraVideoFrame.h */; settings = {ATTRIBUTES = (Public, ); }; };
-		D1CDFF3F1696C77A00609AB0 /* TheoraVideoManager.h in Headers */ = {isa = PBXBuildFile; fileRef = D16775CC155C50280050EC64 /* TheoraVideoManager.h */; settings = {ATTRIBUTES = (Public, ); }; };
-		D1CDFF401696C77A00609AB0 /* TheoraWorkerThread.h in Headers */ = {isa = PBXBuildFile; fileRef = D16775CD155C50280050EC64 /* TheoraWorkerThread.h */; settings = {ATTRIBUTES = (Public, ); }; };
-		D1CDFF4C1696C79700609AB0 /* TheoraAsync.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D167759E155C501D0050EC64 /* TheoraAsync.cpp */; };
-		D1CDFF4D1696C79700609AB0 /* TheoraAudioInterface.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D167759F155C501D0050EC64 /* TheoraAudioInterface.cpp */; };
-		D1CDFF4E1696C79700609AB0 /* TheoraDataSource.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D16775A0155C501D0050EC64 /* TheoraDataSource.cpp */; };
-		D1CDFF4F1696C79700609AB0 /* TheoraException.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D16775A1155C501D0050EC64 /* TheoraException.cpp */; };
-		D1CDFF501696C79700609AB0 /* TheoraFrameQueue.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D16775A2155C501D0050EC64 /* TheoraFrameQueue.cpp */; };
-		D1CDFF511696C79700609AB0 /* TheoraTimer.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D16775A3155C501D0050EC64 /* TheoraTimer.cpp */; };
-		D1CDFF521696C79700609AB0 /* TheoraUtil.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D16775A4155C501D0050EC64 /* TheoraUtil.cpp */; };
-		D1CDFF531696C79700609AB0 /* TheoraVideoClip.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D16775A5155C501D0050EC64 /* TheoraVideoClip.cpp */; };
-		D1CDFF541696C79700609AB0 /* TheoraVideoFrame.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D16775A6155C501D0050EC64 /* TheoraVideoFrame.cpp */; };
-		D1CDFF551696C79700609AB0 /* TheoraVideoManager.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D16775A7155C501D0050EC64 /* TheoraVideoManager.cpp */; };
-		D1CDFF561696C79700609AB0 /* TheoraWorkerThread.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D16775A8155C501D0050EC64 /* TheoraWorkerThread.cpp */; };
-		D1CDFF5C1696C79700609AB0 /* TheoraAsync.h in Headers */ = {isa = PBXBuildFile; fileRef = D16775C1155C50280050EC64 /* TheoraAsync.h */; settings = {ATTRIBUTES = (Public, ); }; };
-		D1CDFF5D1696C79700609AB0 /* TheoraAudioInterface.h in Headers */ = {isa = PBXBuildFile; fileRef = D16775C2155C50280050EC64 /* TheoraAudioInterface.h */; settings = {ATTRIBUTES = (Public, ); }; };
-		D1CDFF5E1696C79700609AB0 /* TheoraDataSource.h in Headers */ = {isa = PBXBuildFile; fileRef = D16775C3155C50280050EC64 /* TheoraDataSource.h */; settings = {ATTRIBUTES = (Public, ); }; };
-		D1CDFF5F1696C79700609AB0 /* TheoraException.h in Headers */ = {isa = PBXBuildFile; fileRef = D16775C4155C50280050EC64 /* TheoraException.h */; settings = {ATTRIBUTES = (Public, ); }; };
-		D1CDFF601696C79700609AB0 /* TheoraExport.h in Headers */ = {isa = PBXBuildFile; fileRef = D16775C5155C50280050EC64 /* TheoraExport.h */; settings = {ATTRIBUTES = (Public, ); }; };
-		D1CDFF611696C79700609AB0 /* TheoraFrameQueue.h in Headers */ = {isa = PBXBuildFile; fileRef = D16775C6155C50280050EC64 /* TheoraFrameQueue.h */; settings = {ATTRIBUTES = (Public, ); }; };
-		D1CDFF621696C79700609AB0 /* TheoraPlayer.h in Headers */ = {isa = PBXBuildFile; fileRef = D16775C7155C50280050EC64 /* TheoraPlayer.h */; settings = {ATTRIBUTES = (Public, ); }; };
-		D1CDFF631696C79700609AB0 /* TheoraTimer.h in Headers */ = {isa = PBXBuildFile; fileRef = D16775C8155C50280050EC64 /* TheoraTimer.h */; settings = {ATTRIBUTES = (Public, ); }; };
-		D1CDFF641696C79700609AB0 /* TheoraUtil.h in Headers */ = {isa = PBXBuildFile; fileRef = D16775C9155C50280050EC64 /* TheoraUtil.h */; settings = {ATTRIBUTES = (Public, ); }; };
-		D1CDFF651696C79700609AB0 /* TheoraVideoClip.h in Headers */ = {isa = PBXBuildFile; fileRef = D16775CA155C50280050EC64 /* TheoraVideoClip.h */; settings = {ATTRIBUTES = (Public, ); }; };
-		D1CDFF661696C79700609AB0 /* TheoraVideoFrame.h in Headers */ = {isa = PBXBuildFile; fileRef = D16775CB155C50280050EC64 /* TheoraVideoFrame.h */; settings = {ATTRIBUTES = (Public, ); }; };
-		D1CDFF671696C79700609AB0 /* TheoraVideoManager.h in Headers */ = {isa = PBXBuildFile; fileRef = D16775CC155C50280050EC64 /* TheoraVideoManager.h */; settings = {ATTRIBUTES = (Public, ); }; };
-		D1CDFF681696C79700609AB0 /* TheoraWorkerThread.h in Headers */ = {isa = PBXBuildFile; fileRef = D16775CD155C50280050EC64 /* TheoraWorkerThread.h */; settings = {ATTRIBUTES = (Public, ); }; };
-		D1CDFF961696D0F000609AB0 /* TheoraVideoClip_Theora.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D1CDFF941696D0F000609AB0 /* TheoraVideoClip_Theora.cpp */; };
-		D1CDFF971696D0F000609AB0 /* TheoraVideoClip_Theora.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D1CDFF941696D0F000609AB0 /* TheoraVideoClip_Theora.cpp */; };
-		D1CDFF981696D0F000609AB0 /* TheoraVideoClip_Theora.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D1CDFF941696D0F000609AB0 /* TheoraVideoClip_Theora.cpp */; };
-		D1CDFF991696D0F000609AB0 /* TheoraVideoClip_Theora.h in Headers */ = {isa = PBXBuildFile; fileRef = D1CDFF951696D0F000609AB0 /* TheoraVideoClip_Theora.h */; };
-		D1CDFF9A1696D0F000609AB0 /* TheoraVideoClip_Theora.h in Headers */ = {isa = PBXBuildFile; fileRef = D1CDFF951696D0F000609AB0 /* TheoraVideoClip_Theora.h */; };
-		D1CDFF9B1696D0F000609AB0 /* TheoraVideoClip_Theora.h in Headers */ = {isa = PBXBuildFile; fileRef = D1CDFF951696D0F000609AB0 /* TheoraVideoClip_Theora.h */; };
-		D1CDFF9E1696D0FA00609AB0 /* TheoraVideoClip_AVFoundation.mm in Sources */ = {isa = PBXBuildFile; fileRef = D1CDFF9C1696D0FA00609AB0 /* TheoraVideoClip_AVFoundation.mm */; };
-		D1CDFF9F1696D0FA00609AB0 /* TheoraVideoClip_AVFoundation.h in Headers */ = {isa = PBXBuildFile; fileRef = D1CDFF9D1696D0FA00609AB0 /* TheoraVideoClip_AVFoundation.h */; };
-		D1CDFFA21696E1CA00609AB0 /* TheoraAsync.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D167759E155C501D0050EC64 /* TheoraAsync.cpp */; };
-		D1CDFFA31696E1CA00609AB0 /* TheoraAudioInterface.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D167759F155C501D0050EC64 /* TheoraAudioInterface.cpp */; };
-		D1CDFFA41696E1CA00609AB0 /* TheoraDataSource.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D16775A0155C501D0050EC64 /* TheoraDataSource.cpp */; };
-		D1CDFFA51696E1CA00609AB0 /* TheoraException.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D16775A1155C501D0050EC64 /* TheoraException.cpp */; };
-		D1CDFFA61696E1CA00609AB0 /* TheoraFrameQueue.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D16775A2155C501D0050EC64 /* TheoraFrameQueue.cpp */; };
-		D1CDFFA71696E1CA00609AB0 /* TheoraTimer.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D16775A3155C501D0050EC64 /* TheoraTimer.cpp */; };
-		D1CDFFA81696E1CA00609AB0 /* TheoraUtil.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D16775A4155C501D0050EC64 /* TheoraUtil.cpp */; };
-		D1CDFFA91696E1CA00609AB0 /* TheoraVideoClip.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D16775A5155C501D0050EC64 /* TheoraVideoClip.cpp */; };
-		D1CDFFAA1696E1CA00609AB0 /* TheoraVideoFrame.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D16775A6155C501D0050EC64 /* TheoraVideoFrame.cpp */; };
-		D1CDFFAB1696E1CA00609AB0 /* TheoraVideoManager.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D16775A7155C501D0050EC64 /* TheoraVideoManager.cpp */; };
-		D1CDFFAC1696E1CA00609AB0 /* TheoraWorkerThread.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D16775A8155C501D0050EC64 /* TheoraWorkerThread.cpp */; };
-		D1CDFFB01696E1CA00609AB0 /* TheoraAsync.h in Headers */ = {isa = PBXBuildFile; fileRef = D16775C1155C50280050EC64 /* TheoraAsync.h */; };
-		D1CDFFB11696E1CA00609AB0 /* TheoraAudioInterface.h in Headers */ = {isa = PBXBuildFile; fileRef = D16775C2155C50280050EC64 /* TheoraAudioInterface.h */; };
-		D1CDFFB21696E1CA00609AB0 /* TheoraDataSource.h in Headers */ = {isa = PBXBuildFile; fileRef = D16775C3155C50280050EC64 /* TheoraDataSource.h */; };
-		D1CDFFB31696E1CA00609AB0 /* TheoraException.h in Headers */ = {isa = PBXBuildFile; fileRef = D16775C4155C50280050EC64 /* TheoraException.h */; };
-		D1CDFFB41696E1CA00609AB0 /* TheoraExport.h in Headers */ = {isa = PBXBuildFile; fileRef = D16775C5155C50280050EC64 /* TheoraExport.h */; };
-		D1CDFFB51696E1CA00609AB0 /* TheoraFrameQueue.h in Headers */ = {isa = PBXBuildFile; fileRef = D16775C6155C50280050EC64 /* TheoraFrameQueue.h */; };
-		D1CDFFB61696E1CA00609AB0 /* TheoraPlayer.h in Headers */ = {isa = PBXBuildFile; fileRef = D16775C7155C50280050EC64 /* TheoraPlayer.h */; };
-		D1CDFFB71696E1CA00609AB0 /* TheoraTimer.h in Headers */ = {isa = PBXBuildFile; fileRef = D16775C8155C50280050EC64 /* TheoraTimer.h */; };
-		D1CDFFB81696E1CA00609AB0 /* TheoraUtil.h in Headers */ = {isa = PBXBuildFile; fileRef = D16775C9155C50280050EC64 /* TheoraUtil.h */; };
-		D1CDFFB91696E1CA00609AB0 /* TheoraVideoClip.h in Headers */ = {isa = PBXBuildFile; fileRef = D16775CA155C50280050EC64 /* TheoraVideoClip.h */; };
-		D1CDFFBA1696E1CA00609AB0 /* TheoraVideoFrame.h in Headers */ = {isa = PBXBuildFile; fileRef = D16775CB155C50280050EC64 /* TheoraVideoFrame.h */; };
-		D1CDFFBB1696E1CA00609AB0 /* TheoraVideoManager.h in Headers */ = {isa = PBXBuildFile; fileRef = D16775CC155C50280050EC64 /* TheoraVideoManager.h */; };
-		D1CDFFBC1696E1CA00609AB0 /* TheoraWorkerThread.h in Headers */ = {isa = PBXBuildFile; fileRef = D16775CD155C50280050EC64 /* TheoraWorkerThread.h */; };
-		D1CDFFBD1696E1CA00609AB0 /* TheoraVideoClip_Theora.h in Headers */ = {isa = PBXBuildFile; fileRef = D1CDFF951696D0F000609AB0 /* TheoraVideoClip_Theora.h */; };
-		D1CDFFC71696E1D700609AB0 /* TheoraAsync.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D167759E155C501D0050EC64 /* TheoraAsync.cpp */; };
-		D1CDFFC81696E1D700609AB0 /* TheoraAudioInterface.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D167759F155C501D0050EC64 /* TheoraAudioInterface.cpp */; };
-		D1CDFFC91696E1D700609AB0 /* TheoraDataSource.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D16775A0155C501D0050EC64 /* TheoraDataSource.cpp */; };
-		D1CDFFCA1696E1D700609AB0 /* TheoraException.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D16775A1155C501D0050EC64 /* TheoraException.cpp */; };
-		D1CDFFCB1696E1D700609AB0 /* TheoraFrameQueue.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D16775A2155C501D0050EC64 /* TheoraFrameQueue.cpp */; };
-		D1CDFFCC1696E1D700609AB0 /* TheoraTimer.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D16775A3155C501D0050EC64 /* TheoraTimer.cpp */; };
-		D1CDFFCD1696E1D700609AB0 /* TheoraUtil.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D16775A4155C501D0050EC64 /* TheoraUtil.cpp */; };
-		D1CDFFCE1696E1D700609AB0 /* TheoraVideoClip.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D16775A5155C501D0050EC64 /* TheoraVideoClip.cpp */; };
-		D1CDFFCF1696E1D700609AB0 /* TheoraVideoFrame.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D16775A6155C501D0050EC64 /* TheoraVideoFrame.cpp */; };
-		D1CDFFD01696E1D700609AB0 /* TheoraVideoManager.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D16775A7155C501D0050EC64 /* TheoraVideoManager.cpp */; };
-		D1CDFFD11696E1D700609AB0 /* TheoraWorkerThread.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D16775A8155C501D0050EC64 /* TheoraWorkerThread.cpp */; };
-		D1CDFFD21696E1D700609AB0 /* TheoraVideoClip_Theora.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D1CDFF941696D0F000609AB0 /* TheoraVideoClip_Theora.cpp */; };
-		D1CDFFD51696E1D700609AB0 /* TheoraAsync.h in Headers */ = {isa = PBXBuildFile; fileRef = D16775C1155C50280050EC64 /* TheoraAsync.h */; };
-		D1CDFFD61696E1D700609AB0 /* TheoraAudioInterface.h in Headers */ = {isa = PBXBuildFile; fileRef = D16775C2155C50280050EC64 /* TheoraAudioInterface.h */; };
-		D1CDFFD71696E1D700609AB0 /* TheoraDataSource.h in Headers */ = {isa = PBXBuildFile; fileRef = D16775C3155C50280050EC64 /* TheoraDataSource.h */; };
-		D1CDFFD81696E1D700609AB0 /* TheoraException.h in Headers */ = {isa = PBXBuildFile; fileRef = D16775C4155C50280050EC64 /* TheoraException.h */; };
-		D1CDFFD91696E1D700609AB0 /* TheoraExport.h in Headers */ = {isa = PBXBuildFile; fileRef = D16775C5155C50280050EC64 /* TheoraExport.h */; };
-		D1CDFFDA1696E1D700609AB0 /* TheoraFrameQueue.h in Headers */ = {isa = PBXBuildFile; fileRef = D16775C6155C50280050EC64 /* TheoraFrameQueue.h */; };
-		D1CDFFDB1696E1D700609AB0 /* TheoraPlayer.h in Headers */ = {isa = PBXBuildFile; fileRef = D16775C7155C50280050EC64 /* TheoraPlayer.h */; };
-		D1CDFFDC1696E1D700609AB0 /* TheoraTimer.h in Headers */ = {isa = PBXBuildFile; fileRef = D16775C8155C50280050EC64 /* TheoraTimer.h */; };
-		D1CDFFDD1696E1D700609AB0 /* TheoraUtil.h in Headers */ = {isa = PBXBuildFile; fileRef = D16775C9155C50280050EC64 /* TheoraUtil.h */; };
-		D1CDFFDE1696E1D700609AB0 /* TheoraVideoClip.h in Headers */ = {isa = PBXBuildFile; fileRef = D16775CA155C50280050EC64 /* TheoraVideoClip.h */; };
-		D1CDFFDF1696E1D700609AB0 /* TheoraVideoFrame.h in Headers */ = {isa = PBXBuildFile; fileRef = D16775CB155C50280050EC64 /* TheoraVideoFrame.h */; };
-		D1CDFFE01696E1D700609AB0 /* TheoraVideoManager.h in Headers */ = {isa = PBXBuildFile; fileRef = D16775CC155C50280050EC64 /* TheoraVideoManager.h */; };
-		D1CDFFE11696E1D700609AB0 /* TheoraWorkerThread.h in Headers */ = {isa = PBXBuildFile; fileRef = D16775CD155C50280050EC64 /* TheoraWorkerThread.h */; };
-		D1CDFFE21696E1D700609AB0 /* TheoraVideoClip_Theora.h in Headers */ = {isa = PBXBuildFile; fileRef = D1CDFF951696D0F000609AB0 /* TheoraVideoClip_Theora.h */; };
-		D1CDFFEA1696E24B00609AB0 /* TheoraVideoClip_AVFoundation.mm in Sources */ = {isa = PBXBuildFile; fileRef = D1CDFF9C1696D0FA00609AB0 /* TheoraVideoClip_AVFoundation.mm */; };
-		D1CDFFEB1696E24C00609AB0 /* TheoraVideoClip_AVFoundation.mm in Sources */ = {isa = PBXBuildFile; fileRef = D1CDFF9C1696D0FA00609AB0 /* TheoraVideoClip_AVFoundation.mm */; };
-		D1CDFFEC1696E24F00609AB0 /* TheoraVideoClip_AVFoundation.mm in Sources */ = {isa = PBXBuildFile; fileRef = D1CDFF9C1696D0FA00609AB0 /* TheoraVideoClip_AVFoundation.mm */; };
-		D1CDFFEE1696FB7200609AB0 /* AVFoundation.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = D1CDFFED1696FB7200609AB0 /* AVFoundation.framework */; };
-		D1CDFFF11696FB8900609AB0 /* CoreVideo.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = D1CDFFF01696FB8900609AB0 /* CoreVideo.framework */; };
-		D1CDFFF31696FBA800609AB0 /* Foundation.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = D1CDFFF21696FBA800609AB0 /* Foundation.framework */; };
-		D1CDFFF41696FBB200609AB0 /* CoreVideo.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = D1CDFFF01696FB8900609AB0 /* CoreVideo.framework */; };
-		D1CDFFF51696FBB200609AB0 /* AVFoundation.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = D1CDFFED1696FB7200609AB0 /* AVFoundation.framework */; };
-		D1CDFFF61696FBB200609AB0 /* Foundation.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = D1CDFFF21696FBA800609AB0 /* Foundation.framework */; };
-		D1CDFFFD1696FC0800609AB0 /* Theora.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = D1CDFFFB1696FC0100609AB0 /* Theora.framework */; };
-		D1CDFFFE1696FC0800609AB0 /* Vorbis.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = D1CDFFF91696FBF700609AB0 /* Vorbis.framework */; };
-		D1CDFFFF1696FC0800609AB0 /* Ogg.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = D1CDFFF71696FBF400609AB0 /* Ogg.framework */; };
-		D1D465D616C2D063007A45AA /* TheoraAudioPacketQueue.h in Headers */ = {isa = PBXBuildFile; fileRef = D1D465D516C2D063007A45AA /* TheoraAudioPacketQueue.h */; settings = {ATTRIBUTES = (Public, ); }; };
-		D1D465D716C2D063007A45AA /* TheoraAudioPacketQueue.h in Headers */ = {isa = PBXBuildFile; fileRef = D1D465D516C2D063007A45AA /* TheoraAudioPacketQueue.h */; settings = {ATTRIBUTES = (Public, ); }; };
-		D1D465D816C2D063007A45AA /* TheoraAudioPacketQueue.h in Headers */ = {isa = PBXBuildFile; fileRef = D1D465D516C2D063007A45AA /* TheoraAudioPacketQueue.h */; settings = {ATTRIBUTES = (Public, ); }; };
-		D1D465DA16C2D070007A45AA /* TheoraAudioPacketQueue.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D1D465D916C2D070007A45AA /* TheoraAudioPacketQueue.cpp */; };
-		D1D465DB16C2D070007A45AA /* TheoraAudioPacketQueue.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D1D465D916C2D070007A45AA /* TheoraAudioPacketQueue.cpp */; };
-		D1D465DC16C2D070007A45AA /* TheoraAudioPacketQueue.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D1D465D916C2D070007A45AA /* TheoraAudioPacketQueue.cpp */; };
-		D1D465DD16C2D070007A45AA /* TheoraAudioPacketQueue.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D1D465D916C2D070007A45AA /* TheoraAudioPacketQueue.cpp */; };
-		D1D465DE16C2D070007A45AA /* TheoraAudioPacketQueue.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D1D465D916C2D070007A45AA /* TheoraAudioPacketQueue.cpp */; };
-		D1D465DF16C2D070007A45AA /* TheoraAudioPacketQueue.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D1D465D916C2D070007A45AA /* TheoraAudioPacketQueue.cpp */; };
-		D1E2719916B46F640046C00C /* yuv420_grey_c.c in Sources */ = {isa = PBXBuildFile; fileRef = D1E2718A16B46F640046C00C /* yuv420_grey_c.c */; };
-		D1E2719A16B46F640046C00C /* yuv420_grey_c.c in Sources */ = {isa = PBXBuildFile; fileRef = D1E2718A16B46F640046C00C /* yuv420_grey_c.c */; };
-		D1E2719B16B46F640046C00C /* yuv420_grey_c.c in Sources */ = {isa = PBXBuildFile; fileRef = D1E2718A16B46F640046C00C /* yuv420_grey_c.c */; };
-		D1E2719C16B46F640046C00C /* yuv420_grey_c.c in Sources */ = {isa = PBXBuildFile; fileRef = D1E2718A16B46F640046C00C /* yuv420_grey_c.c */; };
-		D1E2719D16B46F640046C00C /* yuv420_grey_c.c in Sources */ = {isa = PBXBuildFile; fileRef = D1E2718A16B46F640046C00C /* yuv420_grey_c.c */; };
-		D1E2719E16B46F640046C00C /* yuv420_grey_c.c in Sources */ = {isa = PBXBuildFile; fileRef = D1E2718A16B46F640046C00C /* yuv420_grey_c.c */; };
-		D1E271A516B46F640046C00C /* yuv420_yuv_c.c in Sources */ = {isa = PBXBuildFile; fileRef = D1E2718C16B46F640046C00C /* yuv420_yuv_c.c */; };
-		D1E271A616B46F640046C00C /* yuv420_yuv_c.c in Sources */ = {isa = PBXBuildFile; fileRef = D1E2718C16B46F640046C00C /* yuv420_yuv_c.c */; };
-		D1E271A716B46F640046C00C /* yuv420_yuv_c.c in Sources */ = {isa = PBXBuildFile; fileRef = D1E2718C16B46F640046C00C /* yuv420_yuv_c.c */; };
-		D1E271A816B46F640046C00C /* yuv420_yuv_c.c in Sources */ = {isa = PBXBuildFile; fileRef = D1E2718C16B46F640046C00C /* yuv420_yuv_c.c */; };
-		D1E271A916B46F640046C00C /* yuv420_yuv_c.c in Sources */ = {isa = PBXBuildFile; fileRef = D1E2718C16B46F640046C00C /* yuv420_yuv_c.c */; };
-		D1E271AA16B46F640046C00C /* yuv420_yuv_c.c in Sources */ = {isa = PBXBuildFile; fileRef = D1E2718C16B46F640046C00C /* yuv420_yuv_c.c */; };
-		D1E271AC16B470210046C00C /* yuv420_rgb_c.c in Sources */ = {isa = PBXBuildFile; fileRef = D1E271AB16B470210046C00C /* yuv420_rgb_c.c */; };
-		D1E271AD16B470210046C00C /* yuv420_rgb_c.c in Sources */ = {isa = PBXBuildFile; fileRef = D1E271AB16B470210046C00C /* yuv420_rgb_c.c */; };
-		D1E271AE16B470210046C00C /* yuv420_rgb_c.c in Sources */ = {isa = PBXBuildFile; fileRef = D1E271AB16B470210046C00C /* yuv420_rgb_c.c */; };
-		D1E271AF16B470210046C00C /* yuv420_rgb_c.c in Sources */ = {isa = PBXBuildFile; fileRef = D1E271AB16B470210046C00C /* yuv420_rgb_c.c */; };
-		D1E271B016B470210046C00C /* yuv420_rgb_c.c in Sources */ = {isa = PBXBuildFile; fileRef = D1E271AB16B470210046C00C /* yuv420_rgb_c.c */; };
-		D1E271B116B470210046C00C /* yuv420_rgb_c.c in Sources */ = {isa = PBXBuildFile; fileRef = D1E271AB16B470210046C00C /* yuv420_rgb_c.c */; };
-		D1E271B316B471E80046C00C /* TheoraPixelTransform.h in Headers */ = {isa = PBXBuildFile; fileRef = D1E271B216B471E80046C00C /* TheoraPixelTransform.h */; settings = {ATTRIBUTES = (Public, ); }; };
-		D1E271B416B471E80046C00C /* TheoraPixelTransform.h in Headers */ = {isa = PBXBuildFile; fileRef = D1E271B216B471E80046C00C /* TheoraPixelTransform.h */; settings = {ATTRIBUTES = (Public, ); }; };
-		D1E271B516B471E80046C00C /* TheoraPixelTransform.h in Headers */ = {isa = PBXBuildFile; fileRef = D1E271B216B471E80046C00C /* TheoraPixelTransform.h */; settings = {ATTRIBUTES = (Public, ); }; };
-		D1E271B616B471E80046C00C /* TheoraPixelTransform.h in Headers */ = {isa = PBXBuildFile; fileRef = D1E271B216B471E80046C00C /* TheoraPixelTransform.h */; };
-		D1E271B716B471E80046C00C /* TheoraPixelTransform.h in Headers */ = {isa = PBXBuildFile; fileRef = D1E271B216B471E80046C00C /* TheoraPixelTransform.h */; };
-		D1E271B816B471E80046C00C /* TheoraPixelTransform.h in Headers */ = {isa = PBXBuildFile; fileRef = D1E271B216B471E80046C00C /* TheoraPixelTransform.h */; };
-		D1F09EB1169AFEFB00DEEC63 /* TheoraVideoClip_AVFoundation.h in Headers */ = {isa = PBXBuildFile; fileRef = D1CDFF9D1696D0FA00609AB0 /* TheoraVideoClip_AVFoundation.h */; };
-/* End PBXBuildFile section */
-
-/* Begin PBXFileReference section */
-		D12CA55517734B4200412E5B /* TheoraVideoClip_FFmpeg.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; name = TheoraVideoClip_FFmpeg.cpp; path = src/FFmpeg/TheoraVideoClip_FFmpeg.cpp; sourceTree = "<group>"; };
-		D12CA55617734B4200412E5B /* TheoraVideoClip_FFmpeg.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; name = TheoraVideoClip_FFmpeg.h; path = src/FFmpeg/TheoraVideoClip_FFmpeg.h; sourceTree = "<group>"; };
-		D1358BC218D7777200A36FDC /* Info.plist */ = {isa = PBXFileReference; lastKnownFileType = text.plist.xml; path = Info.plist; sourceTree = "<group>"; };
-		D1358BC318D7777800A36FDC /* iOS.xcconfig */ = {isa = PBXFileReference; lastKnownFileType = text.xcconfig; name = iOS.xcconfig; path = xcconfig/iOS.xcconfig; sourceTree = "<group>"; };
-		D1358BC418D7777800A36FDC /* Mac.xcconfig */ = {isa = PBXFileReference; lastKnownFileType = text.xcconfig; name = Mac.xcconfig; path = xcconfig/Mac.xcconfig; sourceTree = "<group>"; };
-		D139462B17C0ED450091F4A4 /* yuv_libyuv.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; name = yuv_libyuv.c; path = src/YUV/libyuv/yuv_libyuv.c; sourceTree = "<group>"; };
-		D139462C17C0ED450091F4A4 /* yuv_libyuv.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = yuv_libyuv.h; path = src/YUV/libyuv/yuv_libyuv.h; sourceTree = "<group>"; };
-		D13946CA17C119B30091F4A4 /* yuv_util.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; name = yuv_util.c; path = src/YUV/yuv_util.c; sourceTree = "<group>"; };
-		D13946CB17C119B30091F4A4 /* yuv_util.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = yuv_util.h; path = src/YUV/yuv_util.h; sourceTree = "<group>"; };
-		D1473F2A150CA69B00B20490 /* theoraplayer.framework */ = {isa = PBXFileReference; explicitFileType = wrapper.framework; includeInIndex = 0; path = theoraplayer.framework; sourceTree = BUILT_PRODUCTS_DIR; };
-		D159BCAB17C227940030FAB6 /* compare_win.cc */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; name = compare_win.cc; path = src/YUV/libyuv/src/compare_win.cc; sourceTree = "<group>"; };
-		D159BCAC17C227940030FAB6 /* row_win.cc */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; name = row_win.cc; path = src/YUV/libyuv/src/row_win.cc; sourceTree = "<group>"; };
-		D159BCAD17C227940030FAB6 /* row_x86.asm */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.asm.asm; name = row_x86.asm; path = src/YUV/libyuv/src/row_x86.asm; sourceTree = "<group>"; };
-		D159BCAE17C227940030FAB6 /* x86inc.asm */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.asm.asm; name = x86inc.asm; path = src/YUV/libyuv/src/x86inc.asm; sourceTree = "<group>"; };
-		D167759E155C501D0050EC64 /* TheoraAsync.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = TheoraAsync.cpp; path = src/TheoraAsync.cpp; sourceTree = "<group>"; };
-		D167759F155C501D0050EC64 /* TheoraAudioInterface.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = TheoraAudioInterface.cpp; path = src/TheoraAudioInterface.cpp; sourceTree = "<group>"; };
-		D16775A0155C501D0050EC64 /* TheoraDataSource.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = TheoraDataSource.cpp; path = src/TheoraDataSource.cpp; sourceTree = "<group>"; };
-		D16775A1155C501D0050EC64 /* TheoraException.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = TheoraException.cpp; path = src/TheoraException.cpp; sourceTree = "<group>"; };
-		D16775A2155C501D0050EC64 /* TheoraFrameQueue.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = TheoraFrameQueue.cpp; path = src/TheoraFrameQueue.cpp; sourceTree = "<group>"; };
-		D16775A3155C501D0050EC64 /* TheoraTimer.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = TheoraTimer.cpp; path = src/TheoraTimer.cpp; sourceTree = "<group>"; };
-		D16775A4155C501D0050EC64 /* TheoraUtil.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = TheoraUtil.cpp; path = src/TheoraUtil.cpp; sourceTree = "<group>"; };
-		D16775A5155C501D0050EC64 /* TheoraVideoClip.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = TheoraVideoClip.cpp; path = src/TheoraVideoClip.cpp; sourceTree = "<group>"; };
-		D16775A6155C501D0050EC64 /* TheoraVideoFrame.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = TheoraVideoFrame.cpp; path = src/TheoraVideoFrame.cpp; sourceTree = "<group>"; };
-		D16775A7155C501D0050EC64 /* TheoraVideoManager.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = TheoraVideoManager.cpp; path = src/TheoraVideoManager.cpp; sourceTree = "<group>"; };
-		D16775A8155C501D0050EC64 /* TheoraWorkerThread.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = TheoraWorkerThread.cpp; path = src/TheoraWorkerThread.cpp; sourceTree = "<group>"; };
-		D16775C1155C50280050EC64 /* TheoraAsync.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = TheoraAsync.h; path = include/theoraplayer/TheoraAsync.h; sourceTree = "<group>"; };
-		D16775C2155C50280050EC64 /* TheoraAudioInterface.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = TheoraAudioInterface.h; path = include/theoraplayer/TheoraAudioInterface.h; sourceTree = "<group>"; };
-		D16775C3155C50280050EC64 /* TheoraDataSource.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = TheoraDataSource.h; path = include/theoraplayer/TheoraDataSource.h; sourceTree = "<group>"; };
-		D16775C4155C50280050EC64 /* TheoraException.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = TheoraException.h; path = include/theoraplayer/TheoraException.h; sourceTree = "<group>"; };
-		D16775C5155C50280050EC64 /* TheoraExport.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = TheoraExport.h; path = include/theoraplayer/TheoraExport.h; sourceTree = "<group>"; };
-		D16775C6155C50280050EC64 /* TheoraFrameQueue.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = TheoraFrameQueue.h; path = include/theoraplayer/TheoraFrameQueue.h; sourceTree = "<group>"; };
-		D16775C7155C50280050EC64 /* TheoraPlayer.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = TheoraPlayer.h; path = include/theoraplayer/TheoraPlayer.h; sourceTree = "<group>"; };
-		D16775C8155C50280050EC64 /* TheoraTimer.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = TheoraTimer.h; path = include/theoraplayer/TheoraTimer.h; sourceTree = "<group>"; };
-		D16775C9155C50280050EC64 /* TheoraUtil.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = TheoraUtil.h; path = include/theoraplayer/TheoraUtil.h; sourceTree = "<group>"; };
-		D16775CA155C50280050EC64 /* TheoraVideoClip.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = TheoraVideoClip.h; path = include/theoraplayer/TheoraVideoClip.h; sourceTree = "<group>"; };
-		D16775CB155C50280050EC64 /* TheoraVideoFrame.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = TheoraVideoFrame.h; path = include/theoraplayer/TheoraVideoFrame.h; sourceTree = "<group>"; };
-		D16775CC155C50280050EC64 /* TheoraVideoManager.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = TheoraVideoManager.h; path = include/theoraplayer/TheoraVideoManager.h; sourceTree = "<group>"; };
-		D16775CD155C50280050EC64 /* TheoraWorkerThread.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = TheoraWorkerThread.h; path = include/theoraplayer/TheoraWorkerThread.h; sourceTree = "<group>"; };
-		D198F97B177A31FC002942E3 /* libtheoraplayer.a */ = {isa = PBXFileReference; explicitFileType = archive.ar; includeInIndex = 0; path = libtheoraplayer.a; sourceTree = BUILT_PRODUCTS_DIR; };
-		D198F9A7177A31FE002942E3 /* libtheoraplayer_avfoundation.a */ = {isa = PBXFileReference; explicitFileType = archive.ar; includeInIndex = 0; path = libtheoraplayer_avfoundation.a; sourceTree = BUILT_PRODUCTS_DIR; };
-		D198F9D4177A3200002942E3 /* libtheoraplayer_theora_avfoundation.a */ = {isa = PBXFileReference; explicitFileType = archive.ar; includeInIndex = 0; path = libtheoraplayer_theora_avfoundation.a; sourceTree = BUILT_PRODUCTS_DIR; };
-		D1BB6FAE150E9E7100EF9400 /* libtheoraplayer.a */ = {isa = PBXFileReference; explicitFileType = archive.ar; includeInIndex = 0; path = libtheoraplayer.a; sourceTree = BUILT_PRODUCTS_DIR; };
-		D1BCE05718F3F7D800C83470 /* scale_row.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; name = scale_row.h; path = src/YUV/libyuv/include/libyuv/scale_row.h; sourceTree = "<group>"; };
-		D1BCE05818F3F7FE00C83470 /* scale_common.cc */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = scale_common.cc; path = src/YUV/libyuv/src/scale_common.cc; sourceTree = "<group>"; };
-		D1BCE05918F3F7FE00C83470 /* scale_posix.cc */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = scale_posix.cc; path = src/YUV/libyuv/src/scale_posix.cc; sourceTree = "<group>"; };
-		D1BCE06C18F3F80800C83470 /* scale_win.cc */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; name = scale_win.cc; path = src/YUV/libyuv/src/scale_win.cc; sourceTree = "<group>"; };
-		D1C3D04F17C157CD00CA0FD2 /* compare_common.cc */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = compare_common.cc; path = src/YUV/libyuv/src/compare_common.cc; sourceTree = "<group>"; };
-		D1C3D05017C157CD00CA0FD2 /* compare_neon.cc */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = compare_neon.cc; path = src/YUV/libyuv/src/compare_neon.cc; sourceTree = "<group>"; };
-		D1C3D05117C157CD00CA0FD2 /* compare_posix.cc */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = compare_posix.cc; path = src/YUV/libyuv/src/compare_posix.cc; sourceTree = "<group>"; };
-		D1C3D05317C157CD00CA0FD2 /* compare.cc */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = compare.cc; path = src/YUV/libyuv/src/compare.cc; sourceTree = "<group>"; };
-		D1C3D05417C157CD00CA0FD2 /* convert_argb.cc */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = convert_argb.cc; path = src/YUV/libyuv/src/convert_argb.cc; sourceTree = "<group>"; };
-		D1C3D05517C157CD00CA0FD2 /* convert_from_argb.cc */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = convert_from_argb.cc; path = src/YUV/libyuv/src/convert_from_argb.cc; sourceTree = "<group>"; };
-		D1C3D05617C157CD00CA0FD2 /* convert_from.cc */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = convert_from.cc; path = src/YUV/libyuv/src/convert_from.cc; sourceTree = "<group>"; };
-		D1C3D05717C157CD00CA0FD2 /* convert_jpeg.cc */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = convert_jpeg.cc; path = src/YUV/libyuv/src/convert_jpeg.cc; sourceTree = "<group>"; };
-		D1C3D05817C157CD00CA0FD2 /* convert_to_argb.cc */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = convert_to_argb.cc; path = src/YUV/libyuv/src/convert_to_argb.cc; sourceTree = "<group>"; };
-		D1C3D05917C157CD00CA0FD2 /* convert_to_i420.cc */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = convert_to_i420.cc; path = src/YUV/libyuv/src/convert_to_i420.cc; sourceTree = "<group>"; };
-		D1C3D05A17C157CD00CA0FD2 /* convert.cc */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = convert.cc; path = src/YUV/libyuv/src/convert.cc; sourceTree = "<group>"; };
-		D1C3D05B17C157CD00CA0FD2 /* cpu_id.cc */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = cpu_id.cc; path = src/YUV/libyuv/src/cpu_id.cc; sourceTree = "<group>"; };
-		D1C3D05C17C157CD00CA0FD2 /* format_conversion.cc */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = format_conversion.cc; path = src/YUV/libyuv/src/format_conversion.cc; sourceTree = "<group>"; };
-		D1C3D05D17C157CD00CA0FD2 /* mjpeg_decoder.cc */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = mjpeg_decoder.cc; path = src/YUV/libyuv/src/mjpeg_decoder.cc; sourceTree = "<group>"; };
-		D1C3D05E17C157CD00CA0FD2 /* mjpeg_validate.cc */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = mjpeg_validate.cc; path = src/YUV/libyuv/src/mjpeg_validate.cc; sourceTree = "<group>"; };
-		D1C3D05F17C157CD00CA0FD2 /* planar_functions.cc */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = planar_functions.cc; path = src/YUV/libyuv/src/planar_functions.cc; sourceTree = "<group>"; };
-		D1C3D06017C157CD00CA0FD2 /* rotate_argb.cc */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = rotate_argb.cc; path = src/YUV/libyuv/src/rotate_argb.cc; sourceTree = "<group>"; };
-		D1C3D06117C157CD00CA0FD2 /* rotate_mips.cc */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = rotate_mips.cc; path = src/YUV/libyuv/src/rotate_mips.cc; sourceTree = "<group>"; };
-		D1C3D06217C157CD00CA0FD2 /* rotate_neon.cc */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = rotate_neon.cc; path = src/YUV/libyuv/src/rotate_neon.cc; sourceTree = "<group>"; };
-		D1C3D06317C157CD00CA0FD2 /* rotate.cc */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = rotate.cc; path = src/YUV/libyuv/src/rotate.cc; sourceTree = "<group>"; };
-		D1C3D06417C157CD00CA0FD2 /* row_any.cc */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = row_any.cc; path = src/YUV/libyuv/src/row_any.cc; sourceTree = "<group>"; };
-		D1C3D06517C157CD00CA0FD2 /* row_common.cc */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = row_common.cc; path = src/YUV/libyuv/src/row_common.cc; sourceTree = "<group>"; };
-		D1C3D06617C157CD00CA0FD2 /* row_mips.cc */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = row_mips.cc; path = src/YUV/libyuv/src/row_mips.cc; sourceTree = "<group>"; };
-		D1C3D06717C157CD00CA0FD2 /* row_neon.cc */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = row_neon.cc; path = src/YUV/libyuv/src/row_neon.cc; sourceTree = "<group>"; };
-		D1C3D06817C157CD00CA0FD2 /* row_posix.cc */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = row_posix.cc; path = src/YUV/libyuv/src/row_posix.cc; sourceTree = "<group>"; };
-		D1C3D06B17C157CD00CA0FD2 /* scale_argb_neon.cc */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = scale_argb_neon.cc; path = src/YUV/libyuv/src/scale_argb_neon.cc; sourceTree = "<group>"; };
-		D1C3D06C17C157CD00CA0FD2 /* scale_argb.cc */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = scale_argb.cc; path = src/YUV/libyuv/src/scale_argb.cc; sourceTree = "<group>"; };
-		D1C3D06D17C157CD00CA0FD2 /* scale_mips.cc */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = scale_mips.cc; path = src/YUV/libyuv/src/scale_mips.cc; sourceTree = "<group>"; };
-		D1C3D06E17C157CD00CA0FD2 /* scale_neon.cc */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = scale_neon.cc; path = src/YUV/libyuv/src/scale_neon.cc; sourceTree = "<group>"; };
-		D1C3D06F17C157CD00CA0FD2 /* scale.cc */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = scale.cc; path = src/YUV/libyuv/src/scale.cc; sourceTree = "<group>"; };
-		D1C3D07017C157CD00CA0FD2 /* video_common.cc */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = video_common.cc; path = src/YUV/libyuv/src/video_common.cc; sourceTree = "<group>"; };
-		D1C3D1CE17C15BB400CA0FD2 /* libyuv.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; name = libyuv.h; path = src/YUV/libyuv/include/libyuv.h; sourceTree = "<group>"; };
-		D1C3D1CF17C15BC100CA0FD2 /* basic_types.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; name = basic_types.h; path = src/YUV/libyuv/include/libyuv/basic_types.h; sourceTree = "<group>"; };
-		D1C3D1D017C15BC100CA0FD2 /* compare.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; name = compare.h; path = src/YUV/libyuv/include/libyuv/compare.h; sourceTree = "<group>"; };
-		D1C3D1D117C15BC100CA0FD2 /* convert_argb.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; name = convert_argb.h; path = src/YUV/libyuv/include/libyuv/convert_argb.h; sourceTree = "<group>"; };
-		D1C3D1D217C15BC100CA0FD2 /* convert_from_argb.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; name = convert_from_argb.h; path = src/YUV/libyuv/include/libyuv/convert_from_argb.h; sourceTree = "<group>"; };
-		D1C3D1D317C15BC100CA0FD2 /* convert_from.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; name = convert_from.h; path = src/YUV/libyuv/include/libyuv/convert_from.h; sourceTree = "<group>"; };
-		D1C3D1D417C15BC100CA0FD2 /* convert.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; name = convert.h; path = src/YUV/libyuv/include/libyuv/convert.h; sourceTree = "<group>"; };
-		D1C3D1D517C15BC100CA0FD2 /* cpu_id.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; name = cpu_id.h; path = src/YUV/libyuv/include/libyuv/cpu_id.h; sourceTree = "<group>"; };
-		D1C3D1D617C15BC100CA0FD2 /* format_conversion.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; name = format_conversion.h; path = src/YUV/libyuv/include/libyuv/format_conversion.h; sourceTree = "<group>"; };
-		D1C3D1D717C15BC100CA0FD2 /* mjpeg_decoder.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; name = mjpeg_decoder.h; path = src/YUV/libyuv/include/libyuv/mjpeg_decoder.h; sourceTree = "<group>"; };
-		D1C3D1D817C15BC100CA0FD2 /* planar_functions.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; name = planar_functions.h; path = src/YUV/libyuv/include/libyuv/planar_functions.h; sourceTree = "<group>"; };
-		D1C3D1D917C15BC100CA0FD2 /* rotate_argb.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; name = rotate_argb.h; path = src/YUV/libyuv/include/libyuv/rotate_argb.h; sourceTree = "<group>"; };
-		D1C3D1DA17C15BC100CA0FD2 /* rotate.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; name = rotate.h; path = src/YUV/libyuv/include/libyuv/rotate.h; sourceTree = "<group>"; };
-		D1C3D1DB17C15BC100CA0FD2 /* row.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; name = row.h; path = src/YUV/libyuv/include/libyuv/row.h; sourceTree = "<group>"; };
-		D1C3D1DC17C15BC100CA0FD2 /* scale_argb.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; name = scale_argb.h; path = src/YUV/libyuv/include/libyuv/scale_argb.h; sourceTree = "<group>"; };
-		D1C3D1DD17C15BC100CA0FD2 /* scale.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; name = scale.h; path = src/YUV/libyuv/include/libyuv/scale.h; sourceTree = "<group>"; };
-		D1C3D1DE17C15BC100CA0FD2 /* version.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; name = version.h; path = src/YUV/libyuv/include/libyuv/version.h; sourceTree = "<group>"; };
-		D1C3D1DF17C15BC100CA0FD2 /* video_common.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; name = video_common.h; path = src/YUV/libyuv/include/libyuv/video_common.h; sourceTree = "<group>"; };
-		D1CD00031696FF9400609AB0 /* CoreMedia.framework */ = {isa = PBXFileReference; lastKnownFileType = wrapper.framework; name = CoreMedia.framework; path = System/Library/Frameworks/CoreMedia.framework; sourceTree = SDKROOT; };
-		D1CDFF481696C77A00609AB0 /* theoraplayer.framework */ = {isa = PBXFileReference; explicitFileType = wrapper.framework; includeInIndex = 0; path = theoraplayer.framework; sourceTree = BUILT_PRODUCTS_DIR; };
-		D1CDFF701696C79700609AB0 /* theoraplayer.framework */ = {isa = PBXFileReference; explicitFileType = wrapper.framework; includeInIndex = 0; path = theoraplayer.framework; sourceTree = BUILT_PRODUCTS_DIR; };
-		D1CDFF941696D0F000609AB0 /* TheoraVideoClip_Theora.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = TheoraVideoClip_Theora.cpp; path = src/Theora/TheoraVideoClip_Theora.cpp; sourceTree = "<group>"; };
-		D1CDFF951696D0F000609AB0 /* TheoraVideoClip_Theora.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = TheoraVideoClip_Theora.h; path = src/Theora/TheoraVideoClip_Theora.h; sourceTree = "<group>"; };
-		D1CDFF9C1696D0FA00609AB0 /* TheoraVideoClip_AVFoundation.mm */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.objcpp; name = TheoraVideoClip_AVFoundation.mm; path = src/AVFoundation/TheoraVideoClip_AVFoundation.mm; sourceTree = "<group>"; };
-		D1CDFF9D1696D0FA00609AB0 /* TheoraVideoClip_AVFoundation.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = TheoraVideoClip_AVFoundation.h; path = src/AVFoundation/TheoraVideoClip_AVFoundation.h; sourceTree = "<group>"; };
-		D1CDFFC41696E1CA00609AB0 /* libtheoraplayer.a */ = {isa = PBXFileReference; explicitFileType = archive.ar; includeInIndex = 0; path = libtheoraplayer.a; sourceTree = BUILT_PRODUCTS_DIR; };
-		D1CDFFE91696E1D700609AB0 /* libtheoraplayer.a */ = {isa = PBXFileReference; explicitFileType = archive.ar; includeInIndex = 0; path = libtheoraplayer.a; sourceTree = BUILT_PRODUCTS_DIR; };
-		D1CDFFED1696FB7200609AB0 /* AVFoundation.framework */ = {isa = PBXFileReference; lastKnownFileType = wrapper.framework; name = AVFoundation.framework; path = System/Library/Frameworks/AVFoundation.framework; sourceTree = SDKROOT; };
-		D1CDFFF01696FB8900609AB0 /* CoreVideo.framework */ = {isa = PBXFileReference; lastKnownFileType = wrapper.framework; name = CoreVideo.framework; path = System/Library/Frameworks/CoreVideo.framework; sourceTree = SDKROOT; };
-		D1CDFFF21696FBA800609AB0 /* Foundation.framework */ = {isa = PBXFileReference; lastKnownFileType = wrapper.framework; name = Foundation.framework; path = System/Library/Frameworks/Foundation.framework; sourceTree = SDKROOT; };
-		D1CDFFF71696FBF400609AB0 /* Ogg.framework */ = {isa = PBXFileReference; lastKnownFileType = wrapper.framework; name = Ogg.framework; path = ../__build__/products/Debug/Ogg.framework; sourceTree = "<group>"; };
-		D1CDFFF91696FBF700609AB0 /* Vorbis.framework */ = {isa = PBXFileReference; lastKnownFileType = wrapper.framework; name = Vorbis.framework; path = ../__build__/products/Debug/Vorbis.framework; sourceTree = "<group>"; };
-		D1CDFFFB1696FC0100609AB0 /* Theora.framework */ = {isa = PBXFileReference; lastKnownFileType = wrapper.framework; name = Theora.framework; path = ../__build__/products/Debug/Theora.framework; sourceTree = "<group>"; };
-		D1D465D516C2D063007A45AA /* TheoraAudioPacketQueue.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = TheoraAudioPacketQueue.h; path = include/theoraplayer/TheoraAudioPacketQueue.h; sourceTree = "<group>"; };
-		D1D465D916C2D070007A45AA /* TheoraAudioPacketQueue.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = TheoraAudioPacketQueue.cpp; path = src/TheoraAudioPacketQueue.cpp; sourceTree = "<group>"; };
-		D1E2718A16B46F640046C00C /* yuv420_grey_c.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; name = yuv420_grey_c.c; path = src/YUV/C/yuv420_grey_c.c; sourceTree = "<group>"; };
-		D1E2718C16B46F640046C00C /* yuv420_yuv_c.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; name = yuv420_yuv_c.c; path = src/YUV/C/yuv420_yuv_c.c; sourceTree = "<group>"; };
-		D1E271AB16B470210046C00C /* yuv420_rgb_c.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; name = yuv420_rgb_c.c; path = src/YUV/C/yuv420_rgb_c.c; sourceTree = "<group>"; };
-		D1E271B216B471E80046C00C /* TheoraPixelTransform.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = TheoraPixelTransform.h; path = include/theoraplayer/TheoraPixelTransform.h; sourceTree = "<group>"; };
-		D1F4DA1D18FECACE007C1968 /* cpu-features.c */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.c; name = "cpu-features.c"; path = "src/YUV/android/cpu-features.c"; sourceTree = "<group>"; };
-		D1F4DA1E18FECACE007C1968 /* cpu-features.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; name = "cpu-features.h"; path = "src/YUV/android/cpu-features.h"; sourceTree = "<group>"; };
-/* End PBXFileReference section */
-
-/* Begin PBXFrameworksBuildPhase section */
-		D1473F26150CA69B00B20490 /* Frameworks */ = {
-			isa = PBXFrameworksBuildPhase;
-			buildActionMask = 2147483647;
-			files = (
-				D1CD00001696FC0B00609AB0 /* Theora.framework in Frameworks */,
-				D1CD00011696FC0B00609AB0 /* Vorbis.framework in Frameworks */,
-				D1CD00021696FC0B00609AB0 /* Ogg.framework in Frameworks */,
-			);
-			runOnlyForDeploymentPostprocessing = 0;
-		};
-		D198F963177A31FC002942E3 /* Frameworks */ = {
-			isa = PBXFrameworksBuildPhase;
-			buildActionMask = 2147483647;
-			files = (
-			);
-			runOnlyForDeploymentPostprocessing = 0;
-		};
-		D198F98F177A31FE002942E3 /* Frameworks */ = {
-			isa = PBXFrameworksBuildPhase;
-			buildActionMask = 2147483647;
-			files = (
-			);
-			runOnlyForDeploymentPostprocessing = 0;
-		};
-		D198F9BC177A3200002942E3 /* Frameworks */ = {
-			isa = PBXFrameworksBuildPhase;
-			buildActionMask = 2147483647;
-			files = (
-			);
-			runOnlyForDeploymentPostprocessing = 0;
-		};
-		D1BB6FAB150E9E7100EF9400 /* Frameworks */ = {
-			isa = PBXFrameworksBuildPhase;
-			buildActionMask = 2147483647;
-			files = (
-			);
-			runOnlyForDeploymentPostprocessing = 0;
-		};
-		D1CDFF2F1696C77A00609AB0 /* Frameworks */ = {
-			isa = PBXFrameworksBuildPhase;
-			buildActionMask = 2147483647;
-			files = (
-				D1CDFFF41696FBB200609AB0 /* CoreVideo.framework in Frameworks */,
-				D1CDFFF51696FBB200609AB0 /* AVFoundation.framework in Frameworks */,
-				D1CDFFF61696FBB200609AB0 /* Foundation.framework in Frameworks */,
-				D1CD00051696FF9600609AB0 /* CoreMedia.framework in Frameworks */,
-			);
-			runOnlyForDeploymentPostprocessing = 0;
-		};
-		D1CDFF571696C79700609AB0 /* Frameworks */ = {
-			isa = PBXFrameworksBuildPhase;
-			buildActionMask = 2147483647;
-			files = (
-				D1CD00041696FF9400609AB0 /* CoreMedia.framework in Frameworks */,
-				D1CDFFF31696FBA800609AB0 /* Foundation.framework in Frameworks */,
-				D1CDFFF11696FB8900609AB0 /* CoreVideo.framework in Frameworks */,
-				D1CDFFEE1696FB7200609AB0 /* AVFoundation.framework in Frameworks */,
-				D1CDFFFD1696FC0800609AB0 /* Theora.framework in Frameworks */,
-				D1CDFFFE1696FC0800609AB0 /* Vorbis.framework in Frameworks */,
-				D1CDFFFF1696FC0800609AB0 /* Ogg.framework in Frameworks */,
-			);
-			runOnlyForDeploymentPostprocessing = 0;
-		};
-		D1CDFFAE1696E1CA00609AB0 /* Frameworks */ = {
-			isa = PBXFrameworksBuildPhase;
-			buildActionMask = 2147483647;
-			files = (
-			);
-			runOnlyForDeploymentPostprocessing = 0;
-		};
-		D1CDFFD31696E1D700609AB0 /* Frameworks */ = {
-			isa = PBXFrameworksBuildPhase;
-			buildActionMask = 2147483647;
-			files = (
-			);
-			runOnlyForDeploymentPostprocessing = 0;
-		};
-/* End PBXFrameworksBuildPhase section */
-
-/* Begin PBXGroup section */
-		D12CA55417734B2400412E5B /* FFmpeg */ = {
-			isa = PBXGroup;
-			children = (
-				D12CA55517734B4200412E5B /* TheoraVideoClip_FFmpeg.cpp */,
-				D12CA55617734B4200412E5B /* TheoraVideoClip_FFmpeg.h */,
-			);
-			name = FFmpeg;
-			sourceTree = "<group>";
-		};
-		D1358BC118D7776700A36FDC /* config */ = {
-			isa = PBXGroup;
-			children = (
-				D1358BC318D7777800A36FDC /* iOS.xcconfig */,
-				D1358BC418D7777800A36FDC /* Mac.xcconfig */,
-				D1358BC218D7777200A36FDC /* Info.plist */,
-			);
-			name = config;
-			sourceTree = "<group>";
-		};
-		D139462A17C0ED2F0091F4A4 /* libyuv */ = {
-			isa = PBXGroup;
-			children = (
-				D1C3D04E17C157AC00CA0FD2 /* include */,
-				D1C3D04D17C157A800CA0FD2 /* src */,
-				D139462B17C0ED450091F4A4 /* yuv_libyuv.c */,
-				D139462C17C0ED450091F4A4 /* yuv_libyuv.h */,
-			);
-			name = libyuv;
-			sourceTree = "<group>";
-		};
-		D1473F1E150CA69B00B20490 = {
-			isa = PBXGroup;
-			children = (
-				D1358BC118D7776700A36FDC /* config */,
-				D147401F150CAE9600B20490 /* include */,
-				D1473F42150CA6C000B20490 /* src */,
-				D1473F2C150CA69B00B20490 /* Frameworks */,
-				D1473F2B150CA69B00B20490 /* Products */,
-			);
-			sourceTree = "<group>";
-		};
-		D1473F2B150CA69B00B20490 /* Products */ = {
-			isa = PBXGroup;
-			children = (
-				D1473F2A150CA69B00B20490 /* theoraplayer.framework */,
-				D1BB6FAE150E9E7100EF9400 /* libtheoraplayer.a */,
-				D1CDFF481696C77A00609AB0 /* theoraplayer.framework */,
-				D1CDFF701696C79700609AB0 /* theoraplayer.framework */,
-				D1CDFFC41696E1CA00609AB0 /* libtheoraplayer.a */,
-				D1CDFFE91696E1D700609AB0 /* libtheoraplayer.a */,
-				D198F97B177A31FC002942E3 /* libtheoraplayer.a */,
-				D198F9A7177A31FE002942E3 /* libtheoraplayer_avfoundation.a */,
-				D198F9D4177A3200002942E3 /* libtheoraplayer_theora_avfoundation.a */,
-			);
-			name = Products;
-			sourceTree = "<group>";
-		};
-		D1473F2C150CA69B00B20490 /* Frameworks */ = {
-			isa = PBXGroup;
-			children = (
-				D1473F82150CA7F300B20490 /* mac */,
-			);
-			name = Frameworks;
-			sourceTree = "<group>";
-		};
-		D1473F42150CA6C000B20490 /* src */ = {
-			isa = PBXGroup;
-			children = (
-				D1E2718516B46F370046C00C /* YUV */,
-				D1CDFF921696CEFA00609AB0 /* Theora */,
-				D1CDFF931696CF0000609AB0 /* AVFoundation */,
-				D12CA55417734B2400412E5B /* FFmpeg */,
-				D16775A5155C501D0050EC64 /* TheoraVideoClip.cpp */,
-				D167759E155C501D0050EC64 /* TheoraAsync.cpp */,
-				D1D465D916C2D070007A45AA /* TheoraAudioPacketQueue.cpp */,
-				D167759F155C501D0050EC64 /* TheoraAudioInterface.cpp */,
-				D16775A0155C501D0050EC64 /* TheoraDataSource.cpp */,
-				D16775A1155C501D0050EC64 /* TheoraException.cpp */,
-				D16775A2155C501D0050EC64 /* TheoraFrameQueue.cpp */,
-				D16775A3155C501D0050EC64 /* TheoraTimer.cpp */,
-				D16775A4155C501D0050EC64 /* TheoraUtil.cpp */,
-				D16775A6155C501D0050EC64 /* TheoraVideoFrame.cpp */,
-				D16775A7155C501D0050EC64 /* TheoraVideoManager.cpp */,
-				D16775A8155C501D0050EC64 /* TheoraWorkerThread.cpp */,
-			);
-			name = src;
-			sourceTree = "<group>";
-		};
-		D1473F82150CA7F300B20490 /* mac */ = {
-			isa = PBXGroup;
-			children = (
-				D1CD00031696FF9400609AB0 /* CoreMedia.framework */,
-				D1CDFFFB1696FC0100609AB0 /* Theora.framework */,
-				D1CDFFF91696FBF700609AB0 /* Vorbis.framework */,
-				D1CDFFF71696FBF400609AB0 /* Ogg.framework */,
-				D1CDFFF01696FB8900609AB0 /* CoreVideo.framework */,
-				D1CDFFED1696FB7200609AB0 /* AVFoundation.framework */,
-				D1CDFFF21696FBA800609AB0 /* Foundation.framework */,
-			);
-			name = mac;
-			sourceTree = "<group>";
-		};
-		D147401F150CAE9600B20490 /* include */ = {
-			isa = PBXGroup;
-			children = (
-				D16775C1155C50280050EC64 /* TheoraAsync.h */,
-				D16775C2155C50280050EC64 /* TheoraAudioInterface.h */,
-				D16775C3155C50280050EC64 /* TheoraDataSource.h */,
-				D16775C4155C50280050EC64 /* TheoraException.h */,
-				D16775C5155C50280050EC64 /* TheoraExport.h */,
-				D1E271B216B471E80046C00C /* TheoraPixelTransform.h */,
-				D16775CB155C50280050EC64 /* TheoraVideoFrame.h */,
-				D16775C6155C50280050EC64 /* TheoraFrameQueue.h */,
-				D16775C7155C50280050EC64 /* TheoraPlayer.h */,
-				D16775C8155C50280050EC64 /* TheoraTimer.h */,
-				D16775C9155C50280050EC64 /* TheoraUtil.h */,
-				D16775CA155C50280050EC64 /* TheoraVideoClip.h */,
-				D16775CC155C50280050EC64 /* TheoraVideoManager.h */,
-				D1D465D516C2D063007A45AA /* TheoraAudioPacketQueue.h */,
-				D16775CD155C50280050EC64 /* TheoraWorkerThread.h */,
-			);
-			name = include;
-			sourceTree = "<group>";
-		};
-		D1C3D04D17C157A800CA0FD2 /* src */ = {
-			isa = PBXGroup;
-			children = (
-				D1C3D04F17C157CD00CA0FD2 /* compare_common.cc */,
-				D1C3D05017C157CD00CA0FD2 /* compare_neon.cc */,
-				D1C3D05117C157CD00CA0FD2 /* compare_posix.cc */,
-				D159BCAB17C227940030FAB6 /* compare_win.cc */,
-				D1C3D05317C157CD00CA0FD2 /* compare.cc */,
-				D1C3D05417C157CD00CA0FD2 /* convert_argb.cc */,
-				D1C3D05517C157CD00CA0FD2 /* convert_from_argb.cc */,
-				D1C3D05617C157CD00CA0FD2 /* convert_from.cc */,
-				D1C3D05717C157CD00CA0FD2 /* convert_jpeg.cc */,
-				D1C3D05817C157CD00CA0FD2 /* convert_to_argb.cc */,
-				D1C3D05917C157CD00CA0FD2 /* convert_to_i420.cc */,
-				D1C3D05A17C157CD00CA0FD2 /* convert.cc */,
-				D1C3D05B17C157CD00CA0FD2 /* cpu_id.cc */,
-				D1C3D05C17C157CD00CA0FD2 /* format_conversion.cc */,
-				D1C3D05D17C157CD00CA0FD2 /* mjpeg_decoder.cc */,
-				D1C3D05E17C157CD00CA0FD2 /* mjpeg_validate.cc */,
-				D1C3D05F17C157CD00CA0FD2 /* planar_functions.cc */,
-				D1C3D06017C157CD00CA0FD2 /* rotate_argb.cc */,
-				D1C3D06117C157CD00CA0FD2 /* rotate_mips.cc */,
-				D1C3D06217C157CD00CA0FD2 /* rotate_neon.cc */,
-				D1C3D06317C157CD00CA0FD2 /* rotate.cc */,
-				D1C3D06417C157CD00CA0FD2 /* row_any.cc */,
-				D1C3D06517C157CD00CA0FD2 /* row_common.cc */,
-				D1C3D06617C157CD00CA0FD2 /* row_mips.cc */,
-				D1C3D06717C157CD00CA0FD2 /* row_neon.cc */,
-				D1C3D06817C157CD00CA0FD2 /* row_posix.cc */,
-				D159BCAC17C227940030FAB6 /* row_win.cc */,
-				D1C3D06B17C157CD00CA0FD2 /* scale_argb_neon.cc */,
-				D1C3D06C17C157CD00CA0FD2 /* scale_argb.cc */,
-				D1C3D06D17C157CD00CA0FD2 /* scale_mips.cc */,
-				D1C3D06E17C157CD00CA0FD2 /* scale_neon.cc */,
-				D1BCE06C18F3F80800C83470 /* scale_win.cc */,
-				D1C3D06F17C157CD00CA0FD2 /* scale.cc */,
-				D1BCE05818F3F7FE00C83470 /* scale_common.cc */,
-				D1BCE05918F3F7FE00C83470 /* scale_posix.cc */,
-				D1C3D07017C157CD00CA0FD2 /* video_common.cc */,
-				D159BCAD17C227940030FAB6 /* row_x86.asm */,
-				D159BCAE17C227940030FAB6 /* x86inc.asm */,
-			);
-			name = src;
-			sourceTree = "<group>";
-		};
-		D1C3D04E17C157AC00CA0FD2 /* include */ = {
-			isa = PBXGroup;
-			children = (
-				D1C3D1CD17C15BA900CA0FD2 /* libyuv */,
-				D1C3D1CE17C15BB400CA0FD2 /* libyuv.h */,
-			);
-			name = include;
-			sourceTree = "<group>";
-		};
-		D1C3D1CD17C15BA900CA0FD2 /* libyuv */ = {
-			isa = PBXGroup;
-			children = (
-				D1C3D1CF17C15BC100CA0FD2 /* basic_types.h */,
-				D1C3D1D017C15BC100CA0FD2 /* compare.h */,
-				D1C3D1D117C15BC100CA0FD2 /* convert_argb.h */,
-				D1C3D1D217C15BC100CA0FD2 /* convert_from_argb.h */,
-				D1C3D1D317C15BC100CA0FD2 /* convert_from.h */,
-				D1C3D1D417C15BC100CA0FD2 /* convert.h */,
-				D1C3D1D517C15BC100CA0FD2 /* cpu_id.h */,
-				D1C3D1D617C15BC100CA0FD2 /* format_conversion.h */,
-				D1C3D1D717C15BC100CA0FD2 /* mjpeg_decoder.h */,
-				D1C3D1D817C15BC100CA0FD2 /* planar_functions.h */,
-				D1C3D1D917C15BC100CA0FD2 /* rotate_argb.h */,
-				D1C3D1DA17C15BC100CA0FD2 /* rotate.h */,
-				D1C3D1DB17C15BC100CA0FD2 /* row.h */,
-				D1C3D1DC17C15BC100CA0FD2 /* scale_argb.h */,
-				D1BCE05718F3F7D800C83470 /* scale_row.h */,
-				D1C3D1DD17C15BC100CA0FD2 /* scale.h */,
-				D1C3D1DE17C15BC100CA0FD2 /* version.h */,
-				D1C3D1DF17C15BC100CA0FD2 /* video_common.h */,
-			);
-			name = libyuv;
-			sourceTree = "<group>";
-		};
-		D1CDFF921696CEFA00609AB0 /* Theora */ = {
-			isa = PBXGroup;
-			children = (
-				D1CDFF951696D0F000609AB0 /* TheoraVideoClip_Theora.h */,
-				D1CDFF941696D0F000609AB0 /* TheoraVideoClip_Theora.cpp */,
-			);
-			name = Theora;
-			sourceTree = "<group>";
-		};
-		D1CDFF931696CF0000609AB0 /* AVFoundation */ = {
-			isa = PBXGroup;
-			children = (
-				D1CDFF9D1696D0FA00609AB0 /* TheoraVideoClip_AVFoundation.h */,
-				D1CDFF9C1696D0FA00609AB0 /* TheoraVideoClip_AVFoundation.mm */,
-			);
-			name = AVFoundation;
-			sourceTree = "<group>";
-		};
-		D1E2718516B46F370046C00C /* YUV */ = {
-			isa = PBXGroup;
-			children = (
-				D1F4DA1C18FECABC007C1968 /* android */,
-				D139462A17C0ED2F0091F4A4 /* libyuv */,
-				D1E2718716B46F4F0046C00C /* C */,
-				D13946CA17C119B30091F4A4 /* yuv_util.c */,
-				D13946CB17C119B30091F4A4 /* yuv_util.h */,
-			);
-			name = YUV;
-			sourceTree = "<group>";
-		};
-		D1E2718716B46F4F0046C00C /* C */ = {
-			isa = PBXGroup;
-			children = (
-				D1E271AB16B470210046C00C /* yuv420_rgb_c.c */,
-				D1E2718C16B46F640046C00C /* yuv420_yuv_c.c */,
-				D1E2718A16B46F640046C00C /* yuv420_grey_c.c */,
-			);
-			name = C;
-			sourceTree = "<group>";
-		};
-		D1F4DA1C18FECABC007C1968 /* android */ = {
-			isa = PBXGroup;
-			children = (
-				D1F4DA1D18FECACE007C1968 /* cpu-features.c */,
-				D1F4DA1E18FECACE007C1968 /* cpu-features.h */,
-			);
-			name = android;
-			sourceTree = "<group>";
-		};
-/* End PBXGroup section */
-
-/* Begin PBXHeadersBuildPhase section */
-		D1473F27150CA69B00B20490 /* Headers */ = {
-			isa = PBXHeadersBuildPhase;
-			buildActionMask = 2147483647;
-			files = (
-				D16775CE155C50280050EC64 /* TheoraAsync.h in Headers */,
-				D16775D0155C50280050EC64 /* TheoraAudioInterface.h in Headers */,
-				D16775D2155C50280050EC64 /* TheoraDataSource.h in Headers */,
-				D16775D4155C50280050EC64 /* TheoraException.h in Headers */,
-				D16775D6155C50280050EC64 /* TheoraExport.h in Headers */,
-				D16775D8155C50280050EC64 /* TheoraFrameQueue.h in Headers */,
-				D16775DA155C50280050EC64 /* TheoraPlayer.h in Headers */,
-				D16775DC155C50280050EC64 /* TheoraTimer.h in Headers */,
-				D16775DE155C50280050EC64 /* TheoraUtil.h in Headers */,
-				D16775E0155C50280050EC64 /* TheoraVideoClip.h in Headers */,
-				D16775E2155C50280050EC64 /* TheoraVideoFrame.h in Headers */,
-				D16775E4155C50280050EC64 /* TheoraVideoManager.h in Headers */,
-				D16775E6155C50280050EC64 /* TheoraWorkerThread.h in Headers */,
-				D1D465D616C2D063007A45AA /* TheoraAudioPacketQueue.h in Headers */,
-				D1E271B316B471E80046C00C /* TheoraPixelTransform.h in Headers */,
-				D1CDFF991696D0F000609AB0 /* TheoraVideoClip_Theora.h in Headers */,
-				D139463617C0ED450091F4A4 /* yuv_libyuv.h in Headers */,
-				D13946D517C119B40091F4A4 /* yuv_util.h in Headers */,
-			);
-			runOnlyForDeploymentPostprocessing = 0;
-		};
-		D198F964177A31FC002942E3 /* Headers */ = {
-			isa = PBXHeadersBuildPhase;
-			buildActionMask = 2147483647;
-			files = (
-				D198F965177A31FC002942E3 /* TheoraAsync.h in Headers */,
-				D198F966177A31FC002942E3 /* TheoraAudioInterface.h in Headers */,
-				D198F967177A31FC002942E3 /* TheoraDataSource.h in Headers */,
-				D198F968177A31FC002942E3 /* TheoraException.h in Headers */,
-				D198F969177A31FC002942E3 /* TheoraExport.h in Headers */,
-				D198F96A177A31FC002942E3 /* TheoraFrameQueue.h in Headers */,
-				D198F96B177A31FC002942E3 /* TheoraPlayer.h in Headers */,
-				D198F96C177A31FC002942E3 /* TheoraTimer.h in Headers */,
-				D198F96D177A31FC002942E3 /* TheoraUtil.h in Headers */,
-				D198F96E177A31FC002942E3 /* TheoraVideoClip.h in Headers */,
-				D198F96F177A31FC002942E3 /* TheoraVideoFrame.h in Headers */,
-				D198F970177A31FC002942E3 /* TheoraVideoManager.h in Headers */,
-				D198F971177A31FC002942E3 /* TheoraWorkerThread.h in Headers */,
-				D198F972177A31FC002942E3 /* TheoraVideoClip_Theora.h in Headers */,
-				D198F974177A31FC002942E3 /* TheoraPixelTransform.h in Headers */,
-				D139463917C0ED450091F4A4 /* yuv_libyuv.h in Headers */,
-				D13946D817C119B40091F4A4 /* yuv_util.h in Headers */,
-			);
-			runOnlyForDeploymentPostprocessing = 0;
-		};
-		D198F990177A31FE002942E3 /* Headers */ = {
-			isa = PBXHeadersBuildPhase;
-			buildActionMask = 2147483647;
-			files = (
-				D198F991177A31FE002942E3 /* TheoraAsync.h in Headers */,
-				D198F992177A31FE002942E3 /* TheoraAudioInterface.h in Headers */,
-				D198F993177A31FE002942E3 /* TheoraDataSource.h in Headers */,
-				D198F994177A31FE002942E3 /* TheoraException.h in Headers */,
-				D198F995177A31FE002942E3 /* TheoraExport.h in Headers */,
-				D198F996177A31FE002942E3 /* TheoraFrameQueue.h in Headers */,
-				D198F997177A31FE002942E3 /* TheoraPlayer.h in Headers */,
-				D198F998177A31FE002942E3 /* TheoraTimer.h in Headers */,
-				D198F999177A31FE002942E3 /* TheoraUtil.h in Headers */,
-				D198F99A177A31FE002942E3 /* TheoraVideoClip.h in Headers */,
-				D198F99B177A31FE002942E3 /* TheoraVideoFrame.h in Headers */,
-				D198F99C177A31FE002942E3 /* TheoraVideoManager.h in Headers */,
-				D198F99D177A31FE002942E3 /* TheoraWorkerThread.h in Headers */,
-				D198F99E177A31FE002942E3 /* TheoraVideoClip_Theora.h in Headers */,
-				D198F9A0177A31FE002942E3 /* TheoraPixelTransform.h in Headers */,
-				D139463A17C0ED450091F4A4 /* yuv_libyuv.h in Headers */,
-				D13946D917C119B40091F4A4 /* yuv_util.h in Headers */,
-			);
-			runOnlyForDeploymentPostprocessing = 0;
-		};
-		D198F9BD177A3200002942E3 /* Headers */ = {
-			isa = PBXHeadersBuildPhase;
-			buildActionMask = 2147483647;
-			files = (
-				D198F9BE177A3200002942E3 /* TheoraAsync.h in Headers */,
-				D198F9BF177A3200002942E3 /* TheoraAudioInterface.h in Headers */,
-				D198F9C0177A3200002942E3 /* TheoraDataSource.h in Headers */,
-				D198F9C1177A3200002942E3 /* TheoraException.h in Headers */,
-				D198F9C2177A3200002942E3 /* TheoraExport.h in Headers */,
-				D198F9C3177A3200002942E3 /* TheoraFrameQueue.h in Headers */,
-				D198F9C4177A3200002942E3 /* TheoraPlayer.h in Headers */,
-				D198F9C5177A3200002942E3 /* TheoraTimer.h in Headers */,
-				D198F9C6177A3200002942E3 /* TheoraUtil.h in Headers */,
-				D198F9C7177A3200002942E3 /* TheoraVideoClip.h in Headers */,
-				D198F9C8177A3200002942E3 /* TheoraVideoFrame.h in Headers */,
-				D198F9C9177A3200002942E3 /* TheoraVideoManager.h in Headers */,
-				D198F9CA177A3200002942E3 /* TheoraWorkerThread.h in Headers */,
-				D198F9CB177A3200002942E3 /* TheoraVideoClip_Theora.h in Headers */,
-				D198F9CD177A3200002942E3 /* TheoraPixelTransform.h in Headers */,
-				D139463B17C0ED450091F4A4 /* yuv_libyuv.h in Headers */,
-				D13946DA17C119B40091F4A4 /* yuv_util.h in Headers */,
-			);
-			runOnlyForDeploymentPostprocessing = 0;
-		};
-		D1BB6FAC150E9E7100EF9400 /* Headers */ = {
-			isa = PBXHeadersBuildPhase;
-			buildActionMask = 2147483647;
-			files = (
-				D16775CF155C50280050EC64 /* TheoraAsync.h in Headers */,
-				D16775D1155C50280050EC64 /* TheoraAudioInterface.h in Headers */,
-				D16775D3155C50280050EC64 /* TheoraDataSource.h in Headers */,
-				D16775D5155C50280050EC64 /* TheoraException.h in Headers */,
-				D16775D7155C50280050EC64 /* TheoraExport.h in Headers */,
-				D16775D9155C50280050EC64 /* TheoraFrameQueue.h in Headers */,
-				D16775DB155C50280050EC64 /* TheoraPlayer.h in Headers */,
-				D16775DD155C50280050EC64 /* TheoraTimer.h in Headers */,
-				D16775DF155C50280050EC64 /* TheoraUtil.h in Headers */,
-				D16775E1155C50280050EC64 /* TheoraVideoClip.h in Headers */,
-				D16775E3155C50280050EC64 /* TheoraVideoFrame.h in Headers */,
-				D16775E5155C50280050EC64 /* TheoraVideoManager.h in Headers */,
-				D16775E7155C50280050EC64 /* TheoraWorkerThread.h in Headers */,
-				D1CDFF9B1696D0F000609AB0 /* TheoraVideoClip_Theora.h in Headers */,
-				D1E271B616B471E80046C00C /* TheoraPixelTransform.h in Headers */,
-				D139463C17C0ED450091F4A4 /* yuv_libyuv.h in Headers */,
-				D13946DB17C119B40091F4A4 /* yuv_util.h in Headers */,
-			);
-			runOnlyForDeploymentPostprocessing = 0;
-		};
-		D1CDFF331696C77A00609AB0 /* Headers */ = {
-			isa = PBXHeadersBuildPhase;
-			buildActionMask = 2147483647;
-			files = (
-				D1CDFF341696C77A00609AB0 /* TheoraAsync.h in Headers */,
-				D1CDFF351696C77A00609AB0 /* TheoraAudioInterface.h in Headers */,
-				D1CDFF361696C77A00609AB0 /* TheoraDataSource.h in Headers */,
-				D1CDFF371696C77A00609AB0 /* TheoraException.h in Headers */,
-				D1CDFF381696C77A00609AB0 /* TheoraExport.h in Headers */,
-				D1CDFF391696C77A00609AB0 /* TheoraFrameQueue.h in Headers */,
-				D1CDFF3A1696C77A00609AB0 /* TheoraPlayer.h in Headers */,
-				D1CDFF3B1696C77A00609AB0 /* TheoraTimer.h in Headers */,
-				D1CDFF3C1696C77A00609AB0 /* TheoraUtil.h in Headers */,
-				D1CDFF3D1696C77A00609AB0 /* TheoraVideoClip.h in Headers */,
-				D1CDFF3E1696C77A00609AB0 /* TheoraVideoFrame.h in Headers */,
-				D1CDFF3F1696C77A00609AB0 /* TheoraVideoManager.h in Headers */,
-				D1CDFF401696C77A00609AB0 /* TheoraWorkerThread.h in Headers */,
-				D1E271B416B471E80046C00C /* TheoraPixelTransform.h in Headers */,
-				D1D465D716C2D063007A45AA /* TheoraAudioPacketQueue.h in Headers */,
-				D1CDFF9F1696D0FA00609AB0 /* TheoraVideoClip_AVFoundation.h in Headers */,
-				D139463717C0ED450091F4A4 /* yuv_libyuv.h in Headers */,
-				D13946D617C119B40091F4A4 /* yuv_util.h in Headers */,
-			);
-			runOnlyForDeploymentPostprocessing = 0;
-		};
-		D1CDFF5B1696C79700609AB0 /* Headers */ = {
-			isa = PBXHeadersBuildPhase;
-			buildActionMask = 2147483647;
-			files = (
-				D1CDFF5C1696C79700609AB0 /* TheoraAsync.h in Headers */,
-				D1CDFF5D1696C79700609AB0 /* TheoraAudioInterface.h in Headers */,
-				D1CDFF5E1696C79700609AB0 /* TheoraDataSource.h in Headers */,
-				D1CDFF5F1696C79700609AB0 /* TheoraException.h in Headers */,
-				D1CDFF601696C79700609AB0 /* TheoraExport.h in Headers */,
-				D1CDFF611696C79700609AB0 /* TheoraFrameQueue.h in Headers */,
-				D1CDFF621696C79700609AB0 /* TheoraPlayer.h in Headers */,
-				D1CDFF631696C79700609AB0 /* TheoraTimer.h in Headers */,
-				D1CDFF641696C79700609AB0 /* TheoraUtil.h in Headers */,
-				D1CDFF651696C79700609AB0 /* TheoraVideoClip.h in Headers */,
-				D1CDFF661696C79700609AB0 /* TheoraVideoFrame.h in Headers */,
-				D1CDFF671696C79700609AB0 /* TheoraVideoManager.h in Headers */,
-				D1CDFF681696C79700609AB0 /* TheoraWorkerThread.h in Headers */,
-				D1E271B516B471E80046C00C /* TheoraPixelTransform.h in Headers */,
-				D1D465D816C2D063007A45AA /* TheoraAudioPacketQueue.h in Headers */,
-				D1CDFF9A1696D0F000609AB0 /* TheoraVideoClip_Theora.h in Headers */,
-				D1F09EB1169AFEFB00DEEC63 /* TheoraVideoClip_AVFoundation.h in Headers */,
-				D139463817C0ED450091F4A4 /* yuv_libyuv.h in Headers */,
-				D13946D717C119B40091F4A4 /* yuv_util.h in Headers */,
-			);
-			runOnlyForDeploymentPostprocessing = 0;
-		};
-		D1CDFFAF1696E1CA00609AB0 /* Headers */ = {
-			isa = PBXHeadersBuildPhase;
-			buildActionMask = 2147483647;
-			files = (
-				D1CDFFB01696E1CA00609AB0 /* TheoraAsync.h in Headers */,
-				D1CDFFB11696E1CA00609AB0 /* TheoraAudioInterface.h in Headers */,
-				D1CDFFB21696E1CA00609AB0 /* TheoraDataSource.h in Headers */,
-				D1CDFFB31696E1CA00609AB0 /* TheoraException.h in Headers */,
-				D1CDFFB41696E1CA00609AB0 /* TheoraExport.h in Headers */,
-				D1CDFFB51696E1CA00609AB0 /* TheoraFrameQueue.h in Headers */,
-				D1CDFFB61696E1CA00609AB0 /* TheoraPlayer.h in Headers */,
-				D1CDFFB71696E1CA00609AB0 /* TheoraTimer.h in Headers */,
-				D1CDFFB81696E1CA00609AB0 /* TheoraUtil.h in Headers */,
-				D1CDFFB91696E1CA00609AB0 /* TheoraVideoClip.h in Headers */,
-				D1CDFFBA1696E1CA00609AB0 /* TheoraVideoFrame.h in Headers */,
-				D1CDFFBB1696E1CA00609AB0 /* TheoraVideoManager.h in Headers */,
-				D1CDFFBC1696E1CA00609AB0 /* TheoraWorkerThread.h in Headers */,
-				D1CDFFBD1696E1CA00609AB0 /* TheoraVideoClip_Theora.h in Headers */,
-				D1E271B716B471E80046C00C /* TheoraPixelTransform.h in Headers */,
-				D139463D17C0ED450091F4A4 /* yuv_libyuv.h in Headers */,
-				D13946DC17C119B40091F4A4 /* yuv_util.h in Headers */,
-			);
-			runOnlyForDeploymentPostprocessing = 0;
-		};
-		D1CDFFD41696E1D700609AB0 /* Headers */ = {
-			isa = PBXHeadersBuildPhase;
-			buildActionMask = 2147483647;
-			files = (
-				D1CDFFD51696E1D700609AB0 /* TheoraAsync.h in Headers */,
-				D1CDFFD61696E1D700609AB0 /* TheoraAudioInterface.h in Headers */,
-				D1CDFFD71696E1D700609AB0 /* TheoraDataSource.h in Headers */,
-				D1CDFFD81696E1D700609AB0 /* TheoraException.h in Headers */,
-				D1CDFFD91696E1D700609AB0 /* TheoraExport.h in Headers */,
-				D1CDFFDA1696E1D700609AB0 /* TheoraFrameQueue.h in Headers */,
-				D1CDFFDB1696E1D700609AB0 /* TheoraPlayer.h in Headers */,
-				D1CDFFDC1696E1D700609AB0 /* TheoraTimer.h in Headers */,
-				D1CDFFDD1696E1D700609AB0 /* TheoraUtil.h in Headers */,
-				D1CDFFDE1696E1D700609AB0 /* TheoraVideoClip.h in Headers */,
-				D1CDFFDF1696E1D700609AB0 /* TheoraVideoFrame.h in Headers */,
-				D1CDFFE01696E1D700609AB0 /* TheoraVideoManager.h in Headers */,
-				D1CDFFE11696E1D700609AB0 /* TheoraWorkerThread.h in Headers */,
-				D1CDFFE21696E1D700609AB0 /* TheoraVideoClip_Theora.h in Headers */,
-				D1E271B816B471E80046C00C /* TheoraPixelTransform.h in Headers */,
-				D139463E17C0ED450091F4A4 /* yuv_libyuv.h in Headers */,
-				D13946DD17C119B40091F4A4 /* yuv_util.h in Headers */,
-			);
-			runOnlyForDeploymentPostprocessing = 0;
-		};
-/* End PBXHeadersBuildPhase section */
-
-/* Begin PBXNativeTarget section */
-		D1473F29150CA69B00B20490 /* theoraplayer (Theora) */ = {
-			isa = PBXNativeTarget;
-			buildConfigurationList = D1473F3F150CA69B00B20490 /* Build configuration list for PBXNativeTarget "theoraplayer (Theora)" */;
-			buildPhases = (
-				D1473F25150CA69B00B20490 /* Sources */,
-				D1473F26150CA69B00B20490 /* Frameworks */,
-				D1473F27150CA69B00B20490 /* Headers */,
-			);
-			buildRules = (
-			);
-			dependencies = (
-			);
-			name = "theoraplayer (Theora)";
-			productName = theoraplayer;
-			productReference = D1473F2A150CA69B00B20490 /* theoraplayer.framework */;
-			productType = "com.apple.product-type.framework";
-		};
-		D198F950177A31FC002942E3 /* theoraplayer (Mac Theora) */ = {
-			isa = PBXNativeTarget;
-			buildConfigurationList = D198F975177A31FC002942E3 /* Build configuration list for PBXNativeTarget "theoraplayer (Mac Theora)" */;
-			buildPhases = (
-				D198F951177A31FC002942E3 /* Sources */,
-				D198F963177A31FC002942E3 /* Frameworks */,
-				D198F964177A31FC002942E3 /* Headers */,
-			);
-			buildRules = (
-			);
-			dependencies = (
-			);
-			name = "theoraplayer (Mac Theora)";
-			productName = libtheoraplayer;
-			productReference = D198F97B177A31FC002942E3 /* libtheoraplayer.a */;
-			productType = "com.apple.product-type.library.static";
-		};
-		D198F97C177A31FE002942E3 /* theoraplayer (Mac AVFoundation) */ = {
-			isa = PBXNativeTarget;
-			buildConfigurationList = D198F9A1177A31FE002942E3 /* Build configuration list for PBXNativeTarget "theoraplayer (Mac AVFoundation)" */;
-			buildPhases = (
-				D198F97D177A31FE002942E3 /* Sources */,
-				D198F98F177A31FE002942E3 /* Frameworks */,
-				D198F990177A31FE002942E3 /* Headers */,
-			);
-			buildRules = (
-			);
-			dependencies = (
-			);
-			name = "theoraplayer (Mac AVFoundation)";
-			productName = libtheoraplayer;
-			productReference = D198F9A7177A31FE002942E3 /* libtheoraplayer_avfoundation.a */;
-			productType = "com.apple.product-type.library.static";
-		};
-		D198F9A8177A3200002942E3 /* theoraplayer (Mac Theora AVFoundation) */ = {
-			isa = PBXNativeTarget;
-			buildConfigurationList = D198F9CE177A3200002942E3 /* Build configuration list for PBXNativeTarget "theoraplayer (Mac Theora AVFoundation)" */;
-			buildPhases = (
-				D198F9A9177A3200002942E3 /* Sources */,
-				D198F9BC177A3200002942E3 /* Frameworks */,
-				D198F9BD177A3200002942E3 /* Headers */,
-			);
-			buildRules = (
-			);
-			dependencies = (
-			);
-			name = "theoraplayer (Mac Theora AVFoundation)";
-			productName = libtheoraplayer;
-			productReference = D198F9D4177A3200002942E3 /* libtheoraplayer_theora_avfoundation.a */;
-			productType = "com.apple.product-type.library.static";
-		};
-		D1BB6FAD150E9E7100EF9400 /* theoraplayer (iOS Theora) */ = {
-			isa = PBXNativeTarget;
-			buildConfigurationList = D1BB6FBC150E9E7100EF9400 /* Build configuration list for PBXNativeTarget "theoraplayer (iOS Theora)" */;
-			buildPhases = (
-				D1BB6FAA150E9E7100EF9400 /* Sources */,
-				D1BB6FAB150E9E7100EF9400 /* Frameworks */,
-				D1BB6FAC150E9E7100EF9400 /* Headers */,
-			);
-			buildRules = (
-			);
-			dependencies = (
-			);
-			name = "theoraplayer (iOS Theora)";
-			productName = libtheoraplayer;
-			productReference = D1BB6FAE150E9E7100EF9400 /* libtheoraplayer.a */;
-			productType = "com.apple.product-type.library.static";
-		};
-		D1CDFF221696C77A00609AB0 /* theoraplayer (AVFoundation) */ = {
-			isa = PBXNativeTarget;
-			buildConfigurationList = D1CDFF421696C77A00609AB0 /* Build configuration list for PBXNativeTarget "theoraplayer (AVFoundation)" */;
-			buildPhases = (
-				D1CDFF231696C77A00609AB0 /* Sources */,
-				D1CDFF2F1696C77A00609AB0 /* Frameworks */,
-				D1CDFF331696C77A00609AB0 /* Headers */,
-			);
-			buildRules = (
-			);
-			dependencies = (
-			);
-			name = "theoraplayer (AVFoundation)";
-			productName = theoraplayer;
-			productReference = D1CDFF481696C77A00609AB0 /* theoraplayer.framework */;
-			productType = "com.apple.product-type.framework";
-		};
-		D1CDFF4A1696C79700609AB0 /* theoraplayer (Theora AVFoundation) */ = {
-			isa = PBXNativeTarget;
-			buildConfigurationList = D1CDFF6A1696C79700609AB0 /* Build configuration list for PBXNativeTarget "theoraplayer (Theora AVFoundation)" */;
-			buildPhases = (
-				D1CDFF4B1696C79700609AB0 /* Sources */,
-				D1CDFF571696C79700609AB0 /* Frameworks */,
-				D1CDFF5B1696C79700609AB0 /* Headers */,
-			);
-			buildRules = (
-			);
-			dependencies = (
-			);
-			name = "theoraplayer (Theora AVFoundation)";
-			productName = theoraplayer;
-			productReference = D1CDFF701696C79700609AB0 /* theoraplayer.framework */;
-			productType = "com.apple.product-type.framework";
-		};
-		D1CDFFA01696E1CA00609AB0 /* theoraplayer (iOS AVFoundation) */ = {
-			isa = PBXNativeTarget;
-			buildConfigurationList = D1CDFFBE1696E1CA00609AB0 /* Build configuration list for PBXNativeTarget "theoraplayer (iOS AVFoundation)" */;
-			buildPhases = (
-				D1CDFFA11696E1CA00609AB0 /* Sources */,
-				D1CDFFAE1696E1CA00609AB0 /* Frameworks */,
-				D1CDFFAF1696E1CA00609AB0 /* Headers */,
-			);
-			buildRules = (
-			);
-			dependencies = (
-			);
-			name = "theoraplayer (iOS AVFoundation)";
-			productName = libtheoraplayer;
-			productReference = D1CDFFC41696E1CA00609AB0 /* libtheoraplayer.a */;
-			productType = "com.apple.product-type.library.static";
-		};
-		D1CDFFC51696E1D700609AB0 /* theoraplayer (iOS Theora AVFoundation) */ = {
-			isa = PBXNativeTarget;
-			buildConfigurationList = D1CDFFE31696E1D700609AB0 /* Build configuration list for PBXNativeTarget "theoraplayer (iOS Theora AVFoundation)" */;
-			buildPhases = (
-				D1CDFFC61696E1D700609AB0 /* Sources */,
-				D1CDFFD31696E1D700609AB0 /* Frameworks */,
-				D1CDFFD41696E1D700609AB0 /* Headers */,
-			);
-			buildRules = (
-			);
-			dependencies = (
-			);
-			name = "theoraplayer (iOS Theora AVFoundation)";
-			productName = libtheoraplayer;
-			productReference = D1CDFFE91696E1D700609AB0 /* libtheoraplayer.a */;
-			productType = "com.apple.product-type.library.static";
-		};
-/* End PBXNativeTarget section */
-
-/* Begin PBXProject section */
-		D1473F20150CA69B00B20490 /* Project object */ = {
-			isa = PBXProject;
-			attributes = {
-				LastUpgradeCheck = 0510;
-			};
-			buildConfigurationList = D1473F23150CA69B00B20490 /* Build configuration list for PBXProject "theoraplayer" */;
-			compatibilityVersion = "Xcode 3.2";
-			developmentRegion = English;
-			hasScannedForEncodings = 0;
-			knownRegions = (
-				en,
-			);
-			mainGroup = D1473F1E150CA69B00B20490;
-			productRefGroup = D1473F2B150CA69B00B20490 /* Products */;
-			projectDirPath = "";
-			projectRoot = "";
-			targets = (
-				D1473F29150CA69B00B20490 /* theoraplayer (Theora) */,
-				D1CDFF221696C77A00609AB0 /* theoraplayer (AVFoundation) */,
-				D1CDFF4A1696C79700609AB0 /* theoraplayer (Theora AVFoundation) */,
-				D198F950177A31FC002942E3 /* theoraplayer (Mac Theora) */,
-				D198F97C177A31FE002942E3 /* theoraplayer (Mac AVFoundation) */,
-				D198F9A8177A3200002942E3 /* theoraplayer (Mac Theora AVFoundation) */,
-				D1BB6FAD150E9E7100EF9400 /* theoraplayer (iOS Theora) */,
-				D1CDFFA01696E1CA00609AB0 /* theoraplayer (iOS AVFoundation) */,
-				D1CDFFC51696E1D700609AB0 /* theoraplayer (iOS Theora AVFoundation) */,
-			);
-		};
-/* End PBXProject section */
-
-/* Begin PBXSourcesBuildPhase section */
-		D1473F25150CA69B00B20490 /* Sources */ = {
-			isa = PBXSourcesBuildPhase;
-			buildActionMask = 2147483647;
-			files = (
-				D16775AB155C501D0050EC64 /* TheoraAsync.cpp in Sources */,
-				D16775AD155C501D0050EC64 /* TheoraAudioInterface.cpp in Sources */,
-				D16775AF155C501D0050EC64 /* TheoraDataSource.cpp in Sources */,
-				D16775B1155C501D0050EC64 /* TheoraException.cpp in Sources */,
-				D16775B3155C501D0050EC64 /* TheoraFrameQueue.cpp in Sources */,
-				D16775B5155C501D0050EC64 /* TheoraTimer.cpp in Sources */,
-				D16775B7155C501D0050EC64 /* TheoraUtil.cpp in Sources */,
-				D16775B9155C501D0050EC64 /* TheoraVideoClip.cpp in Sources */,
-				D16775BB155C501D0050EC64 /* TheoraVideoFrame.cpp in Sources */,
-				D16775BD155C501D0050EC64 /* TheoraVideoManager.cpp in Sources */,
-				D16775BF155C501D0050EC64 /* TheoraWorkerThread.cpp in Sources */,
-				D1CDFF961696D0F000609AB0 /* TheoraVideoClip_Theora.cpp in Sources */,
-				D1E2719916B46F640046C00C /* yuv420_grey_c.c in Sources */,
-				D1E271A516B46F640046C00C /* yuv420_yuv_c.c in Sources */,
-				D1E271AC16B470210046C00C /* yuv420_rgb_c.c in Sources */,
-				D1BCE05A18F3F7FE00C83470 /* scale_common.cc in Sources */,
-				D1D465DA16C2D070007A45AA /* TheoraAudioPacketQueue.cpp in Sources */,
-				D139462D17C0ED450091F4A4 /* yuv_libyuv.c in Sources */,
-				D13946CC17C119B40091F4A4 /* yuv_util.c in Sources */,
-				D1C3D07217C157CD00CA0FD2 /* compare_common.cc in Sources */,
-				D1C3D08417C157CD00CA0FD2 /* compare_posix.cc in Sources */,
-				D1BCE06318F3F7FE00C83470 /* scale_posix.cc in Sources */,
-				D1C3D09617C157CD00CA0FD2 /* compare.cc in Sources */,
-				D1C3D09F17C157CD00CA0FD2 /* convert_argb.cc in Sources */,
-				D1C3D0C317C157CD00CA0FD2 /* convert_to_argb.cc in Sources */,
-				D1C3D0CC17C157CD00CA0FD2 /* convert_to_i420.cc in Sources */,
-				D1C3D0D517C157CD00CA0FD2 /* convert.cc in Sources */,
-				D1C3D0DE17C157CD00CA0FD2 /* cpu_id.cc in Sources */,
-				D1C3D0E717C157CD00CA0FD2 /* format_conversion.cc in Sources */,
-				D1C3D10217C157CD00CA0FD2 /* planar_functions.cc in Sources */,
-				D1C3D12617C157CD00CA0FD2 /* rotate.cc in Sources */,
-				D1C3D12F17C157CD00CA0FD2 /* row_any.cc in Sources */,
-				D1C3D13817C157CD00CA0FD2 /* row_common.cc in Sources */,
-				D1C3D15317C157CD00CA0FD2 /* row_posix.cc in Sources */,
-				D1C3D17717C157CD00CA0FD2 /* scale_argb.cc in Sources */,
-				D1C3D19B17C157CD00CA0FD2 /* video_common.cc in Sources */,
-				D159BCB017C227F30030FAB6 /* convert_from.cc in Sources */,
-				D159BCB917C228310030FAB6 /* rotate_argb.cc in Sources */,
-				D159BCC217C2286D0030FAB6 /* scale.cc in Sources */,
-			);
-			runOnlyForDeploymentPostprocessing = 0;
-		};
-		D198F951177A31FC002942E3 /* Sources */ = {
-			isa = PBXSourcesBuildPhase;
-			buildActionMask = 2147483647;
-			files = (
-				D198F952177A31FC002942E3 /* TheoraAsync.cpp in Sources */,
-				D198F953177A31FC002942E3 /* TheoraAudioInterface.cpp in Sources */,
-				D198F954177A31FC002942E3 /* TheoraDataSource.cpp in Sources */,
-				D198F955177A31FC002942E3 /* TheoraException.cpp in Sources */,
-				D198F956177A31FC002942E3 /* TheoraFrameQueue.cpp in Sources */,
-				D198F957177A31FC002942E3 /* TheoraTimer.cpp in Sources */,
-				D198F958177A31FC002942E3 /* TheoraUtil.cpp in Sources */,
-				D198F959177A31FC002942E3 /* TheoraVideoClip.cpp in Sources */,
-				D198F95A177A31FC002942E3 /* TheoraVideoFrame.cpp in Sources */,
-				D198F95B177A31FC002942E3 /* TheoraVideoManager.cpp in Sources */,
-				D198F95C177A31FC002942E3 /* TheoraWorkerThread.cpp in Sources */,
-				D198F95D177A31FC002942E3 /* TheoraVideoClip_Theora.cpp in Sources */,
-				D198F95F177A31FC002942E3 /* yuv420_grey_c.c in Sources */,
-				D198F960177A31FC002942E3 /* yuv420_yuv_c.c in Sources */,
-				D198F961177A31FC002942E3 /* yuv420_rgb_c.c in Sources */,
-				D1BCE05D18F3F7FE00C83470 /* scale_common.cc in Sources */,
-				D198F962177A31FC002942E3 /* TheoraAudioPacketQueue.cpp in Sources */,
-				D139463017C0ED450091F4A4 /* yuv_libyuv.c in Sources */,
-				D13946CF17C119B40091F4A4 /* yuv_util.c in Sources */,
-				D1C3D07517C157CD00CA0FD2 /* compare_common.cc in Sources */,
-				D1C3D08717C157CD00CA0FD2 /* compare_posix.cc in Sources */,
-				D1BCE06618F3F7FE00C83470 /* scale_posix.cc in Sources */,
-				D1C3D09917C157CD00CA0FD2 /* compare.cc in Sources */,
-				D1C3D0A217C157CD00CA0FD2 /* convert_argb.cc in Sources */,
-				D1C3D0C617C157CD00CA0FD2 /* convert_to_argb.cc in Sources */,
-				D1C3D0CF17C157CD00CA0FD2 /* convert_to_i420.cc in Sources */,
-				D1C3D0D817C157CD00CA0FD2 /* convert.cc in Sources */,
-				D1C3D0E117C157CD00CA0FD2 /* cpu_id.cc in Sources */,
-				D1C3D0EA17C157CD00CA0FD2 /* format_conversion.cc in Sources */,
-				D1C3D10517C157CD00CA0FD2 /* planar_functions.cc in Sources */,
-				D1C3D12917C157CD00CA0FD2 /* rotate.cc in Sources */,
-				D1C3D13217C157CD00CA0FD2 /* row_any.cc in Sources */,
-				D1C3D13B17C157CD00CA0FD2 /* row_common.cc in Sources */,
-				D1C3D15617C157CD00CA0FD2 /* row_posix.cc in Sources */,
-				D1C3D17A17C157CD00CA0FD2 /* scale_argb.cc in Sources */,
-				D1C3D19E17C157CD00CA0FD2 /* video_common.cc in Sources */,
-				D159BCB317C227F40030FAB6 /* convert_from.cc in Sources */,
-				D159BCBC17C228330030FAB6 /* rotate_argb.cc in Sources */,
-				D159BCC517C2286E0030FAB6 /* scale.cc in Sources */,
-			);
-			runOnlyForDeploymentPostprocessing = 0;
-		};
-		D198F97D177A31FE002942E3 /* Sources */ = {
-			isa = PBXSourcesBuildPhase;
-			buildActionMask = 2147483647;
-			files = (
-				D198F97E177A31FE002942E3 /* TheoraAsync.cpp in Sources */,
-				D198F97F177A31FE002942E3 /* TheoraAudioInterface.cpp in Sources */,
-				D198F980177A31FE002942E3 /* TheoraDataSource.cpp in Sources */,
-				D198F981177A31FE002942E3 /* TheoraException.cpp in Sources */,
-				D198F982177A31FE002942E3 /* TheoraFrameQueue.cpp in Sources */,
-				D198F983177A31FE002942E3 /* TheoraTimer.cpp in Sources */,
-				D198F984177A31FE002942E3 /* TheoraUtil.cpp in Sources */,
-				D198F985177A31FE002942E3 /* TheoraVideoClip.cpp in Sources */,
-				D198F986177A31FE002942E3 /* TheoraVideoFrame.cpp in Sources */,
-				D198F987177A31FE002942E3 /* TheoraVideoManager.cpp in Sources */,
-				D198F988177A31FE002942E3 /* TheoraWorkerThread.cpp in Sources */,
-				D198F989177A31FE002942E3 /* TheoraVideoClip_AVFoundation.mm in Sources */,
-				D198F98B177A31FE002942E3 /* yuv420_grey_c.c in Sources */,
-				D198F98C177A31FE002942E3 /* yuv420_yuv_c.c in Sources */,
-				D198F98D177A31FE002942E3 /* yuv420_rgb_c.c in Sources */,
-				D1BCE05E18F3F7FE00C83470 /* scale_common.cc in Sources */,
-				D198F98E177A31FE002942E3 /* TheoraAudioPacketQueue.cpp in Sources */,
-				D139463117C0ED450091F4A4 /* yuv_libyuv.c in Sources */,
-				D13946D017C119B40091F4A4 /* yuv_util.c in Sources */,
-				D1C3D07617C157CD00CA0FD2 /* compare_common.cc in Sources */,
-				D1C3D08817C157CD00CA0FD2 /* compare_posix.cc in Sources */,
-				D1BCE06718F3F7FE00C83470 /* scale_posix.cc in Sources */,
-				D1C3D09A17C157CD00CA0FD2 /* compare.cc in Sources */,
-				D1C3D0A317C157CD00CA0FD2 /* convert_argb.cc in Sources */,
-				D1C3D0C717C157CD00CA0FD2 /* convert_to_argb.cc in Sources */,
-				D1C3D0D017C157CD00CA0FD2 /* convert_to_i420.cc in Sources */,
-				D1C3D0D917C157CD00CA0FD2 /* convert.cc in Sources */,
-				D1C3D0E217C157CD00CA0FD2 /* cpu_id.cc in Sources */,
-				D1C3D0EB17C157CD00CA0FD2 /* format_conversion.cc in Sources */,
-				D1C3D10617C157CD00CA0FD2 /* planar_functions.cc in Sources */,
-				D1C3D12A17C157CD00CA0FD2 /* rotate.cc in Sources */,
-				D1C3D13317C157CD00CA0FD2 /* row_any.cc in Sources */,
-				D1C3D13C17C157CD00CA0FD2 /* row_common.cc in Sources */,
-				D1C3D15717C157CD00CA0FD2 /* row_posix.cc in Sources */,
-				D1C3D17B17C157CD00CA0FD2 /* scale_argb.cc in Sources */,
-				D1C3D19F17C157CD00CA0FD2 /* video_common.cc in Sources */,
-				D159BCB417C227F50030FAB6 /* convert_from.cc in Sources */,
-				D159BCBD17C228330030FAB6 /* rotate_argb.cc in Sources */,
-				D159BCC617C2286E0030FAB6 /* scale.cc in Sources */,
-			);
-			runOnlyForDeploymentPostprocessing = 0;
-		};
-		D198F9A9177A3200002942E3 /* Sources */ = {
-			isa = PBXSourcesBuildPhase;
-			buildActionMask = 2147483647;
-			files = (
-				D198F9AA177A3200002942E3 /* TheoraAsync.cpp in Sources */,
-				D198F9AB177A3200002942E3 /* TheoraAudioInterface.cpp in Sources */,
-				D198F9AC177A3200002942E3 /* TheoraDataSource.cpp in Sources */,
-				D198F9AD177A3200002942E3 /* TheoraException.cpp in Sources */,
-				D198F9AE177A3200002942E3 /* TheoraFrameQueue.cpp in Sources */,
-				D198F9AF177A3200002942E3 /* TheoraTimer.cpp in Sources */,
-				D198F9B0177A3200002942E3 /* TheoraUtil.cpp in Sources */,
-				D198F9B1177A3200002942E3 /* TheoraVideoClip.cpp in Sources */,
-				D198F9B2177A3200002942E3 /* TheoraVideoFrame.cpp in Sources */,
-				D198F9B3177A3200002942E3 /* TheoraVideoManager.cpp in Sources */,
-				D198F9B4177A3200002942E3 /* TheoraWorkerThread.cpp in Sources */,
-				D198F9B5177A3200002942E3 /* TheoraVideoClip_Theora.cpp in Sources */,
-				D1BCE06818F3F7FE00C83470 /* scale_posix.cc in Sources */,
-				D198F9B6177A3200002942E3 /* TheoraVideoClip_AVFoundation.mm in Sources */,
-				D198F9B8177A3200002942E3 /* yuv420_grey_c.c in Sources */,
-				D198F9B9177A3200002942E3 /* yuv420_yuv_c.c in Sources */,
-				D198F9BA177A3200002942E3 /* yuv420_rgb_c.c in Sources */,
-				D198F9BB177A3200002942E3 /* TheoraAudioPacketQueue.cpp in Sources */,
-				D139463217C0ED450091F4A4 /* yuv_libyuv.c in Sources */,
-				D13946D117C119B40091F4A4 /* yuv_util.c in Sources */,
-				D1C3D07717C157CD00CA0FD2 /* compare_common.cc in Sources */,
-				D1C3D08917C157CD00CA0FD2 /* compare_posix.cc in Sources */,
-				D1C3D09B17C157CD00CA0FD2 /* compare.cc in Sources */,
-				D1C3D0A417C157CD00CA0FD2 /* convert_argb.cc in Sources */,
-				D1C3D0C817C157CD00CA0FD2 /* convert_to_argb.cc in Sources */,
-				D1C3D0D117C157CD00CA0FD2 /* convert_to_i420.cc in Sources */,
-				D1C3D0DA17C157CD00CA0FD2 /* convert.cc in Sources */,
-				D1C3D0E317C157CD00CA0FD2 /* cpu_id.cc in Sources */,
-				D1C3D0EC17C157CD00CA0FD2 /* format_conversion.cc in Sources */,
-				D1C3D10717C157CD00CA0FD2 /* planar_functions.cc in Sources */,
-				D1C3D12B17C157CD00CA0FD2 /* rotate.cc in Sources */,
-				D1C3D13417C157CD00CA0FD2 /* row_any.cc in Sources */,
-				D1C3D13D17C157CD00CA0FD2 /* row_common.cc in Sources */,
-				D1C3D15817C157CD00CA0FD2 /* row_posix.cc in Sources */,
-				D1C3D17C17C157CD00CA0FD2 /* scale_argb.cc in Sources */,
-				D1C3D1A017C157CD00CA0FD2 /* video_common.cc in Sources */,
-				D159BCB517C227F50030FAB6 /* convert_from.cc in Sources */,
-				D159BCBE17C228340030FAB6 /* rotate_argb.cc in Sources */,
-				D1BCE05F18F3F7FE00C83470 /* scale_common.cc in Sources */,
-				D159BCC717C2286F0030FAB6 /* scale.cc in Sources */,
-			);
-			runOnlyForDeploymentPostprocessing = 0;
-		};
-		D1BB6FAA150E9E7100EF9400 /* Sources */ = {
-			isa = PBXSourcesBuildPhase;
-			buildActionMask = 2147483647;
-			files = (
-				D16775AC155C501D0050EC64 /* TheoraAsync.cpp in Sources */,
-				D1BCE06018F3F7FE00C83470 /* scale_common.cc in Sources */,
-				D16775AE155C501D0050EC64 /* TheoraAudioInterface.cpp in Sources */,
-				D16775B0155C501D0050EC64 /* TheoraDataSource.cpp in Sources */,
-				D16775B2155C501D0050EC64 /* TheoraException.cpp in Sources */,
-				D16775B4155C501D0050EC64 /* TheoraFrameQueue.cpp in Sources */,
-				D16775B6155C501D0050EC64 /* TheoraTimer.cpp in Sources */,
-				D16775B8155C501D0050EC64 /* TheoraUtil.cpp in Sources */,
-				D16775BA155C501D0050EC64 /* TheoraVideoClip.cpp in Sources */,
-				D16775BC155C501D0050EC64 /* TheoraVideoFrame.cpp in Sources */,
-				D16775BE155C501D0050EC64 /* TheoraVideoManager.cpp in Sources */,
-				D16775C0155C501D0050EC64 /* TheoraWorkerThread.cpp in Sources */,
-				D1BCE06918F3F7FE00C83470 /* scale_posix.cc in Sources */,
-				D1CDFF981696D0F000609AB0 /* TheoraVideoClip_Theora.cpp in Sources */,
-				D1E2719C16B46F640046C00C /* yuv420_grey_c.c in Sources */,
-				D1E271A816B46F640046C00C /* yuv420_yuv_c.c in Sources */,
-				D1E271AF16B470210046C00C /* yuv420_rgb_c.c in Sources */,
-				D139463317C0ED450091F4A4 /* yuv_libyuv.c in Sources */,
-				D1D465DD16C2D070007A45AA /* TheoraAudioPacketQueue.cpp in Sources */,
-				D13946D217C119B40091F4A4 /* yuv_util.c in Sources */,
-				D1C3D07817C157CD00CA0FD2 /* compare_common.cc in Sources */,
-				D1C3D08117C157CD00CA0FD2 /* compare_neon.cc in Sources */,
-				D1C3D09C17C157CD00CA0FD2 /* compare.cc in Sources */,
-				D1C3D0A517C157CD00CA0FD2 /* convert_argb.cc in Sources */,
-				D1C3D0C917C157CD00CA0FD2 /* convert_to_argb.cc in Sources */,
-				D1C3D0D217C157CD00CA0FD2 /* convert_to_i420.cc in Sources */,
-				D1C3D0DB17C157CD00CA0FD2 /* convert.cc in Sources */,
-				D1C3D0E417C157CD00CA0FD2 /* cpu_id.cc in Sources */,
-				D1C3D0ED17C157CD00CA0FD2 /* format_conversion.cc in Sources */,
-				D1C3D10817C157CD00CA0FD2 /* planar_functions.cc in Sources */,
-				D1C3D12317C157CD00CA0FD2 /* rotate_neon.cc in Sources */,
-				D1C3D12C17C157CD00CA0FD2 /* rotate.cc in Sources */,
-				D1C3D13517C157CD00CA0FD2 /* row_any.cc in Sources */,
-				D1C3D13E17C157CD00CA0FD2 /* row_common.cc in Sources */,
-				D1C3D15017C157CD00CA0FD2 /* row_neon.cc in Sources */,
-				D1C3D17417C157CD00CA0FD2 /* scale_argb_neon.cc in Sources */,
-				D1C3D17D17C157CD00CA0FD2 /* scale_argb.cc in Sources */,
-				D1C3D18F17C157CD00CA0FD2 /* scale_neon.cc in Sources */,
-				D1C3D1A117C157CD00CA0FD2 /* video_common.cc in Sources */,
-				D159BCB617C227F60030FAB6 /* convert_from.cc in Sources */,
-				D159BCBF17C228340030FAB6 /* rotate_argb.cc in Sources */,
-				D159BCC817C2286F0030FAB6 /* scale.cc in Sources */,
-				D15D361017C386A600F40439 /* row_posix.cc in Sources */,
-				D15D361317C386B100F40439 /* compare_posix.cc in Sources */,
-			);
-			runOnlyForDeploymentPostprocessing = 0;
-		};
-		D1CDFF231696C77A00609AB0 /* Sources */ = {
-			isa = PBXSourcesBuildPhase;
-			buildActionMask = 2147483647;
-			files = (
-				D1CDFF241696C77A00609AB0 /* TheoraAsync.cpp in Sources */,
-				D1CDFF251696C77A00609AB0 /* TheoraAudioInterface.cpp in Sources */,
-				D1CDFF261696C77A00609AB0 /* TheoraDataSource.cpp in Sources */,
-				D1CDFF271696C77A00609AB0 /* TheoraException.cpp in Sources */,
-				D1CDFF281696C77A00609AB0 /* TheoraFrameQueue.cpp in Sources */,
-				D1CDFF291696C77A00609AB0 /* TheoraTimer.cpp in Sources */,
-				D1CDFF2A1696C77A00609AB0 /* TheoraUtil.cpp in Sources */,
-				D1CDFF2B1696C77A00609AB0 /* TheoraVideoClip.cpp in Sources */,
-				D1CDFF2C1696C77A00609AB0 /* TheoraVideoFrame.cpp in Sources */,
-				D1CDFF2D1696C77A00609AB0 /* TheoraVideoManager.cpp in Sources */,
-				D1CDFF2E1696C77A00609AB0 /* TheoraWorkerThread.cpp in Sources */,
-				D1CDFF9E1696D0FA00609AB0 /* TheoraVideoClip_AVFoundation.mm in Sources */,
-				D1E2719A16B46F640046C00C /* yuv420_grey_c.c in Sources */,
-				D1E271A616B46F640046C00C /* yuv420_yuv_c.c in Sources */,
-				D1E271AD16B470210046C00C /* yuv420_rgb_c.c in Sources */,
-				D1BCE05B18F3F7FE00C83470 /* scale_common.cc in Sources */,
-				D1D465DB16C2D070007A45AA /* TheoraAudioPacketQueue.cpp in Sources */,
-				D139462E17C0ED450091F4A4 /* yuv_libyuv.c in Sources */,
-				D13946CD17C119B40091F4A4 /* yuv_util.c in Sources */,
-				D1C3D07317C157CD00CA0FD2 /* compare_common.cc in Sources */,
-				D1C3D08517C157CD00CA0FD2 /* compare_posix.cc in Sources */,
-				D1BCE06418F3F7FE00C83470 /* scale_posix.cc in Sources */,
-				D1C3D09717C157CD00CA0FD2 /* compare.cc in Sources */,
-				D1C3D0A017C157CD00CA0FD2 /* convert_argb.cc in Sources */,
-				D1C3D0C417C157CD00CA0FD2 /* convert_to_argb.cc in Sources */,
-				D1C3D0CD17C157CD00CA0FD2 /* convert_to_i420.cc in Sources */,
-				D1C3D0D617C157CD00CA0FD2 /* convert.cc in Sources */,
-				D1C3D0DF17C157CD00CA0FD2 /* cpu_id.cc in Sources */,
-				D1C3D0E817C157CD00CA0FD2 /* format_conversion.cc in Sources */,
-				D1C3D10317C157CD00CA0FD2 /* planar_functions.cc in Sources */,
-				D1C3D12717C157CD00CA0FD2 /* rotate.cc in Sources */,
-				D1C3D13017C157CD00CA0FD2 /* row_any.cc in Sources */,
-				D1C3D13917C157CD00CA0FD2 /* row_common.cc in Sources */,
-				D1C3D15417C157CD00CA0FD2 /* row_posix.cc in Sources */,
-				D1C3D17817C157CD00CA0FD2 /* scale_argb.cc in Sources */,
-				D1C3D19C17C157CD00CA0FD2 /* video_common.cc in Sources */,
-				D159BCB117C227F40030FAB6 /* convert_from.cc in Sources */,
-				D159BCBA17C228320030FAB6 /* rotate_argb.cc in Sources */,
-				D159BCC317C2286D0030FAB6 /* scale.cc in Sources */,
-			);
-			runOnlyForDeploymentPostprocessing = 0;
-		};
-		D1CDFF4B1696C79700609AB0 /* Sources */ = {
-			isa = PBXSourcesBuildPhase;
-			buildActionMask = 2147483647;
-			files = (
-				D1CDFF4C1696C79700609AB0 /* TheoraAsync.cpp in Sources */,
-				D1CDFF4D1696C79700609AB0 /* TheoraAudioInterface.cpp in Sources */,
-				D1CDFF4E1696C79700609AB0 /* TheoraDataSource.cpp in Sources */,
-				D1CDFF4F1696C79700609AB0 /* TheoraException.cpp in Sources */,
-				D1CDFF501696C79700609AB0 /* TheoraFrameQueue.cpp in Sources */,
-				D1CDFF511696C79700609AB0 /* TheoraTimer.cpp in Sources */,
-				D1CDFF521696C79700609AB0 /* TheoraUtil.cpp in Sources */,
-				D1CDFF531696C79700609AB0 /* TheoraVideoClip.cpp in Sources */,
-				D1CDFF541696C79700609AB0 /* TheoraVideoFrame.cpp in Sources */,
-				D1CDFF551696C79700609AB0 /* TheoraVideoManager.cpp in Sources */,
-				D1CDFF561696C79700609AB0 /* TheoraWorkerThread.cpp in Sources */,
-				D1CDFF971696D0F000609AB0 /* TheoraVideoClip_Theora.cpp in Sources */,
-				D1BCE06518F3F7FE00C83470 /* scale_posix.cc in Sources */,
-				D1CDFFEC1696E24F00609AB0 /* TheoraVideoClip_AVFoundation.mm in Sources */,
-				D1E2719B16B46F640046C00C /* yuv420_grey_c.c in Sources */,
-				D1E271A716B46F640046C00C /* yuv420_yuv_c.c in Sources */,
-				D1E271AE16B470210046C00C /* yuv420_rgb_c.c in Sources */,
-				D1D465DC16C2D070007A45AA /* TheoraAudioPacketQueue.cpp in Sources */,
-				D139462F17C0ED450091F4A4 /* yuv_libyuv.c in Sources */,
-				D13946CE17C119B40091F4A4 /* yuv_util.c in Sources */,
-				D1C3D07417C157CD00CA0FD2 /* compare_common.cc in Sources */,
-				D1C3D08617C157CD00CA0FD2 /* compare_posix.cc in Sources */,
-				D1C3D09817C157CD00CA0FD2 /* compare.cc in Sources */,
-				D1C3D0A117C157CD00CA0FD2 /* convert_argb.cc in Sources */,
-				D1C3D0C517C157CD00CA0FD2 /* convert_to_argb.cc in Sources */,
-				D1C3D0CE17C157CD00CA0FD2 /* convert_to_i420.cc in Sources */,
-				D1C3D0D717C157CD00CA0FD2 /* convert.cc in Sources */,
-				D1C3D0E017C157CD00CA0FD2 /* cpu_id.cc in Sources */,
-				D1C3D0E917C157CD00CA0FD2 /* format_conversion.cc in Sources */,
-				D1C3D10417C157CD00CA0FD2 /* planar_functions.cc in Sources */,
-				D1C3D12817C157CD00CA0FD2 /* rotate.cc in Sources */,
-				D1C3D13117C157CD00CA0FD2 /* row_any.cc in Sources */,
-				D1C3D13A17C157CD00CA0FD2 /* row_common.cc in Sources */,
-				D1C3D15517C157CD00CA0FD2 /* row_posix.cc in Sources */,
-				D1C3D17917C157CD00CA0FD2 /* scale_argb.cc in Sources */,
-				D1C3D19D17C157CD00CA0FD2 /* video_common.cc in Sources */,
-				D159BCB217C227F40030FAB6 /* convert_from.cc in Sources */,
-				D159BCBB17C228320030FAB6 /* rotate_argb.cc in Sources */,
-				D1BCE05C18F3F7FE00C83470 /* scale_common.cc in Sources */,
-				D159BCC417C2286D0030FAB6 /* scale.cc in Sources */,
-			);
-			runOnlyForDeploymentPostprocessing = 0;
-		};
-		D1CDFFA11696E1CA00609AB0 /* Sources */ = {
-			isa = PBXSourcesBuildPhase;
-			buildActionMask = 2147483647;
-			files = (
-				D1CDFFA21696E1CA00609AB0 /* TheoraAsync.cpp in Sources */,
-				D1BCE06118F3F7FE00C83470 /* scale_common.cc in Sources */,
-				D1CDFFA31696E1CA00609AB0 /* TheoraAudioInterface.cpp in Sources */,
-				D1CDFFA41696E1CA00609AB0 /* TheoraDataSource.cpp in Sources */,
-				D1CDFFA51696E1CA00609AB0 /* TheoraException.cpp in Sources */,
-				D1CDFFA61696E1CA00609AB0 /* TheoraFrameQueue.cpp in Sources */,
-				D1CDFFA71696E1CA00609AB0 /* TheoraTimer.cpp in Sources */,
-				D1CDFFA81696E1CA00609AB0 /* TheoraUtil.cpp in Sources */,
-				D1CDFFA91696E1CA00609AB0 /* TheoraVideoClip.cpp in Sources */,
-				D1CDFFAA1696E1CA00609AB0 /* TheoraVideoFrame.cpp in Sources */,
-				D1CDFFAB1696E1CA00609AB0 /* TheoraVideoManager.cpp in Sources */,
-				D1CDFFAC1696E1CA00609AB0 /* TheoraWorkerThread.cpp in Sources */,
-				D1BCE06A18F3F7FE00C83470 /* scale_posix.cc in Sources */,
-				D1CDFFEA1696E24B00609AB0 /* TheoraVideoClip_AVFoundation.mm in Sources */,
-				D1E2719D16B46F640046C00C /* yuv420_grey_c.c in Sources */,
-				D1E271A916B46F640046C00C /* yuv420_yuv_c.c in Sources */,
-				D1E271B016B470210046C00C /* yuv420_rgb_c.c in Sources */,
-				D139463417C0ED450091F4A4 /* yuv_libyuv.c in Sources */,
-				D1D465DE16C2D070007A45AA /* TheoraAudioPacketQueue.cpp in Sources */,
-				D13946D317C119B40091F4A4 /* yuv_util.c in Sources */,
-				D1C3D07917C157CD00CA0FD2 /* compare_common.cc in Sources */,
-				D1C3D08217C157CD00CA0FD2 /* compare_neon.cc in Sources */,
-				D1C3D09D17C157CD00CA0FD2 /* compare.cc in Sources */,
-				D1C3D0A617C157CD00CA0FD2 /* convert_argb.cc in Sources */,
-				D1C3D0CA17C157CD00CA0FD2 /* convert_to_argb.cc in Sources */,
-				D1C3D0D317C157CD00CA0FD2 /* convert_to_i420.cc in Sources */,
-				D1C3D0DC17C157CD00CA0FD2 /* convert.cc in Sources */,
-				D1C3D0E517C157CD00CA0FD2 /* cpu_id.cc in Sources */,
-				D1C3D0EE17C157CD00CA0FD2 /* format_conversion.cc in Sources */,
-				D1C3D10917C157CD00CA0FD2 /* planar_functions.cc in Sources */,
-				D1C3D12417C157CD00CA0FD2 /* rotate_neon.cc in Sources */,
-				D1C3D12D17C157CD00CA0FD2 /* rotate.cc in Sources */,
-				D1C3D13617C157CD00CA0FD2 /* row_any.cc in Sources */,
-				D1C3D13F17C157CD00CA0FD2 /* row_common.cc in Sources */,
-				D1C3D15117C157CD00CA0FD2 /* row_neon.cc in Sources */,
-				D1C3D17517C157CD00CA0FD2 /* scale_argb_neon.cc in Sources */,
-				D1C3D17E17C157CD00CA0FD2 /* scale_argb.cc in Sources */,
-				D1C3D19017C157CD00CA0FD2 /* scale_neon.cc in Sources */,
-				D1C3D1A217C157CD00CA0FD2 /* video_common.cc in Sources */,
-				D159BCB717C227F60030FAB6 /* convert_from.cc in Sources */,
-				D159BCC017C228340030FAB6 /* rotate_argb.cc in Sources */,
-				D159BCC917C2286F0030FAB6 /* scale.cc in Sources */,
-				D15D361117C386A600F40439 /* row_posix.cc in Sources */,
-				D15D361617C386B400F40439 /* compare_posix.cc in Sources */,
-			);
-			runOnlyForDeploymentPostprocessing = 0;
-		};
-		D1CDFFC61696E1D700609AB0 /* Sources */ = {
-			isa = PBXSourcesBuildPhase;
-			buildActionMask = 2147483647;
-			files = (
-				D1CDFFC71696E1D700609AB0 /* TheoraAsync.cpp in Sources */,
-				D1CDFFC81696E1D700609AB0 /* TheoraAudioInterface.cpp in Sources */,
-				D1CDFFC91696E1D700609AB0 /* TheoraDataSource.cpp in Sources */,
-				D1CDFFCA1696E1D700609AB0 /* TheoraException.cpp in Sources */,
-				D1CDFFCB1696E1D700609AB0 /* TheoraFrameQueue.cpp in Sources */,
-				D1CDFFCC1696E1D700609AB0 /* TheoraTimer.cpp in Sources */,
-				D1CDFFCD1696E1D700609AB0 /* TheoraUtil.cpp in Sources */,
-				D1CDFFCE1696E1D700609AB0 /* TheoraVideoClip.cpp in Sources */,
-				D1CDFFCF1696E1D700609AB0 /* TheoraVideoFrame.cpp in Sources */,
-				D1CDFFD01696E1D700609AB0 /* TheoraVideoManager.cpp in Sources */,
-				D1CDFFD11696E1D700609AB0 /* TheoraWorkerThread.cpp in Sources */,
-				D1CDFFD21696E1D700609AB0 /* TheoraVideoClip_Theora.cpp in Sources */,
-				D1CDFFEB1696E24C00609AB0 /* TheoraVideoClip_AVFoundation.mm in Sources */,
-				D1E2719E16B46F640046C00C /* yuv420_grey_c.c in Sources */,
-				D1E271AA16B46F640046C00C /* yuv420_yuv_c.c in Sources */,
-				D1E271B116B470210046C00C /* yuv420_rgb_c.c in Sources */,
-				D13946C617C110670091F4A4 /* yuv_libyuv.c in Sources */,
-				D1D465DF16C2D070007A45AA /* TheoraAudioPacketQueue.cpp in Sources */,
-				D13946D417C119B40091F4A4 /* yuv_util.c in Sources */,
-				D1C3D07A17C157CD00CA0FD2 /* compare_common.cc in Sources */,
-				D1C3D08317C157CD00CA0FD2 /* compare_neon.cc in Sources */,
-				D1C3D09E17C157CD00CA0FD2 /* compare.cc in Sources */,
-				D1C3D0A717C157CD00CA0FD2 /* convert_argb.cc in Sources */,
-				D1C3D0CB17C157CD00CA0FD2 /* convert_to_argb.cc in Sources */,
-				D1C3D0D417C157CD00CA0FD2 /* convert_to_i420.cc in Sources */,
-				D1C3D0DD17C157CD00CA0FD2 /* convert.cc in Sources */,
-				D1C3D0E617C157CD00CA0FD2 /* cpu_id.cc in Sources */,
-				D1C3D0EF17C157CD00CA0FD2 /* format_conversion.cc in Sources */,
-				D1C3D10A17C157CD00CA0FD2 /* planar_functions.cc in Sources */,
-				D1C3D12517C157CD00CA0FD2 /* rotate_neon.cc in Sources */,
-				D1C3D12E17C157CD00CA0FD2 /* rotate.cc in Sources */,
-				D1C3D13717C157CD00CA0FD2 /* row_any.cc in Sources */,
-				D1C3D14017C157CD00CA0FD2 /* row_common.cc in Sources */,
-				D1C3D15217C157CD00CA0FD2 /* row_neon.cc in Sources */,
-				D1C3D17617C157CD00CA0FD2 /* scale_argb_neon.cc in Sources */,
-				D1C3D17F17C157CD00CA0FD2 /* scale_argb.cc in Sources */,
-				D1C3D19117C157CD00CA0FD2 /* scale_neon.cc in Sources */,
-				D1C3D1A317C157CD00CA0FD2 /* video_common.cc in Sources */,
-				D159BCB817C227F70030FAB6 /* convert_from.cc in Sources */,
-				D1BCE06218F3F7FE00C83470 /* scale_common.cc in Sources */,
-				D159BCC117C228350030FAB6 /* rotate_argb.cc in Sources */,
-				D159BCCA17C228700030FAB6 /* scale.cc in Sources */,
-				D1BCE06B18F3F7FE00C83470 /* scale_posix.cc in Sources */,
-				D15D361217C386A700F40439 /* row_posix.cc in Sources */,
-				D15D361517C386B300F40439 /* compare_posix.cc in Sources */,
-			);
-			runOnlyForDeploymentPostprocessing = 0;
-		};
-/* End PBXSourcesBuildPhase section */
-
-/* Begin XCBuildConfiguration section */
-		D1473F43150CA6CE00B20490 /* Debug */ = {
-			isa = XCBuildConfiguration;
-			buildSettings = {
-				ALWAYS_SEARCH_USER_PATHS = NO;
-				CLANG_WARN_BOOL_CONVERSION = YES;
-				CLANG_WARN_CONSTANT_CONVERSION = YES;
-				CLANG_WARN_EMPTY_BODY = YES;
-				CLANG_WARN_ENUM_CONVERSION = YES;
-				CLANG_WARN_INT_CONVERSION = YES;
-				CLANG_WARN__DUPLICATE_METHOD_MATCH = YES;
-				COPY_PHASE_STRIP = NO;
-				GCC_DYNAMIC_NO_PIC = NO;
-				GCC_ENABLE_OBJC_EXCEPTIONS = YES;
-				GCC_GENERATE_DEBUGGING_SYMBOLS = YES;
-				GCC_INLINES_ARE_PRIVATE_EXTERN = YES;
-				GCC_OPTIMIZATION_LEVEL = 0;
-				GCC_PREPROCESSOR_DEFINITIONS = (
-					"DEBUG=1",
-					_DEBUG,
-					"$(inherited)",
-				);
-				"GCC_PREPROCESSOR_DEFINITIONS[sdk=iphoneos*]" = (
-					"$(LIBYUV_PREPROCESSOR_IOS)",
-					"$(inherited)",
-				);
-				"GCC_PREPROCESSOR_DEFINITIONS[sdk=iphonesimulator*]" = "$(inherited)";
-				"GCC_PREPROCESSOR_DEFINITIONS[sdk=macosx*]" = (
-					"$(LIBYUV_PREPROCESSOR_MAC)",
-					"$(inherited)",
-				);
-				GCC_SYMBOLS_PRIVATE_EXTERN = YES;
-				GCC_WARN_64_TO_32_BIT_CONVERSION = YES;
-				GCC_WARN_ABOUT_RETURN_TYPE = YES;
-				GCC_WARN_UNDECLARED_SELECTOR = YES;
-				GCC_WARN_UNINITIALIZED_AUTOS = YES;
-				GCC_WARN_UNUSED_FUNCTION = YES;
-				GCC_WARN_UNUSED_VARIABLE = YES;
-				HEADER_SEARCH_PATHS = (
-					"$(SRCROOT)/../ogg/include",
-					"$(SRCROOT)/../vorbis/include",
-					"$(SRCROOT)/../xal/lib/ogg/include",
-					"$(SRCROOT)/../xal/lib/vorbis/include",
-					"$(SRCROOT)/../theora/include",
-					"$(SRCROOT)/src/YUV/libyuv/include",
-				);
-				LIBYUV_PREPROCESSOR_IOS = "LIBYUV_NEON __ARM_NEON__";
-				LIBYUV_PREPROCESSOR_MAC = __SSSE3__;
-				ONLY_ACTIVE_ARCH = YES;
-				SKIP_INSTALL = YES;
-			};
-			name = Debug;
-		};
-		D1473F44150CA6CE00B20490 /* Debug */ = {
-			isa = XCBuildConfiguration;
-			baseConfigurationReference = D1358BC418D7777800A36FDC /* Mac.xcconfig */;
-			buildSettings = {
-				GCC_PREPROCESSOR_DEFINITIONS = (
-					__THEORA,
-					_YUV_LIBYUV,
-					"$(inherited)",
-				);
-				GCC_WARN_UNINITIALIZED_AUTOS = NO;
-				GCC_WARN_UNUSED_VARIABLE = NO;
-				INFOPLIST_FILE = Info.plist;
-				LD_DYLIB_INSTALL_NAME = "@executable_path/../Frameworks/$(EXECUTABLE_PATH)";
-				PRODUCT_NAME = theoraplayer;
-				WRAPPER_EXTENSION = framework;
-			};
-			name = Debug;
-		};
-		D1473F45150CA6D600B20490 /* App Store */ = {
-			isa = XCBuildConfiguration;
-			buildSettings = {
-				ALWAYS_SEARCH_USER_PATHS = NO;
-				CLANG_WARN_BOOL_CONVERSION = YES;
-				CLANG_WARN_CONSTANT_CONVERSION = YES;
-				CLANG_WARN_EMPTY_BODY = YES;
-				CLANG_WARN_ENUM_CONVERSION = YES;
-				CLANG_WARN_INT_CONVERSION = YES;
-				CLANG_WARN__DUPLICATE_METHOD_MATCH = YES;
-				COPY_PHASE_STRIP = YES;
-				GCC_ENABLE_OBJC_EXCEPTIONS = YES;
-				GCC_GENERATE_DEBUGGING_SYMBOLS = YES;
-				GCC_INLINES_ARE_PRIVATE_EXTERN = YES;
-				GCC_OPTIMIZATION_LEVEL = 3;
-				"GCC_PREPROCESSOR_DEFINITIONS[sdk=iphoneos*]" = (
-					"$(LIBYUV_PREPROCESSOR_IOS)",
-					"$(inherited)",
-				);
-				"GCC_PREPROCESSOR_DEFINITIONS[sdk=iphonesimulator*]" = "$(inherited)";
-				"GCC_PREPROCESSOR_DEFINITIONS[sdk=macosx*]" = (
-					"$(LIBYUV_PREPROCESSOR_MAC)",
-					"$(inherited)",
-				);
-				GCC_SYMBOLS_PRIVATE_EXTERN = YES;
-				GCC_WARN_64_TO_32_BIT_CONVERSION = YES;
-				GCC_WARN_ABOUT_RETURN_TYPE = YES;
-				GCC_WARN_UNDECLARED_SELECTOR = YES;
-				GCC_WARN_UNINITIALIZED_AUTOS = YES;
-				GCC_WARN_UNUSED_FUNCTION = YES;
-				GCC_WARN_UNUSED_VARIABLE = YES;
-				HEADER_SEARCH_PATHS = (
-					"$(SRCROOT)/../ogg/include",
-					"$(SRCROOT)/../vorbis/include",
-					"$(SRCROOT)/../xal/lib/ogg/include",
-					"$(SRCROOT)/../xal/lib/vorbis/include",
-					"$(SRCROOT)/../theora/include",
-					"$(SRCROOT)/src/YUV/libyuv/include",
-				);
-				LIBYUV_PREPROCESSOR_IOS = "LIBYUV_NEON __ARM_NEON__";
-				LIBYUV_PREPROCESSOR_MAC = __SSSE3__;
-				SKIP_INSTALL = YES;
-			};
-			name = "App Store";
-		};
-		D1473F46150CA6D600B20490 /* App Store */ = {
-			isa = XCBuildConfiguration;
-			baseConfigurationReference = D1358BC418D7777800A36FDC /* Mac.xcconfig */;
-			buildSettings = {
-				GCC_PREPROCESSOR_DEFINITIONS = (
-					__THEORA,
-					_YUV_LIBYUV,
-					"$(inherited)",
-				);
-				GCC_WARN_UNINITIALIZED_AUTOS = NO;
-				GCC_WARN_UNUSED_VARIABLE = NO;
-				INFOPLIST_FILE = Info.plist;
-				LD_DYLIB_INSTALL_NAME = "@executable_path/../Frameworks/$(EXECUTABLE_PATH)";
-				PRODUCT_NAME = theoraplayer;
-				WRAPPER_EXTENSION = framework;
-			};
-			name = "App Store";
-		};
-		D1473F47150CA6E200B20490 /* Release */ = {
-			isa = XCBuildConfiguration;
-			buildSettings = {
-				ALWAYS_SEARCH_USER_PATHS = NO;
-				CLANG_WARN_BOOL_CONVERSION = YES;
-				CLANG_WARN_CONSTANT_CONVERSION = YES;
-				CLANG_WARN_EMPTY_BODY = YES;
-				CLANG_WARN_ENUM_CONVERSION = YES;
-				CLANG_WARN_INT_CONVERSION = YES;
-				CLANG_WARN__DUPLICATE_METHOD_MATCH = YES;
-				COPY_PHASE_STRIP = YES;
-				GCC_ENABLE_OBJC_EXCEPTIONS = YES;
-				GCC_GENERATE_DEBUGGING_SYMBOLS = YES;
-				GCC_INLINES_ARE_PRIVATE_EXTERN = YES;
-				GCC_OPTIMIZATION_LEVEL = 3;
-				"GCC_PREPROCESSOR_DEFINITIONS[sdk=iphoneos*]" = (
-					"$(LIBYUV_PREPROCESSOR_IOS)",
-					"$(inherited)",
-				);
-				"GCC_PREPROCESSOR_DEFINITIONS[sdk=iphonesimulator*]" = "$(inherited)";
-				"GCC_PREPROCESSOR_DEFINITIONS[sdk=macosx*]" = (
-					"$(LIBYUV_PREPROCESSOR_MAC)",
-					"$(inherited)",
-				);
-				GCC_SYMBOLS_PRIVATE_EXTERN = YES;
-				GCC_WARN_64_TO_32_BIT_CONVERSION = YES;
-				GCC_WARN_ABOUT_RETURN_TYPE = YES;
-				GCC_WARN_UNDECLARED_SELECTOR = YES;
-				GCC_WARN_UNINITIALIZED_AUTOS = YES;
-				GCC_WARN_UNUSED_FUNCTION = YES;
-				GCC_WARN_UNUSED_VARIABLE = YES;
-				HEADER_SEARCH_PATHS = (
-					"$(SRCROOT)/../ogg/include",
-					"$(SRCROOT)/../vorbis/include",
-					"$(SRCROOT)/../xal/lib/ogg/include",
-					"$(SRCROOT)/../xal/lib/vorbis/include",
-					"$(SRCROOT)/../theora/include",
-					"$(SRCROOT)/src/YUV/libyuv/include",
-				);
-				LIBYUV_PREPROCESSOR_IOS = "LIBYUV_NEON __ARM_NEON__";
-				LIBYUV_PREPROCESSOR_MAC = __SSSE3__;
-				SKIP_INSTALL = YES;
-			};
-			name = Release;
-		};
-		D1473F48150CA6E200B20490 /* Release */ = {
-			isa = XCBuildConfiguration;
-			baseConfigurationReference = D1358BC418D7777800A36FDC /* Mac.xcconfig */;
-			buildSettings = {
-				GCC_PREPROCESSOR_DEFINITIONS = (
-					__THEORA,
-					_YUV_LIBYUV,
-					"$(inherited)",
-				);
-				GCC_WARN_UNINITIALIZED_AUTOS = NO;
-				GCC_WARN_UNUSED_VARIABLE = NO;
-				INFOPLIST_FILE = Info.plist;
-				LD_DYLIB_INSTALL_NAME = "@executable_path/../Frameworks/$(EXECUTABLE_PATH)";
-				PRODUCT_NAME = theoraplayer;
-				WRAPPER_EXTENSION = framework;
-			};
-			name = Release;
-		};
-		D198F977177A31FC002942E3 /* Debug */ = {
-			isa = XCBuildConfiguration;
-			baseConfigurationReference = D1358BC418D7777800A36FDC /* Mac.xcconfig */;
-			buildSettings = {
-				GCC_PREPROCESSOR_DEFINITIONS = (
-					_MAC,
-					__THEORA,
-					_YUV_LIBYUV,
-					"$(inherited)",
-				);
-				GCC_WARN_UNINITIALIZED_AUTOS = NO;
-				GCC_WARN_UNUSED_VALUE = NO;
-				GCC_WARN_UNUSED_VARIABLE = NO;
-				PRODUCT_NAME = theoraplayer;
-				SKIP_INSTALL = YES;
-			};
-			name = Debug;
-		};
-		D198F979177A31FC002942E3 /* Release */ = {
-			isa = XCBuildConfiguration;
-			baseConfigurationReference = D1358BC418D7777800A36FDC /* Mac.xcconfig */;
-			buildSettings = {
-				GCC_PREPROCESSOR_DEFINITIONS = (
-					_MAC,
-					__THEORA,
-					_YUV_LIBYUV,
-					"$(inherited)",
-				);
-				GCC_WARN_UNINITIALIZED_AUTOS = NO;
-				GCC_WARN_UNUSED_VALUE = NO;
-				GCC_WARN_UNUSED_VARIABLE = NO;
-				PRODUCT_NAME = theoraplayer;
-				SKIP_INSTALL = YES;
-			};
-			name = Release;
-		};
-		D198F97A177A31FC002942E3 /* App Store */ = {
-			isa = XCBuildConfiguration;
-			baseConfigurationReference = D1358BC418D7777800A36FDC /* Mac.xcconfig */;
-			buildSettings = {
-				GCC_PREPROCESSOR_DEFINITIONS = (
-					_MAC,
-					__THEORA,
-					_YUV_LIBYUV,
-					"$(inherited)",
-				);
-				GCC_WARN_UNINITIALIZED_AUTOS = NO;
-				GCC_WARN_UNUSED_VALUE = NO;
-				GCC_WARN_UNUSED_VARIABLE = NO;
-				PRODUCT_NAME = theoraplayer;
-				SKIP_INSTALL = YES;
-			};
-			name = "App Store";
-		};
-		D198F9A3177A31FE002942E3 /* Debug */ = {
-			isa = XCBuildConfiguration;
-			baseConfigurationReference = D1358BC418D7777800A36FDC /* Mac.xcconfig */;
-			buildSettings = {
-				GCC_PREPROCESSOR_DEFINITIONS = (
-					_MAC,
-					__AVFOUNDATION,
-					_YUV_LIBYUV,
-					"$(inherited)",
-				);
-				GCC_WARN_UNINITIALIZED_AUTOS = NO;
-				GCC_WARN_UNUSED_VALUE = NO;
-				GCC_WARN_UNUSED_VARIABLE = NO;
-				PRODUCT_NAME = theoraplayer_avfoundation;
-				SKIP_INSTALL = YES;
-			};
-			name = Debug;
-		};
-		D198F9A5177A31FE002942E3 /* Release */ = {
-			isa = XCBuildConfiguration;
-			baseConfigurationReference = D1358BC418D7777800A36FDC /* Mac.xcconfig */;
-			buildSettings = {
-				GCC_PREPROCESSOR_DEFINITIONS = (
-					_MAC,
-					__AVFOUNDATION,
-					_YUV_LIBYUV,
-					"$(inherited)",
-				);
-				GCC_WARN_UNINITIALIZED_AUTOS = NO;
-				GCC_WARN_UNUSED_VALUE = NO;
-				GCC_WARN_UNUSED_VARIABLE = NO;
-				PRODUCT_NAME = theoraplayer_avfoundation;
-				SKIP_INSTALL = YES;
-			};
-			name = Release;
-		};
-		D198F9A6177A31FE002942E3 /* App Store */ = {
-			isa = XCBuildConfiguration;
-			baseConfigurationReference = D1358BC418D7777800A36FDC /* Mac.xcconfig */;
-			buildSettings = {
-				GCC_PREPROCESSOR_DEFINITIONS = (
-					_MAC,
-					__AVFOUNDATION,
-					_YUV_LIBYUV,
-					"$(inherited)",
-				);
-				GCC_WARN_UNINITIALIZED_AUTOS = NO;
-				GCC_WARN_UNUSED_VALUE = NO;
-				GCC_WARN_UNUSED_VARIABLE = NO;
-				PRODUCT_NAME = theoraplayer_avfoundation;
-				SKIP_INSTALL = YES;
-			};
-			name = "App Store";
-		};
-		D198F9D0177A3200002942E3 /* Debug */ = {
-			isa = XCBuildConfiguration;
-			baseConfigurationReference = D1358BC418D7777800A36FDC /* Mac.xcconfig */;
-			buildSettings = {
-				GCC_PREPROCESSOR_DEFINITIONS = (
-					_MAC,
-					__THEORA,
-					__AVFOUNDATION,
-					YUV_LIBYUV,
-					"$(inherited)",
-				);
-				GCC_WARN_UNINITIALIZED_AUTOS = NO;
-				GCC_WARN_UNUSED_VALUE = NO;
-				GCC_WARN_UNUSED_VARIABLE = NO;
-				PRODUCT_NAME = theoraplayer_theora_avfoundation;
-				SKIP_INSTALL = YES;
-			};
-			name = Debug;
-		};
-		D198F9D2177A3200002942E3 /* Release */ = {
-			isa = XCBuildConfiguration;
-			baseConfigurationReference = D1358BC418D7777800A36FDC /* Mac.xcconfig */;
-			buildSettings = {
-				GCC_PREPROCESSOR_DEFINITIONS = (
-					_MAC,
-					__THEORA,
-					__AVFOUNDATION,
-					YUV_LIBYUV,
-					"$(inherited)",
-				);
-				GCC_WARN_UNINITIALIZED_AUTOS = NO;
-				GCC_WARN_UNUSED_VALUE = NO;
-				GCC_WARN_UNUSED_VARIABLE = NO;
-				PRODUCT_NAME = theoraplayer_theora_avfoundation;
-				SKIP_INSTALL = YES;
-			};
-			name = Release;
-		};
-		D198F9D3177A3200002942E3 /* App Store */ = {
-			isa = XCBuildConfiguration;
-			baseConfigurationReference = D1358BC418D7777800A36FDC /* Mac.xcconfig */;
-			buildSettings = {
-				GCC_PREPROCESSOR_DEFINITIONS = (
-					_MAC,
-					__THEORA,
-					__AVFOUNDATION,
-					YUV_LIBYUV,
-					"$(inherited)",
-				);
-				GCC_WARN_UNINITIALIZED_AUTOS = NO;
-				GCC_WARN_UNUSED_VALUE = NO;
-				GCC_WARN_UNUSED_VARIABLE = NO;
-				PRODUCT_NAME = theoraplayer_theora_avfoundation;
-				SKIP_INSTALL = YES;
-			};
-			name = "App Store";
-		};
-		D1BB6FB8150E9E7100EF9400 /* Debug */ = {
-			isa = XCBuildConfiguration;
-			baseConfigurationReference = D1358BC318D7777800A36FDC /* iOS.xcconfig */;
-			buildSettings = {
-				GCC_PREPROCESSOR_DEFINITIONS = (
-					_IOS,
-					__THEORA,
-					_YUV_LIBYUV,
-					"$(inherited)",
-				);
-				"GCC_PREPROCESSOR_DEFINITIONS[arch=arm64]" = LIBYUV_DISABLE_NEON;
-				GCC_WARN_UNINITIALIZED_AUTOS = NO;
-				GCC_WARN_UNUSED_VALUE = NO;
-				GCC_WARN_UNUSED_VARIABLE = NO;
-				PRODUCT_NAME = theoraplayer;
-				SKIP_INSTALL = YES;
-			};
-			name = Debug;
-		};
-		D1BB6FBA150E9E7100EF9400 /* Release */ = {
-			isa = XCBuildConfiguration;
-			baseConfigurationReference = D1358BC318D7777800A36FDC /* iOS.xcconfig */;
-			buildSettings = {
-				GCC_PREPROCESSOR_DEFINITIONS = (
-					_IOS,
-					__THEORA,
-					_YUV_LIBYUV,
-					"$(inherited)",
-				);
-				"GCC_PREPROCESSOR_DEFINITIONS[arch=arm64]" = LIBYUV_DISABLE_NEON;
-				GCC_WARN_UNINITIALIZED_AUTOS = NO;
-				GCC_WARN_UNUSED_VALUE = NO;
-				GCC_WARN_UNUSED_VARIABLE = NO;
-				PRODUCT_NAME = theoraplayer;
-				SKIP_INSTALL = YES;
-			};
-			name = Release;
-		};
-		D1BB6FBB150E9E7100EF9400 /* App Store */ = {
-			isa = XCBuildConfiguration;
-			baseConfigurationReference = D1358BC318D7777800A36FDC /* iOS.xcconfig */;
-			buildSettings = {
-				GCC_PREPROCESSOR_DEFINITIONS = (
-					_IOS,
-					__THEORA,
-					_YUV_LIBYUV,
-					"$(inherited)",
-				);
-				"GCC_PREPROCESSOR_DEFINITIONS[arch=arm64]" = LIBYUV_DISABLE_NEON;
-				GCC_WARN_UNINITIALIZED_AUTOS = NO;
-				GCC_WARN_UNUSED_VALUE = NO;
-				GCC_WARN_UNUSED_VARIABLE = NO;
-				PRODUCT_NAME = theoraplayer;
-				SKIP_INSTALL = YES;
-			};
-			name = "App Store";
-		};
-		D1CDFF441696C77A00609AB0 /* Debug */ = {
-			isa = XCBuildConfiguration;
-			baseConfigurationReference = D1358BC418D7777800A36FDC /* Mac.xcconfig */;
-			buildSettings = {
-				GCC_PREPROCESSOR_DEFINITIONS = (
-					__AVFOUNDATION,
-					_YUV_LIBYUV,
-					"$(inherited)",
-				);
-				GCC_WARN_UNINITIALIZED_AUTOS = NO;
-				GCC_WARN_UNUSED_VARIABLE = NO;
-				INFOPLIST_FILE = Info.plist;
-				LD_DYLIB_INSTALL_NAME = "@executable_path/../Frameworks/$(EXECUTABLE_PATH)";
-				PRODUCT_NAME = theoraplayer;
-				WRAPPER_EXTENSION = framework;
-			};
-			name = Debug;
-		};
-		D1CDFF461696C77A00609AB0 /* Release */ = {
-			isa = XCBuildConfiguration;
-			baseConfigurationReference = D1358BC418D7777800A36FDC /* Mac.xcconfig */;
-			buildSettings = {
-				GCC_PREPROCESSOR_DEFINITIONS = (
-					__AVFOUNDATION,
-					_YUV_LIBYUV,
-					"$(inherited)",
-				);
-				GCC_WARN_UNINITIALIZED_AUTOS = NO;
-				GCC_WARN_UNUSED_VARIABLE = NO;
-				INFOPLIST_FILE = Info.plist;
-				LD_DYLIB_INSTALL_NAME = "@executable_path/../Frameworks/$(EXECUTABLE_PATH)";
-				PRODUCT_NAME = theoraplayer;
-				WRAPPER_EXTENSION = framework;
-			};
-			name = Release;
-		};
-		D1CDFF471696C77A00609AB0 /* App Store */ = {
-			isa = XCBuildConfiguration;
-			baseConfigurationReference = D1358BC418D7777800A36FDC /* Mac.xcconfig */;
-			buildSettings = {
-				GCC_PREPROCESSOR_DEFINITIONS = (
-					__AVFOUNDATION,
-					_YUV_LIBYUV,
-					"$(inherited)",
-				);
-				GCC_WARN_UNINITIALIZED_AUTOS = NO;
-				GCC_WARN_UNUSED_VARIABLE = NO;
-				INFOPLIST_FILE = Info.plist;
-				LD_DYLIB_INSTALL_NAME = "@executable_path/../Frameworks/$(EXECUTABLE_PATH)";
-				PRODUCT_NAME = theoraplayer;
-				WRAPPER_EXTENSION = framework;
-			};
-			name = "App Store";
-		};
-		D1CDFF6C1696C79700609AB0 /* Debug */ = {
-			isa = XCBuildConfiguration;
-			baseConfigurationReference = D1358BC418D7777800A36FDC /* Mac.xcconfig */;
-			buildSettings = {
-				GCC_PREPROCESSOR_DEFINITIONS = (
-					__THEORA,
-					__AVFOUNDATION,
-					_YUV_LIBYUV,
-					"$(inherited)",
-				);
-				GCC_WARN_UNINITIALIZED_AUTOS = NO;
-				GCC_WARN_UNUSED_VARIABLE = NO;
-				INFOPLIST_FILE = Info.plist;
-				LD_DYLIB_INSTALL_NAME = "@executable_path/../Frameworks/$(EXECUTABLE_PATH)";
-				PRODUCT_NAME = theoraplayer;
-				WRAPPER_EXTENSION = framework;
-			};
-			name = Debug;
-		};
-		D1CDFF6E1696C79700609AB0 /* Release */ = {
-			isa = XCBuildConfiguration;
-			baseConfigurationReference = D1358BC418D7777800A36FDC /* Mac.xcconfig */;
-			buildSettings = {
-				GCC_PREPROCESSOR_DEFINITIONS = (
-					__THEORA,
-					__AVFOUNDATION,
-					_YUV_LIBYUV,
-					"$(inherited)",
-				);
-				GCC_WARN_UNINITIALIZED_AUTOS = NO;
-				GCC_WARN_UNUSED_VARIABLE = NO;
-				INFOPLIST_FILE = Info.plist;
-				LD_DYLIB_INSTALL_NAME = "@executable_path/../Frameworks/$(EXECUTABLE_PATH)";
-				PRODUCT_NAME = theoraplayer;
-				WRAPPER_EXTENSION = framework;
-			};
-			name = Release;
-		};
-		D1CDFF6F1696C79700609AB0 /* App Store */ = {
-			isa = XCBuildConfiguration;
-			baseConfigurationReference = D1358BC418D7777800A36FDC /* Mac.xcconfig */;
-			buildSettings = {
-				GCC_PREPROCESSOR_DEFINITIONS = (
-					__THEORA,
-					__AVFOUNDATION,
-					_YUV_LIBYUV,
-					"$(inherited)",
-				);
-				GCC_WARN_UNINITIALIZED_AUTOS = NO;
-				GCC_WARN_UNUSED_VARIABLE = NO;
-				INFOPLIST_FILE = Info.plist;
-				LD_DYLIB_INSTALL_NAME = "@executable_path/../Frameworks/$(EXECUTABLE_PATH)";
-				PRODUCT_NAME = theoraplayer;
-				WRAPPER_EXTENSION = framework;
-			};
-			name = "App Store";
-		};
-		D1CDFFC01696E1CA00609AB0 /* Debug */ = {
-			isa = XCBuildConfiguration;
-			baseConfigurationReference = D1358BC318D7777800A36FDC /* iOS.xcconfig */;
-			buildSettings = {
-				GCC_PREPROCESSOR_DEFINITIONS = (
-					_IOS,
-					__AVFOUNDATION,
-					_YUV_LIBYUV,
-					"$(inherited)",
-				);
-				"GCC_PREPROCESSOR_DEFINITIONS[arch=arm64]" = LIBYUV_DISABLE_NEON;
-				GCC_WARN_UNINITIALIZED_AUTOS = NO;
-				GCC_WARN_UNUSED_VALUE = NO;
-				GCC_WARN_UNUSED_VARIABLE = NO;
-				PRODUCT_NAME = theoraplayer;
-				SKIP_INSTALL = YES;
-			};
-			name = Debug;
-		};
-		D1CDFFC21696E1CA00609AB0 /* Release */ = {
-			isa = XCBuildConfiguration;
-			baseConfigurationReference = D1358BC318D7777800A36FDC /* iOS.xcconfig */;
-			buildSettings = {
-				GCC_PREPROCESSOR_DEFINITIONS = (
-					_IOS,
-					__AVFOUNDATION,
-					_YUV_LIBYUV,
-					"$(inherited)",
-				);
-				"GCC_PREPROCESSOR_DEFINITIONS[arch=arm64]" = LIBYUV_DISABLE_NEON;
-				GCC_WARN_UNINITIALIZED_AUTOS = NO;
-				GCC_WARN_UNUSED_VALUE = NO;
-				GCC_WARN_UNUSED_VARIABLE = NO;
-				PRODUCT_NAME = theoraplayer;
-				SKIP_INSTALL = YES;
-			};
-			name = Release;
-		};
-		D1CDFFC31696E1CA00609AB0 /* App Store */ = {
-			isa = XCBuildConfiguration;
-			baseConfigurationReference = D1358BC318D7777800A36FDC /* iOS.xcconfig */;
-			buildSettings = {
-				GCC_PREPROCESSOR_DEFINITIONS = (
-					_IOS,
-					__AVFOUNDATION,
-					_YUV_LIBYUV,
-					"$(inherited)",
-				);
-				"GCC_PREPROCESSOR_DEFINITIONS[arch=arm64]" = LIBYUV_DISABLE_NEON;
-				GCC_WARN_UNINITIALIZED_AUTOS = NO;
-				GCC_WARN_UNUSED_VALUE = NO;
-				GCC_WARN_UNUSED_VARIABLE = NO;
-				PRODUCT_NAME = theoraplayer;
-				SKIP_INSTALL = YES;
-			};
-			name = "App Store";
-		};
-		D1CDFFE51696E1D700609AB0 /* Debug */ = {
-			isa = XCBuildConfiguration;
-			baseConfigurationReference = D1358BC318D7777800A36FDC /* iOS.xcconfig */;
-			buildSettings = {
-				GCC_PREPROCESSOR_DEFINITIONS = (
-					_IOS,
-					__THEORA,
-					__AVFOUNDATION,
-					_YUV_LIBYUV,
-					"$(inherited)",
-				);
-				"GCC_PREPROCESSOR_DEFINITIONS[arch=arm64]" = LIBYUV_DISABLE_NEON;
-				GCC_WARN_UNINITIALIZED_AUTOS = NO;
-				GCC_WARN_UNUSED_VALUE = NO;
-				GCC_WARN_UNUSED_VARIABLE = NO;
-				PRODUCT_NAME = theoraplayer;
-				SKIP_INSTALL = YES;
-			};
-			name = Debug;
-		};
-		D1CDFFE71696E1D700609AB0 /* Release */ = {
-			isa = XCBuildConfiguration;
-			baseConfigurationReference = D1358BC318D7777800A36FDC /* iOS.xcconfig */;
-			buildSettings = {
-				GCC_PREPROCESSOR_DEFINITIONS = (
-					_IOS,
-					__THEORA,
-					__AVFOUNDATION,
-					_YUV_LIBYUV,
-					"$(inherited)",
-				);
-				"GCC_PREPROCESSOR_DEFINITIONS[arch=arm64]" = LIBYUV_DISABLE_NEON;
-				GCC_WARN_UNINITIALIZED_AUTOS = NO;
-				GCC_WARN_UNUSED_VALUE = NO;
-				GCC_WARN_UNUSED_VARIABLE = NO;
-				PRODUCT_NAME = theoraplayer;
-				SKIP_INSTALL = YES;
-			};
-			name = Release;
-		};
-		D1CDFFE81696E1D700609AB0 /* App Store */ = {
-			isa = XCBuildConfiguration;
-			baseConfigurationReference = D1358BC318D7777800A36FDC /* iOS.xcconfig */;
-			buildSettings = {
-				GCC_PREPROCESSOR_DEFINITIONS = (
-					_IOS,
-					__THEORA,
-					__AVFOUNDATION,
-					_YUV_LIBYUV,
-					"$(inherited)",
-				);
-				"GCC_PREPROCESSOR_DEFINITIONS[arch=arm64]" = LIBYUV_DISABLE_NEON;
-				GCC_WARN_UNINITIALIZED_AUTOS = NO;
-				GCC_WARN_UNUSED_VALUE = NO;
-				GCC_WARN_UNUSED_VARIABLE = NO;
-				PRODUCT_NAME = theoraplayer;
-				SKIP_INSTALL = YES;
-			};
-			name = "App Store";
-		};
-/* End XCBuildConfiguration section */
-
-/* Begin XCConfigurationList section */
-		D1473F23150CA69B00B20490 /* Build configuration list for PBXProject "theoraplayer" */ = {
-			isa = XCConfigurationList;
-			buildConfigurations = (
-				D1473F43150CA6CE00B20490 /* Debug */,
-				D1473F47150CA6E200B20490 /* Release */,
-				D1473F45150CA6D600B20490 /* App Store */,
-			);
-			defaultConfigurationIsVisible = 0;
-			defaultConfigurationName = Debug;
-		};
-		D1473F3F150CA69B00B20490 /* Build configuration list for PBXNativeTarget "theoraplayer (Theora)" */ = {
-			isa = XCConfigurationList;
-			buildConfigurations = (
-				D1473F44150CA6CE00B20490 /* Debug */,
-				D1473F48150CA6E200B20490 /* Release */,
-				D1473F46150CA6D600B20490 /* App Store */,
-			);
-			defaultConfigurationIsVisible = 0;
-			defaultConfigurationName = Debug;
-		};
-		D198F975177A31FC002942E3 /* Build configuration list for PBXNativeTarget "theoraplayer (Mac Theora)" */ = {
-			isa = XCConfigurationList;
-			buildConfigurations = (
-				D198F977177A31FC002942E3 /* Debug */,
-				D198F979177A31FC002942E3 /* Release */,
-				D198F97A177A31FC002942E3 /* App Store */,
-			);
-			defaultConfigurationIsVisible = 0;
-			defaultConfigurationName = Debug;
-		};
-		D198F9A1177A31FE002942E3 /* Build configuration list for PBXNativeTarget "theoraplayer (Mac AVFoundation)" */ = {
-			isa = XCConfigurationList;
-			buildConfigurations = (
-				D198F9A3177A31FE002942E3 /* Debug */,
-				D198F9A5177A31FE002942E3 /* Release */,
-				D198F9A6177A31FE002942E3 /* App Store */,
-			);
-			defaultConfigurationIsVisible = 0;
-			defaultConfigurationName = Debug;
-		};
-		D198F9CE177A3200002942E3 /* Build configuration list for PBXNativeTarget "theoraplayer (Mac Theora AVFoundation)" */ = {
-			isa = XCConfigurationList;
-			buildConfigurations = (
-				D198F9D0177A3200002942E3 /* Debug */,
-				D198F9D2177A3200002942E3 /* Release */,
-				D198F9D3177A3200002942E3 /* App Store */,
-			);
-			defaultConfigurationIsVisible = 0;
-			defaultConfigurationName = Debug;
-		};
-		D1BB6FBC150E9E7100EF9400 /* Build configuration list for PBXNativeTarget "theoraplayer (iOS Theora)" */ = {
-			isa = XCConfigurationList;
-			buildConfigurations = (
-				D1BB6FB8150E9E7100EF9400 /* Debug */,
-				D1BB6FBA150E9E7100EF9400 /* Release */,
-				D1BB6FBB150E9E7100EF9400 /* App Store */,
-			);
-			defaultConfigurationIsVisible = 0;
-			defaultConfigurationName = Debug;
-		};
-		D1CDFF421696C77A00609AB0 /* Build configuration list for PBXNativeTarget "theoraplayer (AVFoundation)" */ = {
-			isa = XCConfigurationList;
-			buildConfigurations = (
-				D1CDFF441696C77A00609AB0 /* Debug */,
-				D1CDFF461696C77A00609AB0 /* Release */,
-				D1CDFF471696C77A00609AB0 /* App Store */,
-			);
-			defaultConfigurationIsVisible = 0;
-			defaultConfigurationName = Debug;
-		};
-		D1CDFF6A1696C79700609AB0 /* Build configuration list for PBXNativeTarget "theoraplayer (Theora AVFoundation)" */ = {
-			isa = XCConfigurationList;
-			buildConfigurations = (
-				D1CDFF6C1696C79700609AB0 /* Debug */,
-				D1CDFF6E1696C79700609AB0 /* Release */,
-				D1CDFF6F1696C79700609AB0 /* App Store */,
-			);
-			defaultConfigurationIsVisible = 0;
-			defaultConfigurationName = Debug;
-		};
-		D1CDFFBE1696E1CA00609AB0 /* Build configuration list for PBXNativeTarget "theoraplayer (iOS AVFoundation)" */ = {
-			isa = XCConfigurationList;
-			buildConfigurations = (
-				D1CDFFC01696E1CA00609AB0 /* Debug */,
-				D1CDFFC21696E1CA00609AB0 /* Release */,
-				D1CDFFC31696E1CA00609AB0 /* App Store */,
-			);
-			defaultConfigurationIsVisible = 0;
-			defaultConfigurationName = Debug;
-		};
-		D1CDFFE31696E1D700609AB0 /* Build configuration list for PBXNativeTarget "theoraplayer (iOS Theora AVFoundation)" */ = {
-			isa = XCConfigurationList;
-			buildConfigurations = (
-				D1CDFFE51696E1D700609AB0 /* Debug */,
-				D1CDFFE71696E1D700609AB0 /* Release */,
-				D1CDFFE81696E1D700609AB0 /* App Store */,
-			);
-			defaultConfigurationIsVisible = 0;
-			defaultConfigurationName = Debug;
-		};
-/* End XCConfigurationList section */
-	};
-	rootObject = D1473F20150CA69B00B20490 /* Project object */;
-}
diff --git a/drivers/theoraplayer/video_stream_theoraplayer.cpp b/drivers/theoraplayer/video_stream_theoraplayer.cpp
deleted file mode 100644
index 876cac3425..0000000000
--- a/drivers/theoraplayer/video_stream_theoraplayer.cpp
+++ /dev/null
@@ -1,556 +0,0 @@
-/*************************************************************************/
-/*  video_stream.cpp                                                     */
-/*************************************************************************/
-/*                       This file is part of:                           */
-/*                           GODOT ENGINE                                */
-/*                    http://www.godotengine.org                         */
-/*************************************************************************/
-/* Copyright (c) 2007-2015 Juan Linietsky, Ariel Manzur.                 */
-/*                                                                       */
-/* Permission is hereby granted, free of charge, to any person obtaining */
-/* a copy of this software and associated documentation files (the       */
-/* "Software"), to deal in the Software without restriction, including   */
-/* without limitation the rights to use, copy, modify, merge, publish,   */
-/* distribute, sublicense, and/or sell copies of the Software, and to    */
-/* permit persons to whom the Software is furnished to do so, subject to */
-/* the following conditions:                                             */
-/*                                                                       */
-/* The above copyright notice and this permission notice shall be        */
-/* included in all copies or substantial portions of the Software.       */
-/*                                                                       */
-/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,       */
-/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF    */
-/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/
-/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY  */
-/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,  */
-/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE     */
-/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.                */
-/*************************************************************************/
-#include "video_stream_theoraplayer.h"
-
-#include "core/os/file_access.h"
-
-#include "include/theoraplayer/TheoraPlayer.h"
-#include "include/theoraplayer/TheoraTimer.h"
-#include "include/theoraplayer/TheoraAudioInterface.h"
-#include "include/theoraplayer/TheoraDataSource.h"
-#include "include/theoraplayer/TheoraException.h"
-
-#include "core/ring_buffer.h"
-#include "core/os/thread_safe.h"
-
-#include "core/globals.h"
-
-static TheoraVideoManager* mgr = NULL;
-
-class TPDataFA : public TheoraDataSource {
-
-	FileAccess* fa;
-	String data_name;
-
-public:
-
-	int read(void* output,int nBytes) {
-
-		if (!fa)
-			return -1;
-
-		return fa->get_buffer((uint8_t*)output, nBytes);
-	};
-
-	//! returns a string representation of the DataSource, eg 'File: source.ogg'
-	virtual std::string repr() {
-		return data_name.utf8().get_data();
-	};
-
-	//! position the source pointer to byte_index from the start of the source
-	virtual void seek(unsigned long byte_index) {
-
-		if (!fa)
-			return;
-
-		fa->seek(byte_index);
-	};
-
-
-	//! return the size of the stream in bytes
-	virtual unsigned long size() {
-
-		if (!fa)
-			return 0;
-
-		return fa->get_len();
-	};
-
-	//! return the current position of the source pointer
-	virtual unsigned long tell() {
-
-		if (!fa)
-			return 0;
-
-		return fa->get_pos();
-	};
-
-	TPDataFA(const String& p_path) {
-
-		fa = FileAccess::open(p_path, FileAccess::READ);
-		data_name = "File: " + p_path;
-	};
-
-	TPDataFA(FileAccess* p_fa, const String& p_path) {
-
-		fa = p_fa;
-		data_name = "File: " + p_path;
-	};
-
-	~TPDataFA() {
-
-		if (fa)
-			memdelete(fa);
-	};
-};
-
-class AudioStreamInput : public AudioStreamResampled {
-
-	_THREAD_SAFE_CLASS_;
-
-	int channels;
-	int freq;
-
-	RID stream_rid;
-	mutable RingBuffer<float> rb;
-	int rb_power;
-	int total_wrote;
-	bool playing;
-	bool paused;
-
-public:
-
-	virtual void play() {
-
-		_THREAD_SAFE_METHOD_
-		_setup(channels, freq, 256);
-		stream_rid=AudioServer::get_singleton()->audio_stream_create(get_audio_stream());
-		AudioServer::get_singleton()->stream_set_active(stream_rid,true);
-		AudioServer::get_singleton()->stream_set_volume_scale(stream_rid,1);
-		playing = true;
-		paused = false;
-	};
-	virtual void stop() {
-
-		_THREAD_SAFE_METHOD_
-
-		AudioServer::get_singleton()->stream_set_active(stream_rid,false);
-		//_clear_stream();
-		playing=false;
-		_clear();
-	};
-
-	virtual bool is_playing() const { return true; };
-
-	virtual void set_paused(bool p_paused) { paused = p_paused; };
-	virtual bool is_paused(bool p_paused) const { return paused; };
-
-	virtual void set_loop(bool p_enable) {};
-	virtual bool has_loop() const { return false; };
-
-	virtual float get_length() const { return 0; };
-
-	virtual String get_stream_name() const { return "Theora Audio Stream"; };
-
-	virtual int get_loop_count() const { return 1; };
-
-	virtual float get_pos() const { return 0; };
-	virtual void seek_pos(float p_time) {};
-
-	virtual UpdateMode get_update_mode() const { return UPDATE_THREAD; };
-
-	virtual bool _can_mix() const { return true; };
-
-	void input(float* p_data, int p_samples) {
-
-
-		_THREAD_SAFE_METHOD_;
-		//printf("input %i samples from %p\n", p_samples, p_data);
-		if (rb.space_left() < p_samples) {
-			rb_power += 1;
-			rb.resize(rb_power);
-		}
-		rb.write(p_data, p_samples);
-
-		update(); //update too here for less latency
-	};
-
-	void update() {
-
-		_THREAD_SAFE_METHOD_;
-		int todo = get_todo();
-		int16_t* buffer = get_write_buffer();
-		int frames = rb.data_left()/channels;
-		const int to_write = MIN(todo, frames);
-
-		for (int i=0; i<to_write*channels; i++) {
-
-			int v = rb.read() * 32767;
-			int16_t sample = CLAMP(v,-32768,32767);
-			buffer[i] = sample;
-		};
-		write(to_write);
-		total_wrote += to_write;
-	};
-
-	int get_pending() const {
-		return rb.data_left();
-	};
-
-	int get_total_wrote() {
-
-		return total_wrote - (get_total() - get_todo());
-	};
-
-	AudioStreamInput(int p_channels, int p_freq) {
-
-		playing = false;
-		paused = true;
-		channels = p_channels;
-		freq = p_freq;
-		total_wrote = 0;
-		rb_power = 22;
-		rb.resize(rb_power);
-	};
-
-	~AudioStreamInput() {
-
-		stop();
-	};
-};
-
-class TPAudioGodot : public TheoraAudioInterface, TheoraTimer {
-
-	Ref<AudioStreamInput> stream;
-	int sample_count;
-	int channels;
-	int freq;
-
-public:
-
-	void insertData(float* data, int nSamples) {
-
-		stream->input(data, nSamples);
-	};
-
-	TPAudioGodot(TheoraVideoClip* owner, int nChannels, int p_freq)
-		: TheoraAudioInterface(owner, nChannels, p_freq), TheoraTimer() {
-
-		printf("***************** audio interface constructor freq %i\n", p_freq);
-		channels = nChannels;
-		freq = p_freq;
-		stream = Ref<AudioStreamInput>(memnew(AudioStreamInput(nChannels, p_freq)));
-		stream->play();
-		sample_count = 0;
-		owner->setTimer(this);
-	};
-
-	void stop() {
-
-		stream->stop();
-	};
-
-	void update(float time_increase)
-	{
-		float prev_time = mTime;
-		//mTime = (float)(stream->get_total_wrote()) / freq;
-		//mTime = MAX(0,mTime-AudioServer::get_singleton()->get_output_delay());
-		//mTime = (float)sample_count / channels / freq;
-		mTime += time_increase;
-		if (mTime - prev_time > .02) printf("time increase %f secs\n", mTime - prev_time);
-		//float duration=mClip->getDuration();
-		//if (mTime > duration) mTime=duration;
-		//printf("time at timer is %f, %f, samples %i\n", mTime, time_increase, sample_count);
-	}
-};
-
-class TPAudioGodotFactory : public TheoraAudioInterfaceFactory {
-
-public:
-	TheoraAudioInterface* createInstance(TheoraVideoClip* owner, int nChannels, int freq) {
-
-		printf("************** creating audio output\n");
-		TheoraAudioInterface* ta = new TPAudioGodot(owner, nChannels, freq);
-		return ta;
-	};
-};
-
-static TPAudioGodotFactory* audio_factory = NULL;
-
-void VideoStreamTheoraplayer::stop() {
-
-	playing = false;
-	if (clip) {
-		clip->stop();
-		clip->seek(0);
-	};
-	started = true;
-};
-
-void VideoStreamTheoraplayer::play() {
-	if (clip)
-		playing = true;
-};
-
-bool VideoStreamTheoraplayer::is_playing() const {
-
-	return playing;
-};
-
-void VideoStreamTheoraplayer::set_paused(bool p_paused) {
-
-	paused = p_paused;
-	if (paused) {
-		clip->pause();
-	} else {
-		if (clip && playing && !started)
-			clip->play();
-	}
-};
-
-bool VideoStreamTheoraplayer::is_paused(bool p_paused) const {
-
-	return !playing;
-};
-
-void VideoStreamTheoraplayer::set_loop(bool p_enable) {
-
-	loop = p_enable;
-};
-
-bool VideoStreamTheoraplayer::has_loop() const {
-
-	return loop;
-};
-
-float VideoStreamTheoraplayer::get_length() const {
-
-	if (!clip)
-		return 0;
-
-	return clip->getDuration();
-};
-
-
-float VideoStreamTheoraplayer::get_pos() const {
-
-	if (!clip)
-		return 0;
-
-	return clip->getTimer()->getTime();
-};
-
-void VideoStreamTheoraplayer::seek_pos(float p_time) {
-
-	if (!clip)
-		return;
-
-	clip->seek(p_time);
-};
-
-int VideoStreamTheoraplayer::get_pending_frame_count() const {
-
-	if (!clip)
-		return 0;
-
-	TheoraVideoFrame* f = clip->getNextFrame();
-	return f ? 1 : 0;
-};
-
-
-void VideoStreamTheoraplayer::pop_frame(Ref<ImageTexture> p_tex) {
-
-	if (!clip)
-		return;
-
-	TheoraVideoFrame* f = clip->getNextFrame();
-	if (!f) {
-		return;
-	};
-
-#ifdef GLES2_ENABLED
-//	RasterizerGLES2* r = RasterizerGLES2::get_singleton();
-//	r->_texture_set_data(p_tex, f->mBpp == 3 ? Image::Format_RGB : Image::Format_RGBA, f->mBpp, w, h, f->getBuffer());
-
-#endif
-
-	float w=clip->getWidth(),h=clip->getHeight();
-	int imgsize = w * h * f->mBpp;
-
-	int size = f->getStride() * f->getHeight() * f->mBpp;
-	data.resize(imgsize);
-	{
-		DVector<uint8_t>::Write wr = data.write();
-		uint8_t* ptr = wr.ptr();
-		copymem(ptr, f->getBuffer(), imgsize);
-	}
-    /*
-    for (int i=0; i<h; i++) {
-        int dstofs = i * w * f->mBpp;
-        int srcofs = i * f->getStride() * f->mBpp;
-        copymem(ptr + dstofs, f->getBuffer() + dstofs, w * f->mBpp);
-    };
-     */
-	Image frame = Image();
-	frame.create(w, h, 0, f->mBpp == 3 ? Image::FORMAT_RGB : Image::FORMAT_RGBA, data);
-
-	clip->popFrame();
-
-	if (p_tex->get_width() == 0) {
-		p_tex->create(frame.get_width(),frame.get_height(),frame.get_format(),Texture::FLAG_VIDEO_SURFACE|Texture::FLAG_FILTER);
-		p_tex->set_data(frame);
-	} else {
-
-		p_tex->set_data(frame);
-	};
-};
-
-/*
-Image VideoStreamTheoraplayer::pop_frame() {
-
-	Image ret = frame;
-	frame = Image();
-	return ret;
-};
-*/
-
-Image VideoStreamTheoraplayer::peek_frame() const {
-
-	return Image();
-};
-
-void VideoStreamTheoraplayer::update(float p_time) {
-
-	if (!mgr)
-		return;
-
-	if (!clip)
-		return;
-
-	if (!playing || paused)
-		return;
-
-	//printf("video update!\n");
-	if (started) {
-		if (clip->getNumReadyFrames() < 2) {
-			printf("frames not ready, returning!\n");
-			return;
-		};
-		started = false;
-		//printf("playing clip!\n");
-		clip->play();
-	} else if (clip->isDone()) {
-		playing = false;
-	};
-
-	mgr->update(p_time);
-};
-
-
-void VideoStreamTheoraplayer::set_audio_track(int p_idx) {
-	audio_track=p_idx;
-	if (clip)
-		clip->set_audio_track(audio_track);
-}
-
-void VideoStreamTheoraplayer::set_file(const String& p_file) {
-
-	FileAccess* f = FileAccess::open(p_file, FileAccess::READ);
-	if (!f || !f->is_open())
-		return;
-
-	if (!audio_factory) {
-		audio_factory = memnew(TPAudioGodotFactory);
-	};
-
-	if (mgr == NULL) {
-		mgr = memnew(TheoraVideoManager);
-		mgr->setAudioInterfaceFactory(audio_factory);
-	};
-
-	int track = GLOBAL_DEF("theora/audio_track", 0); // hack
-
-	if (p_file.find(".mp4") != -1) {
-		
-		std::string file = p_file.replace("res://", "").utf8().get_data();
-		clip = mgr->createVideoClip(file, TH_RGBX, 2, false, track);
-		//clip->set_audio_track(audio_track);
-		memdelete(f);
-
-	} else {
-
-		TheoraDataSource* ds = memnew(TPDataFA(f, p_file));
-
-		try {
-			clip = mgr->createVideoClip(ds);
-			clip->set_audio_track(audio_track);
-		} catch (_TheoraGenericException e) {
-			printf("exception ocurred! %s\n", e.repr().c_str());
-			clip = NULL;
-		};
-	};
-
-	clip->pause();
-	started = true;
-};
-
-VideoStreamTheoraplayer::~VideoStreamTheoraplayer() {
-
-	stop();
-	//if (mgr) { // this should be a singleton or static or something
-	//	memdelete(mgr);
-	//};
-	//mgr = NULL;
-	if (clip) {
-		mgr->destroyVideoClip(clip);
-		clip = NULL;
-	};
-};
-
-VideoStreamTheoraplayer::VideoStreamTheoraplayer() {
-
-	//mgr = NULL;
-	clip = NULL;
-	started = false;
-	playing = false;
-	paused = false;
-	loop = false;
-	audio_track=0;
-};
-
-
-RES ResourceFormatLoaderVideoStreamTheoraplayer::load(const String &p_path, const String& p_original_path, Error *r_error) {
-	if (r_error)
-		*r_error=OK;
-
-	VideoStreamTheoraplayer *stream = memnew(VideoStreamTheoraplayer);
-	stream->set_file(p_path);
-	return Ref<VideoStreamTheoraplayer>(stream);
-}
-
-void ResourceFormatLoaderVideoStreamTheoraplayer::get_recognized_extensions(List<String> *p_extensions) const {
-
-	p_extensions->push_back("ogm");
-	p_extensions->push_back("ogv");
-	p_extensions->push_back("mp4");
-}
-bool ResourceFormatLoaderVideoStreamTheoraplayer::handles_type(const String& p_type) const {
-	return p_type=="VideoStream" || p_type == "VideoStreamTheoraplayer";
-}
-
-String ResourceFormatLoaderVideoStreamTheoraplayer::get_resource_type(const String &p_path) const {
-
-	String exl=p_path.extension().to_lower();
-	if (exl=="ogm" || exl=="ogv" || exl=="mp4")
-		return "VideoStream";
-	return "";
-}
-
-
-
diff --git a/drivers/theoraplayer/video_stream_theoraplayer.h b/drivers/theoraplayer/video_stream_theoraplayer.h
deleted file mode 100644
index 69cae7c4a2..0000000000
--- a/drivers/theoraplayer/video_stream_theoraplayer.h
+++ /dev/null
@@ -1,66 +0,0 @@
-#ifndef VIDEO_STREAM_THEORAPLAYER_H
-#define VIDEO_STREAM_THEORAPLAYER_H
-
-#include "scene/resources/video_stream.h"
-#include "io/resource_loader.h"
-#include "scene/resources/texture.h"
-
-class TheoraVideoManager;
-class TheoraVideoClip;
-
-class VideoStreamTheoraplayer : public VideoStream {
-
-	OBJ_TYPE(VideoStreamTheoraplayer,VideoStream);
-
-	mutable DVector<uint8_t> data;
-	TheoraVideoClip* clip;
-	bool started;
-	bool playing;
-	bool loop;
-	bool paused;
-
-	int audio_track;
-
-public:
-
-	virtual void stop();
-	virtual void play();
-
-	virtual bool is_playing() const;
-
-	virtual void set_paused(bool p_paused);
-	virtual bool is_paused(bool p_paused) const;
-
-	virtual void set_loop(bool p_enable);
-	virtual bool has_loop() const;
-
-	virtual float get_pos() const;
-	virtual void seek_pos(float p_time);
-
-	virtual float get_length() const;
-
-	virtual int get_pending_frame_count() const;
-	virtual void pop_frame(Ref<ImageTexture> p_tex);
-	virtual Image peek_frame() const;
-
-	void update(float p_time);
-
-	void set_file(const String& p_file);
-	void set_audio_track(int p_idx);
-
-	~VideoStreamTheoraplayer();
-	VideoStreamTheoraplayer();
-};
-
-class ResourceFormatLoaderVideoStreamTheoraplayer : public ResourceFormatLoader {
-public:
-	virtual RES load(const String &p_path,const String& p_original_path="",Error *r_error=NULL);
-	virtual void get_recognized_extensions(List<String> *p_extensions) const;
-	virtual bool handles_type(const String& p_type) const;
-	virtual String get_resource_type(const String &p_path) const;
-
-};
-
-
-#endif
-
diff --git a/drivers/unix/file_access_unix.cpp b/drivers/unix/file_access_unix.cpp
index 76042089ff..8e70ecc932 100644
--- a/drivers/unix/file_access_unix.cpp
+++ b/drivers/unix/file_access_unix.cpp
@@ -63,7 +63,7 @@ Error FileAccessUnix::_open(const String& p_path, int p_mode_flags) {
 		fclose(f);
 	f=NULL;
 
-	String path=fix_path(p_path);
+	path=fix_path(p_path);
 	//printf("opening %ls, %i\n", path.c_str(), Memory::get_static_mem_usage());
 
 	ERR_FAIL_COND_V(f,ERR_ALREADY_IN_USE);
@@ -114,6 +114,9 @@ void FileAccessUnix::close() {
 		return;
 	fclose(f);
 	f = NULL;
+	if (close_notification_func) {
+		close_notification_func(path,flags);
+	}
 	if (save_path!="") {
 
 		//unlink(save_path.utf8().get_data());
@@ -240,6 +243,7 @@ FileAccess * FileAccessUnix::create_libc() {
 	return memnew( FileAccessUnix );
 }
 
+CloseNotificationFunc FileAccessUnix::close_notification_func=NULL;
 
 FileAccessUnix::FileAccessUnix() {
 
diff --git a/drivers/unix/file_access_unix.h b/drivers/unix/file_access_unix.h
index 5b0f0e7cb7..6c41a51ec5 100644
--- a/drivers/unix/file_access_unix.h
+++ b/drivers/unix/file_access_unix.h
@@ -38,6 +38,10 @@
 /**
 	@author Juan Linietsky <reduzio@gmail.com>
 */
+
+
+typedef void (*CloseNotificationFunc)(const String& p_file,int p_flags);
+
 class FileAccessUnix : public FileAccess {
 	
 	FILE *f;
@@ -45,10 +49,13 @@ class FileAccessUnix : public FileAccess {
 	void check_errors() const;
 	mutable Error last_error;
 	String save_path;
+	String path;
 	
-		static FileAccess* create_libc();
+	static FileAccess* create_libc();
 public:
 	
+	static CloseNotificationFunc close_notification_func;
+
 	virtual Error _open(const String& p_path, int p_mode_flags); ///< open a file
 	virtual void close(); ///< close a file
 	virtual bool is_open() const; ///< true when file is open 
diff --git a/drivers/unix/os_unix.cpp b/drivers/unix/os_unix.cpp
index 314e13cee4..96f90e6be1 100644
--- a/drivers/unix/os_unix.cpp
+++ b/drivers/unix/os_unix.cpp
@@ -226,8 +226,9 @@ uint64_t OS_Unix::get_unix_time() const {
 uint64_t OS_Unix::get_system_time_msec() const {
 	struct timeval tv_now;
 	gettimeofday(&tv_now, NULL);
-	localtime(&tv_now.tv_usec);
-	uint64_t msec = tv_now.tv_usec/1000;
+	//localtime(&tv_now.tv_usec);
+	//localtime((const long *)&tv_now.tv_usec);
+	uint64_t msec = uint64_t(tv_now.tv_sec)*1000+tv_now.tv_usec/1000;
 	return msec;
 }
 
diff --git a/drivers/vorbis/audio_stream_ogg_vorbis.cpp b/drivers/vorbis/audio_stream_ogg_vorbis.cpp
index ee9ba8da4d..ca055c8b62 100644
--- a/drivers/vorbis/audio_stream_ogg_vorbis.cpp
+++ b/drivers/vorbis/audio_stream_ogg_vorbis.cpp
@@ -30,7 +30,7 @@
 
 
 
-size_t AudioStreamOGGVorbis::_ov_read_func(void *p_dst,size_t p_data, size_t p_count, void *_f) {
+size_t AudioStreamPlaybackOGGVorbis::_ov_read_func(void *p_dst,size_t p_data, size_t p_count, void *_f) {
 
 	//printf("read to %p, %i bytes, %i nmemb, %p\n",p_dst,p_data,p_count,_f);
 	FileAccess *fa=(FileAccess*)_f;
@@ -46,7 +46,7 @@ size_t AudioStreamOGGVorbis::_ov_read_func(void *p_dst,size_t p_data, size_t p_c
 	return read;
 }
 
-int AudioStreamOGGVorbis::_ov_seek_func(void *_f,ogg_int64_t offs, int whence) {
+int AudioStreamPlaybackOGGVorbis::_ov_seek_func(void *_f,ogg_int64_t offs, int whence) {
 
 	//printf("seek to %p, offs %i, whence %i\n",_f,(int)offs,whence);
 
@@ -76,7 +76,7 @@ int AudioStreamOGGVorbis::_ov_seek_func(void *_f,ogg_int64_t offs, int whence) {
 #endif
 
 }
-int AudioStreamOGGVorbis::_ov_close_func(void *_f) {
+int AudioStreamPlaybackOGGVorbis::_ov_close_func(void *_f) {
 
 //	printf("close %p\n",_f);
 	if (!_f)
@@ -86,7 +86,7 @@ int AudioStreamOGGVorbis::_ov_close_func(void *_f) {
 		fa->close();
 	return 0;
 }
-long AudioStreamOGGVorbis::_ov_tell_func(void *_f) {
+long AudioStreamPlaybackOGGVorbis::_ov_tell_func(void *_f) {
 
 	//printf("close %p\n",_f);
 
@@ -95,38 +95,32 @@ long AudioStreamOGGVorbis::_ov_tell_func(void *_f) {
 }
 
 
-bool AudioStreamOGGVorbis::_can_mix() const {
 
-	return /*playing &&*/ !paused;
-}
-
-
-void AudioStreamOGGVorbis::update() {
+int AudioStreamPlaybackOGGVorbis::mix(int16_t* p_bufer,int p_frames) {
 
-	_THREAD_SAFE_METHOD_
-
-	if (!playing && !setting_up)
-		return;
+	if (!playing)
+		return 0;
 
+	int total=p_frames;
 	while (true) {
 
-		int todo = get_todo();
+		int todo = p_frames;
 
-		if (todo==0 || todo<MIN_MIX)
+		if (todo==0 || todo<MIN_MIX) {
 			break;
+		}
 
 		//printf("to mix %i - mix me %i bytes\n",to_mix,to_mix*stream_channels*sizeof(int16_t));
 
 		#ifdef BIG_ENDIAN_ENABLED
-		long ret=ov_read(&vf,(char*)get_write_buffer(),todo*stream_channels*sizeof(int16_t), 1, 2, 1, &current_section);
+		long ret=ov_read(&vf,(char*)p_bufer,todo*stream_channels*sizeof(int16_t), 1, 2, 1, &current_section);
 		#else
-		long ret=ov_read(&vf,(char*)get_write_buffer(),todo*stream_channels*sizeof(int16_t), 0, 2, 1, &current_section);
+		long ret=ov_read(&vf,(char*)p_bufer,todo*stream_channels*sizeof(int16_t), 0, 2, 1, &current_section);
 		#endif
+
 		if (ret<0) {
 
 			playing = false;
-			setting_up=false;
-
 			ERR_EXPLAIN("Error reading OGG Vorbis File: "+file);
 			ERR_BREAK(ret<0);
 		} else if (ret==0) { // end of song, reload?
@@ -138,9 +132,8 @@ void AudioStreamOGGVorbis::update() {
 			if (!has_loop()) {
 
 				playing=false;
-				setting_up=false;
 				repeats=1;
-				return;
+				break;
 			}
 
 			f=FileAccess::open(file,FileAccess::READ);
@@ -148,11 +141,22 @@ void AudioStreamOGGVorbis::update() {
 			int errv = ov_open_callbacks(f,&vf,NULL,0,_ov_callbacks);
 			if (errv!=0) {
 				playing=false;
-				setting_up=false;
-				return; // :(
+				break;; // :(
 			}
 
-			frames_mixed=0;
+			if (loop_restart_time) {
+				bool ok = ov_time_seek(&vf,loop_restart_time)==0;
+				if (!ok) {
+					playing=false;
+					//ERR_EXPLAIN("loop restart time rejected");
+					ERR_PRINT("loop restart time rejected")
+				}
+
+				frames_mixed=stream_srate*loop_restart_time;
+			} else {
+
+				frames_mixed=0;
+			}
 			repeats++;
 			continue;
 
@@ -162,16 +166,19 @@ void AudioStreamOGGVorbis::update() {
 		ret/=sizeof(int16_t);
 
 		frames_mixed+=ret;
-		write(ret);
+
+		p_bufer+=ret*stream_channels;
+		p_frames-=ret;
+
 	}
 
-}
+	return total-p_frames;
 
+}
 
 
-void AudioStreamOGGVorbis::play() {
 
-	_THREAD_SAFE_METHOD_
+void AudioStreamPlaybackOGGVorbis::play(float p_from) {
 
 	if (playing)
 		stop();
@@ -179,56 +186,46 @@ void AudioStreamOGGVorbis::play() {
 	if (_load_stream()!=OK)
 		return;
 
+
 	frames_mixed=0;
-	playing=false;
-	setting_up=true;
-	update();
-	if (!setting_up)
-		return;
-	setting_up=false;
 	playing=true;
+	if (p_from>0) {
+		seek_pos(p_from);
+	}
 }
 
-void AudioStreamOGGVorbis::_close_file() {
+void AudioStreamPlaybackOGGVorbis::_close_file() {
 
 	if (f) {
+
 		memdelete(f);
 		f=NULL;
 	}
 }
 
-void AudioStreamOGGVorbis::stop() {
-
-	_THREAD_SAFE_METHOD_
+bool AudioStreamPlaybackOGGVorbis::is_playing() const {
+	return playing;
+}
+void AudioStreamPlaybackOGGVorbis::stop() {
 
 	_clear_stream();
 	playing=false;
-	_clear();
-}
-
-AudioStreamOGGVorbis::UpdateMode AudioStreamOGGVorbis::get_update_mode() const {
-
-	return UPDATE_THREAD;
+	//_clear();
 }
 
 
-bool AudioStreamOGGVorbis::is_playing() const {
 
+float AudioStreamPlaybackOGGVorbis::get_pos() const {
 
-	return playing || (get_total() - get_todo() -1 > 0);
-}
-
-float AudioStreamOGGVorbis::get_pos() const {
-
-	int32_t frames = int32_t(frames_mixed) - (int32_t(get_total()) - get_todo());
+	int32_t frames = int32_t(frames_mixed);
 	if (frames < 0)
 		frames=0;
 	return double(frames) / stream_srate;
 }
 
-void AudioStreamOGGVorbis::seek_pos(float p_time) {
+void AudioStreamPlaybackOGGVorbis::seek_pos(float p_time) {
+
 
-	_THREAD_SAFE_METHOD_
 
 	if (!playing)
 		return;
@@ -237,85 +234,107 @@ void AudioStreamOGGVorbis::seek_pos(float p_time) {
 	frames_mixed=stream_srate*p_time;
 }
 
-String AudioStreamOGGVorbis::get_stream_name() const  {
+String AudioStreamPlaybackOGGVorbis::get_stream_name() const  {
 
 	return "";
 }
 
-void AudioStreamOGGVorbis::set_loop(bool p_enable) {
+void AudioStreamPlaybackOGGVorbis::set_loop(bool p_enable) {
 
 	loops=p_enable;
 }
 
-bool AudioStreamOGGVorbis::has_loop() const {
+bool AudioStreamPlaybackOGGVorbis::has_loop() const {
 
 	return loops;
 }
 
-int AudioStreamOGGVorbis::get_loop_count() const {
+int AudioStreamPlaybackOGGVorbis::get_loop_count() const {
 	return repeats;
 }
 
 
-void AudioStreamOGGVorbis::set_file(const String& p_file) {
+Error AudioStreamPlaybackOGGVorbis::set_file(const String& p_file) {
 
 	file=p_file;
-}
-
-Error AudioStreamOGGVorbis::_load_stream()  {
-
-	_clear_stream();
-	if (file=="")
-		return ERR_INVALID_DATA;
-
+	stream_valid=false;
 	Error err;
 	f=FileAccess::open(file,FileAccess::READ,&err);
 
-
 	if (err) {
 		ERR_FAIL_COND_V( err, err );
 	}
 
 	int errv = ov_open_callbacks(f,&vf,NULL,0,_ov_callbacks);
+	switch(errv) {
 
-
+		case OV_EREAD: { // - A read from media returned an error.
+			memdelete(f); f=NULL;
+			ERR_FAIL_V( ERR_FILE_CANT_READ );
+		} break;
+		case OV_EVERSION:  // - Vorbis version mismatch.
+		case OV_ENOTVORBIS: { // - Bitstream is not Vorbis data.
+			memdelete(f); f=NULL;
+			ERR_FAIL_V( ERR_FILE_UNRECOGNIZED );
+		} break;
+		case OV_EBADHEADER: { // - Invalid Vorbis bitstream header.
+			memdelete(f); f=NULL;
+			ERR_FAIL_V( ERR_FILE_CORRUPT );
+		} break;
+		case OV_EFAULT: { // - Internal logic fault; indicates a bug or heap/stack corruption.
+			memdelete(f); f=NULL;
+			ERR_FAIL_V( ERR_BUG );
+		} break;
+	}
 	const vorbis_info *vinfo=ov_info(&vf,-1);
 	stream_channels=vinfo->channels;
 	stream_srate=vinfo->rate;
-	Error serr = _setup(stream_channels,stream_srate);
+	ogg_int64_t len = ov_time_total(&vf,-1);
+	length=len/1000.0;
+	ov_clear(&vf);
+	memdelete(f);
+	f=NULL;
+	stream_valid=true;
+
+
+	return OK;
+}
+
+Error AudioStreamPlaybackOGGVorbis::_load_stream()  {
+
+	ERR_FAIL_COND_V(!stream_valid,ERR_UNCONFIGURED);
 
-	if (serr) {
-		_close_file();
-		ERR_FAIL_V( ERR_INVALID_DATA );
+	_clear_stream();
+	if (file=="")
+		return ERR_INVALID_DATA;
+
+	Error err;
+	f=FileAccess::open(file,FileAccess::READ,&err);
+	if (err) {
+		ERR_FAIL_COND_V( err, err );
 	}
 
+	int errv = ov_open_callbacks(f,&vf,NULL,0,_ov_callbacks);
 	switch(errv) {
 
 		case OV_EREAD: { // - A read from media returned an error.
-			_close_file();
+			memdelete(f); f=NULL;
 			ERR_FAIL_V( ERR_FILE_CANT_READ );
 		} break;
 		case OV_EVERSION:  // - Vorbis version mismatch.
 		case OV_ENOTVORBIS: { // - Bitstream is not Vorbis data.
-			_close_file();
+			memdelete(f); f=NULL;
 			ERR_FAIL_V( ERR_FILE_UNRECOGNIZED );
 		} break;
 		case OV_EBADHEADER: { // - Invalid Vorbis bitstream header.
-			_close_file();
+			memdelete(f); f=NULL;
 			ERR_FAIL_V( ERR_FILE_CORRUPT );
 		} break;
 		case OV_EFAULT: { // - Internal logic fault; indicates a bug or heap/stack corruption.
-
-			_close_file();
+			memdelete(f); f=NULL;
 			ERR_FAIL_V( ERR_BUG );
 		} break;
 	}
-
-
-	ogg_int64_t len = ov_time_total(&vf,-1);
-
-	length=len/1000.0;
-
 	repeats=0;
 	stream_loaded=true;
 
@@ -324,16 +343,16 @@ Error AudioStreamOGGVorbis::_load_stream()  {
 }
 
 
-float AudioStreamOGGVorbis::get_length() const {
+float AudioStreamPlaybackOGGVorbis::get_length() const {
 
 	if (!stream_loaded) {
-		if (const_cast<AudioStreamOGGVorbis*>(this)->_load_stream()!=OK)
+		if (const_cast<AudioStreamPlaybackOGGVorbis*>(this)->_load_stream()!=OK)
 			return 0;
 	}
 	return length;
 }
 
-void AudioStreamOGGVorbis::_clear_stream() {
+void AudioStreamPlaybackOGGVorbis::_clear_stream() {
 
 	if (!stream_loaded)
 		return;
@@ -346,18 +365,18 @@ void AudioStreamOGGVorbis::_clear_stream() {
 	playing=false;
 }
 
-void AudioStreamOGGVorbis::set_paused(bool p_paused) {
+void AudioStreamPlaybackOGGVorbis::set_paused(bool p_paused) {
 
 	paused=p_paused;
 }
 
-bool AudioStreamOGGVorbis::is_paused(bool p_paused) const {
+bool AudioStreamPlaybackOGGVorbis::is_paused(bool p_paused) const {
 
 	return paused;
 }
 
 
-AudioStreamOGGVorbis::AudioStreamOGGVorbis() {
+AudioStreamPlaybackOGGVorbis::AudioStreamPlaybackOGGVorbis() {
 
 	loops=false;
 	playing=false;
@@ -367,17 +386,18 @@ AudioStreamOGGVorbis::AudioStreamOGGVorbis() {
 	_ov_callbacks.tell_func=_ov_tell_func;
 	f = NULL;
 	stream_loaded=false;
-	repeats=0;
-	setting_up=false;
+	stream_valid=false;
+	repeats=0;	
 	paused=true;
 	stream_channels=0;
 	stream_srate=0;
 	current_section=0;
 	length=0;
+	loop_restart_time=0;
 }
 
 
-AudioStreamOGGVorbis::~AudioStreamOGGVorbis() {
+AudioStreamPlaybackOGGVorbis::~AudioStreamPlaybackOGGVorbis() {
 
 	_clear_stream();
 
diff --git a/drivers/vorbis/audio_stream_ogg_vorbis.h b/drivers/vorbis/audio_stream_ogg_vorbis.h
index 5e3649d980..827d8b0be3 100644
--- a/drivers/vorbis/audio_stream_ogg_vorbis.h
+++ b/drivers/vorbis/audio_stream_ogg_vorbis.h
@@ -29,17 +29,16 @@
 #ifndef AUDIO_STREAM_OGG_VORBIS_H
 #define AUDIO_STREAM_OGG_VORBIS_H
 
-#include "scene/resources/audio_stream_resampled.h"
+#include "scene/resources/audio_stream.h"
 #include "vorbis/vorbisfile.h"
 #include "os/file_access.h"
 #include "io/resource_loader.h"
 #include "os/thread_safe.h"
 
-class AudioStreamOGGVorbis : public AudioStreamResampled {
 
-	OBJ_TYPE(AudioStreamOGGVorbis,AudioStreamResampled);
-	_THREAD_SAFE_CLASS_
+class AudioStreamPlaybackOGGVorbis : public AudioStreamPlayback {
 
+	OBJ_TYPE(AudioStreamPlaybackOGGVorbis,AudioStreamPlayback);
 
 	enum {
 		MIN_MIX=1024
@@ -54,9 +53,6 @@ class AudioStreamOGGVorbis : public AudioStreamResampled {
 	static int _ov_close_func(void *_f);
 	static long _ov_tell_func(void *_f);
 
-
-	virtual bool _can_mix() const;
-
 	String file;
 	int64_t frames_mixed;
 
@@ -67,7 +63,7 @@ class AudioStreamOGGVorbis : public AudioStreamResampled {
 	int stream_srate;
 	int current_section;
 
-	volatile bool setting_up;
+
 	bool paused;
 	bool loops;
 	int repeats;
@@ -76,17 +72,21 @@ class AudioStreamOGGVorbis : public AudioStreamResampled {
 	void _clear_stream();
 	void _close_file();
 
+	bool stream_valid;
+	float loop_restart_time;
 
-public:
 
+public:
 
-	void set_file(const String& p_file);
 
+	Error set_file(const String& p_file);
 
-	virtual void play();
+	virtual void play(float p_from=0);
 	virtual void stop();
 	virtual bool is_playing() const;
 
+	virtual void set_loop_restart_time(float p_time) { loop_restart_time=0; }
+
 	virtual void set_paused(bool p_paused);
 	virtual bool is_paused(bool p_paused) const;
 
@@ -102,11 +102,32 @@ public:
 	virtual float get_pos() const;
 	virtual void seek_pos(float p_time);
 
-	virtual UpdateMode get_update_mode() const;
-	virtual void update();
+	virtual int get_channels() const { return stream_channels; }
+	virtual int get_mix_rate() const { return stream_srate; }
+
+	virtual int get_minimum_buffer_size() const { return 0; }
+	virtual int mix(int16_t* p_bufer,int p_frames);
+
+	AudioStreamPlaybackOGGVorbis();
+	~AudioStreamPlaybackOGGVorbis();
+};
+
+
+class AudioStreamOGGVorbis : public AudioStream {
+
+	OBJ_TYPE(AudioStreamOGGVorbis,AudioStream);
+
+	String file;
+public:
+
+	Ref<AudioStreamPlayback> instance_playback() {
+		Ref<AudioStreamPlaybackOGGVorbis> pb = memnew( AudioStreamPlaybackOGGVorbis );
+		pb->set_file(file);
+		return pb;
+	}
+
+	void set_file(const String& p_file) { file=p_file; }
 
-	AudioStreamOGGVorbis();
-	~AudioStreamOGGVorbis();
 };
 
 class ResourceFormatLoaderAudioStreamOGGVorbis : public ResourceFormatLoader {
diff --git a/drivers/webp/dsp/dsp.h b/drivers/webp/dsp/dsp.h
index afe30413c6..fd686a8532 100644
--- a/drivers/webp/dsp/dsp.h
+++ b/drivers/webp/dsp/dsp.h
@@ -29,7 +29,7 @@ extern "C" {
 #define WEBP_USE_SSE2
 #endif
 
-#if defined(__ANDROID__) && defined(__ARM_ARCH_7A__)
+#if defined(__ANDROID__) && defined(__ARM_ARCH_7A__) && defined(__ARM_NEON__)
 #define WEBP_ANDROID_NEON  // Android targets that might support NEON
 #endif